1 package com.atlassian.mail;
2
3 import org.apache.log4j.Category;
4
5 import javax.swing.text.html.HTML;
6 import javax.swing.text.html.HTMLEditorKit;
7 import javax.swing.text.html.parser.ParserDelegator;
8 import java.io.*;
9 import java.util.ArrayList;
10
11
12
13
14
15
16 public class HtmlToTextConverter
17 {
18 private static final Category log = Category.getInstance(HtmlToTextConverter.class);
19
20 private class HTMLCallbackHandler extends HTMLEditorKit.ParserCallback {
21
22 Writer out;
23 boolean started = false;
24 boolean inBody = false;
25 boolean inList = false;
26 boolean firstTD = true;
27 int listCount = -1;
28 ArrayList links = new ArrayList();
29
30 private static final String NEWLINE = "\n";
31 private static final String TAB = "\t";
32 private static final String STAR = "*";
33 private static final String SPACE = " ";
34 private static final String PERIOD = ".";
35 private static final String OPEN_BRACKET = "[";
36 private static final String CLOSE_BRACKET = "]";
37 private static final String DASH_LINE = "----------------------------------------------------------------------------------------";
38
39
40
41 public HTMLCallbackHandler(Writer writer) {
42 out = writer;
43 }
44
45 public void handleStartTag(HTML.Tag tag, javax.swing.text.MutableAttributeSet set, int position) {
46 try
47 {
48 if (inBody && started && tag.equals(HTML.Tag.P))
49 {
50 out.write(NEWLINE + NEWLINE);
51 }
52 else if (inBody && started && tag.equals(HTML.Tag.OL) || tag.equals(HTML.Tag.UL))
53 {
54 inList = true;
55 out.write(NEWLINE + NEWLINE);
56 if(tag.equals(HTML.Tag.OL))
57 listCount = 1;
58 }
59 else if (inBody && started && inList && tag.equals(HTML.Tag.LI))
60 {
61 out.write(NEWLINE);
62 if(listCount != -1)
63 {
64 out.write(listCount + PERIOD + SPACE);
65 listCount++;
66 }
67 else
68 out.write(STAR);
69 }
70 else if (inBody && started && tag.equals(HTML.Tag.TABLE))
71 {
72 out.write(NEWLINE);
73 }
74 else if (inBody && started && tag.equals(HTML.Tag.TR))
75 {
76 out.write(NEWLINE);
77 firstTD = true;
78 }
79 else if (inBody && started && tag.equals(HTML.Tag.TD) || tag.equals(HTML.Tag.TH))
80 {
81 if(!firstTD)
82 {
83 out.write(TAB);
84 }
85 else
86 {
87 firstTD = false;
88 }
89 }
90 else if (inBody && started && tag.equals(HTML.Tag.PRE))
91 {
92 out.write(NEWLINE);
93 }
94 else if (inBody && started && tag.equals(HTML.Tag.IMG))
95 {
96
97 handleLink((String)set.getAttribute(HTML.Attribute.SRC));
98 }
99 else if (inBody && started && tag.equals(HTML.Tag.A))
100 {
101
102 handleLink((String)set.getAttribute(HTML.Attribute.HREF));
103 }
104 else if (inBody && started && tag.equals(HTML.Tag.HR))
105 {
106 out.write(NEWLINE + DASH_LINE);
107 }
108 else if (inBody && started && tag.equals(HTML.Tag.H1) || tag.equals(HTML.Tag.H2) || tag.equals(HTML.Tag.H3) || tag.equals(HTML.Tag.H4) || tag.equals(HTML.Tag.H5) || tag.equals(HTML.Tag.H6))
109 {
110 out.write(NEWLINE);
111 }
112 else if (tag.equals(HTML.Tag.BODY))
113 {
114 inBody = true;
115 }
116 }
117 catch (IOException e)
118 {
119 log.warn("IO error converting HTML to text", e);
120 }
121
122 }
123
124 private void handleLink(String src) throws IOException
125 {
126 if(src != null)
127 {
128 links.add(src);
129 out.write(OPEN_BRACKET + links.size() + CLOSE_BRACKET);
130 }
131 }
132
133 public void handleEndTag(HTML.Tag tag, int position) {
134 if (inBody && started && tag.equals(HTML.Tag.OL) || tag.equals(HTML.Tag.UL))
135 {
136 inList = false;
137 if(tag.equals(HTML.Tag.OL))
138 listCount = -1;
139 }
140 else if (tag.equals(HTML.Tag.BODY))
141 {
142 if(links.size() != 0)
143 {
144
145 try
146 {
147 out.write(NEWLINE + DASH_LINE + NEWLINE);
148 for (int i = 0; i < links.size(); i++)
149 {
150 String src = (String)links.get(i);
151 out.write(OPEN_BRACKET + (i + 1) + CLOSE_BRACKET + SPACE + src);
152 if((i + 1) < links.size())
153 {
154 out.write(NEWLINE);
155 }
156 }
157 }
158 catch (IOException e)
159 {
160 e.printStackTrace();
161 }
162 }
163 inBody = false;
164 }
165 }
166
167 public void handleText(char[] aChar, int position) {
168 try
169 {
170 if (inBody)
171 {
172 out.write(aChar);
173 started = true;
174 }
175 }
176 catch (IOException e)
177 {
178 log.warn("IO error converting HTML to text", e);
179 }
180 }
181
182 public void handleSimpleTag(HTML.Tag tag, javax.swing.text.MutableAttributeSet a, int pos) {
183 try
184 {
185 if (inBody && started && tag.equals(HTML.Tag.BR))
186 out.write(NEWLINE);
187 }
188 catch (IOException e)
189 {
190 log.warn("IO error converting HTML to text", e);
191 }
192
193 }
194 }
195
196
197 public String convert(String html) throws IOException
198 {
199 StringWriter out = new StringWriter();
200 convert(new StringReader(html), out);
201 out.close();
202 return out.toString();
203 }
204
205 private void convert(Reader reader, Writer writer) throws IOException
206 {
207 HTMLCallbackHandler handler = new HTMLCallbackHandler(writer);
208 new ParserDelegator().parse(reader, handler, true);
209 }
210 }