View Javadoc

1   /*
2    * Created by IntelliJ IDEA.
3    * User: owen
4    * Date: Nov 27, 2002
5    * Time: 2:14:35 PM
6    * CVS Revision: $Revision: 1.1 $
7    * Last CVS Commit: $Date: 2002/12/02 05:26:32 $
8    * Author of last CVS Commit: $Author: mike $
9    * To change this template use Options | File Templates.
10   */
11  package com.atlassian.core.util;
12  
13  import java.util.ArrayList;
14  import java.util.List;
15  
16  public class HTMLUtils
17  {
18      private int currentIndex;
19  
20      public static String stripTags(String html)
21      {
22          StringBuffer detagged = new StringBuffer();
23          boolean intag = false;
24          for (int count = 0; count < html.length(); count++)
25          {
26              char current = html.charAt(count);
27  
28              if (current == '>')
29                  intag = false;
30              else if (current == '<')
31                  intag = true;
32              else if (!intag)
33                  detagged.append(current);
34          }
35          return detagged.toString();
36      }
37  
38      public static String stripOuterHtmlTags(String html)
39      {
40          ArrayList tags = new ArrayList();
41          tags.add(new String[]{"html", "true", "0"});
42          tags.add(new String[]{"head", "false", "0"});
43          tags.add(new String[]{"body", "true", "0"});
44          String result = stripOuterTags(html, tags, 0);
45  
46          return result.trim();
47      }
48  
49      private static String stripOuterTags(String html, String tag, boolean inclusive)
50      {
51          ArrayList tags = new ArrayList();
52          tags.add(new String[]{tag, new Boolean(inclusive).toString(), "0"});
53          return stripOuterTags(html, tags, 0);
54      }
55  
56      private static String stripOuterTags(String html, List tagIncs, int listValue)
57      {
58          String[] tagInc = (String[]) tagIncs.get(listValue);
59          String tag = tagInc[0];
60          boolean inclusive = new Boolean(tagInc[1]).booleanValue();
61          int initialCount = new Integer(tagInc[2]).intValue();
62  
63          String[] previousInc = null;
64          if (listValue != 0)
65              previousInc = (String[]) tagIncs.get(listValue - 1);
66  
67          String[] nextInc = null;
68          if (listValue < tagIncs.size() - 1)
69              nextInc = (String[]) tagIncs.get(listValue + 1);
70  
71          StringBuffer detagged = new StringBuffer();
72          boolean tagValue = false;
73          for (int count = initialCount; count < html.length(); count++)
74          {
75              char current = html.charAt(count);
76  
77              if (tagValue)
78              {
79                  if (current == '<')
80                  {
81                      int newCounter = foundTag(html, count, tag, false);
82                      if (newCounter != -1)
83                      {
84                          tagValue = false;
85                          count = newCounter;
86                          if (previousInc != null)
87                          {
88                              previousInc[2] = new Integer(newCounter).toString();
89                              tagIncs.add(listValue - 1, previousInc);
90                              tagIncs.remove(listValue);
91                          }
92                          if (inclusive)
93                          {
94                              return detagged.toString();
95                          }
96                          else
97                          {
98                              listValue++;
99                              tagInc = (String[]) tagIncs.get(listValue);
100                             tag = tagInc[0];
101                             inclusive = new Boolean(tagInc[1]).booleanValue();
102                             if (listValue < tagIncs.size() - 1)
103                                 nextInc = (String[]) tagIncs.get(listValue + 1);
104                             else
105                                 nextInc = null;
106                         }
107                     }
108                     else
109                     {
110                         if (inclusive)
111                             detagged.append(current);
112                     }
113                 }
114                 else
115                 {
116                     if (inclusive)
117                         detagged.append(current);
118                 }
119             }
120             else
121             {
122                 if (current == '<')
123                 {
124                     int newCounter = foundTag(html, count, tag, true);
125                     if (newCounter != -1)
126                     {
127                         tagValue = true;
128                         count = newCounter;
129                         //move tags to trim
130                         if (nextInc != null)
131                         {
132                             if (inclusive)
133                             {
134                                 nextInc[2] = new Integer(count).toString();
135                                 tagIncs.remove(listValue + 1);
136                                 tagIncs.add(listValue + 1, nextInc);
137                                 detagged.append(stripOuterTags(html, tagIncs, listValue + 1));
138                                 String[] tempTagIncs = (String[]) tagIncs.get(listValue);
139                                 count = new Integer(tempTagIncs[2]).intValue();
140                             }
141                         }
142                     }
143                     else
144                     {
145                         detagged.append(current);
146                     }
147                 }
148                 else
149                 {
150                     detagged.append(current);
151                 }
152             }
153         }
154         return detagged.toString();
155     }
156 
157     private static int foundTag(String html, int count, String tag, boolean opening)
158     {
159         String tagToFind = tag;
160         if (!opening)
161             tagToFind = "/" + tagToFind;
162         int htmlCounter = count;
163         int htmlFound = 0;
164         boolean inQuotes = false;
165         for (htmlCounter = count; htmlCounter < html.length(); htmlCounter++)
166         {
167             char current2 = html.charAt(htmlCounter);
168             if (current2 == '\"')
169             {
170                 inQuotes = !inQuotes;
171                 htmlFound = 0;
172             }
173             else if (!inQuotes)
174             {
175                 if (current2 == '>')
176                     break;
177                 else if (htmlFound != tagToFind.length())
178                 {
179                     if (tagToFind.toLowerCase().charAt(htmlFound) == current2 || tagToFind.toUpperCase().charAt(htmlFound) == current2)
180                         htmlFound++;
181                     else
182                         htmlFound = 0;
183                 }
184             }
185         }
186         if (htmlFound == tagToFind.length())
187         {
188             return htmlCounter + 1;
189         }
190         else
191         {
192             return -1;
193         }
194     }
195 }