1   /*
2    * Created by IntelliJ IDEA.
3    * User: Administrator
4    * Date: 20/02/2002
5    * Time: 16:48:36
6    * To change template for new class use
7    * Code Style | Class Templates options (Tools | IDE Options).
8    */
9   package com.atlassian.core.util;
10  
11  import org.w3c.dom.Element;
12  import org.w3c.dom.Node;
13  import org.w3c.dom.NodeList;
14  import org.w3c.dom.Text;
15  
16  public class XMLUtils
17  {
18      /**
19       * Our default policy is to replace ummappable characters with the replacement char
20       */
21      private static final ReplacePolicy DEFAULT_POLICY = new ReplacePolicy();
22  
23      /**
24       * With a given parent XML Element, find the text contents of the child element with supplied name.
25       */
26      public static String getContainedText(Node parent, String childTagName)
27      {
28          try
29          {
30              Node tag = ((Element) parent).getElementsByTagName(childTagName).item(0);
31              return ((Text) tag.getFirstChild()).getData();
32          }
33          catch (Exception e)
34          {
35              return null;
36          }
37      }
38  
39      /**
40       * Given one element, get a single named child element of it.
41       */
42      public static Element getSingleChildElement(Element el, String name)
43      {
44          NodeList children = el.getChildNodes();
45          for (int i = 0; i < children.getLength(); i++)
46          {
47              Node node = children.item(i);
48              if (node.getNodeType() == Node.ELEMENT_NODE && name.equals(node.getNodeName()))
49              {
50                  return (Element) node;
51              }
52          }
53  
54          return null;
55      }
56  
57      /**
58       * Get an attribute of a given element, with a default value if the attribute is not present or blank.
59       */
60      public static String getAttributeWithDefault(Element element, String attributeName, String defaultValue)
61      {
62          String group = element.getAttribute(attributeName);
63  
64          if (group == null || "".equals(group.trim()))
65          {
66              group = defaultValue;
67          }
68  
69          return group;
70      }
71  
72      /**
73       * Escapes a string so it may be returned as text content or attribute value. Non printable characters are escaped
74       * using character references. Where the format specifies a deault entity reference, that reference is used (e.g.
75       * <tt>&amp;lt;</tt>).
76       *
77       * @param source the string to escape or "" for null.
78       * @param policy how to handle invalid XML characters
79       * @since 3.19 / 3.10.1
80       */
81      public static String escape(final String source, final TransformPolicy policy)
82      {
83          if (source == null)
84          {
85              return "";
86          }
87          StringBuilder sb = new StringBuilder(source.length() + 30);  // lets allocate a StringBuilder that is roughly the same length
88          for (int i = 0; i < source.length(); ++i)
89          {
90              transform(sb, source.charAt(i), policy);
91          }
92          return sb.toString();
93      }
94  
95      /**
96       * Escape an XML string using a default policy of replacing invalid XML characters.
97       */
98      public static String escape(final String source)
99      {
100         return escape(source, DEFAULT_POLICY);
101     }
102 
103 
104     public static String escapeForCdata(String source)
105     {
106         if (source == null)
107         {
108             return null;
109         }
110         final StringBuilder sb = new StringBuilder();
111 
112         int index;
113         int oldIndex = 0;
114         while ((index = source.indexOf("]]>", oldIndex)) > -1)
115         {
116             final String str = source.substring(oldIndex, index);
117             transformCData(sb, str, DEFAULT_POLICY);
118 
119             oldIndex = index + 3;
120             sb.append("]]]]><![CDATA[>");
121         }
122 
123         final String rest = source.substring(oldIndex);
124         transformCData(sb, rest, DEFAULT_POLICY);
125 
126         return sb.toString();
127     }
128 
129     /**
130      * Identifies the last printable character in the Unicode range that is supported by the encoding used with this
131      * serializer. For 8-bit encodings this will be either 0x7E or 0xFF. For 16-bit encodings this will be 0xFFFF.
132      * Characters that are not printable will be escaped using character references.
133      */
134     static private int _lastPrintable = 0x7E;
135 
136     /**
137      * Encode special XML characters into the equivalent character references. These five are defined by default for all
138      * XML documents. Converts '<', '>', '"'. and '\'' to "lt", "gt", "quot", or "apos".
139      */
140     static private String getEntityRef(char ch)
141     {
142         switch (ch)
143         {
144             case '<':
145                 return "lt";
146             case '>':
147                 return "gt";
148             case '"':
149                 return "quot";
150             case '\'':
151                 return "apos";
152             case '&':
153                 return "amp";
154         }
155 
156         return null;
157     }
158 
159     /**
160      * If there is a suitable entity reference for this character, return it. The list of available entity references is
161      * almost but not identical between XML and HTML. This uses a default transformation policy of replacing invalid XML
162      * characters.
163      */
164     public static String escape(final char ch)
165     {
166         return escape(ch, DEFAULT_POLICY);
167     }
168 
169     /**
170      * Escape XML characters with a user specified transformation policy for invalid characters.
171      *
172      * @since 3.19 / 3.10.1
173      */
174     public static String escape(final char ch, final TransformPolicy policy)
175     {
176         final StringBuilder sb = new StringBuilder();
177         transform(sb, ch, policy);
178         return sb.toString();
179     }
180 
181     /**
182      * Append escaped version of character to the end of the StringBuilder
183      */
184     private static void transform(final StringBuilder sb, final char ch, final TransformPolicy policy)
185     {
186         if (!validXml(ch))
187         {
188             sb.append(policy.handle(ch));
189         }
190         else
191         {
192             String charRef = getEntityRef(ch);
193             if (charRef != null)
194             {
195                 sb.append("&").append(charRef).append(";");
196             }
197             else if ((ch >= ' ' && ch <= _lastPrintable && ch != 0xF7) ||
198                     ch == '\n' || ch == '\r' || ch == '\t')
199             {
200                 // If the character is not printable, print as character reference.
201                 // Non printables are below ASCII space but not tab or line
202                 // terminator, ASCII delete, or above a certain Unicode threshold.
203                 sb.append(ch);
204             }
205             else
206             {
207                 sb.append("&#").append(Integer.toString(ch)).append(";");
208             }
209         }
210     }
211 
212     /**
213      * Append escaped version of CData "character data" to the end of the StringBuilder
214      */
215     private static void transformCData(final StringBuilder sb, final String cdata, final TransformPolicy policy)
216     {
217         for (int i = 0; i < cdata.toCharArray().length; i++)
218         {
219             char ch = cdata.toCharArray()[i];
220             if (!validXml(ch))
221             {
222                 sb.append(policy.handle(ch));
223             }
224             else
225             {
226                 sb.append(ch);
227             }
228         }
229     }
230     
231     /**
232      * @since 3.19 / 3.10.1
233      */
234     // http://www.w3.org/TR/REC-xml/#charsets
235     public static boolean validXml(final char ch)
236     {
237         return ((ch == 0x9) ||
238                 (ch == 0xA) ||
239                 (ch == 0xD) ||
240                 ((ch >= 0x20) && (ch <= 0xD7FF)) ||
241                 ((ch >= 0xE000) && (ch <= 0xFFFD)) ||
242                 ((ch >= 0x10000) && (ch <= 0x10FFFF)));
243     }
244 
245     /**
246      * @since 3.19 / 3.10.1
247      */
248     public interface TransformPolicy
249     {
250         String handle(final char input);
251     }
252 
253     // Replace bad XML input with a fixed character
254     public static class ReplacePolicy implements TransformPolicy
255     {
256         public String handle(final char input)
257         {
258             // the unicode REPLACEMENT CHARACTER: ?
259             return "\uFFFD";
260         }
261     }
262 
263 }