1   package com.atlassian.core.util;
2   
3   import junit.framework.TestCase;
4   
5   /**
6    * @since v3.19
7    */
8   public class XMLUtilsTest extends TestCase
9   {
10      private final char[] BAD_CO_CONTROL_CHARS = new char[]
11              {
12                      '\u0000',
13                      '\u0007',
14                      '\u0011',
15                      '\u0014'
16              };
17  
18      private final char[] BAD_SURROGATE_CHARS = new char[]
19              {
20                      '\uD84C',
21                      '\uDF84'
22              };
23  
24      private final char[] BAD_OTHER_CHARS = new char[]
25              {
26                      '\uFFFE',
27                      '\uFFFF',
28              };
29  
30      public void testNullInputHandling()
31      {
32          assertEquals("", XMLUtils.escape(null));
33          //
34          // this is inconsistent however I don't want to fix this inconsistency and break any applications
35          // its been this way since 14/09/05 and its gonna continue so.  At least we have a test to reflect its
36          // behaviour.
37          //
38          assertEquals(null, XMLUtils.escapeForCdata(null));
39      }
40  
41      public void testEscape()
42      {
43          // these two MUST be escaped
44          assertEquals("&lt;", XMLUtils.escape("<"));
45          assertEquals("&lt;", XMLUtils.escape('<'));
46          assertEquals("&amp;", XMLUtils.escape("&"));
47          assertEquals("&amp;", XMLUtils.escape('&'));
48  
49          // the rest MAY be
50          assertEquals("&gt;", XMLUtils.escape(">"));
51          assertEquals("&gt;", XMLUtils.escape('>'));
52          assertEquals("&quot;", XMLUtils.escape("\""));
53          assertEquals("&quot;", XMLUtils.escape('"'));
54          assertEquals("&apos;", XMLUtils.escape("'"));
55          assertEquals("&apos;", XMLUtils.escape('\''));
56  
57          // no C0 control characters allowed
58          assertCharIsReplaced(BAD_CO_CONTROL_CHARS);
59  
60          // no surrogates allowed
61          assertCharIsReplaced(BAD_SURROGATE_CHARS);
62  
63          assertCharIsReplaced(BAD_OTHER_CHARS);
64          
65          // C1 control character should be escaped
66          // http://www.cafeconleche.org/books/effectivexml/chapters/03.html
67          assertEquals("&#128;", XMLUtils.escape("\u0080"));
68          assertEquals("&#128;", XMLUtils.escape('\u0080'));
69          assertEquals("&#159;", XMLUtils.escape("\u009f"));
70          assertEquals("&#159;", XMLUtils.escape('\u009f'));
71  
72          // valid white space is left alone
73          assertEquals(" ", XMLUtils.escape(" "));
74          assertEquals(" ", XMLUtils.escape(' '));
75          assertEquals("\t", XMLUtils.escape("\t"));
76          assertEquals("\t", XMLUtils.escape('\t'));
77          assertEquals("\r", XMLUtils.escape("\r"));
78          assertEquals("\r", XMLUtils.escape('\r'));
79          assertEquals("\n", XMLUtils.escape("\n"));
80          assertEquals("\n", XMLUtils.escape('\n'));
81  
82      }
83  
84      public void testCharEscape()
85      {
86          for (char c : BAD_CO_CONTROL_CHARS)
87          {
88              assertEquals("\uFFFD", XMLUtils.escape(c));
89          }
90          for (char c : BAD_SURROGATE_CHARS)
91          {
92              assertEquals("\uFFFD", XMLUtils.escape(c));
93          }
94          for (char c : BAD_OTHER_CHARS)
95          {
96              assertEquals("\uFFFD", XMLUtils.escape(c));
97          }
98      }
99  
100     private void assertCharIsReplaced(final char[] bad_chars)
101     {
102         for (char bad_char : bad_chars)
103         {
104             String badStr = new StringBuilder().append(bad_char).toString();
105             assertEquals("Expecting uFFFD but got " + (int) bad_char, "\uFFFD", XMLUtils.escape(badStr));
106         }
107     }
108 
109     public void testEscapeCDataSection()
110     {
111         // general input
112         assertEquals("general input", XMLUtils.escapeForCdata("general input"));
113 
114         // when ]] is present it starts a new CDATA section
115         assertEquals("cdata with with ]]]]><![CDATA[> in it", XMLUtils.escapeForCdata("cdata with with ]]> in it"));
116 
117         // when entity references are not resolved
118         assertEquals("here is some <tags> that are &lt; eft alone", XMLUtils.escapeForCdata("here is some <tags> that are &lt; eft alone"));
119         assertEquals("here is some <tags>]]]]><![CDATA[> that are &lt; eft alone", XMLUtils.escapeForCdata("here is some <tags>]]> that are &lt; eft alone"));
120 
121 
122         // no C0 control characters allowed
123         assertCDataCharIsReplaced(BAD_CO_CONTROL_CHARS);
124         // no surrogates allowed
125         assertCDataCharIsReplaced(BAD_SURROGATE_CHARS);
126         // others no good
127         assertCDataCharIsReplaced(BAD_OTHER_CHARS);
128 
129         // put it all together        
130         assertEquals("here is[\uFFFD\uFFFD] some\r\t\n <tags>]]]]><![CDATA[> that are &lt; eft alone", XMLUtils.escapeForCdata("here is[\u0007\uD84C] some\r\t\n <tags>]]> that are &lt; eft alone"));
131 
132     }
133 
134     private void assertCDataCharIsReplaced(final char[] bad_chars)
135     {
136         final String replacmentStr = "\uFFFD";
137         for (char bad_char : bad_chars)
138         {
139             String badStr = new StringBuilder().append(bad_char).toString();
140             assertEquals("Expecting uFFFD but got " + (int) bad_char, replacmentStr, XMLUtils.escapeForCdata(badStr));
141         }
142     }
143 }