View Javadoc

1   package com.atlassian.core.util.xml;
2   
3   
4   import org.apache.log4j.Logger;
5   
6   import java.io.FilterReader;
7   import java.io.IOException;
8   import java.io.Reader;
9   
10  /**
11   * Class to read XML streams and filter out invalid UTF-8 characters
12   */
13  public class XMLCleaningReader extends FilterReader
14  {
15      private static final Logger log = Logger.getLogger(XMLCleaningReader.class);
16  
17      public XMLCleaningReader(Reader reader)
18      {
19          super(reader);
20      }
21  
22      public int read(char cbuf[], int off, int len) throws IOException
23      {
24          final int charsRead = super.read(cbuf, off, len);
25  
26          if (charsRead > -1)
27          {
28              int limit = charsRead + off;
29              for (int j = off; j < limit; j++)
30              {
31                  char c = cbuf[j];
32                  if (c > -1 && c != 9 && c != 10 && c != 13)
33                  {
34                      if (c < 32 || (c > 55295 && c < 57344))
35                      {
36                          log.warn("Replaced invalid XML character " + c + " (" + (int) c + ").");
37                          cbuf[j] = '\uFFFD';
38                      }
39                  }
40              }
41          }
42  
43          return charsRead;
44      }
45  
46      public int read() throws IOException
47      {
48          final int i = super.read();
49          if (i < 32 && i > -1 && i != 9 && i != 10 && i != 13)
50          {
51              return '\uFFFD';
52          }
53          return i;
54      }
55  }