View Javadoc

1   package com.atlassian.core.util.xml;
2   
3   import com.atlassian.core.util.DataUtils;
4   
5   import java.io.*;
6   import java.util.zip.ZipInputStream;
7   
8   /**
9    * An input streams that handles Unicode Byte-Order Mark (BOM) marker within a normal file as well as a ZIP file.
10   * Distilled and adapted from http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6206835
11   */
12  public class BOMZipFileInputStream extends InputStream
13  {
14      // ------------------------------------------------------------------------------------------------------- Constants
15      public final static byte[] UTF32BEBOMBYTES = new byte[]{(byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF,};
16      public final static byte[] UTF32LEBOMBYTES = new byte[]{(byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00,};
17      public final static byte[] UTF16BEBOMBYTES = new byte[]{(byte) 0xFE, (byte) 0xFF,};
18      public final static byte[] UTF16LEBOMBYTES = new byte[]{(byte) 0xFF, (byte) 0xFE,};
19      public final static byte[] UTF8BOMBYTES = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF,};
20      public final static byte[][] BOMBYTES = new byte[][]{
21              UTF32BEBOMBYTES,
22              UTF32LEBOMBYTES,
23              UTF16BEBOMBYTES,
24              UTF16LEBOMBYTES,
25              UTF8BOMBYTES,
26      };
27      public final static int NONE = -1;
28  
29      /**
30       * No bom sequence is longer than 4 bytes
31       */
32      public final static int MAXBOMBYTES = 4;
33  
34      // ------------------------------------------------------------------------------------------------- Type Properties
35      private InputStream daStream;
36  
37      // ---------------------------------------------------------------------------------------------------- Dependencies
38      // ---------------------------------------------------------------------------------------------------- Constructors
39      public BOMZipFileInputStream(String fileName) throws IOException, FileNotFoundException
40      {
41          int BOMType = getBOMType(fileName);
42          int skipBytes = getSkipBytes(BOMType);
43          InputStream fIn = getFileInputStream(fileName);
44          if (skipBytes > 0)
45          {
46              fIn.skip(skipBytes);
47          }
48          daStream = fIn;
49      }
50  
51      // -------------------------------------------------------------------------------------------------- Public Methods
52      public int read() throws IOException
53      {
54          return daStream.read();
55      }
56  
57      // -------------------------------------------------------------------------------------------------- Helper Methods
58  
59      private InputStream getFileInputStream(String filename) throws IOException
60      {
61          InputStream is = null;
62          FileInputStream fileInputStream = new FileInputStream(filename);
63          if (filename != null && filename.trim().endsWith(DataUtils.SUFFIX_ZIP))
64          {
65              ZipInputStream input = new ZipInputStream(new BufferedInputStream(fileInputStream));
66              input.getNextEntry();
67              is = input;
68          }
69          else
70          {
71              is = new BufferedInputStream(fileInputStream);
72          }
73          return is;
74      }
75  
76      private int getBOMType(String _f) throws IOException
77      {
78          InputStream fileInputStream = getFileInputStream(_f);
79          byte[] buff = new byte[MAXBOMBYTES];
80          int read = fileInputStream.read(buff);
81          int bomType = getBOMType(buff, read);
82          fileInputStream.close();
83          return bomType;
84      }
85  
86      private int getSkipBytes(int bomType)
87      {
88          if (bomType < 0 || bomType >= BOMBYTES.length) return 0;
89          return BOMBYTES[bomType].length;
90      }
91  
92      private int getBOMType(byte[] _bomBytes, int _length)
93      {
94          for (int i = 0; i < BOMBYTES.length; i++)
95          {
96              for (int j = 0; j < _length && j < BOMBYTES[i].length; j++)
97              {
98                  if (_bomBytes[j] != BOMBYTES[i][j]) break;
99                  if (_bomBytes[j] == BOMBYTES[i][j] && j == BOMBYTES[i].length - 1) return i;
100             }
101         }
102         return NONE;
103     }
104 }