1   package com.atlassian.core.util;
2   
3   import java.util.*;
4   import java.util.regex.Pattern;
5   import java.io.UnsupportedEncodingException;
6   import static org.apache.commons.lang.StringUtils.*;
7   
8   public class StringUtils
9   {
10      private static int MAX_LENGTH = 9000;
11      protected static Map stringCharMappings = new HashMap(4);   // this may be a race condition due to no synchronisation.  However, it hasn't yet been a problem
12  
13  
14      private static char[][] getMappings(String encoding)
15      {
16          char[][] stringChars = (char[][]) stringCharMappings.get(encoding);
17  
18          if (stringChars == null)
19          {
20              stringChars = new char[MAX_LENGTH][];
21              if ("UTF-8".equalsIgnoreCase(encoding) ||
22                      "Big5".equalsIgnoreCase(encoding) ||
23                      "Windows-1252".equalsIgnoreCase(encoding))
24              {
25                  // FIXME: These characters are valid in utf-8
26                  addMapping(8216, "'", stringChars);
27                  addMapping(8217, "'", stringChars);
28                  addMapping(8220, "\"", stringChars);
29                  addMapping(8221, "\"", stringChars);
30                  addMapping(8230, "...", stringChars);
31                  addMapping(8211, "-", stringChars);
32                  addMapping(183, "- ", stringChars);  // replace bullets
33              } else if ("ISO-8859-1".equalsIgnoreCase(encoding))
34              {
35                  addMapping(145, "'", stringChars);
36                  addMapping(146, "'", stringChars);
37                  addMapping(147, "\"", stringChars);
38                  addMapping(148, "\"", stringChars);
39                  addMapping(133, "...", stringChars);
40                  addMapping(150, "-", stringChars);
41                  addMapping(183, "- ", stringChars);  // replace bullets
42              }
43              // unicode control characters should be chopped off
44              for (int i = 0; i < 32; i++)
45              {
46                  if (i == 9 || i == 10 || i == 13)
47                  {
48                      continue; // 9, 10, 13 are line feed and carriage return chars
49                  } else
50                  {
51                      addMapping(i, "", stringChars);
52                  }
53              }
54  
55              stringCharMappings.put(encoding, stringChars);
56          }
57  
58          return stringChars;
59      }
60  
61      private static void addMapping(int charsNumericValue, String replaceStr, char[][] mappings)
62      {
63          mappings[charsNumericValue] = replaceStr.toCharArray();
64      }
65  
66      /**
67       * replaces "smart quotes" and other problematic characters that appear in JIRA when data is cut and pasted
68       * from a Microsoft word document. <p>
69       * These include smart single and double quotes, ellipses, em-dashes and bullets
70       * (these characters belong to the Windows Code Page 1252 encoding)
71       *
72       * @param s        string to simplify
73       * @param encoding eg. UTF-8, Big5, ISO-8859-1 etc.
74       * @return
75       */
76      public static final String escapeCP1252(String s, String encoding)
77      {
78          if (s == null)
79              return null;
80  
81          int len = s.length();
82          if (len == 0)
83              return s;
84  
85          // if extended empty string just return it
86          String trimmed = s.trim();
87          if (trimmed.length() == 0 || ("\"\"").equals(trimmed))
88              return trimmed;
89  
90          // initialise the Mapping before encoding
91          char[][] stringChars = getMappings(encoding);
92  
93          int i = 0;
94          // First loop through String and check if escaping is needed at all
95          // No buffers are copied at this time
96          do
97          {
98              int index = s.charAt(i);
99              if (index >= MAX_LENGTH)
100                 continue;
101             if (stringChars[index] != null)
102             {
103                 break;
104             }
105         }
106         while (++i < len);
107 
108         // If the check went to the end with no escaping then i should be == len now
109         // otherwise we must continue escaping for real
110         if (i == len)
111         {
112             return s;
113         }
114 
115         // We found a character to escape and broke out at position i
116         // Now copy all characters before that to StringBuffer sb
117         // Since a char[] will be used for copying we might as well get
118         // a complete copy of it so that we can use array indexing instead of charAt
119         StringBuffer sb = new StringBuffer(len + 40);
120         char[] chars = new char[len];
121         // Copy all chars from the String s to the chars buffer
122         s.getChars(0, len, chars, 0);
123         // Append the first i characters that we have checked to the resulting StringBuffer
124         sb.append(chars, 0, i);
125         int last = i;
126         char[] subst = null;
127         for (; i < len; i++)
128         {
129             char c = chars[i];
130             int index = c;
131 
132             if (index < stringChars.length)
133                 subst = stringChars[index];
134             else
135                 subst = null;
136 
137             // It is faster to append a char[] than a String which is why we use this
138             if (subst != null)
139             {
140                 if (i > last)
141                     sb.append(chars, last, i - last);
142                 sb.append(subst);
143                 last = i + 1;
144             }
145         }
146         if (i > last)
147         {
148             sb.append(chars, last, i - last);
149         }
150         return sb.toString();
151     }
152 
153 
154     /**
155      * Crop a string if it is longer than a certain length, adding the specified suffix.
156      * <p/>
157      * If the string is shorter than the cropAt length, then it is returned unchanged.
158      */
159     public static String crop(String original, int cropAt, String suffix)
160     {
161         if (original == null)
162             return null;
163 
164         if (original.length() > cropAt)
165         {
166             original = original.substring(0, cropAt) + suffix;
167         }
168         return original;
169     }
170 
171     /**
172      * Tests to see is a string contains any of the string in the list passed
173      */
174     public static boolean contains(String value, List possiblyContains)
175     {
176         if (value == null)
177         {
178             if ((possiblyContains == null || possiblyContains.isEmpty()))
179                 return true;
180             else
181                 return false;
182         } else
183         {
184             if ((possiblyContains == null || possiblyContains.isEmpty()))
185                 return false;
186 
187             for (int i = 0; i < possiblyContains.size(); i++)
188             {
189                 if (value.indexOf((String) possiblyContains.get(i)) > -1)
190                     return true;
191             }
192             return false;
193         }
194     }
195 
196     /**
197      * Replaces all occurrences of one string with another.
198      */
199     public static String replaceAll(final String str, final String oldPattern, final String newPattern)
200     {
201         if (str == null) return null;
202         if (oldPattern == null || oldPattern.equals("")) return str;
203         String remainder = str;
204         StringBuffer buf = new StringBuffer(str.length() * 2);
205         while (true)
206         {
207             int i = remainder.indexOf(oldPattern);
208             if (i != -1)
209             {
210                 buf.append(remainder.substring(0, i));
211                 buf.append(newPattern);
212                 remainder = remainder.substring(i + oldPattern.length());
213             } else
214             {
215                 buf.append(remainder);
216                 break;
217             }
218         }
219         return buf.toString();
220     }
221 
222     /**
223      * Tests if all the characters in the string is an ASCII character
224      */
225     public static boolean isStringAllASCII(String str)
226     {
227         if (str == null)
228             return true;
229 
230         for (int i = 0; i < str.length(); i++)
231         {
232             char c = str.charAt(i);
233             if (c < 0 || c > 127)
234             {
235                 return false;
236             }
237         }
238         return true;
239     }
240 
241     /**
242      * Checks if all the characters in the string are from the specified character set
243      */
244     public static boolean isStringOfCharSet(String string, String charset)
245     {
246         if (string == null)
247             return true;
248 
249         try
250         {
251             return (string.equals(new String(string.getBytes(charset), charset)));
252         }
253         catch (UnsupportedEncodingException e)
254         {
255         }
256         return false;
257     }
258 
259     public static boolean isStringISO_8859_1(String string)
260     {
261         return isStringOfCharSet(string, "ISO-8859-1");
262     }
263 
264     public static boolean equalsIgnoreLineTerminators(String s1, String s2)
265     {
266         String normalisedValue = normalise(isEmpty(s1) ? "" : s1 );
267         String normalisedCurrentValue = normalise(isEmpty(s2) ? "" : s2);
268 
269         return normalisedValue.equals(normalisedCurrentValue);
270     }
271 
272     public static String normalise(String value)
273     {
274         // Replace all instances of '\r\n' with '\n'
275         String normalised = replaceAll(value, "\r\n", "\n");
276         // As all '\r\n' combinations have been replaced, the only thing that should be left are single '\r' characters (if any)
277         // Replace these with '\n'
278         return replaceAll(normalised, "\r", "\n");
279     }
280 
281     /**
282      * Method will turn a String of comma seperated entities into a String Array.
283      * Spaces before or after the comma will be cropped.
284      *
285      * @param entryString A comma seperated String
286      * @return String Array
287      */
288     public static String[] splitCommaSeparatedString(String entryString)
289     {
290         Pattern commaPattern = Pattern.compile(",");
291         String[] parsed = commaPattern.split(entryString);
292         for (int i = 0; i < parsed.length; i++)
293         {
294             String s = parsed[i];
295             s = s.trim();
296         }
297         return parsed;
298     }
299 
300     /**
301      * Create a String of comma seperated entries from a Collection.
302      *
303      * @param entries A collection of entries
304      * @return Comma seperated String
305      */
306     public static String createCommaSeperatedString(Collection entries)
307     {
308         if (entries != null)
309         {
310             StringBuffer sb = new StringBuffer();
311             for (Iterator iterator = entries.iterator(); iterator.hasNext();)
312             {
313                 String groupName = (String) iterator.next();
314                 sb.append(groupName);
315                 if (iterator.hasNext())
316                     sb.append(",");
317             }
318             return sb.toString();
319         } else return null;
320     }
321 }