View Javadoc

1   package com.atlassian.core.util;
2   
3   import com.opensymphony.util.TextUtils;
4   
5   import java.util.*;
6   import java.util.regex.Pattern;
7   import java.io.UnsupportedEncodingException;
8   
9   public class StringUtils
10  {
11      private static final int MAX_LENGTH = 9000;
12      protected static Map<String, char[][]> stringCharMappings = new HashMap<String, char[][]>(4);   // this may be a race condition due to no synchronisation.  However, it hasn't yet been a problem
13  
14  
15      private static char[][] getMappings(String encoding)
16      {
17          char[][] stringChars = stringCharMappings.get(encoding);
18  
19          if (stringChars == null)
20          {
21              stringChars = new char[MAX_LENGTH][];
22              if ("UTF-8".equalsIgnoreCase(encoding) ||
23                      "Big5".equalsIgnoreCase(encoding) ||
24                      "Windows-1252".equalsIgnoreCase(encoding))
25              {
26                  // FIXME: These characters are valid in utf-8
27                  addMapping(8216, "'", stringChars);
28                  addMapping(8217, "'", stringChars);
29                  addMapping(8220, "\"", stringChars);
30                  addMapping(8221, "\"", stringChars);
31                  addMapping(8230, "...", stringChars);
32                  addMapping(8211, "-", stringChars);
33                  addMapping(183, "- ", stringChars);  // replace bullets
34              } else if ("ISO-8859-1".equalsIgnoreCase(encoding))
35              {
36                  addMapping(145, "'", stringChars);
37                  addMapping(146, "'", stringChars);
38                  addMapping(147, "\"", stringChars);
39                  addMapping(148, "\"", stringChars);
40                  addMapping(133, "...", stringChars);
41                  addMapping(150, "-", stringChars);
42                  addMapping(183, "- ", stringChars);  // replace bullets
43              }
44              // unicode control characters should be chopped off
45              for (int i = 0; i < 32; i++)
46              {
47                  if (i == 9 || i == 10 || i == 13)
48                  {
49                      continue; // 9, 10, 13 are line feed and carriage return chars
50                  } else
51                  {
52                      addMapping(i, "", stringChars);
53                  }
54              }
55  
56              stringCharMappings.put(encoding, stringChars);
57          }
58  
59          return stringChars;
60      }
61  
62      private static void addMapping(int charsNumericValue, String replaceStr, char[][] mappings)
63      {
64          mappings[charsNumericValue] = replaceStr.toCharArray();
65      }
66  
67      /**
68       * replaces "smart quotes" and other problematic characters that appear in JIRA when data is cut and pasted
69       * from a Microsoft word document. <p>
70       * These include smart single and double quotes, ellipses, em-dashes and bullets
71       * (these characters belong to the Windows Code Page 1252 encoding)
72       *
73       * @param s        string to simplify
74       * @param encoding eg. UTF-8, Big5, ISO-8859-1 etc.
75       * @return
76       */
77      public static final String escapeCP1252(String s, String encoding)
78      {
79          if (s == null)
80              return null;
81  
82          int len = s.length();
83          if (len == 0)
84              return s;
85  
86          // if extended empty string just return it
87          String trimmed = s.trim();
88          if (trimmed.length() == 0 || ("\"\"").equals(trimmed))
89              return trimmed;
90  
91          // initialise the Mapping before encoding
92          char[][] stringChars = getMappings(encoding);
93  
94          int i = 0;
95          // First loop through String and check if escaping is needed at all
96          // No buffers are copied at this time
97          do
98          {
99              int index = s.charAt(i);
100             if (index >= MAX_LENGTH)
101                 continue;
102             if (stringChars[index] != null)
103             {
104                 break;
105             }
106         }
107         while (++i < len);
108 
109         // If the check went to the end with no escaping then i should be == len now
110         // otherwise we must continue escaping for real
111         if (i == len)
112         {
113             return s;
114         }
115 
116         // We found a character to escape and broke out at position i
117         // Now copy all characters before that to StringBuffer sb
118         // Since a char[] will be used for copying we might as well get
119         // a complete copy of it so that we can use array indexing instead of charAt
120         StringBuilder sb = new StringBuilder(len + 40);
121         char[] chars = new char[len];
122         // Copy all chars from the String s to the chars buffer
123         s.getChars(0, len, chars, 0);
124         // Append the first i characters that we have checked to the resulting StringBuffer
125         sb.append(chars, 0, i);
126         int last = i;
127         char[] subst = null;
128         for (; i < len; i++)
129         {
130             char c = chars[i];
131             int index = c;
132 
133             if (index < stringChars.length)
134                 subst = stringChars[index];
135             else
136                 subst = null;
137 
138             // It is faster to append a char[] than a String which is why we use this
139             if (subst != null)
140             {
141                 if (i > last)
142                     sb.append(chars, last, i - last);
143                 sb.append(subst);
144                 last = i + 1;
145             }
146         }
147         if (i > last)
148         {
149             sb.append(chars, last, i - last);
150         }
151         return sb.toString();
152     }
153 
154 
155     /**
156      * Crop a string if it is longer than a certain length, adding the specified suffix.
157      * <p/>
158      * If the string is shorter than the cropAt length, then it is returned unchanged.
159      */
160     public static String crop(String original, int cropAt, String suffix)
161     {
162         if (original == null)
163             return null;
164 
165         if (original.length() > cropAt)
166         {
167             original = original.substring(0, cropAt) + suffix;
168         }
169         return original;
170     }
171 
172     /**
173      * Tests to see is a string contains any of the string in the list passed
174      */
175     public static boolean contains(String value, List<String> possiblyContains)
176     {
177         if (value == null)
178         {
179             if ((possiblyContains == null || possiblyContains.isEmpty()))
180                 return true;
181             else
182                 return false;
183         } else
184         {
185             if ((possiblyContains == null || possiblyContains.isEmpty()))
186                 return false;
187 
188             for (String possiblyContain : possiblyContains)
189             {
190                 if (value.contains(possiblyContain))
191                     return true;
192             }
193             return false;
194         }
195     }
196 
197     /**
198      * Replaces all occurrences of one string with another.
199      */
200     public static String replaceAll(final String str, final String oldPattern, final String newPattern)
201     {
202         if (str == null) return null;
203         if (oldPattern == null || oldPattern.equals("")) return str;
204         String remainder = str;
205         StringBuilder buf = new StringBuilder(str.length() * 2);
206         while (true)
207         {
208             int i = remainder.indexOf(oldPattern);
209             if (i != -1)
210             {
211                 buf.append(remainder.substring(0, i));
212                 buf.append(newPattern);
213                 remainder = remainder.substring(i + oldPattern.length());
214             } else
215             {
216                 buf.append(remainder);
217                 break;
218             }
219         }
220         return buf.toString();
221     }
222 
223     /**
224      * Tests if all the characters in the string is an ASCII character
225      */
226     public static boolean isStringAllASCII(String str)
227     {
228         if (str == null)
229             return true;
230 
231         for (int i = 0; i < str.length(); i++)
232         {
233             char c = str.charAt(i);
234             if (c < 0 || c > 127)
235             {
236                 return false;
237             }
238         }
239         return true;
240     }
241 
242     /**
243      * Checks if all the characters in the string are from the specified character set
244      */
245     public static boolean isStringOfCharSet(String string, String charset)
246     {
247         if (string == null)
248             return true;
249 
250         try
251         {
252             return (string.equals(new String(string.getBytes(charset), charset)));
253         }
254         catch (UnsupportedEncodingException e)
255         {
256         }
257         return false;
258     }
259 
260     public static boolean isStringISO_8859_1(String string)
261     {
262         return isStringOfCharSet(string, "ISO-8859-1");
263     }
264 
265     public static boolean equalsIgnoreLineTerminators(String s1, String s2)
266     {
267         String normalisedValue = normalise(TextUtils.noNull(s1));
268         String normalisedCurrentValue = normalise(TextUtils.noNull(s2));
269 
270         return normalisedValue.equals(normalisedCurrentValue);
271     }
272 
273     public static String normalise(String value)
274     {
275         // Replace all instances of '\r\n' with '\n'
276         String normalised = replaceAll(value, "\r\n", "\n");
277         // As all '\r\n' combinations have been replaced, the only thing that should be left are single '\r' characters (if any)
278         // Replace these with '\n'
279         return replaceAll(normalised, "\r", "\n");
280     }
281 
282     /**
283      * Method will turn a String of comma seperated entities into a String Array.
284      * Spaces before or after the comma will be cropped.
285      *
286      * @param entryString A comma seperated String
287      * @return String Array
288      */
289     public static String[] splitCommaSeparatedString(String entryString)
290     {
291         Pattern commaPattern = Pattern.compile(",");
292         String[] parsed = commaPattern.split(entryString);
293         for (int i = 0; i < parsed.length; i++)
294         {
295             String s = parsed[i];
296             s = s.trim();
297         }
298         return parsed;
299     }
300 
301     /**
302      * Create a String of comma seperated entries from a Collection.
303      *
304      * @param entries A collection of entries
305      * @return Comma seperated String
306      */
307     public static String createCommaSeperatedString(Iterable<String> entries)
308     {
309         if (entries != null)
310         {
311             StringBuilder sb = new StringBuilder();
312             for (Iterator iterator = entries.iterator(); iterator.hasNext();)
313             {
314                 String groupName = (String) iterator.next();
315                 sb.append(groupName);
316                 if (iterator.hasNext())
317                     sb.append(",");
318             }
319             return sb.toString();
320         } else return null;
321     }
322 }