1   package com.atlassian.bonnie.search.extractor;
2   
3   import com.atlassian.bonnie.search.SearchableAttachment;
4   
5   import java.io.InputStream;
6   import java.io.IOException;
7   
8   import org.apache.commons.io.IOUtils;
9   import org.slf4j.Logger;
10  import org.slf4j.LoggerFactory;
11  
12  public class DefaultTextContentExtractor extends BaseAttachmentContentExtractor
13  {
14      private static final Logger log = LoggerFactory.getLogger(DefaultTextContentExtractor.class);
15  
16      /**
17       * Extract text from mime types like 'text/*', 'application/xml*' and 'application/*+xml'
18       */
19      protected boolean shouldExtractFrom(String fileName, String contentType)
20      {
21          return contentType.startsWith("text/") || contentType.startsWith("application/xml") ||
22                  (contentType.startsWith("application/") && contentType.endsWith("+xml"));
23      }
24  
25      protected String extractText(InputStream is, SearchableAttachment attachment)
26      {
27          try
28          {
29              return IOUtils.toString(is, "UTF-8");
30          }
31          catch (IOException e)
32          {
33              log.error("Couldn't extract text from attachment: " + attachment, e);
34              return null;
35          }
36      }
37  }