View Javadoc

1   package com.atlassian.bonnie.search.extractor;
2   
3   import com.atlassian.bonnie.search.SearchableAttachment;
4   
5   import java.io.InputStream;
6   
7   import org.apache.poi.hwpf.extractor.WordExtractor;
8   import org.slf4j.Logger;
9   import org.slf4j.LoggerFactory;
10  
11  @Deprecated
12  public class MsWordContentExtractor extends BaseAttachmentContentExtractor
13  {
14      private static final String[] CONTENT_TYPES = { "application/msword" };
15      private static final String[] EXTENSIONS = { "doc" };
16  
17      protected String[] getMatchingContentTypes()
18      {
19          return CONTENT_TYPES;
20      }
21  
22      protected String[] getMatchingFileExtensions()
23      {
24          return EXTENSIONS;
25      }
26  
27      protected String extractText(InputStream is, SearchableAttachment attachment) throws ExtractorException
28      {
29          try
30          {
31              WordExtractor extractor = new WordExtractor(is);
32              return extractor.getText();
33          }
34          catch (Exception e)
35          {
36              throw new ExtractorException("Error reading content of Word document: " + e.getMessage(), e);
37          }
38      }
39  }