1 package com.atlassian.bonnie.search.extractor;
2
3 import com.atlassian.bonnie.search.SearchableAttachment;
4
5 import java.io.InputStream;
6
7 import org.apache.poi.hwpf.extractor.WordExtractor;
8 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory;
10
11 @Deprecated
12 public class MsWordContentExtractor extends BaseAttachmentContentExtractor
13 {
14 private static final String[] CONTENT_TYPES = { "application/msword" };
15 private static final String[] EXTENSIONS = { "doc" };
16
17 protected String[] getMatchingContentTypes()
18 {
19 return CONTENT_TYPES;
20 }
21
22 protected String[] getMatchingFileExtensions()
23 {
24 return EXTENSIONS;
25 }
26
27 protected String extractText(InputStream is, SearchableAttachment attachment) throws ExtractorException
28 {
29 try
30 {
31 WordExtractor extractor = new WordExtractor(is);
32 return extractor.getText();
33 }
34 catch (Exception e)
35 {
36 throw new ExtractorException("Error reading content of Word document: " + e.getMessage(), e);
37 }
38 }
39 }