1 package com.atlassian.bonnie.search.extractor;
2
3 import com.atlassian.bonnie.search.SearchableAttachment;
4
5 import java.io.InputStream;
6 import java.io.IOException;
7
8 import org.apache.commons.io.IOUtils;
9 import org.slf4j.Logger;
10 import org.slf4j.LoggerFactory;
11
12 public class DefaultTextContentExtractor extends BaseAttachmentContentExtractor
13 {
14 private static final Logger log = LoggerFactory.getLogger(DefaultTextContentExtractor.class);
15
16
17
18
19 protected boolean shouldExtractFrom(String fileName, String contentType)
20 {
21 return contentType.startsWith("text/") || contentType.startsWith("application/xml") ||
22 (contentType.startsWith("application/") && contentType.endsWith("+xml"));
23 }
24
25 protected String extractText(InputStream is, SearchableAttachment attachment)
26 {
27 try
28 {
29 return IOUtils.toString(is, "UTF-8");
30 }
31 catch (IOException e)
32 {
33 log.error("Couldn't extract text from attachment: " + attachment, e);
34 return null;
35 }
36 }
37 }