1 package com.atlassian.bonnie.search.extractor;
2
3 import com.atlassian.bonnie.search.SearchableAttachment;
4 import org.apache.commons.io.IOUtils;
5 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
6 import org.apache.poi.hssf.eventusermodel.HSSFListener;
7 import org.apache.poi.hssf.eventusermodel.HSSFRequest;
8 import org.apache.poi.hssf.record.LabelSSTRecord;
9 import org.apache.poi.hssf.record.NumberRecord;
10 import org.apache.poi.hssf.record.Record;
11 import org.apache.poi.hssf.record.SSTRecord;
12 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
13
14 import java.io.IOException;
15 import java.io.InputStream;
16
17 @Deprecated
18 public class MsExcelContentExtractor extends BaseAttachmentContentExtractor
19 {
20 private static final String[] CONTENT_TYPES = {"application/excel", "application/x-excel",
21 "application/x-msexcel", "application/vnd.ms-excel"};
22 private static final String[] EXTENSIONS = {"xls"};
23
24 private static class ExcelEventListener implements HSSFListener
25 {
26 private final StringBuffer buff;
27 private SSTRecord sstrec;
28 private static final char SPACE = ' ';
29
30 public ExcelEventListener(StringBuffer buff)
31 {
32 this.buff = buff;
33 }
34
35 public void processRecord(Record record)
36 {
37 switch (record.getSid())
38 {
39 case NumberRecord.sid:
40 NumberRecord numrec = (NumberRecord) record;
41 final double numberValue = numrec.getValue();
42 if (isInteger(numberValue))
43 buff.append((int) numberValue).append(SPACE);
44 else
45 buff.append(numberValue).append(SPACE);
46 break;
47 case SSTRecord.sid:
48 sstrec = (SSTRecord) record;
49 break;
50 case LabelSSTRecord.sid:
51 LabelSSTRecord lrec = (LabelSSTRecord) record;
52 buff.append(sstrec.getString(lrec.getSSTIndex())).append(SPACE);
53 break;
54 }
55 }
56
57
58
59
60 private boolean isInteger(double doubleValue)
61 {
62 double floored = Math.floor(doubleValue);
63 return doubleValue - floored == 0;
64 }
65 }
66
67 protected String[] getMatchingContentTypes()
68 {
69 return CONTENT_TYPES;
70 }
71
72 protected String[] getMatchingFileExtensions()
73 {
74 return EXTENSIONS;
75 }
76
77 protected String extractText(InputStream is, SearchableAttachment attachment) throws ExtractorException
78 {
79 StringBuffer content = new StringBuffer();
80
81 InputStream din = null;
82
83 try
84 {
85 POIFSFileSystem poifs = new POIFSFileSystem(is);
86
87
88 din = poifs.createDocumentInputStream("Workbook");
89
90 HSSFRequest req = new HSSFRequest();
91
92 req.addListenerForAllRecords(new ExcelEventListener(content));
93
94 HSSFEventFactory factory = new HSSFEventFactory();
95 factory.processEvents(req, din);
96 }
97 catch (IOException e)
98 {
99 throw new ExtractorException("Error reading content of Excel document: " + e.getMessage(), e);
100 }
101 finally
102 {
103 IOUtils.closeQuietly(din);
104 }
105
106
107 return content.toString();
108 }
109 }