1   package com.atlassian.bonnie.search.extractor;
2   
3   import com.atlassian.bonnie.search.Extractor;
4   import com.atlassian.bonnie.search.MockSearchableAttachment;
5   import org.apache.lucene.document.Document;
6   
7   public class TestMsExcelContentExtractor extends BaseAttachmentContentExtractorTest
8   {
9   	private MockSearchableAttachment excelAttachment;
10  
11  	public Extractor getExtractor()
12      {
13          return new MsExcelContentExtractor();
14      }
15  
16  	/**
17  	 * This will test that long values like 354259021 are properly casted from doubles (default storage type {@see NumberRecord}) back into int's so they can be searched on (CONF-10016).
18  	 * The problem before was that they were being indexed as 3.54259021E8 causing a search for 354259021 to fail.
19  	 * Also test that actual doubles with decimal values are preserved (assert for 1.5 etc).
20  	 */
21  	public void testSimpleExcelDoc()
22  	{
23  		assertOnExtractedTextOf(excelAttachment, new String[]{"Apples", "Quantity", "10", "20", "354259021", "1.5", "2.75"}, new String[0]);
24  	}
25  
26  	public void testMultipleOccurrencesOfWordsAreExtracted()
27  	{
28  		StringBuffer searchableContent = new StringBuffer();
29  		extractor.addFields(new Document(), searchableContent, excelAttachment);
30  
31  		// there are 3 occurences of the word "Apples" is this excel spreadsheet
32  		int startIndex = 0;
33  		int occurrences = 0;
34  
35  		while ((startIndex = searchableContent.indexOf("Apples", startIndex)) != -1)
36  		{
37  			occurrences++;
38  			startIndex++; // starting searching 1 after the last occurrence
39  		}
40  		
41  		assertEquals(3, occurrences);
42  	}
43  
44  	protected void setUp() throws Exception
45  	{
46  		super.setUp();
47  		excelAttachment = createSearchableAttachment("test-attachment-search.xls", "application/excel");
48  	}
49  
50  	protected void tearDown() throws Exception
51  	{
52  		excelAttachment = null;
53  		super.tearDown();
54  	}
55  }