1   package com.atlassian.bonnie.search.summary;
2   
3   import com.atlassian.bonnie.LuceneConnection;
4   import com.atlassian.bonnie.search.BaseDocumentBuilder;
5   import junit.framework.TestCase;
6   import org.apache.lucene.analysis.Analyzer;
7   import org.apache.lucene.analysis.SimpleAnalyzer;
8   import org.apache.lucene.document.Document;
9   import org.apache.lucene.document.Field;
10  import org.apache.lucene.index.IndexWriter;
11  import org.apache.lucene.store.RAMDirectory;
12  import org.slf4j.Logger;
13  import org.slf4j.LoggerFactory;
14  
15  import java.io.IOException;
16  
17  public class TestSummarizer extends TestCase
18  {
19      private static final Analyzer ANALYZER = new SimpleAnalyzer();
20  
21      private static final Logger LOG = LoggerFactory.getLogger(TestSummarizer.class);
22  
23      public void testBlankSummary() throws IOException
24      {
25          Summarizer summarizer = new Summarizer(ANALYZER);
26          Summary summary = summarizer.getSummary("", "test");
27          assertEquals(0, summary.getFragments().length);
28      }
29  
30      public void testSimpleSummary() throws IOException
31      {
32          Summarizer summarizer = new Summarizer(ANALYZER);
33          Summary summary = summarizer.getSummary("A fragment of text.", "fragment");
34          printSummary(summary);
35      }
36  
37      public void testLotsofFragments() throws IOException
38      {
39          Summarizer summarizer = new Summarizer(ANALYZER, 5, 30, null);
40          Summary summary = summarizer.getSummary("More fragments more fragment and and more more more more more and more fragment and more and " +
41                  "more more and a fragment of text.", "fragment");
42          printSummary(summary);
43      }
44  
45      public void testStopWordsAtStart() throws IOException
46      {
47          Summarizer summarizer = new Summarizer(ANALYZER, 10, 30, null);
48          Summary summary = summarizer.getSummary("This is the home page for the Mail Room space", "home");
49          printSummary(summary);
50      }
51  
52      public void testMultipleFragments() throws IOException
53      {
54          Summarizer summarizer = new Summarizer(ANALYZER, 1, 10, null);
55          Summary summary = summarizer.getSummary("More fragments and more more more more more more more and a fragment of text.", "fragment");
56          printSummary(summary);
57      }
58  
59      public void testSummarizeShortText() throws IOException
60      {
61          Summarizer summarize = new Summarizer(ANALYZER, 1, 10, null);
62          assertEquals("I like cheese", summarize.getSummary("I like cheese").toString());
63      }
64  
65      public void testNoEarlyTruncation() throws IOException
66      {
67          // Tests that the summarizer does not truncate the text too early for either long or short pieces of text and
68          // regardless of the position of the query string.
69  
70          Summarizer summarize = new Summarizer(ANALYZER, 20, 30, null);
71          assertEquals("short excerpt that's fun", summarize.getSummary("short excerpt that's fun", "excerpt").toString());
72          assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "excerpt").toString());
73          assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "just").toString());
74      }
75  //TODO lucene upgrade, I think this test is possibly broken and probably not testing what it implies it is testing. Needs a review
76      public void testHighlightWildcards() throws IOException
77      {
78          // build an in-memory index for expanding wildcard terms
79          RAMDirectory dir = new RAMDirectory();
80          IndexWriter w = new IndexWriter(dir, ANALYZER, true);
81          Document doc = new Document();
82          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheese", Field.Store.YES, Field.Index.TOKENIZED));
83          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheeseboy", Field.Store.YES, Field.Index.TOKENIZED));
84          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheep", Field.Store.YES, Field.Index.TOKENIZED));
85          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "management", Field.Store.YES, Field.Index.TOKENIZED));
86          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "managing", Field.Store.YES, Field.Index.TOKENIZED));
87          doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "manage", Field.Store.YES, Field.Index.TOKENIZED));
88          w.addDocument(doc);
89          w.close();
90  
91          Summarizer summarize = new Summarizer(ANALYZER, 1, 10, new LuceneConnection(dir, ANALYZER));
92          Summary summary1 = summarize.getSummary("I like cheese", "cheese");
93          assertEquals(3, summary1.getFragments().length);
94          assertEquals("cheese", summary1.getFragments()[2].getText());
95          assertTrue(summary1.getFragments()[2].isHighlight());
96  
97          Summary summary2 = summarize.getSummary("I like cheese", "che*");
98          assertEquals(3, summary2.getFragments().length);
99          assertEquals("cheese", summary2.getFragments()[2].toString());
100         assertTrue(summary2.getFragments()[2].isHighlight());
101 
102         Summary summary3 = summarize.getSummary("I think Management sucks!", "manag* sucks");
103         assertEquals(5, summary3.getFragments().length);
104         assertEquals("Management", summary3.getFragments()[2].toString());
105         assertTrue(summary3.getFragments()[2].isHighlight());
106 
107     }
108 
109     private void printSummary(Summary summary)
110     {
111         StringBuffer buffer = new StringBuffer();
112         buffer.append("Summary:");
113         for (int idx = 0; idx < summary.getFragments().length; idx++)
114         {
115             Summary.Fragment fragment = summary.getFragments()[idx];
116             if (fragment instanceof Summary.Highlight)
117                 buffer.append("_");
118             buffer.append(fragment.toString());
119             if (fragment instanceof Summary.Highlight)
120                 buffer.append("_");
121             if ((idx + 1) < summary.getFragments().length)
122                 buffer.append("|");
123         }
124         LOG.info(buffer.toString());
125     }
126 }