1   package com.atlassian.bonnie.search.summary;
2   
3   import com.atlassian.bonnie.LuceneConnection;
4   import junit.framework.TestCase;
5   import org.apache.log4j.Category;
6   import org.apache.lucene.analysis.Analyzer;
7   import org.apache.lucene.analysis.SimpleAnalyzer;
8   import org.apache.lucene.document.Document;
9   import org.apache.lucene.document.Field;
10  import org.apache.lucene.index.IndexWriter;
11  import org.apache.lucene.store.RAMDirectory;
12  
13  import java.io.IOException;
14  
15  public class TestSummarizer extends TestCase
16  {
17      private static final Analyzer ANALYZER = new SimpleAnalyzer();
18  
19      public static final Category LOG = Category.getInstance(TestSummarizer.class);
20  
21      public void testBlankSummary() throws IOException
22      {
23          Summarizer summarizer = new Summarizer(ANALYZER);
24          Summary summary = summarizer.getSummary("", "test");
25          assertEquals(0, summary.getFragments().length);
26      }
27  
28      public void testSimpleSummary() throws IOException
29      {
30          Summarizer summarizer = new Summarizer(ANALYZER);
31          Summary summary = summarizer.getSummary("A fragment of text.", "fragment");
32          printSummary(summary);
33      }
34  
35      public void testLotsofFragments() throws IOException
36      {
37          Summarizer summarizer = new Summarizer(ANALYZER, 5, 30, null);
38          Summary summary = summarizer.getSummary("More fragments more fragment and and more more more more more and more fragment and more and " +
39                  "more more and a fragment of text.", "fragment");
40          printSummary(summary);
41      }
42  
43      public void testStopWordsAtStart() throws IOException
44      {
45          Summarizer summarizer = new Summarizer(ANALYZER, 10, 30, null);
46          Summary summary = summarizer.getSummary("This is the home page for the Mail Room space", "home");
47          printSummary(summary);
48      }
49  
50      public void testMultipleFragments() throws IOException
51      {
52          Summarizer summarizer = new Summarizer(ANALYZER, 1, 10, null);
53          Summary summary = summarizer.getSummary("More fragments and more more more more more more more and a fragment of text.", "fragment");
54          printSummary(summary);
55      }
56  
57      public void testSummarizeShortText() throws IOException
58      {
59          Summarizer summarize = new Summarizer(ANALYZER, 1, 10, null);
60          assertEquals("I like cheese", summarize.getSummary("I like cheese").toString());
61      }
62  
63      public void testNoEarlyTruncation() throws IOException
64      {
65          // Tests that the summarizer does not truncate the text too early for either long or short pieces of text and
66          // regardless of the position of the query string.
67  
68          Summarizer summarize = new Summarizer(ANALYZER, 20, 30, null);
69          assertEquals("short excerpt that's fun", summarize.getSummary("short excerpt that's fun", "excerpt").toString());
70          assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "excerpt").toString());
71          assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "just").toString());
72      }
73  
74      public void testHighlightWildcards() throws IOException
75      {
76          // build an in-memory index for expanding wildcard terms
77          RAMDirectory dir = new RAMDirectory();
78          IndexWriter w = new IndexWriter(dir, ANALYZER, true);
79          Document doc = new Document();
80          doc.add(new Field("contentBody", "cheese", Field.Store.YES, Field.Index.TOKENIZED));
81          doc.add(new Field("contentBody", "cheeseboy", Field.Store.YES, Field.Index.TOKENIZED));
82          doc.add(new Field("contentBody", "cheep", Field.Store.YES, Field.Index.TOKENIZED));
83          doc.add(new Field("contentBody", "management", Field.Store.YES, Field.Index.TOKENIZED));
84          doc.add(new Field("contentBody", "managing", Field.Store.YES, Field.Index.TOKENIZED));
85          doc.add(new Field("contentBody", "manage", Field.Store.YES, Field.Index.TOKENIZED));
86          w.addDocument(doc);
87          w.close();
88  
89          Summarizer summarize = new Summarizer(ANALYZER, 1, 10, new LuceneConnection(dir, ANALYZER));
90          Summary summary1 = summarize.getSummary("I like cheese", "cheese");
91          assertEquals(3, summary1.getFragments().length);
92          assertEquals("cheese", summary1.getFragments()[2].getText());
93          assertTrue(summary1.getFragments()[2].isHighlight());
94  
95          Summary summary2 = summarize.getSummary("I like cheese", "che*");
96          assertEquals(3, summary2.getFragments().length);
97          assertEquals("cheese", summary2.getFragments()[2].toString());
98          assertTrue(summary2.getFragments()[2].isHighlight());
99  
100         Summary summary3 = summarize.getSummary("I think Management sucks!", "manag* sucks");
101         assertEquals(5, summary3.getFragments().length);
102         assertEquals("Management", summary3.getFragments()[2].toString());
103         assertTrue(summary3.getFragments()[2].isHighlight());
104 
105     }
106 
107     private void printSummary(Summary summary)
108     {
109         StringBuffer buffer = new StringBuffer();
110         buffer.append("Summary:");
111         for (int idx = 0; idx < summary.getFragments().length; idx++)
112         {
113             Summary.Fragment fragment = summary.getFragments()[idx];
114             if (fragment instanceof Summary.Highlight)
115                 buffer.append("_");
116             buffer.append(fragment.toString());
117             if (fragment instanceof Summary.Highlight)
118                 buffer.append("_");
119             if ((idx + 1) < summary.getFragments().length)
120                 buffer.append("|");
121         }
122         LOG.info(buffer.toString());
123     }
124 }