1 package com.atlassian.bonnie.search.summary;
2
3 import com.atlassian.bonnie.LuceneConnection;
4 import com.atlassian.bonnie.search.BaseDocumentBuilder;
5 import junit.framework.TestCase;
6 import org.apache.lucene.analysis.Analyzer;
7 import org.apache.lucene.analysis.SimpleAnalyzer;
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.store.RAMDirectory;
12 import org.slf4j.Logger;
13 import org.slf4j.LoggerFactory;
14
15 import java.io.IOException;
16
17 public class TestSummarizer extends TestCase
18 {
19 private static final Analyzer ANALYZER = new SimpleAnalyzer();
20
21 private static final Logger LOG = LoggerFactory.getLogger(TestSummarizer.class);
22
23 public void testBlankSummary() throws IOException
24 {
25 Summarizer summarizer = new Summarizer(ANALYZER);
26 Summary summary = summarizer.getSummary("", "test");
27 assertEquals(0, summary.getFragments().length);
28 }
29
30 public void testSimpleSummary() throws IOException
31 {
32 Summarizer summarizer = new Summarizer(ANALYZER);
33 Summary summary = summarizer.getSummary("A fragment of text.", "fragment");
34 printSummary(summary);
35 }
36
37 public void testLotsofFragments() throws IOException
38 {
39 Summarizer summarizer = new Summarizer(ANALYZER, 5, 30, null);
40 Summary summary = summarizer.getSummary("More fragments more fragment and and more more more more more and more fragment and more and " +
41 "more more and a fragment of text.", "fragment");
42 printSummary(summary);
43 }
44
45 public void testStopWordsAtStart() throws IOException
46 {
47 Summarizer summarizer = new Summarizer(ANALYZER, 10, 30, null);
48 Summary summary = summarizer.getSummary("This is the home page for the Mail Room space", "home");
49 printSummary(summary);
50 }
51
52 public void testMultipleFragments() throws IOException
53 {
54 Summarizer summarizer = new Summarizer(ANALYZER, 1, 10, null);
55 Summary summary = summarizer.getSummary("More fragments and more more more more more more more and a fragment of text.", "fragment");
56 printSummary(summary);
57 }
58
59 public void testSummarizeShortText() throws IOException
60 {
61 Summarizer summarize = new Summarizer(ANALYZER, 1, 10, null);
62 assertEquals("I like cheese", summarize.getSummary("I like cheese").toString());
63 }
64
65 public void testNoEarlyTruncation() throws IOException
66 {
67
68
69
70 Summarizer summarize = new Summarizer(ANALYZER, 20, 30, null);
71 assertEquals("short excerpt that's fun", summarize.getSummary("short excerpt that's fun", "excerpt").toString());
72 assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "excerpt").toString());
73 assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "just").toString());
74 }
75
76 public void testHighlightWildcards() throws IOException
77 {
78
79 RAMDirectory dir = new RAMDirectory();
80 IndexWriter w = new IndexWriter(dir, ANALYZER, true);
81 Document doc = new Document();
82 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheese", Field.Store.YES, Field.Index.TOKENIZED));
83 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheeseboy", Field.Store.YES, Field.Index.TOKENIZED));
84 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "cheep", Field.Store.YES, Field.Index.TOKENIZED));
85 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "management", Field.Store.YES, Field.Index.TOKENIZED));
86 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "managing", Field.Store.YES, Field.Index.TOKENIZED));
87 doc.add(new Field(BaseDocumentBuilder.FieldName.CONTENT_BODY, "manage", Field.Store.YES, Field.Index.TOKENIZED));
88 w.addDocument(doc);
89 w.close();
90
91 Summarizer summarize = new Summarizer(ANALYZER, 1, 10, new LuceneConnection(dir, ANALYZER));
92 Summary summary1 = summarize.getSummary("I like cheese", "cheese");
93 assertEquals(3, summary1.getFragments().length);
94 assertEquals("cheese", summary1.getFragments()[2].getText());
95 assertTrue(summary1.getFragments()[2].isHighlight());
96
97 Summary summary2 = summarize.getSummary("I like cheese", "che*");
98 assertEquals(3, summary2.getFragments().length);
99 assertEquals("cheese", summary2.getFragments()[2].toString());
100 assertTrue(summary2.getFragments()[2].isHighlight());
101
102 Summary summary3 = summarize.getSummary("I think Management sucks!", "manag* sucks");
103 assertEquals(5, summary3.getFragments().length);
104 assertEquals("Management", summary3.getFragments()[2].toString());
105 assertTrue(summary3.getFragments()[2].isHighlight());
106
107 }
108
109 private void printSummary(Summary summary)
110 {
111 StringBuffer buffer = new StringBuffer();
112 buffer.append("Summary:");
113 for (int idx = 0; idx < summary.getFragments().length; idx++)
114 {
115 Summary.Fragment fragment = summary.getFragments()[idx];
116 if (fragment instanceof Summary.Highlight)
117 buffer.append("_");
118 buffer.append(fragment.toString());
119 if (fragment instanceof Summary.Highlight)
120 buffer.append("_");
121 if ((idx + 1) < summary.getFragments().length)
122 buffer.append("|");
123 }
124 LOG.info(buffer.toString());
125 }
126 }