1 package com.atlassian.bonnie.search.summary;
2
3 import com.atlassian.bonnie.LuceneConnection;
4 import junit.framework.TestCase;
5 import org.apache.log4j.Category;
6 import org.apache.lucene.analysis.Analyzer;
7 import org.apache.lucene.analysis.SimpleAnalyzer;
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.store.RAMDirectory;
12
13 import java.io.IOException;
14
15 public class TestSummarizer extends TestCase
16 {
17 private static final Analyzer ANALYZER = new SimpleAnalyzer();
18
19 public static final Category LOG = Category.getInstance(TestSummarizer.class);
20
21 public void testBlankSummary() throws IOException
22 {
23 Summarizer summarizer = new Summarizer(ANALYZER);
24 Summary summary = summarizer.getSummary("", "test");
25 assertEquals(0, summary.getFragments().length);
26 }
27
28 public void testSimpleSummary() throws IOException
29 {
30 Summarizer summarizer = new Summarizer(ANALYZER);
31 Summary summary = summarizer.getSummary("A fragment of text.", "fragment");
32 printSummary(summary);
33 }
34
35 public void testLotsofFragments() throws IOException
36 {
37 Summarizer summarizer = new Summarizer(ANALYZER, 5, 30, null);
38 Summary summary = summarizer.getSummary("More fragments more fragment and and more more more more more and more fragment and more and " +
39 "more more and a fragment of text.", "fragment");
40 printSummary(summary);
41 }
42
43 public void testStopWordsAtStart() throws IOException
44 {
45 Summarizer summarizer = new Summarizer(ANALYZER, 10, 30, null);
46 Summary summary = summarizer.getSummary("This is the home page for the Mail Room space", "home");
47 printSummary(summary);
48 }
49
50 public void testMultipleFragments() throws IOException
51 {
52 Summarizer summarizer = new Summarizer(ANALYZER, 1, 10, null);
53 Summary summary = summarizer.getSummary("More fragments and more more more more more more more and a fragment of text.", "fragment");
54 printSummary(summary);
55 }
56
57 public void testSummarizeShortText() throws IOException
58 {
59 Summarizer summarize = new Summarizer(ANALYZER, 1, 10, null);
60 assertEquals("I like cheese", summarize.getSummary("I like cheese").toString());
61 }
62
63 public void testNoEarlyTruncation() throws IOException
64 {
65
66
67
68 Summarizer summarize = new Summarizer(ANALYZER, 20, 30, null);
69 assertEquals("short excerpt that's fun", summarize.getSummary("short excerpt that's fun", "excerpt").toString());
70 assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "excerpt").toString());
71 assertEquals("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", summarize.getSummary("This is a much, much longer test text excerpt that's really, really, ridiculously long but just as fun", "just").toString());
72 }
73
74 public void testHighlightWildcards() throws IOException
75 {
76
77 RAMDirectory dir = new RAMDirectory();
78 IndexWriter w = new IndexWriter(dir, ANALYZER, true);
79 Document doc = new Document();
80 doc.add(new Field("contentBody", "cheese", Field.Store.YES, Field.Index.TOKENIZED));
81 doc.add(new Field("contentBody", "cheeseboy", Field.Store.YES, Field.Index.TOKENIZED));
82 doc.add(new Field("contentBody", "cheep", Field.Store.YES, Field.Index.TOKENIZED));
83 doc.add(new Field("contentBody", "management", Field.Store.YES, Field.Index.TOKENIZED));
84 doc.add(new Field("contentBody", "managing", Field.Store.YES, Field.Index.TOKENIZED));
85 doc.add(new Field("contentBody", "manage", Field.Store.YES, Field.Index.TOKENIZED));
86 w.addDocument(doc);
87 w.close();
88
89 Summarizer summarize = new Summarizer(ANALYZER, 1, 10, new LuceneConnection(dir, ANALYZER));
90 Summary summary1 = summarize.getSummary("I like cheese", "cheese");
91 assertEquals(3, summary1.getFragments().length);
92 assertEquals("cheese", summary1.getFragments()[2].getText());
93 assertTrue(summary1.getFragments()[2].isHighlight());
94
95 Summary summary2 = summarize.getSummary("I like cheese", "che*");
96 assertEquals(3, summary2.getFragments().length);
97 assertEquals("cheese", summary2.getFragments()[2].toString());
98 assertTrue(summary2.getFragments()[2].isHighlight());
99
100 Summary summary3 = summarize.getSummary("I think Management sucks!", "manag* sucks");
101 assertEquals(5, summary3.getFragments().length);
102 assertEquals("Management", summary3.getFragments()[2].toString());
103 assertTrue(summary3.getFragments()[2].isHighlight());
104
105 }
106
107 private void printSummary(Summary summary)
108 {
109 StringBuffer buffer = new StringBuffer();
110 buffer.append("Summary:");
111 for (int idx = 0; idx < summary.getFragments().length; idx++)
112 {
113 Summary.Fragment fragment = summary.getFragments()[idx];
114 if (fragment instanceof Summary.Highlight)
115 buffer.append("_");
116 buffer.append(fragment.toString());
117 if (fragment instanceof Summary.Highlight)
118 buffer.append("_");
119 if ((idx + 1) < summary.getFragments().length)
120 buffer.append("|");
121 }
122 LOG.info(buffer.toString());
123 }
124 }