1 package com.atlassian.bonnie.search.extractor;
2
3 import com.atlassian.bonnie.search.Extractor;
4 import com.atlassian.bonnie.search.MockSearchableAttachment;
5
6 import java.io.IOException;
7
8 public class TestPdfContentExtractor extends BaseAttachmentContentExtractorTest
9 {
10 public Extractor getExtractor()
11 {
12 return new PdfContentExtractor();
13 }
14
15 public void testSimplePdf()
16 {
17 assertOnExtractedTextOf(createSearchableAttachment("test-attachment-search.pdf", "application/pdf"), new String[]{"feature"}, new String[]{"apples"});
18 }
19
20
21
22 public void testPdfWithDifferentContentCreators()
23 {
24
25 assertOnExtractedTextOf(createSearchableAttachment("test-v1_3.pdf", "application/pdf"), new String[]{"grasses", "colleagues"}, new String[]{});
26
27 assertOnExtractedTextOf(createSearchableAttachment("test-v1_4.pdf", "application/pdf"), new String[]{}, new String[]{});
28
29 }
30
31 public void testInternationalisedPdf()
32 {
33
34 assertOnExtractedTextOf(createSearchableAttachment("chinese-characters.pdf", "application/pdf"), new String[]{"\u5c0f\u96de"}, new String[]{});
35
36 assertOnExtractedTextOf(createSearchableAttachment("arabic-characters.pdf", "application/pdf"), new String[]{"Romanization", "\u0637\u0648\u064a\u0644\u0629"}, new String[]{});
37 }
38
39 public void testExtractorExceptionThrownOnError() throws IOException
40 {
41 MockSearchableAttachment attachment = createSearchableAttachment("test-attachment-search.txt", "text/plain");
42 attachment.setContentType("application/pdf");
43 try
44 {
45 ((PdfContentExtractor) extractor).extractText(attachment.getContentsAsStream(), attachment);
46 fail("Exception expected");
47 }
48 catch (ExtractorException e)
49 {
50
51 }
52 }
53 }