View Javadoc

1   /*
2    * Copyright (c) 2003 by Atlassian Software Systems Pty. Ltd.
3    * All rights reserved.
4    */
5   package com.atlassian.bonnie.search;
6   
7   import com.atlassian.bonnie.LuceneConnection;
8   import com.atlassian.bonnie.LuceneException;
9   import com.atlassian.bonnie.LuceneUtils;
10  import com.atlassian.bonnie.Searcher;
11  import com.atlassian.bonnie.analyzer.LuceneAnalyzerFactory;
12  import org.apache.log4j.Logger;
13  import org.apache.lucene.document.Document;
14  import org.apache.lucene.index.IndexReader;
15  import org.apache.lucene.index.Term;
16  import org.apache.lucene.index.TermEnum;
17  import org.apache.lucene.queryParser.MultiFieldQueryParser;
18  import org.apache.lucene.queryParser.ParseException;
19  import org.apache.lucene.queryParser.QueryParser;
20  import org.apache.lucene.search.*;
21  
22  import java.io.IOException;
23  import java.util.*;
24  
25  public class LuceneSearcher implements Searcher
26  {
27      private static final Logger log = Logger.getLogger(LuceneSearcher.class);
28      private LuceneConnection luceneConnection;
29      private LuceneAnalyzerFactory luceneAnalyzerFactory;
30      private static final String[] HANDLE_ONLY_FIELDS = new String[] { DocumentBuilder.HANDLE_FIELD_NAME };
31  
32      public void setLuceneConnection(LuceneConnection luceneConnection)
33      {
34          this.luceneConnection = luceneConnection;
35      }
36  
37      public void setLuceneAnalyzerFactory(LuceneAnalyzerFactory luceneAnalyzerFactory)
38      {
39          this.luceneAnalyzerFactory = luceneAnalyzerFactory;
40      }
41  
42      public void setBooleanQueryMaxClause(int max)
43      {
44          BooleanQuery.setMaxClauseCount(max);
45      }
46  
47      /**
48       * Different from term query in that the query parameter specified is passed through an analyzer that may
49       * remove certain stop words before constructing a Query. Desirable for full text search fields. Undesirable for keyword
50       * searches (build a Term query instead).
51       *
52       * @param searchFields
53       * @param query
54       */
55      public Query buildStandardQuery(String[] searchFields, String query)
56      {
57          Query myquery;
58  
59          try
60          {
61              QueryParser qp = makeQueryParserForSearchFields(searchFields);
62              qp.setDefaultOperator(QueryParser.Operator.AND);
63              myquery = qp.parse(query);
64          }
65          catch (ParseException e)
66          {
67              throw new LuceneException("Couldn't parse the query successfully:" + e.getMessage());
68          }
69  
70          return myquery;
71      }
72  
73      private QueryParser makeQueryParserForSearchFields(String[] searchFields)
74      {
75          if (searchFields.length == 1)
76              return new QueryParser(searchFields[0], luceneAnalyzerFactory.createAnalyzer());
77          else
78              return new MultiFieldQueryParser(searchFields, luceneAnalyzerFactory.createAnalyzer());
79      }
80  
81      public Query buildStandardQuery(String defaultSearchField, String query)
82      {
83          return buildStandardQuery(new String[]{defaultSearchField}, query);
84      }
85  
86      public List search(final Query myquery)
87      {
88          return search(myquery, null);
89      }
90  
91      public List search(final Query myquery, final Sort sort)
92      {
93          final List result = new LinkedList();
94          luceneConnection.withSearch(new LuceneConnection.SearcherAction()
95          {
96              public boolean perform(IndexSearcher searcher) throws IOException
97              {
98                  Hits hits = searcher.search(myquery, sort);
99  
100                 for (int i = 0, x = hits.length(); i < x; i++)
101                 {
102                     Document doc = hits.doc(i);
103                     String handle = doc.get(DocumentBuilder.HANDLE_FIELD_NAME);
104                     result.add(handle);
105                 }
106                 return true;
107             }
108         });
109         return result;
110     }
111 
112     public Query rewrite(final Query query)
113     {
114         return (Query) luceneConnection.withReader(new LuceneConnection.ReaderAction()
115         {
116             public Object perform(IndexReader reader) throws IOException
117             {
118                 return query.rewrite(reader);
119             }
120         });
121     }
122 
123     public String explain(final Query myquery, final int docid)
124     {
125         final StringBuffer sb = new StringBuffer();
126         luceneConnection.withSearch(new LuceneConnection.SearcherAction()
127         {
128             public boolean perform(IndexSearcher searcher) throws IOException
129             {
130                 Explanation e = searcher.explain(myquery, docid);
131                 sb.append(e.toHtml());
132                 return true;
133             }
134         });
135         return sb.toString();
136     }
137 
138     public int searchCount(final Query myquery)
139     {
140         final int[] totalHits = new int[1];
141         luceneConnection.withSearch(new LuceneConnection.SearcherAction()
142         {
143             public boolean perform(IndexSearcher searcher) throws IOException
144             {
145                 searcher.search(myquery, new HitCollector()
146                 {
147                     public void collect(int i, float v)
148                     {
149                         totalHits[0]++;
150                     }
151                 });
152                 return false;
153             }
154         });
155         return totalHits[0];
156     }
157 
158     public int searchCount(final Query query, final Filter filter)
159     {
160         final int[] totalHits = new int[1];
161         luceneConnection.withSearch(new LuceneConnection.SearcherAction()
162         {
163             public boolean perform(IndexSearcher searcher) throws IOException
164             {
165                 searcher.search(query, filter, new HitCollector()
166                 {
167                     public void collect(int i, float v)
168                     {
169                         totalHits[0]++;
170                     }
171                 });
172 
173                 return false;
174             }
175         });
176         return totalHits[0];
177     }
178 
179     public List searchForFields(Query myquery, Set fieldsToExtract, int startIndex, int numItems)
180     {
181         return searchForFields(myquery, fieldsToExtract, startIndex, numItems, null, new int[1]);
182     }
183 
184     public List searchForFields(final Query myquery, Set fieldsToExtract, final int startIndex, final int numItems, final Filter filter, final int[] filteredcount)
185     {
186         return searchForFields(myquery, fieldsToExtract, startIndex, numItems, null, null, new int[1]);
187     }
188 
189     public List searchForFields(final Query myquery, Set fieldsToExtract, final int startIndex, final int numItems, final Filter filter, final Sort sort, final int[] filteredcount)
190     {
191         String[] fieldsToExtractArr;
192 
193         if (fieldsToExtract != null && fieldsToExtract.size() > 0)
194             fieldsToExtractArr = (String[]) fieldsToExtract.toArray(new String[fieldsToExtract.size()]);
195         else
196             fieldsToExtractArr = HANDLE_ONLY_FIELDS;
197 
198         final String[] fieldsToExtractArr1 = fieldsToExtractArr;
199 
200         final ArrayList results = new ArrayList();
201         luceneConnection.withSearch(new LuceneConnection.SearcherAction()
202         {
203             public boolean perform(IndexSearcher searcher) throws IOException
204             {
205                 Hits hits = searcher.search(myquery, filter, sort);
206 
207                 if (searcher instanceof FilterCountingSearcher)
208                 {
209                     FilterCountingSearcher filterCountingSearcher = ((FilterCountingSearcher) searcher);
210                     for (int i = 0; i < filterCountingSearcher.getFilteredCounts().length; i++)
211                     {
212                         if (i > filteredcount.length - 1)
213                         {
214                             log.error("Array passed in to store filter counts is too small. Actual: " + filteredcount.length 
215                                     + ". Expected: " + filterCountingSearcher.getFilteredCounts().length);
216                             break;
217                         }
218 
219                         filteredcount[i] = filterCountingSearcher.getFilteredCounts()[i];
220                     }
221                 }
222 
223                 results.ensureCapacity(startIndex + numItems);
224                 for (int i = 0, x = hits.length(); i < x; i++)
225                 {
226                     if (i < startIndex || i >= startIndex + numItems)
227                     {
228                         results.add(null);
229                         continue;
230                     }
231                     else
232                     {
233                         Document doc = hits.doc(i);
234                         Map result;
235                         if (fieldsToExtractArr1 == null)
236                         {
237                             result = LuceneUtils.buildMapFromDocument(doc);
238                         }
239                         else
240                         {
241                             result = new HashMap(fieldsToExtractArr1.length);
242                             for (int j = 0; j < fieldsToExtractArr1.length; j++)
243                             {
244                                 String fieldname = fieldsToExtractArr1[j];
245                                 result.put(fieldname, doc.get(fieldname));
246                             }
247                         }
248                         result.put("docid", new Integer(hits.id(i)));   // add docid so explanation is possible
249                         results.add(result);
250                     }
251                 }
252 
253                 return true;
254             }
255         });
256 
257         return results;
258     }
259 
260     public List getAllFieldValues(final String fieldName)
261     {
262         return (List) luceneConnection.withReader(new LuceneConnection.ReaderAction()
263         {
264             public Object perform(IndexReader reader) throws IOException
265             {
266                 List values = new ArrayList();
267                 TermEnum terms = reader.terms(new Term(fieldName, ""));
268 
269                 // Terms starts on the first result, and advances on next(),
270                 // so we have to loop this way. -c
271                 do
272                 {
273                     Term term = terms.term();
274                     if (term == null)
275                         continue;
276 
277                     if (!fieldName.equals(term.field()))
278                         break;
279 
280                     values.add(term.text());
281 
282                 } while (terms.next());
283 
284                 if (terms != null)
285                 {
286                     terms.close();
287                 }
288                 return values;
289             }
290         });
291     }
292 
293 }