View Javadoc

1   package com.atlassian.bonnie.search.extractor;
2   
3   import java.lang.reflect.Array;
4   import java.lang.reflect.InvocationTargetException;
5   import java.util.ArrayList;
6   import java.util.Collection;
7   import java.util.Date;
8   import java.util.Iterator;
9   import java.util.List;
10  
11  import org.apache.commons.beanutils.PropertyUtils;
12  import org.apache.commons.lang.StringUtils;
13  import org.apache.lucene.document.Document;
14  import org.apache.lucene.document.Field;
15  
16  import com.atlassian.bonnie.LuceneException;
17  import com.atlassian.bonnie.LuceneUtils;
18  import com.atlassian.bonnie.Searchable;
19  import com.atlassian.bonnie.search.Extractor;
20  import com.atlassian.bonnie.search.HibernateUnwrapper;
21  
22  /**
23   * <p>
24   * An indexing extractor that is configured via XML. You should note that earlier versions of this extractor were
25   * hard-coded to append the data from the contentBody field to the defaultSearchableText supplied to
26   * {@link #addFields(Document, StringBuffer, Searchable)} instead of to the Document. <strong>This behaviour has been
27   * changed.</strong>
28   * </p>
29   * <p>
30   * The XmlConfiguredExtractor will now append all extracted content to the Document and will only add to the
31   * defaultSearchableText if it has been configured to.
32   * </p>
33   */
34  public class XmlConfiguredExtractor implements Extractor
35  {
36      public void addFields(Document document, StringBuffer defaultSearchableText, Searchable searchable)
37      {
38          XmlClassConfigurations.ClassConfiguration classConfig = XmlClassConfigurations.getClassConfiguration(HibernateUnwrapper.getUnderlyingClass(searchable));
39  
40          if (classConfig != null)
41          {
42              for (XmlClassConfigurations.FieldConfiguration fieldConfiguration : classConfig.getFieldConfigurations())
43              {
44                  String attributeNames[] = fieldConfiguration.getAttributeName().split(",");
45                  List<String> indexedValues = new ArrayList<String>(attributeNames.length);
46                  for (String attributeName : attributeNames)
47                  {
48                      String indexedValue;
49                      Object o = getContentOfAttribute(searchable, attributeName);
50                      if (o.getClass().isArray())
51                      {
52                          indexedValue = indexArrayField(document, fieldConfiguration, o);
53                      }
54                      else if (o instanceof Collection)
55                      {
56                          indexedValue = indexCollectionField(document, fieldConfiguration, (Collection) o);
57                      }
58                      else if (o instanceof Date
59                              && fieldConfiguration.getType().equals(
60                                      XmlClassConfigurations.FieldConfiguration.TYPE_KEYWORD))
61                      {
62                          indexedValue = indexDateField(document, fieldConfiguration, (Date) o);
63                      }
64                      else
65                      {
66                          indexedValue = indexStringField(document, fieldConfiguration, String.valueOf(o));
67                      }
68  
69                      indexedValues.add(indexedValue);
70                  }
71  
72                  if (fieldConfiguration.isAppendToDefaultSearchableText() && !indexedValues.isEmpty())
73                  {
74                      defaultSearchableText.append(StringUtils.join(indexedValues, ','));
75                  }
76              }
77          }
78      }
79  
80      /**
81       * 
82       * @param doc
83       * @param fieldConfiguration
84       * @param date
85       * @return a String representing the data that was indexed
86       */
87      private String indexDateField(Document doc, XmlClassConfigurations.FieldConfiguration fieldConfiguration, Date date)
88      {
89          String dateStr = LuceneUtils.dateToString(date);
90          Field field = new Field(fieldConfiguration.getFieldName(), dateStr, Field.Store.YES, Field.Index.UN_TOKENIZED);
91          doc.add(field);
92          return dateStr;
93      }
94  
95      /**
96       * @param doc
97       * @param fieldConfiguration
98       * @param arr
99       * @return a String representing the values indexed, with each separated by a space character.
100      */
101     private String indexArrayField(Document doc, XmlClassConfigurations.FieldConfiguration fieldConfiguration,
102             Object arr)
103     {
104         int length = Array.getLength(arr);
105         List<String> indexedValues = new ArrayList<String>(length);
106         for (int i = 0; i < length; i++)
107         {
108             Object o = Array.get(arr, i);
109             indexedValues.add(indexStringField(doc, fieldConfiguration, String.valueOf(o)));
110         }
111 
112         return StringUtils.join(indexedValues, ' ');
113     }
114 
115     /**
116      * 
117      * @param doc
118      * @param fieldConfiguration
119      * @param collection
120      * @return a String representing the values indexed, with each separated by a space character.
121      */
122     private String indexCollectionField(Document doc, XmlClassConfigurations.FieldConfiguration fieldConfiguration,
123             Collection collection)
124     {
125         List<String> indexedValues = new ArrayList<String>(collection.size());
126         for (Iterator it = collection.iterator(); it.hasNext();)
127         {
128             Object o = it.next();
129             indexedValues.add(indexStringField(doc, fieldConfiguration, String.valueOf(o)));
130         }
131 
132         return StringUtils.join(indexedValues, ' ');
133     }
134 
135     /**
136      * 
137      * @param doc
138      * @param fieldConfiguration
139      * @param strContent
140      * @return the String that was indexed.
141      */
142     private String indexStringField(Document doc, XmlClassConfigurations.FieldConfiguration fieldConfiguration,
143             String strContent)
144     {
145         Field field;
146         if (fieldConfiguration.getType().equals(XmlClassConfigurations.FieldConfiguration.TYPE_TEXT))
147         {
148             field = new Field(fieldConfiguration.getFieldName(), strContent, Field.Store.YES, Field.Index.TOKENIZED);
149         }
150         else if (fieldConfiguration.getType().equals(XmlClassConfigurations.FieldConfiguration.TYPE_KEYWORD))
151         {
152             field = new Field(fieldConfiguration.getFieldName(), strContent, Field.Store.YES, Field.Index.UN_TOKENIZED);
153         }
154         else if (fieldConfiguration.getType().equals(XmlClassConfigurations.FieldConfiguration.TYPE_UNINDEXED))
155         {
156             field = new Field(fieldConfiguration.getFieldName(), strContent, Field.Store.YES, Field.Index.NO);
157         }
158         else if (fieldConfiguration.getType().equals(XmlClassConfigurations.FieldConfiguration.TYPE_UNSTORED))
159         {
160             field = new Field(fieldConfiguration.getFieldName(), strContent, Field.Store.NO, Field.Index.TOKENIZED);
161         }
162         else
163         {
164             throw new LuceneException("Unknown type for a field, fieldName=" + fieldConfiguration.getFieldName());
165         }
166 
167         doc.add(field);
168         return strContent;
169     }
170 
171     private Object getContentOfAttribute(Object obj, String attributeName)
172     {
173         try
174         {
175             String[] attributes = attributeName.split("\\.");
176             Object o = null;
177             for (int i = 0; i < attributes.length; ++i)
178                 o = PropertyUtils.getProperty(obj, attributeName);
179             return (o == null) ? "" : o;
180         }
181         catch (IllegalAccessException iae)
182         {
183             throw new LuceneException("Couldn't get string content of attribute, as property accessor method for " + attributeName + " cannot be accessed");
184         }
185         catch (NoSuchMethodException e)
186         {
187             throw new LuceneException("Couldn't get string content of attribute, as no such property accessor method for " + attributeName + " exists");
188         }
189         catch (InvocationTargetException e)
190         {
191             throw new LuceneException("Calling property accessor method for attribute " + attributeName + " threw an exception", e);
192         }
193     }
194 }