1   package com.atlassian.bonnie.search;
2   
3   import java.lang.reflect.InvocationTargetException;
4   import java.util.ArrayList;
5   import java.util.Collections;
6   import java.util.List;
7   
8   import org.apache.commons.beanutils.PropertyUtils;
9   import org.apache.lucene.document.Document;
10  import org.apache.lucene.document.Field;
11  import org.slf4j.Logger;
12  import org.slf4j.LoggerFactory;
13  
14  import com.atlassian.bonnie.Handle;
15  import com.atlassian.bonnie.HandleResolver;
16  import com.atlassian.bonnie.Searchable;
17  
18  public class BaseDocumentBuilder implements DocumentBuilder
19  {
20  	private static final Logger log = LoggerFactory.getLogger(BaseDocumentBuilder.class);
21  
22  	private final HandleResolver handleResolver;
23  	private final List<Extractor> extractors;
24  	private final List<DocumentPostProcessor> postProcessors;
25  	public static final String TYPE_FIELD = "type";
26  	public static final String URL_PATH_FIELD = "urlPath";
27      private static final int CONTENT_BODY_MAX_SIZE = new ContentBodyMaxSizeSystemProperty().getValue();
28  
29      static class ContentBodyMaxSizeSystemProperty
30      {
31          public static final int DEFAULT = 1048576;
32          private final int value;
33  
34          public ContentBodyMaxSizeSystemProperty()
35          {
36              int value;
37              final String contentBodyMaxSize = System.getProperty("atlassian.indexing.contentbody.maxsize");
38  
39              if (contentBodyMaxSize != null)
40              {
41                  try
42                  {
43                      value = Integer.parseInt(contentBodyMaxSize);
44                  }
45                  catch (NumberFormatException e)
46                  {
47                      value = DEFAULT;
48                  }
49              }
50              else
51              {
52                  value = DEFAULT;
53              }
54              this.value = value;
55          }
56  
57          public int getValue()
58          {
59              return value;
60          }
61      }
62  
63      /**
64  	 * This may change, don't consider this part of a public API
65  	 */
66  	public static class FieldName
67  	{
68  		public static final String HANDLE = "handle";
69  		public static final String CLASS_NAME = "classname";
70  		public static final String CONTENT_BODY = "contentBody";
71  	}
72  
73  	public BaseDocumentBuilder(HandleResolver handleResolver, List<Extractor> extractors, List<DocumentPostProcessor> postProcessors)
74  	{
75  		if (extractors == null)
76  			throw new IllegalArgumentException("extractors is required.");
77  		if (postProcessors == null)
78  			throw new IllegalArgumentException("postProcessors is required.");
79  		if (handleResolver == null)
80  			throw new IllegalArgumentException("handleResolver is required.");
81  
82  		this.handleResolver = handleResolver;
83  		this.extractors = Collections.unmodifiableList(new ArrayList<Extractor>(extractors));
84  		this.postProcessors = Collections.unmodifiableList(new ArrayList<DocumentPostProcessor>(postProcessors));
85  	}
86  
87      /**
88       * Generates a Lucene document using the extractors defined by construction of BaseDocumentBuilder.
89       * <em>Note:</em> The 'contentBody' field will only be used if the data is below the value defined by system property
90       * 'atlassian.indexing.contentbody.maxsize'. If this property is not defined, threshold will default to 1 MiB.
91       *
92       * @param searchable The object to be processed 
93       * @return a Lucene Document with extracted data from searchable object
94       */
95      public Document getDocument(Searchable searchable)
96  	{
97  		Document document = getInitialDocument(searchable);
98  		StringBuffer contentBody = new StringBuffer();
99  
100 		for (Extractor extractor : extractors)
101         {
102 			try
103 			{
104 				extractor.addFields(document, contentBody, searchable);
105 			}
106 			catch (RuntimeException e)
107 			{
108 				log.error("Error extracting search fields from " + searchable + " using " + extractor  + ": " + e.getMessage(), e);
109 			}
110 		}
111 
112 		if (contentBody.length() > 0)
113         {
114             Field.Store store;
115             if (contentBody.length() > CONTENT_BODY_MAX_SIZE)
116                 store = Field.Store.NO;
117             else
118                 store = Field.Store.YES;
119 
120             document.add(new Field(FieldName.CONTENT_BODY, contentBody.toString(), store, Field.Index.TOKENIZED));
121         }
122 
123 		for (DocumentPostProcessor documentPostProcessor : postProcessors)
124         {
125 			documentPostProcessor.process(document);
126 		}
127 
128 		return document;
129 	}
130 
131 	/**
132 	 * Get the initial document that will be passed through the chain of extractors
133 	 *
134 	 * @param searchable the object the document is being created for
135 	 * @return a new Document pre-filled with the absolute minimum necessary data for it to
136 	 *         be added.
137 	 */
138 	protected Document getInitialDocument(Searchable searchable)
139 	{
140 		Document document = new Document();
141 
142 		Field handleField = new Field(FieldName.HANDLE, getHandle(searchable).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED);
143 		document.add(handleField);
144 
145         Field classNameField = new Field(FieldName.CLASS_NAME,
146             HibernateUnwrapper.getUnderlyingClass(searchable).getName(),
147             Field.Store.NO, Field.Index.UN_TOKENIZED);
148         document.add(classNameField);
149 
150 		Object type = getProperty(searchable, "type");
151         if (type != null)
152         {
153             document.add(new Field(TYPE_FIELD, type.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
154         }
155 
156         Object urlPath = getProperty(searchable, "urlPath");
157 		if (urlPath != null)
158 		{
159 			document.add(new Field(URL_PATH_FIELD, urlPath.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
160 		}
161 		return document;
162 	}
163 
164     private Object getProperty(Object object, String field)
165     {
166 		try
167 		{
168 			return PropertyUtils.getProperty(object, field);
169 		}
170 		catch (NoSuchMethodException e)
171 		{
172 			log.info("Unable to find field '" + field + "' on " + object, e);
173 		}
174         catch (IllegalAccessException e)
175         {
176             log.info("Unable to access field '" + field + "' on " + object, e);
177         }
178         catch (InvocationTargetException e)
179         {
180             log.info("Problem accessing field '" + field + "' on " + object, e);
181         }
182         return null;
183     }
184 
185 	public Handle getHandle(Object obj)
186 	{
187 		return handleResolver.getHandle(obj);
188 	}
189 }