View Javadoc

1   package com.atlassian.bonnie.index;
2   
3   import org.apache.log4j.Category;
4   import org.apache.lucene.analysis.Analyzer;
5   import org.apache.lucene.document.Document;
6   import org.apache.lucene.index.IndexWriter;
7   import org.apache.lucene.store.Directory;
8   import org.apache.lucene.store.FSDirectory;
9   
10  import java.io.File;
11  import java.io.IOException;
12  import java.util.*;
13  
14  /**
15   * Writes documents to multiple temporary indices.
16   */
17  public class TempIndexWriter
18  {
19      protected static Category log = Category.getInstance(TempIndexWriter.class);
20  
21      private final Map writers = Collections.synchronizedMap(new HashMap());
22      private Analyzer analyzerForIndexing;
23      private String tmpDir;
24  
25      public TempIndexWriter(Analyzer analyzerForIndexing, String tmpDir)
26      {
27          this.analyzerForIndexing = analyzerForIndexing;
28          this.tmpDir = tmpDir;
29      }
30  
31      /**
32       * Add a document to an index.
33       * @param key key representing which index to write to. A new writer will be created if no writer exists for the key.
34       * @param doc document to add
35       * @throws IOException
36       */
37      public void addDocument(String key, Document doc) throws IOException
38      {
39          WriterData writerData = (WriterData) writers.get(key);
40          if (writerData == null)
41          {
42              writerData = createData();
43              writers.put(key, writerData);
44          }
45          writerData.writer.addDocument(doc);
46      }
47  
48      /**
49       * Delete the respective temp indices.
50       * @param prefix
51       * @throws IOException
52       */
53      public void close(String prefix) throws IOException
54      {
55          for (Iterator it = writers.keySet().iterator(); it.hasNext();)
56          {
57              String key = it.next().toString();
58              if (key.startsWith(prefix))
59              {
60                  WriterData writerData = (WriterData) writers.get(key);
61                  if (writerData != null) {
62                      delete(writerData.tmpIndexDir);
63                  }
64              }
65          }
66      }
67  
68      /**
69       * Delete the respective temp indices.
70       * @throws IOException
71       */
72      public void closeAll() throws IOException
73      {
74          for (Iterator it = writers.keySet().iterator(); it.hasNext();)
75          {
76              WriterData writerData = (WriterData) writers.get(it.next());
77              delete(writerData.tmpIndexDir);
78          }
79          writers.clear();
80      }
81  
82      /**
83       * Close all temporary writers, and merge all temporary indices to the writer provided.
84       * @param writer
85       * @throws IOException
86       */
87      public void merge(IndexWriter writer) throws IOException
88      {
89          final List indexDirectories = new LinkedList();
90          for (Iterator it = writers.keySet().iterator(); it.hasNext();)
91          {
92              WriterData writerData = (WriterData) writers.get(it.next());
93              writerData.writer.close();
94              indexDirectories.add(writerData.dir);
95          }
96          writer.addIndexes((Directory[]) indexDirectories.toArray(new Directory[indexDirectories.size()]));
97      }
98  
99      /**
100      * Close relevant temporary writers, and merge temporary indices identified by keys which start with the provided prefix to the
101      * writer provided.
102      * @param prefix key prefix
103      * @param writer
104      * @throws IOException
105      */
106     public void merge(String prefix, IndexWriter writer) throws IOException
107     {
108         final List indexDirectories = new LinkedList();
109         for (Iterator it = writers.keySet().iterator(); it.hasNext();)
110         {
111             String key = it.next().toString();
112             if (key.startsWith(prefix))
113             {
114                 WriterData writerData = (WriterData) writers.get(key);
115                 writerData.writer.close();
116                 indexDirectories.add(writerData.dir);
117             }
118         }
119         System.out.println("Index directories:" + indexDirectories);
120         if (indexDirectories.size() > 0)
121             writer.addIndexes((Directory[]) indexDirectories.toArray(new Directory[indexDirectories.size()]));
122     }
123 
124     protected final boolean delete(File directory)
125     {
126         File[] files = directory.listFiles();
127         for (int i = 0; i < files.length; i++)
128         {
129             if (!files[i].delete())
130             {
131                 log.error("Failed to delete index file: " + files[i].getAbsolutePath());
132                 return false;
133             }
134         }
135         return directory.delete();
136     }
137 
138     private WriterData createData() throws IOException
139     {
140         // create the temp index directory.
141         File tmpIndexDir = getTmpDir() == null ? File.createTempFile("lucene", "index") :
142                 File.createTempFile("lucene", "index", new File(getTmpDir()));
143         if (!tmpIndexDir.delete() || !tmpIndexDir.mkdirs())
144         {
145             throw new IOException("Unable to create temporary index directory: " + tmpIndexDir);
146         }
147         Directory dir = FSDirectory.getDirectory(tmpIndexDir);
148         //TODO: This probably doesn't need to be here. We should just construct the next IndexWriter with create=true
149         new IndexWriter(dir, null, true).close();
150         IndexWriter writer = new IndexWriter(dir, analyzerForIndexing, false);
151         writer.setMergeFactor(50);
152         writer.setMaxBufferedDocs(300);
153         writer.setMaxMergeDocs(Integer.MAX_VALUE);    // better for fast searches and batch indexing, but not if we need small segments
154 
155         return new WriterData(writer, dir, tmpIndexDir);
156     }
157 
158     public String getTmpDir()
159     {
160         return tmpDir;
161     }
162 
163     protected class WriterData
164     {
165         IndexWriter writer;
166         Directory dir;
167         File tmpIndexDir;
168 
169         protected WriterData(IndexWriter writer, Directory dir, File tmpIndexDir)
170         {
171             this.writer = writer;
172             this.dir = dir;
173             this.tmpIndexDir = tmpIndexDir;
174         }
175     }
176 }