1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.FieldSelector;
22 import org.apache.lucene.document.FieldSelectorResult;
23 import org.apache.lucene.document.Fieldable;
24 import org.apache.lucene.util.MapBackedSet;
26 import java.io.IOException;
28 import java.util.concurrent.ConcurrentHashMap;
31 /** An IndexReader which reads multiple, parallel indexes. Each index added
32 * must have the same number of documents, but typically each contains
33 * different fields. Each document contains the union of the fields of all
34 * documents with the same document number. When searching, matches for a
35 * query term are from the first index added that has the field.
37 * <p>This is useful, e.g., with collections that have large fields which
38 * change rarely and small fields that change more frequently. The smaller
39 * fields may be re-indexed in a new index and both indexes may be searched
42 * <p><strong>Warning:</strong> It is up to you to make sure all indexes
43 * are created and modified the same way. For example, if you add
44 * documents to one index, you need to add the same documents in the
45 * same order to the other indexes. <em>Failure to do so will result in
46 * undefined behavior</em>.
48 public class ParallelReader extends IndexReader {
49 private List<IndexReader> readers = new ArrayList<IndexReader>();
50 private List<Boolean> decrefOnClose = new ArrayList<Boolean>(); // remember which subreaders to decRef on close
51 boolean incRefReaders = false;
52 private SortedMap<String,IndexReader> fieldToReader = new TreeMap<String,IndexReader>();
53 private Map<IndexReader,Collection<String>> readerToFields = new HashMap<IndexReader,Collection<String>>();
54 private List<IndexReader> storedFieldReaders = new ArrayList<IndexReader>();
58 private boolean hasDeletions;
60 /** Construct a ParallelReader.
61 * <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
63 public ParallelReader() throws IOException { this(true); }
65 /** Construct a ParallelReader.
66 * @param closeSubReaders indicates whether the subreaders should be closed
67 * when this ParallelReader is closed
69 public ParallelReader(boolean closeSubReaders) throws IOException {
71 this.incRefReaders = !closeSubReaders;
72 readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
77 public String toString() {
78 final StringBuilder buffer = new StringBuilder("ParallelReader(");
79 final Iterator<IndexReader> iter = readers.iterator();
81 buffer.append(iter.next());
83 while (iter.hasNext()) {
84 buffer.append(", ").append(iter.next());
87 return buffer.toString();
90 /** Add an IndexReader.
91 * @throws IOException if there is a low-level IO error
93 public void add(IndexReader reader) throws IOException {
98 /** Add an IndexReader whose stored fields will not be returned. This can
99 * accelerate search when stored fields are only needed from a subset of
102 * @throws IllegalArgumentException if not all indexes contain the same number
104 * @throws IllegalArgumentException if not all indexes have the same value
105 * of {@link IndexReader#maxDoc()}
106 * @throws IOException if there is a low-level IO error
108 public void add(IndexReader reader, boolean ignoreStoredFields)
112 if (readers.size() == 0) {
113 this.maxDoc = reader.maxDoc();
114 this.numDocs = reader.numDocs();
115 this.hasDeletions = reader.hasDeletions();
118 if (reader.maxDoc() != maxDoc) // check compatibility
119 throw new IllegalArgumentException
120 ("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
121 if (reader.numDocs() != numDocs)
122 throw new IllegalArgumentException
123 ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs());
125 Collection<String> fields = reader.getFieldNames(IndexReader.FieldOption.ALL);
126 readerToFields.put(reader, fields);
127 for (final String field : fields) { // update fieldToReader map
128 if (fieldToReader.get(field) == null)
129 fieldToReader.put(field, reader);
132 if (!ignoreStoredFields)
133 storedFieldReaders.add(reader); // add to storedFieldReaders
139 decrefOnClose.add(Boolean.valueOf(incRefReaders));
143 public synchronized Object clone() {
145 return doReopen(true);
146 } catch (Exception ex) {
147 throw new RuntimeException(ex);
152 * Tries to reopen the subreaders.
154 * If one or more subreaders could be re-opened (i. e. subReader.reopen()
155 * returned a new instance != subReader), then a new ParallelReader instance
156 * is returned, otherwise this instance is returned.
158 * A re-opened instance might share one or more subreaders with the old
159 * instance. Index modification operations result in undefined behavior
160 * when performed before the old instance is closed.
161 * (see {@link IndexReader#reopen()}).
163 * If subreaders are shared, then the reference count of those
164 * readers is increased to ensure that the subreaders remain open
165 * until the last referring reader is closed.
167 * @throws CorruptIndexException if the index is corrupt
168 * @throws IOException if there is a low-level IO error
171 public synchronized IndexReader reopen() throws CorruptIndexException, IOException {
172 return doReopen(false);
175 protected IndexReader doReopen(boolean doClone) throws CorruptIndexException, IOException {
178 boolean reopened = false;
179 List<IndexReader> newReaders = new ArrayList<IndexReader>();
181 boolean success = false;
184 for (final IndexReader oldReader : readers) {
185 IndexReader newReader = null;
187 newReader = (IndexReader) oldReader.clone();
189 newReader = oldReader.reopen();
191 newReaders.add(newReader);
192 // if at least one of the subreaders was updated we remember that
193 // and return a new ParallelReader
194 if (newReader != oldReader) {
200 if (!success && reopened) {
201 for (int i = 0; i < newReaders.size(); i++) {
202 IndexReader r = newReaders.get(i);
203 if (r != readers.get(i)) {
206 } catch (IOException ignore) {
207 // keep going - we want to clean up as much as possible
215 List<Boolean> newDecrefOnClose = new ArrayList<Boolean>();
216 ParallelReader pr = new ParallelReader();
217 for (int i = 0; i < readers.size(); i++) {
218 IndexReader oldReader = readers.get(i);
219 IndexReader newReader = newReaders.get(i);
220 if (newReader == oldReader) {
221 newDecrefOnClose.add(Boolean.TRUE);
224 // this is a new subreader instance, so on close() we don't
225 // decRef but close it
226 newDecrefOnClose.add(Boolean.FALSE);
228 pr.add(newReader, !storedFieldReaders.contains(oldReader));
230 pr.decrefOnClose = newDecrefOnClose;
231 pr.incRefReaders = incRefReaders;
234 // No subreader was refreshed
241 public int numDocs() {
242 // Don't call ensureOpen() here (it could affect performance)
247 public int maxDoc() {
248 // Don't call ensureOpen() here (it could affect performance)
253 public boolean hasDeletions() {
254 // Don't call ensureOpen() here (it could affect performance)
258 // check first reader
260 public boolean isDeleted(int n) {
261 // Don't call ensureOpen() here (it could affect performance)
262 if (readers.size() > 0)
263 return readers.get(0).isDeleted(n);
267 // delete in all readers
269 protected void doDelete(int n) throws CorruptIndexException, IOException {
270 for (final IndexReader reader : readers) {
271 reader.deleteDocument(n);
276 // undeleteAll in all readers
278 protected void doUndeleteAll() throws CorruptIndexException, IOException {
279 for (final IndexReader reader : readers) {
280 reader.undeleteAll();
282 hasDeletions = false;
285 // append fields from storedFieldReaders
287 public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
289 Document result = new Document();
290 for (final IndexReader reader: storedFieldReaders) {
292 boolean include = (fieldSelector==null);
294 Collection<String> fields = readerToFields.get(reader);
295 for (final String field : fields)
296 if (fieldSelector.accept(field) != FieldSelectorResult.NO_LOAD) {
302 List<Fieldable> fields = reader.document(n, fieldSelector).getFields();
303 for (Fieldable field : fields) {
313 public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
315 ArrayList<TermFreqVector> results = new ArrayList<TermFreqVector>();
316 for (final Map.Entry<String,IndexReader> e: fieldToReader.entrySet()) {
318 String field = e.getKey();
319 IndexReader reader = e.getValue();
320 TermFreqVector vector = reader.getTermFreqVector(n, field);
324 return results.toArray(new TermFreqVector[results.size()]);
328 public TermFreqVector getTermFreqVector(int n, String field)
331 IndexReader reader = fieldToReader.get(field);
332 return reader==null ? null : reader.getTermFreqVector(n, field);
337 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
339 IndexReader reader = fieldToReader.get(field);
340 if (reader != null) {
341 reader.getTermFreqVector(docNumber, field, mapper);
346 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
349 for (final Map.Entry<String,IndexReader> e : fieldToReader.entrySet()) {
351 String field = e.getKey();
352 IndexReader reader = e.getValue();
353 reader.getTermFreqVector(docNumber, field, mapper);
359 public boolean hasNorms(String field) throws IOException {
361 IndexReader reader = fieldToReader.get(field);
362 return reader==null ? false : reader.hasNorms(field);
366 public byte[] norms(String field) throws IOException {
368 IndexReader reader = fieldToReader.get(field);
369 return reader==null ? null : reader.norms(field);
373 public void norms(String field, byte[] result, int offset)
376 IndexReader reader = fieldToReader.get(field);
378 reader.norms(field, result, offset);
382 protected void doSetNorm(int n, String field, byte value)
383 throws CorruptIndexException, IOException {
384 IndexReader reader = fieldToReader.get(field);
386 reader.doSetNorm(n, field, value);
390 public TermEnum terms() throws IOException {
392 return new ParallelTermEnum();
396 public TermEnum terms(Term term) throws IOException {
398 return new ParallelTermEnum(term);
402 public int docFreq(Term term) throws IOException {
404 IndexReader reader = fieldToReader.get(term.field());
405 return reader==null ? 0 : reader.docFreq(term);
409 public TermDocs termDocs(Term term) throws IOException {
411 return new ParallelTermDocs(term);
415 public TermDocs termDocs() throws IOException {
417 return new ParallelTermDocs();
421 public TermPositions termPositions(Term term) throws IOException {
423 return new ParallelTermPositions(term);
427 public TermPositions termPositions() throws IOException {
429 return new ParallelTermPositions();
433 * Checks recursively if all subreaders are up to date.
436 public boolean isCurrent() throws CorruptIndexException, IOException {
437 for (final IndexReader reader : readers) {
438 if (!reader.isCurrent()) {
443 // all subreaders are up to date
448 * Checks recursively if all subindexes are optimized
451 public boolean isOptimized() {
452 for (final IndexReader reader : readers) {
453 if (!reader.isOptimized()) {
458 // all subindexes are optimized
464 * @throws UnsupportedOperationException
467 public long getVersion() {
468 throw new UnsupportedOperationException("ParallelReader does not support this method.");
472 IndexReader[] getSubReaders() {
473 return readers.toArray(new IndexReader[readers.size()]);
477 protected void doCommit(Map<String,String> commitUserData) throws IOException {
478 for (final IndexReader reader : readers)
479 reader.commit(commitUserData);
483 protected synchronized void doClose() throws IOException {
484 for (int i = 0; i < readers.size(); i++) {
485 if (decrefOnClose.get(i).booleanValue()) {
486 readers.get(i).decRef();
488 readers.get(i).close();
494 public Collection<String> getFieldNames (IndexReader.FieldOption fieldNames) {
496 Set<String> fieldSet = new HashSet<String>();
497 for (final IndexReader reader : readers) {
498 Collection<String> names = reader.getFieldNames(fieldNames);
499 fieldSet.addAll(names);
504 private class ParallelTermEnum extends TermEnum {
505 private String field;
506 private Iterator<String> fieldIterator;
507 private TermEnum termEnum;
509 public ParallelTermEnum() throws IOException {
511 field = fieldToReader.firstKey();
512 } catch(NoSuchElementException e) {
513 // No fields, so keep field == null, termEnum == null
517 termEnum = fieldToReader.get(field).terms();
520 public ParallelTermEnum(Term term) throws IOException {
521 field = term.field();
522 IndexReader reader = fieldToReader.get(field);
524 termEnum = reader.terms(term);
528 public boolean next() throws IOException {
532 // another term in this field?
533 if (termEnum.next() && termEnum.term().field()==field)
534 return true; // yes, keep going
536 termEnum.close(); // close old termEnum
538 // find the next field with terms, if any
539 if (fieldIterator==null) {
540 fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
541 fieldIterator.next(); // Skip field to get next one
543 while (fieldIterator.hasNext()) {
544 field = fieldIterator.next();
545 termEnum = fieldToReader.get(field).terms(new Term(field));
546 Term term = termEnum.term();
547 if (term!=null && term.field()==field)
553 return false; // no more fields
561 return termEnum.term();
565 public int docFreq() {
569 return termEnum.docFreq();
573 public void close() throws IOException {
580 // wrap a TermDocs in order to support seek(Term)
581 private class ParallelTermDocs implements TermDocs {
582 protected TermDocs termDocs;
584 public ParallelTermDocs() {}
585 public ParallelTermDocs(Term term) throws IOException {
587 termDocs = readers.isEmpty() ? null : readers.get(0).termDocs(null);
592 public int doc() { return termDocs.doc(); }
593 public int freq() { return termDocs.freq(); }
595 public void seek(Term term) throws IOException {
596 IndexReader reader = fieldToReader.get(term.field());
597 termDocs = reader!=null ? reader.termDocs(term) : null;
600 public void seek(TermEnum termEnum) throws IOException {
601 seek(termEnum.term());
604 public boolean next() throws IOException {
608 return termDocs.next();
611 public int read(final int[] docs, final int[] freqs) throws IOException {
615 return termDocs.read(docs, freqs);
618 public boolean skipTo(int target) throws IOException {
622 return termDocs.skipTo(target);
625 public void close() throws IOException {
632 private class ParallelTermPositions
633 extends ParallelTermDocs implements TermPositions {
635 public ParallelTermPositions() {}
636 public ParallelTermPositions(Term term) throws IOException { seek(term); }
639 public void seek(Term term) throws IOException {
640 IndexReader reader = fieldToReader.get(term.field());
641 termDocs = reader!=null ? reader.termPositions(term) : null;
644 public int nextPosition() throws IOException {
645 // It is an error to call this if there is no next position, e.g. if termDocs==null
646 return ((TermPositions)termDocs).nextPosition();
649 public int getPayloadLength() {
650 return ((TermPositions)termDocs).getPayloadLength();
653 public byte[] getPayload(byte[] data, int offset) throws IOException {
654 return ((TermPositions)termDocs).getPayload(data, offset);
658 // TODO: Remove warning after API has been finalized
659 public boolean isPayloadAvailable() {
660 return ((TermPositions) termDocs).isPayloadAvailable();
665 public void addReaderFinishedListener(ReaderFinishedListener listener) {
666 super.addReaderFinishedListener(listener);
667 for (IndexReader reader : readers) {
668 reader.addReaderFinishedListener(listener);
673 public void removeReaderFinishedListener(ReaderFinishedListener listener) {
674 super.removeReaderFinishedListener(listener);
675 for (IndexReader reader : readers) {
676 reader.removeReaderFinishedListener(listener);