1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.util.Collection;
26 import java.util.concurrent.atomic.AtomicInteger;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.FieldSelector;
30 import org.apache.lucene.search.FieldCache; // javadocs
31 import org.apache.lucene.search.Similarity;
32 import org.apache.lucene.store.*;
33 import org.apache.lucene.util.ArrayUtil;
34 import org.apache.lucene.util.ReaderUtil; // for javadocs
35 import org.apache.lucene.util.VirtualMethod;
37 /** IndexReader is an abstract class, providing an interface for accessing an
38 index. Search of an index is done entirely through this abstract interface,
39 so that any subclass which implements it is searchable.
41 <p> Concrete subclasses of IndexReader are usually constructed with a call to
42 one of the static <code>open()</code> methods, e.g. {@link
43 #open(Directory, boolean)}.
45 <p> For efficiency, in this API documents are often referred to via
46 <i>document numbers</i>, non-negative integers which each name a unique
47 document in the index. These document numbers are ephemeral--they may change
48 as documents are added to and deleted from an index. Clients should thus not
49 rely on a given document having the same number between sessions.
51 <p> An IndexReader can be opened on a directory for which an IndexWriter is
52 opened already, but it cannot be used to delete documents from the index then.
55 <b>NOTE</b>: for backwards API compatibility, several methods are not listed
56 as abstract, but have no useful implementations in this base class and
57 instead always throw UnsupportedOperationException. Subclasses are
58 strongly encouraged to override these methods, but in many cases may not
64 <b>NOTE</b>: as of 2.4, it's possible to open a read-only
65 IndexReader using the static open methods that accept the
66 boolean readOnly parameter. Such a reader has better
67 concurrency as it's not necessary to synchronize on the
68 isDeleted method. You must specify false if you want to
69 make changes with the resulting IndexReader.
72 <a name="thread-safety"></a><p><b>NOTE</b>: {@link
73 IndexReader} instances are completely thread
74 safe, meaning multiple threads can call any of its methods,
75 concurrently. If your application requires external
76 synchronization, you should <b>not</b> synchronize on the
77 <code>IndexReader</code> instance; use your own
78 (non-Lucene) objects instead.
80 public abstract class IndexReader implements Cloneable,Closeable {
83 * A custom listener that's invoked when the IndexReader
86 * <p>For a SegmentReader, this listener is called only
87 * once all SegmentReaders sharing the same core are
88 * closed. At this point it is safe for apps to evict
89 * this reader from any caches keyed on {@link
90 * #getCoreCacheKey}. This is the same interface that
91 * {@link FieldCache} uses, internally, to evict
94 * <p>For other readers, this listener is called when they
97 * @lucene.experimental
99 public static interface ReaderFinishedListener {
100 public void finished(IndexReader reader);
103 // Impls must set this if they may call add/removeReaderFinishedListener:
104 protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
106 /** Expert: adds a {@link ReaderFinishedListener}. The
107 * provided listener is also added to any sub-readers, if
108 * this is a composite reader. Also, any reader reopened
109 * or cloned from this one will also copy the listeners at
110 * the time of reopen.
112 * @lucene.experimental */
113 public void addReaderFinishedListener(ReaderFinishedListener listener) {
115 readerFinishedListeners.add(listener);
118 /** Expert: remove a previously added {@link ReaderFinishedListener}.
120 * @lucene.experimental */
121 public void removeReaderFinishedListener(ReaderFinishedListener listener) {
123 readerFinishedListeners.remove(listener);
126 protected void notifyReaderFinishedListeners() {
127 // Defensive (should never be null -- all impls must set
129 if (readerFinishedListeners != null) {
130 for(ReaderFinishedListener listener : readerFinishedListeners) {
131 listener.finished(this);
136 protected void readerFinished() {
137 notifyReaderFinishedListeners();
141 * Constants describing field properties, for example used for
142 * {@link IndexReader#getFieldNames(FieldOption)}.
144 public static enum FieldOption {
147 /** All indexed fields */
149 /** All fields that store payloads */
151 /** All fields that omit tf */
152 OMIT_TERM_FREQ_AND_POSITIONS,
153 /** All fields that omit positions */
155 /** All fields which are not indexed */
157 /** All fields which are indexed with termvectors enabled */
158 INDEXED_WITH_TERMVECTOR,
159 /** All fields which are indexed but don't have termvectors enabled */
160 INDEXED_NO_TERMVECTOR,
161 /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
163 /** All fields with termvectors with position values enabled */
164 TERMVECTOR_WITH_POSITION,
165 /** All fields with termvectors with offset values enabled */
166 TERMVECTOR_WITH_OFFSET,
167 /** All fields with termvectors with offset values and position values enabled */
168 TERMVECTOR_WITH_POSITION_OFFSET,
171 private volatile boolean closed;
172 protected boolean hasChanges;
174 private final AtomicInteger refCount = new AtomicInteger();
176 static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
178 /** Expert: returns the current refCount for this reader */
179 public int getRefCount() {
180 return refCount.get();
184 * Expert: increments the refCount of this IndexReader
185 * instance. RefCounts are used to determine when a
186 * reader can be closed safely, i.e. as soon as there are
187 * no more references. Be sure to always call a
188 * corresponding {@link #decRef}, in a finally clause;
189 * otherwise the reader may never be closed. Note that
190 * {@link #close} simply calls decRef(), which means that
191 * the IndexReader will not really be closed until {@link
192 * #decRef} has been called for all outstanding
198 public void incRef() {
200 refCount.incrementAndGet();
204 * Expert: increments the refCount of this IndexReader
205 * instance only if the IndexReader has not been closed yet
206 * and returns <code>true</code> iff the refCount was
207 * successfully incremented, otherwise <code>false</code>.
208 * If this method returns <code>false</code> the reader is either
209 * already closed or is currently been closed. Either way this
210 * reader instance shouldn't be used by an application unless
211 * <code>true</code> is returned.
213 * RefCounts are used to determine when a
214 * reader can be closed safely, i.e. as soon as there are
215 * no more references. Be sure to always call a
216 * corresponding {@link #decRef}, in a finally clause;
217 * otherwise the reader may never be closed. Note that
218 * {@link #close} simply calls decRef(), which means that
219 * the IndexReader will not really be closed until {@link
220 * #decRef} has been called for all outstanding
226 public boolean tryIncRef() {
228 while ((count = refCount.get()) > 0) {
229 if (refCount.compareAndSet(count, count+1)) {
238 public String toString() {
239 final StringBuilder buffer = new StringBuilder();
243 buffer.append(getClass().getSimpleName());
245 final IndexReader[] subReaders = getSequentialSubReaders();
246 if ((subReaders != null) && (subReaders.length > 0)) {
247 buffer.append(subReaders[0]);
248 for (int i = 1; i < subReaders.length; ++i) {
249 buffer.append(" ").append(subReaders[i]);
253 return buffer.toString();
257 * Expert: decreases the refCount of this IndexReader
258 * instance. If the refCount drops to 0, then pending
259 * changes (if any) are committed to the index and this
260 * reader is closed. If an exception is hit, the refCount
263 * @throws IOException in case an IOException occurs in commit() or doClose()
267 public void decRef() throws IOException {
269 final int rc = refCount.getAndDecrement();
271 boolean success = false;
278 // Put reference back on failure
279 refCount.incrementAndGet();
283 } else if (rc <= 0) {
284 throw new IllegalStateException("too many decRef calls: refCount was " + rc + " before decrement");
288 protected IndexReader() {
293 * @throws AlreadyClosedException if this IndexReader is closed
295 protected final void ensureOpen() throws AlreadyClosedException {
296 if (refCount.get() <= 0) {
297 throw new AlreadyClosedException("this IndexReader is closed");
301 /** Returns a IndexReader reading the index in the given
302 * Directory, with readOnly=true.
303 * @param directory the index directory
304 * @throws CorruptIndexException if the index is corrupt
305 * @throws IOException if there is a low-level IO error
307 public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
308 return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR);
311 /** Returns an IndexReader reading the index in the given
312 * Directory. You should pass readOnly=true, since it
313 * gives much better concurrent performance, unless you
314 * intend to do write operations (delete documents or
315 * change norms) with the reader.
316 * @param directory the index directory
317 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
318 * @throws CorruptIndexException if the index is corrupt
319 * @throws IOException if there is a low-level IO error
321 public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
322 return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
326 * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
328 * @param writer The IndexWriter to open from
329 * @param applyAllDeletes If true, all buffered deletes will
330 * be applied (made visible) in the returned reader. If
331 * false, the deletes are not applied but remain buffered
332 * (in IndexWriter) so that they will be applied in the
333 * future. Applying deletes can be costly, so if your app
334 * can tolerate deleted documents being returned you might
335 * gain some performance by passing false.
336 * @return The new IndexReader
337 * @throws CorruptIndexException
338 * @throws IOException if there is a low-level IO error
340 * @see #openIfChanged(IndexReader,IndexWriter,boolean)
342 * @lucene.experimental
344 public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
345 return writer.getReader(applyAllDeletes);
348 /** Expert: returns an IndexReader reading the index in the given
349 * {@link IndexCommit}. You should pass readOnly=true, since it
350 * gives much better concurrent performance, unless you
351 * intend to do write operations (delete documents or
352 * change norms) with the reader.
353 * @param commit the commit point to open
354 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
355 * @throws CorruptIndexException if the index is corrupt
356 * @throws IOException if there is a low-level IO error
358 public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException {
359 return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
362 /** Expert: returns an IndexReader reading the index in
363 * the given Directory, with a custom {@link
364 * IndexDeletionPolicy}. You should pass readOnly=true,
365 * since it gives much better concurrent performance,
366 * unless you intend to do write operations (delete
367 * documents or change norms) with the reader.
368 * @param directory the index directory
369 * @param deletionPolicy a custom deletion policy (only used
370 * if you use this reader to perform deletes or to set
371 * norms); see {@link IndexWriter} for details.
372 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
373 * @throws CorruptIndexException if the index is corrupt
374 * @throws IOException if there is a low-level IO error
376 public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
377 return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
380 /** Expert: returns an IndexReader reading the index in
381 * the given Directory, with a custom {@link
382 * IndexDeletionPolicy}. You should pass readOnly=true,
383 * since it gives much better concurrent performance,
384 * unless you intend to do write operations (delete
385 * documents or change norms) with the reader.
386 * @param directory the index directory
387 * @param deletionPolicy a custom deletion policy (only used
388 * if you use this reader to perform deletes or to set
389 * norms); see {@link IndexWriter} for details.
390 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
391 * @param termInfosIndexDivisor Subsamples which indexed
392 * terms are loaded into RAM. This has the same effect as {@link
393 * IndexWriter#setTermIndexInterval} except that setting
394 * must be done at indexing time while this setting can be
395 * set per reader. When set to N, then one in every
396 * N*termIndexInterval terms in the index is loaded into
397 * memory. By setting this to a value > 1 you can reduce
398 * memory usage, at the expense of higher latency when
399 * loading a TermInfo. The default value is 1. Set this
400 * to -1 to skip loading the terms index entirely.
401 * @throws CorruptIndexException if the index is corrupt
402 * @throws IOException if there is a low-level IO error
404 public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
405 return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
408 /** Expert: returns an IndexReader reading the index in
409 * the given Directory, using a specific commit and with
410 * a custom {@link IndexDeletionPolicy}. You should pass
411 * readOnly=true, since it gives much better concurrent
412 * performance, unless you intend to do write operations
413 * (delete documents or change norms) with the reader.
414 * @param commit the specific {@link IndexCommit} to open;
415 * see {@link IndexReader#listCommits} to list all commits
417 * @param deletionPolicy a custom deletion policy (only used
418 * if you use this reader to perform deletes or to set
419 * norms); see {@link IndexWriter} for details.
420 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
421 * @throws CorruptIndexException if the index is corrupt
422 * @throws IOException if there is a low-level IO error
424 public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
425 return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
428 /** Expert: returns an IndexReader reading the index in
429 * the given Directory, using a specific commit and with
430 * a custom {@link IndexDeletionPolicy}. You should pass
431 * readOnly=true, since it gives much better concurrent
432 * performance, unless you intend to do write operations
433 * (delete documents or change norms) with the reader.
434 * @param commit the specific {@link IndexCommit} to open;
435 * see {@link IndexReader#listCommits} to list all commits
437 * @param deletionPolicy a custom deletion policy (only used
438 * if you use this reader to perform deletes or to set
439 * norms); see {@link IndexWriter} for details.
440 * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
441 * @param termInfosIndexDivisor Subsamples which indexed
442 * terms are loaded into RAM. This has the same effect as {@link
443 * IndexWriter#setTermIndexInterval} except that setting
444 * must be done at indexing time while this setting can be
445 * set per reader. When set to N, then one in every
446 * N*termIndexInterval terms in the index is loaded into
447 * memory. By setting this to a value > 1 you can reduce
448 * memory usage, at the expense of higher latency when
449 * loading a TermInfo. The default value is 1. Set this
450 * to -1 to skip loading the terms index entirely. This is only useful in
451 * advanced situations when you will only .next() through all terms;
452 * attempts to seek will hit an exception.
454 * @throws CorruptIndexException if the index is corrupt
455 * @throws IOException if there is a low-level IO error
457 public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
458 return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor);
461 private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
462 return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
466 * If the index has changed since the provided reader was
467 * opened, open and return a new reader; else, return
468 * null. The new reader, if not null, will be the same
469 * type of reader as the previous one, ie an NRT reader
470 * will open a new NRT reader, a MultiReader will open a
471 * new MultiReader, etc.
473 * <p>This method is typically far less costly than opening a
474 * fully new <code>IndexReader</code> as it shares
475 * resources (for example sub-readers) with the provided
476 * <code>IndexReader</code>, when possible.
478 * <p>The provided reader is not closed (you are responsible
479 * for doing so); if a new reader is returned you also
480 * must eventually close it. Be sure to never close a
481 * reader while other threads are still using it; see
482 * <code>SearcherManager</code> in
483 * <code>contrib/misc</code> to simplify managing this.
485 * <p>If a new reader is returned, it's safe to make changes
486 * (deletions, norms) with it. All shared mutable state
487 * with the old reader uses "copy on write" semantics to
488 * ensure the changes are not seen by other readers.
490 * @throws CorruptIndexException if the index is corrupt
491 * @throws IOException if there is a low-level IO error
492 * @return null if there are no changes; else, a new
493 * IndexReader instance which you must eventually close
495 public static IndexReader openIfChanged(IndexReader oldReader) throws IOException {
496 if (oldReader.hasNewReopenAPI1) {
497 final IndexReader newReader = oldReader.doOpenIfChanged();
498 assert newReader != oldReader;
501 final IndexReader newReader = oldReader.reopen();
502 if (newReader == oldReader) {
511 * If the index has changed since the provided reader was
512 * opened, open and return a new reader, with the
513 * specified <code>readOnly</code>; else, return
516 * @see #openIfChanged(IndexReader)
518 public static IndexReader openIfChanged(IndexReader oldReader, boolean readOnly) throws IOException {
519 if (oldReader.hasNewReopenAPI2) {
520 final IndexReader newReader = oldReader.doOpenIfChanged(readOnly);
521 assert newReader != oldReader;
524 final IndexReader newReader = oldReader.reopen(readOnly);
525 if (newReader == oldReader) {
534 * If the IndexCommit differs from what the
535 * provided reader is searching, or the provided reader is
536 * not already read-only, open and return a new
537 * <code>readOnly=true</code> reader; else, return null.
539 * @see #openIfChanged(IndexReader)
541 // TODO: should you be able to specify readOnly?
542 public static IndexReader openIfChanged(IndexReader oldReader, IndexCommit commit) throws IOException {
543 if (oldReader.hasNewReopenAPI3) {
544 final IndexReader newReader = oldReader.doOpenIfChanged(commit);
545 assert newReader != oldReader;
548 final IndexReader newReader = oldReader.reopen(commit);
549 if (newReader == oldReader) {
558 * Expert: If there changes (committed or not) in the
559 * {@link IndexWriter} versus what the provided reader is
560 * searching, then open and return a new read-only
561 * IndexReader searching both committed and uncommitted
562 * changes from the writer; else, return null (though, the
563 * current implementation never returns null).
565 * <p>This provides "near real-time" searching, in that
566 * changes made during an {@link IndexWriter} session can be
567 * quickly made available for searching without closing
568 * the writer nor calling {@link #commit}.
570 * <p>It's <i>near</i> real-time because there is no hard
571 * guarantee on how quickly you can get a new reader after
572 * making changes with IndexWriter. You'll have to
573 * experiment in your situation to determine if it's
574 * fast enough. As this is a new and experimental
575 * feature, please report back on your findings so we can
576 * learn, improve and iterate.</p>
578 * <p>The very first time this method is called, this
579 * writer instance will make every effort to pool the
580 * readers that it opens for doing merges, applying
581 * deletes, etc. This means additional resources (RAM,
582 * file descriptors, CPU time) will be consumed.</p>
584 * <p>For lower latency on reopening a reader, you should
585 * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
586 * pre-warm a newly merged segment before it's committed
587 * to the index. This is important for minimizing
588 * index-to-search delay after a large merge. </p>
590 * <p>If an addIndexes* call is running in another thread,
591 * then this reader will only search those segments from
592 * the foreign index that have been successfully copied
595 * <p><b>NOTE</b>: Once the writer is closed, any
596 * outstanding readers may continue to be used. However,
597 * if you attempt to reopen any of those readers, you'll
598 * hit an {@link AlreadyClosedException}.</p>
600 * @return IndexReader that covers entire index plus all
601 * changes made so far by this IndexWriter instance, or
602 * null if there are no new changes
604 * @param writer The IndexWriter to open from
606 * @param applyAllDeletes If true, all buffered deletes will
607 * be applied (made visible) in the returned reader. If
608 * false, the deletes are not applied but remain buffered
609 * (in IndexWriter) so that they will be applied in the
610 * future. Applying deletes can be costly, so if your app
611 * can tolerate deleted documents being returned you might
612 * gain some performance by passing false.
614 * @throws IOException
616 * @lucene.experimental
618 public static IndexReader openIfChanged(IndexReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException {
619 if (oldReader.hasNewReopenAPI4) {
620 final IndexReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes);
621 assert newReader != oldReader;
624 final IndexReader newReader = oldReader.reopen(writer, applyAllDeletes);
625 if (newReader == oldReader) {
634 * Refreshes an IndexReader if the index has changed since this instance
637 * Opening an IndexReader is an expensive operation. This method can be used
638 * to refresh an existing IndexReader to reduce these costs. This method
639 * tries to only load segments that have changed or were created after the
640 * IndexReader was (re)opened.
642 * If the index has not changed since this instance was (re)opened, then this
643 * call is a NOOP and returns this instance. Otherwise, a new instance is
644 * returned. The old instance is <b>not</b> closed and remains usable.<br>
646 * If the reader is reopened, even though they share
647 * resources internally, it's safe to make changes
648 * (deletions, norms) with the new reader. All shared
649 * mutable state obeys "copy on write" semantics to ensure
650 * the changes are not seen by other readers.
652 * You can determine whether a reader was actually reopened by comparing the
653 * old instance with the instance returned by this method:
655 * IndexReader reader = ...
657 * IndexReader newReader = r.reopen();
658 * if (newReader != reader) {
659 * ... // reader was reopened
662 * reader = newReader;
666 * Be sure to synchronize that code so that other threads,
667 * if present, can never use reader after it has been
668 * closed and before it's switched to newReader.
670 * <p><b>NOTE</b>: If this reader is a near real-time
671 * reader (obtained from {@link IndexWriter#getReader()},
672 * reopen() will simply call writer.getReader() again for
673 * you, though this may change in the future.
675 * @throws CorruptIndexException if the index is corrupt
676 * @throws IOException if there is a low-level IO error
677 * @deprecated Use IndexReader#openIfChanged(IndexReader) instead
680 public IndexReader reopen() throws CorruptIndexException, IOException {
681 final IndexReader newReader = IndexReader.openIfChanged(this);
682 if (newReader == null) {
689 /** Just like {@link #reopen()}, except you can change the
690 * readOnly of the original reader. If the index is
691 * unchanged but readOnly is different then a new reader
694 * IndexReader#openIfChanged(IndexReader,boolean) instead */
696 public IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
697 final IndexReader newReader = IndexReader.openIfChanged(this, openReadOnly);
698 if (newReader == null) {
705 /** Expert: reopen this reader on a specific commit point.
706 * This always returns a readOnly reader. If the
707 * specified commit point matches what this reader is
708 * already on, and this reader is already readOnly, then
709 * this same instance is returned; if it is not already
710 * readOnly, a readOnly clone is returned.
711 * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexCommit) instead
714 public IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException {
715 final IndexReader newReader = IndexReader.openIfChanged(this, commit);
716 if (newReader == null) {
724 * Expert: returns a readonly reader, covering all
725 * committed as well as un-committed changes to the index.
726 * This provides "near real-time" searching, in that
727 * changes made during an IndexWriter session can be
728 * quickly made available for searching without closing
729 * the writer nor calling {@link #commit}.
731 * <p>Note that this is functionally equivalent to calling
732 * {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to
733 * open a new reader. But the turnaround time of this
734 * method should be faster since it avoids the potentially
735 * costly {@link #commit}.</p>
737 * <p>You must close the {@link IndexReader} returned by
738 * this method once you are done using it.</p>
740 * <p>It's <i>near</i> real-time because there is no hard
741 * guarantee on how quickly you can get a new reader after
742 * making changes with IndexWriter. You'll have to
743 * experiment in your situation to determine if it's
744 * fast enough. As this is a new and experimental
745 * feature, please report back on your findings so we can
746 * learn, improve and iterate.</p>
748 * <p>The resulting reader supports {@link
749 * IndexReader#reopen}, but that call will simply forward
750 * back to this method (though this may change in the
753 * <p>The very first time this method is called, this
754 * writer instance will make every effort to pool the
755 * readers that it opens for doing merges, applying
756 * deletes, etc. This means additional resources (RAM,
757 * file descriptors, CPU time) will be consumed.</p>
759 * <p>For lower latency on reopening a reader, you should
760 * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
761 * pre-warm a newly merged segment before it's committed
762 * to the index. This is important for minimizing
763 * index-to-search delay after a large merge. </p>
765 * <p>If an addIndexes* call is running in another thread,
766 * then this reader will only search those segments from
767 * the foreign index that have been successfully copied
770 * <p><b>NOTE</b>: Once the writer is closed, any
771 * outstanding readers may continue to be used. However,
772 * if you attempt to reopen any of those readers, you'll
773 * hit an {@link AlreadyClosedException}.</p>
775 * @return IndexReader that covers entire index plus all
776 * changes made so far by this IndexWriter instance
778 * @param writer The IndexWriter to open from
779 * @param applyAllDeletes If true, all buffered deletes will
780 * be applied (made visible) in the returned reader. If
781 * false, the deletes are not applied but remain buffered
782 * (in IndexWriter) so that they will be applied in the
783 * future. Applying deletes can be costly, so if your app
784 * can tolerate deleted documents being returned you might
785 * gain some performance by passing false.
787 * @throws IOException
789 * @lucene.experimental
790 * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexReader,boolean) instead
793 public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
794 final IndexReader newReader = IndexReader.openIfChanged(this, writer, applyAllDeletes);
795 if (newReader == null) {
803 * If the index has changed since it was opened, open and return a new reader;
804 * else, return {@code null}.
806 * @see #openIfChanged(IndexReader)
808 protected IndexReader doOpenIfChanged() throws CorruptIndexException, IOException {
809 throw new UnsupportedOperationException("This reader does not support reopen().");
813 * If the index has changed since it was opened, open and return a new reader;
814 * else, return {@code null}.
816 * @see #openIfChanged(IndexReader, boolean)
818 protected IndexReader doOpenIfChanged(boolean openReadOnly) throws CorruptIndexException, IOException {
819 throw new UnsupportedOperationException("This reader does not support reopen().");
823 * If the index has changed since it was opened, open and return a new reader;
824 * else, return {@code null}.
826 * @see #openIfChanged(IndexReader, IndexCommit)
828 protected IndexReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
829 throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit).");
833 * If the index has changed since it was opened, open and return a new reader;
834 * else, return {@code null}.
836 * @see #openIfChanged(IndexReader, IndexWriter, boolean)
838 protected IndexReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
839 return writer.getReader(applyAllDeletes);
843 * Efficiently clones the IndexReader (sharing most
846 * On cloning a reader with pending changes (deletions,
847 * norms), the original reader transfers its write lock to
848 * the cloned reader. This means only the cloned reader
849 * may make further changes to the index, and commit the
850 * changes to the index on close, but the old reader still
851 * reflects all changes made up until it was cloned.
853 * Like {@link #openIfChanged(IndexReader)}, it's safe to make changes to
854 * either the original or the cloned reader: all shared
855 * mutable state obeys "copy on write" semantics to ensure
856 * the changes are not seen by other readers.
860 public synchronized Object clone() {
861 throw new UnsupportedOperationException("This reader does not implement clone()");
865 * Clones the IndexReader and optionally changes readOnly. A readOnly
866 * reader cannot open a writeable reader.
867 * @throws CorruptIndexException if the index is corrupt
868 * @throws IOException if there is a low-level IO error
870 public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
871 throw new UnsupportedOperationException("This reader does not implement clone()");
875 * Returns the directory associated with this index. The Default
876 * implementation returns the directory specified by subclasses when
877 * delegating to the IndexReader(Directory) constructor, or throws an
878 * UnsupportedOperationException if one was not specified.
879 * @throws UnsupportedOperationException if no directory
881 public Directory directory() {
883 throw new UnsupportedOperationException("This reader does not support this method.");
887 * Returns the time the index in the named directory was last modified.
888 * Do not use this to check whether the reader is still up-to-date, use
889 * {@link #isCurrent()} instead.
890 * @throws CorruptIndexException if the index is corrupt
891 * @throws IOException if there is a low-level IO error
893 public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
894 return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
896 public Object doBody(String segmentFileName) throws IOException {
897 return Long.valueOf(directory2.fileModified(segmentFileName));
899 }.run()).longValue();
903 * Reads version number from segments files. The version number is
904 * initialized with a timestamp and then increased by one for each change of
907 * @param directory where the index resides.
908 * @return version number.
909 * @throws CorruptIndexException if the index is corrupt
910 * @throws IOException if there is a low-level IO error
912 public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
913 return SegmentInfos.readCurrentVersion(directory);
917 * Reads commitUserData, previously passed to {@link
918 * IndexWriter#commit(Map)}, from current index
919 * segments file. This will return null if {@link
920 * IndexWriter#commit(Map)} has never been called for
923 * @param directory where the index resides.
924 * @return commit userData.
925 * @throws CorruptIndexException if the index is corrupt
926 * @throws IOException if there is a low-level IO error
928 * @see #getCommitUserData()
930 public static Map<String,String> getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
931 return SegmentInfos.readCurrentUserData(directory);
935 * Version number when this IndexReader was opened. Not
936 * implemented in the IndexReader base class.
938 * <p>If this reader is based on a Directory (ie, was
939 * created by calling {@link #open}, or {@link #openIfChanged} on
940 * a reader based on a Directory), then this method
941 * returns the version recorded in the commit that the
942 * reader opened. This version is advanced every time
943 * {@link IndexWriter#commit} is called.</p>
945 * <p>If instead this reader is a near real-time reader
946 * (ie, obtained by a call to {@link
947 * IndexWriter#getReader}, or by calling {@link #openIfChanged}
948 * on a near real-time reader), then this method returns
949 * the version of the last commit done by the writer.
950 * Note that even as further changes are made with the
951 * writer, the version will not changed until a commit is
952 * completed. Thus, you should not rely on this method to
953 * determine when a near real-time reader should be
954 * opened. Use {@link #isCurrent} instead.</p>
956 * @throws UnsupportedOperationException unless overridden in subclass
958 public long getVersion() {
959 throw new UnsupportedOperationException("This reader does not support this method.");
963 * Retrieve the String userData optionally passed to
964 * IndexWriter#commit. This will return null if {@link
965 * IndexWriter#commit(Map)} has never been called for
968 * @see #getCommitUserData(Directory)
970 public Map<String,String> getCommitUserData() {
971 throw new UnsupportedOperationException("This reader does not support this method.");
976 * Check whether any new changes have occurred to the
977 * index since this reader was opened.
979 * <p>If this reader is based on a Directory (ie, was
980 * created by calling {@link #open}, or {@link #openIfChanged} on
981 * a reader based on a Directory), then this method checks
982 * if any further commits (see {@link IndexWriter#commit}
983 * have occurred in that directory).</p>
985 * <p>If instead this reader is a near real-time reader
986 * (ie, obtained by a call to {@link
987 * IndexWriter#getReader}, or by calling {@link #openIfChanged}
988 * on a near real-time reader), then this method checks if
989 * either a new commmit has occurred, or any new
990 * uncommitted changes have taken place via the writer.
991 * Note that even if the writer has only performed
992 * merging, this method will still return false.</p>
994 * <p>In any event, if this returns false, you should call
995 * {@link #openIfChanged} to get a new reader that sees the
998 * @throws CorruptIndexException if the index is corrupt
999 * @throws IOException if there is a low-level IO error
1000 * @throws UnsupportedOperationException unless overridden in subclass
1002 public boolean isCurrent() throws CorruptIndexException, IOException {
1003 throw new UnsupportedOperationException("This reader does not support this method.");
1006 /** @deprecated Check segment count using {@link
1007 * #getSequentialSubReaders} instead. */
1009 public boolean isOptimized() {
1010 throw new UnsupportedOperationException("This reader does not support this method.");
1014 * Return an array of term frequency vectors for the specified document.
1015 * The array contains a vector for each vectorized field in the document.
1016 * Each vector contains terms and frequencies for all terms in a given vectorized field.
1017 * If no such fields existed, the method returns null. The term vectors that are
1018 * returned may either be of type {@link TermFreqVector}
1019 * or of type {@link TermPositionVector} if
1020 * positions or offsets have been stored.
1022 * @param docNumber document for which term frequency vectors are returned
1023 * @return array of term frequency vectors. May be null if no term vectors have been
1024 * stored for the specified document.
1025 * @throws IOException if index cannot be accessed
1026 * @see org.apache.lucene.document.Field.TermVector
1028 abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
1033 * Return a term frequency vector for the specified document and field. The
1034 * returned vector contains terms and frequencies for the terms in
1035 * the specified field of this document, if the field had the storeTermVector
1036 * flag set. If termvectors had been stored with positions or offsets, a
1037 * {@link TermPositionVector} is returned.
1039 * @param docNumber document for which the term frequency vector is returned
1040 * @param field field for which the term frequency vector is returned.
1041 * @return term frequency vector May be null if field does not exist in the specified
1042 * document or term vector was not stored.
1043 * @throws IOException if index cannot be accessed
1044 * @see org.apache.lucene.document.Field.TermVector
1046 abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
1050 * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
1051 * the {@link TermFreqVector}.
1052 * @param docNumber The number of the document to load the vector for
1053 * @param field The name of the field to load
1054 * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null
1055 * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1058 abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
1061 * Map all the term vectors for all fields in a Document
1062 * @param docNumber The number of the document to load the vector for
1063 * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null
1064 * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1066 abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
1069 * Returns <code>true</code> if an index exists at the specified directory.
1070 * @param directory the directory to check for an index
1071 * @return <code>true</code> if an index exists; <code>false</code> otherwise
1072 * @throws IOException if there is a problem with accessing the index
1074 public static boolean indexExists(Directory directory) throws IOException {
1076 new SegmentInfos().read(directory);
1078 } catch (IOException ioe) {
1083 /** Returns the number of documents in this index. */
1084 public abstract int numDocs();
1086 /** Returns one greater than the largest possible document number.
1087 * This may be used to, e.g., determine how big to allocate an array which
1088 * will have an element for every document number in an index.
1090 public abstract int maxDoc();
1092 /** Returns the number of deleted documents. */
1093 public int numDeletedDocs() {
1094 return maxDoc() - numDocs();
1098 * Returns the stored fields of the <code>n</code><sup>th</sup>
1099 * <code>Document</code> in this index.
1101 * <b>NOTE:</b> for performance reasons, this method does not check if the
1102 * requested document is deleted, and therefore asking for a deleted document
1103 * may yield unspecified results. Usually this is not required, however you
1104 * can call {@link #isDeleted(int)} with the requested document ID to verify
1105 * the document is not deleted.
1107 * @throws CorruptIndexException if the index is corrupt
1108 * @throws IOException if there is a low-level IO error
1110 public Document document(int n) throws CorruptIndexException, IOException {
1112 if (n < 0 || n >= maxDoc()) {
1113 throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")");
1115 return document(n, null);
1119 * Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
1120 * <sup>th</sup> position. The {@link FieldSelector} may be used to determine
1121 * what {@link org.apache.lucene.document.Field}s to load and how they should
1122 * be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
1123 * <code>FieldsReader</code>) is closed before the lazy
1124 * {@link org.apache.lucene.document.Field} is loaded an exception may be
1125 * thrown. If you want the value of a lazy
1126 * {@link org.apache.lucene.document.Field} to be available after closing you
1127 * must explicitly load it or fetch the Document again with a new loader.
1129 * <b>NOTE:</b> for performance reasons, this method does not check if the
1130 * requested document is deleted, and therefore asking for a deleted document
1131 * may yield unspecified results. Usually this is not required, however you
1132 * can call {@link #isDeleted(int)} with the requested document ID to verify
1133 * the document is not deleted.
1135 * @param n Get the document at the <code>n</code><sup>th</sup> position
1136 * @param fieldSelector The {@link FieldSelector} to use to determine what
1137 * Fields should be loaded on the Document. May be null, in which case
1138 * all Fields will be loaded.
1139 * @return The stored fields of the
1140 * {@link org.apache.lucene.document.Document} at the nth position
1141 * @throws CorruptIndexException if the index is corrupt
1142 * @throws IOException if there is a low-level IO error
1143 * @see org.apache.lucene.document.Fieldable
1144 * @see org.apache.lucene.document.FieldSelector
1145 * @see org.apache.lucene.document.SetBasedFieldSelector
1146 * @see org.apache.lucene.document.LoadFirstFieldSelector
1148 // TODO (1.5): When we convert to JDK 1.5 make this Set<String>
1149 public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
1151 /** Returns true if document <i>n</i> has been deleted */
1152 public abstract boolean isDeleted(int n);
1154 /** Returns true if any documents have been deleted */
1155 public abstract boolean hasDeletions();
1157 /** Returns true if there are norms stored for this field. */
1158 public boolean hasNorms(String field) throws IOException {
1159 // backward compatible implementation.
1160 // SegmentReader has an efficient implementation.
1162 return norms(field) != null;
1165 /** Returns the byte-encoded normalization factor for the named field of
1166 * every document. This is used by the search code to score documents.
1167 * Returns null if norms were not indexed for this field.
1169 * @see org.apache.lucene.document.Field#setBoost(float)
1171 public abstract byte[] norms(String field) throws IOException;
1173 /** Reads the byte-encoded normalization factor for the named field of every
1174 * document. This is used by the search code to score documents.
1176 * @see org.apache.lucene.document.Field#setBoost(float)
1178 public abstract void norms(String field, byte[] bytes, int offset)
1181 /** Expert: Resets the normalization factor for the named field of the named
1182 * document. The norm represents the product of the field's {@link
1183 * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
1184 * int) length normalization}. Thus, to preserve the length normalization
1185 * values when resetting this, one should base the new value upon the old.
1187 * <b>NOTE:</b> If this field does not index norms, then
1188 * this method throws {@link IllegalStateException}.
1190 * @see #norms(String)
1191 * @see Similarity#decodeNormValue(byte)
1192 * @throws StaleReaderException if the index has changed
1193 * since this reader was opened
1194 * @throws CorruptIndexException if the index is corrupt
1195 * @throws LockObtainFailedException if another writer
1196 * has this index open (<code>write.lock</code> could not
1198 * @throws IOException if there is a low-level IO error
1199 * @throws IllegalStateException if the field does not index norms
1201 public synchronized void setNorm(int doc, String field, byte value)
1202 throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1206 doSetNorm(doc, field, value);
1209 /** Implements setNorm in subclass.*/
1210 protected abstract void doSetNorm(int doc, String field, byte value)
1211 throws CorruptIndexException, IOException;
1213 /** Expert: Resets the normalization factor for the named field of the named
1216 * @see #norms(String)
1217 * @see Similarity#decodeNormValue(byte)
1219 * @throws StaleReaderException if the index has changed
1220 * since this reader was opened
1221 * @throws CorruptIndexException if the index is corrupt
1222 * @throws LockObtainFailedException if another writer
1223 * has this index open (<code>write.lock</code> could not
1225 * @throws IOException if there is a low-level IO error
1226 * @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the
1227 * float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}.
1228 * This method will be removed in Lucene 4.0
1231 public void setNorm(int doc, String field, float value)
1232 throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1234 setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
1237 /** Returns an enumeration of all the terms in the index. The
1238 * enumeration is ordered by Term.compareTo(). Each term is greater
1239 * than all that precede it in the enumeration. Note that after
1240 * calling terms(), {@link TermEnum#next()} must be called
1241 * on the resulting enumeration before calling other methods such as
1242 * {@link TermEnum#term()}.
1243 * @throws IOException if there is a low-level IO error
1245 public abstract TermEnum terms() throws IOException;
1247 /** Returns an enumeration of all terms starting at a given term. If
1248 * the given term does not exist, the enumeration is positioned at the
1249 * first term greater than the supplied term. The enumeration is
1250 * ordered by Term.compareTo(). Each term is greater than all that
1251 * precede it in the enumeration.
1252 * @throws IOException if there is a low-level IO error
1254 public abstract TermEnum terms(Term t) throws IOException;
1256 /** Returns the number of documents containing the term <code>t</code>.
1257 * @throws IOException if there is a low-level IO error
1259 public abstract int docFreq(Term t) throws IOException;
1261 /** Returns an enumeration of all the documents which contain
1262 * <code>term</code>. For each document, the document number, the frequency of
1263 * the term in that document is also provided, for use in
1264 * search scoring. If term is null, then all non-deleted
1265 * docs are returned with freq=1.
1266 * Thus, this method implements the mapping:
1268 * Term => <docNum, freq><sup>*</sup>
1270 * <p>The enumeration is ordered by document number. Each document number
1271 * is greater than all that precede it in the enumeration.
1272 * @throws IOException if there is a low-level IO error
1274 public TermDocs termDocs(Term term) throws IOException {
1276 TermDocs termDocs = termDocs();
1277 termDocs.seek(term);
1281 /** Returns an unpositioned {@link TermDocs} enumerator.
1283 * Note: the TermDocs returned is unpositioned. Before using it, ensure
1284 * that you first position it with {@link TermDocs#seek(Term)} or
1285 * {@link TermDocs#seek(TermEnum)}.
1287 * @throws IOException if there is a low-level IO error
1289 public abstract TermDocs termDocs() throws IOException;
1291 /** Returns an enumeration of all the documents which contain
1292 * <code>term</code>. For each document, in addition to the document number
1293 * and frequency of the term in that document, a list of all of the ordinal
1294 * positions of the term in the document is available. Thus, this method
1295 * implements the mapping:
1298 * Term => <docNum, freq,
1299 * <pos<sub>1</sub>, pos<sub>2</sub>, ...
1300 * pos<sub>freq-1</sub>>
1303 * <p> This positional information facilitates phrase and proximity searching.
1304 * <p>The enumeration is ordered by document number. Each document number is
1305 * greater than all that precede it in the enumeration.
1306 * @throws IOException if there is a low-level IO error
1308 public TermPositions termPositions(Term term) throws IOException {
1310 TermPositions termPositions = termPositions();
1311 termPositions.seek(term);
1312 return termPositions;
1315 /** Returns an unpositioned {@link TermPositions} enumerator.
1316 * @throws IOException if there is a low-level IO error
1318 public abstract TermPositions termPositions() throws IOException;
1322 /** Deletes the document numbered <code>docNum</code>. Once a document is
1323 * deleted it will not appear in TermDocs or TermPostitions enumerations.
1324 * Attempts to read its field with the {@link #document}
1325 * method will result in an error. The presence of this document may still be
1326 * reflected in the {@link #docFreq} statistic, though
1327 * this will be corrected eventually as the index is further modified.
1329 * @throws StaleReaderException if the index has changed
1330 * since this reader was opened
1331 * @throws CorruptIndexException if the index is corrupt
1332 * @throws LockObtainFailedException if another writer
1333 * has this index open (<code>write.lock</code> could not
1335 * @throws IOException if there is a low-level IO error
1337 public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1345 /** Implements deletion of the document numbered <code>docNum</code>.
1346 * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
1348 protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException;
1351 /** Deletes all documents that have a given <code>term</code> indexed.
1352 * This is useful if one uses a document field to hold a unique ID string for
1353 * the document. Then to delete such a document, one merely constructs a
1354 * term with the appropriate field and the unique ID string as its text and
1355 * passes it to this method.
1356 * See {@link #deleteDocument(int)} for information about when this deletion will
1359 * @return the number of documents deleted
1360 * @throws StaleReaderException if the index has changed
1361 * since this reader was opened
1362 * @throws CorruptIndexException if the index is corrupt
1363 * @throws LockObtainFailedException if another writer
1364 * has this index open (<code>write.lock</code> could not
1366 * @throws IOException if there is a low-level IO error
1368 public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1370 TermDocs docs = termDocs(term);
1371 if (docs == null) return 0;
1374 while (docs.next()) {
1375 deleteDocument(docs.doc());
1384 /** Undeletes all documents currently marked as deleted in
1387 * <p>NOTE: this method can only recover documents marked
1388 * for deletion but not yet removed from the index; when
1389 * and how Lucene removes deleted documents is an
1390 * implementation detail, subject to change from release
1391 * to release. However, you can use {@link
1392 * #numDeletedDocs} on the current IndexReader instance to
1393 * see how many documents will be un-deleted.
1395 * @throws StaleReaderException if the index has changed
1396 * since this reader was opened
1397 * @throws LockObtainFailedException if another writer
1398 * has this index open (<code>write.lock</code> could not
1400 * @throws CorruptIndexException if the index is corrupt
1401 * @throws IOException if there is a low-level IO error
1403 public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1410 /** Implements actual undeleteAll() in subclass. */
1411 protected abstract void doUndeleteAll() throws CorruptIndexException, IOException;
1413 /** Does nothing by default. Subclasses that require a write lock for
1414 * index modifications must implement this method. */
1415 protected synchronized void acquireWriteLock() throws IOException {
1421 * @throws IOException
1423 public final synchronized void flush() throws IOException {
1429 * @param commitUserData Opaque Map (String -> String)
1430 * that's recorded into the segments file in the index,
1431 * and retrievable by {@link
1432 * IndexReader#getCommitUserData}.
1433 * @throws IOException
1435 public final synchronized void flush(Map<String, String> commitUserData) throws IOException {
1437 commit(commitUserData);
1441 * Commit changes resulting from delete, undeleteAll, or
1442 * setNorm operations
1444 * If an exception is hit, then either no changes or all
1445 * changes will have been committed to the index
1446 * (transactional semantics).
1447 * @throws IOException if there is a low-level IO error
1449 protected final synchronized void commit() throws IOException {
1454 * Commit changes resulting from delete, undeleteAll, or
1455 * setNorm operations
1457 * If an exception is hit, then either no changes or all
1458 * changes will have been committed to the index
1459 * (transactional semantics).
1460 * @throws IOException if there is a low-level IO error
1462 public final synchronized void commit(Map<String, String> commitUserData) throws IOException {
1463 // Don't call ensureOpen since we commit() on close
1464 doCommit(commitUserData);
1468 /** Implements commit. */
1469 protected abstract void doCommit(Map<String, String> commitUserData) throws IOException;
1472 * Closes files associated with this index.
1473 * Also saves any new deletions to disk.
1474 * No other methods should be called after this has been called.
1475 * @throws IOException if there is a low-level IO error
1477 public final synchronized void close() throws IOException {
1484 /** Implements close. */
1485 protected abstract void doClose() throws IOException;
1489 * Get a list of unique field names that exist in this index and have the specified
1490 * field option information.
1491 * @param fldOption specifies which field option should be available for the returned fields
1492 * @return Collection of Strings indicating the names of the fields.
1493 * @see IndexReader.FieldOption
1495 public abstract Collection<String> getFieldNames(FieldOption fldOption);
1498 * Expert: return the IndexCommit that this reader has
1499 * opened. This method is only implemented by those
1500 * readers that correspond to a Directory with its own
1503 * @lucene.experimental
1505 public IndexCommit getIndexCommit() throws IOException {
1506 throw new UnsupportedOperationException("This reader does not support this method.");
1510 * Prints the filename and size of each file within a given compound file.
1511 * Add the -extract flag to extract files to the current working directory.
1512 * In order to make the extracted version of the index work, you have to copy
1513 * the segments file from the compound index into the directory where the extracted files are stored.
1514 * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>
1516 public static void main(String [] args) {
1517 String filename = null;
1518 boolean extract = false;
1520 for (int i = 0; i < args.length; ++i) {
1521 if (args[i].equals("-extract")) {
1523 } else if (filename == null) {
1528 if (filename == null) {
1529 System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
1533 Directory dir = null;
1534 CompoundFileReader cfr = null;
1537 File file = new File(filename);
1538 String dirname = file.getAbsoluteFile().getParent();
1539 filename = file.getName();
1540 dir = FSDirectory.open(new File(dirname));
1541 cfr = new CompoundFileReader(dir, filename);
1543 String [] files = cfr.listAll();
1544 ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable
1546 for (int i = 0; i < files.length; ++i) {
1547 long len = cfr.fileLength(files[i]);
1550 System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
1551 IndexInput ii = cfr.openInput(files[i]);
1553 FileOutputStream f = new FileOutputStream(files[i]);
1555 // read and write with a small buffer, which is more effective than reading byte by byte
1556 byte[] buffer = new byte[1024];
1557 int chunk = buffer.length;
1559 final int bufLen = (int) Math.min(chunk, len);
1560 ii.readBytes(buffer, 0, bufLen);
1561 f.write(buffer, 0, bufLen);
1569 System.out.println(files[i] + ": " + len + " bytes");
1571 } catch (IOException ioe) {
1572 ioe.printStackTrace();
1581 catch (IOException ioe) {
1582 ioe.printStackTrace();
1587 /** Returns all commit points that exist in the Directory.
1588 * Normally, because the default is {@link
1589 * KeepOnlyLastCommitDeletionPolicy}, there would be only
1590 * one commit point. But if you're using a custom {@link
1591 * IndexDeletionPolicy} then there could be many commits.
1592 * Once you have a given commit, you can open a reader on
1593 * it by calling {@link IndexReader#open(IndexCommit,boolean)}
1594 * There must be at least one commit in
1595 * the Directory, else this method throws {@link
1596 * IndexNotFoundException}. Note that if a commit is in
1597 * progress while this method is running, that commit
1598 * may or may not be returned.
1600 * @return a sorted list of {@link IndexCommit}s, from oldest
1602 public static Collection<IndexCommit> listCommits(Directory dir) throws IOException {
1603 return DirectoryReader.listCommits(dir);
1606 /** Expert: returns the sequential sub readers that this
1607 * reader is logically composed of. For example,
1608 * IndexSearcher uses this API to drive searching by one
1609 * sub reader at a time. If this reader is not composed
1610 * of sequential child readers, it should return null.
1611 * If this method returns an empty array, that means this
1612 * reader is a null reader (for example a MultiReader
1613 * that has no sub readers).
1615 * NOTE: You should not try using sub-readers returned by
1616 * this method to make any changes (setNorm, deleteDocument,
1617 * etc.). While this might succeed for one composite reader
1618 * (like MultiReader), it will most likely lead to index
1619 * corruption for other readers (like DirectoryReader obtained
1620 * through {@link #open}. Use the parent reader directly. */
1621 public IndexReader[] getSequentialSubReaders() {
1627 public Object getCoreCacheKey() {
1628 // Don't can ensureOpen since FC calls this (to evict)
1633 /** Expert. Warning: this returns null if the reader has
1635 public Object getDeletesCacheKey() {
1639 /** Returns the number of unique terms (across all fields)
1642 * This method returns long, even though internally
1643 * Lucene cannot handle more than 2^31 unique terms, for
1644 * a possible future when this limitation is removed.
1646 * @throws UnsupportedOperationException if this count
1647 * cannot be easily determined (eg Multi*Readers).
1648 * Instead, you should call {@link
1649 * #getSequentialSubReaders} and ask each sub reader for
1650 * its unique term count. */
1651 public long getUniqueTermCount() throws IOException {
1652 throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
1655 // Back compat for reopen()
1657 private static final VirtualMethod<IndexReader> reopenMethod1 =
1658 new VirtualMethod<IndexReader>(IndexReader.class, "reopen");
1660 private static final VirtualMethod<IndexReader> doOpenIfChangedMethod1 =
1661 new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged");
1663 private final boolean hasNewReopenAPI1 =
1664 VirtualMethod.compareImplementationDistance(getClass(),
1665 doOpenIfChangedMethod1, reopenMethod1) >= 0; // its ok for both to be overridden
1667 // Back compat for reopen(boolean openReadOnly)
1669 private static final VirtualMethod<IndexReader> reopenMethod2 =
1670 new VirtualMethod<IndexReader>(IndexReader.class, "reopen", boolean.class);
1672 private static final VirtualMethod<IndexReader> doOpenIfChangedMethod2 =
1673 new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", boolean.class);
1675 private final boolean hasNewReopenAPI2 =
1676 VirtualMethod.compareImplementationDistance(getClass(),
1677 doOpenIfChangedMethod2, reopenMethod2) >= 0; // its ok for both to be overridden
1679 // Back compat for reopen(IndexCommit commit)
1681 private static final VirtualMethod<IndexReader> reopenMethod3 =
1682 new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexCommit.class);
1684 private static final VirtualMethod<IndexReader> doOpenIfChangedMethod3 =
1685 new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexCommit.class);
1687 private final boolean hasNewReopenAPI3 =
1688 VirtualMethod.compareImplementationDistance(getClass(),
1689 doOpenIfChangedMethod3, reopenMethod3) >= 0; // its ok for both to be overridden
1691 // Back compat for reopen(IndexWriter writer, boolean applyDeletes)
1693 private static final VirtualMethod<IndexReader> reopenMethod4 =
1694 new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexWriter.class, boolean.class);
1696 private static final VirtualMethod<IndexReader> doOpenIfChangedMethod4 =
1697 new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexWriter.class, boolean.class);
1699 private final boolean hasNewReopenAPI4 =
1700 VirtualMethod.compareImplementationDistance(getClass(),
1701 doOpenIfChangedMethod4, reopenMethod4) >= 0; // its ok for both to be overridden
1703 /** For IndexReader implementations that use
1704 * TermInfosReader to read terms, this returns the
1705 * current indexDivisor as specified when the reader was
1708 public int getTermInfosIndexDivisor() {
1709 throw new UnsupportedOperationException("This reader does not support this method.");