X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/IndexReader.java diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/IndexReader.java deleted file mode 100644 index ca689bf..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ /dev/null @@ -1,1386 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.FieldCache; // javadocs -import org.apache.lucene.search.Similarity; -import org.apache.lucene.store.*; -import org.apache.lucene.util.ArrayUtil; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.Closeable; -import java.util.Collection; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; - -/** IndexReader is an abstract class, providing an interface for accessing an - index. Search of an index is done entirely through this abstract interface, - so that any subclass which implements it is searchable. - -

Concrete subclasses of IndexReader are usually constructed with a call to - one of the static open() methods, e.g. {@link - #open(Directory, boolean)}. - -

For efficiency, in this API documents are often referred to via - document numbers, non-negative integers which each name a unique - document in the index. These document numbers are ephemeral--they may change - as documents are added to and deleted from an index. Clients should thus not - rely on a given document having the same number between sessions. - -

An IndexReader can be opened on a directory for which an IndexWriter is - opened already, but it cannot be used to delete documents from the index then. - -

- NOTE: for backwards API compatibility, several methods are not listed - as abstract, but have no useful implementations in this base class and - instead always throw UnsupportedOperationException. Subclasses are - strongly encouraged to override these methods, but in many cases may not - need to. -

- -

- - NOTE: as of 2.4, it's possible to open a read-only - IndexReader using the static open methods that accept the - boolean readOnly parameter. Such a reader has better - concurrency as it's not necessary to synchronize on the - isDeleted method. You must specify false if you want to - make changes with the resulting IndexReader. -

- -

NOTE: {@link - IndexReader} instances are completely thread - safe, meaning multiple threads can call any of its methods, - concurrently. If your application requires external - synchronization, you should not synchronize on the - IndexReader instance; use your own - (non-Lucene) objects instead. -*/ -public abstract class IndexReader implements Cloneable,Closeable { - - /** - * A custom listener that's invoked when the IndexReader - * is finished. - * - *

For a SegmentReader, this listener is called only - * once all SegmentReaders sharing the same core are - * closed. At this point it is safe for apps to evict - * this reader from any caches keyed on {@link - * #getCoreCacheKey}. This is the same interface that - * {@link FieldCache} uses, internally, to evict - * entries.

- * - *

For other readers, this listener is called when they - * are closed.

- * - * @lucene.experimental - */ - public static interface ReaderFinishedListener { - public void finished(IndexReader reader); - } - - // Impls must set this if they may call add/removeReaderFinishedListener: - protected volatile Collection readerFinishedListeners; - - /** Expert: adds a {@link ReaderFinishedListener}. The - * provided listener is also added to any sub-readers, if - * this is a composite reader. Also, any reader reopened - * or cloned from this one will also copy the listeners at - * the time of reopen. - * - * @lucene.experimental */ - public void addReaderFinishedListener(ReaderFinishedListener listener) { - readerFinishedListeners.add(listener); - } - - /** Expert: remove a previously added {@link ReaderFinishedListener}. - * - * @lucene.experimental */ - public void removeReaderFinishedListener(ReaderFinishedListener listener) { - readerFinishedListeners.remove(listener); - } - - protected void notifyReaderFinishedListeners() { - // Defensive (should never be null -- all impls must set - // this): - if (readerFinishedListeners != null) { - for(ReaderFinishedListener listener : readerFinishedListeners) { - listener.finished(this); - } - } - } - - protected void readerFinished() { - notifyReaderFinishedListeners(); - } - - /** - * Constants describing field properties, for example used for - * {@link IndexReader#getFieldNames(FieldOption)}. - */ - public static enum FieldOption { - /** All fields */ - ALL, - /** All indexed fields */ - INDEXED, - /** All fields that store payloads */ - STORES_PAYLOADS, - /** All fields that omit tf */ - OMIT_TERM_FREQ_AND_POSITIONS, - /** All fields that omit positions */ - OMIT_POSITIONS, - /** All fields which are not indexed */ - UNINDEXED, - /** All fields which are indexed with termvectors enabled */ - INDEXED_WITH_TERMVECTOR, - /** All fields which are indexed but don't have termvectors enabled */ - INDEXED_NO_TERMVECTOR, - /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */ - TERMVECTOR, - /** All fields with termvectors with position values enabled */ - TERMVECTOR_WITH_POSITION, - /** All fields with termvectors with offset values enabled */ - TERMVECTOR_WITH_OFFSET, - /** All fields with termvectors with offset values and position values enabled */ - TERMVECTOR_WITH_POSITION_OFFSET, - } - - private boolean closed; - protected boolean hasChanges; - - private final AtomicInteger refCount = new AtomicInteger(); - - static int DEFAULT_TERMS_INDEX_DIVISOR = 1; - - /** Expert: returns the current refCount for this reader */ - public int getRefCount() { - return refCount.get(); - } - - /** - * Expert: increments the refCount of this IndexReader - * instance. RefCounts are used to determine when a - * reader can be closed safely, i.e. as soon as there are - * no more references. Be sure to always call a - * corresponding {@link #decRef}, in a finally clause; - * otherwise the reader may never be closed. Note that - * {@link #close} simply calls decRef(), which means that - * the IndexReader will not really be closed until {@link - * #decRef} has been called for all outstanding - * references. - * - * @see #decRef - */ - public void incRef() { - ensureOpen(); - refCount.incrementAndGet(); - } - - /** {@inheritDoc} */ - @Override - public String toString() { - final StringBuilder buffer = new StringBuilder(); - if (hasChanges) { - buffer.append('*'); - } - buffer.append(getClass().getSimpleName()); - buffer.append('('); - final IndexReader[] subReaders = getSequentialSubReaders(); - if ((subReaders != null) && (subReaders.length > 0)) { - buffer.append(subReaders[0]); - for (int i = 1; i < subReaders.length; ++i) { - buffer.append(" ").append(subReaders[i]); - } - } - buffer.append(')'); - return buffer.toString(); - } - - /** - * Expert: decreases the refCount of this IndexReader - * instance. If the refCount drops to 0, then pending - * changes (if any) are committed to the index and this - * reader is closed. If an exception is hit, the refCount - * is unchanged. - * - * @throws IOException in case an IOException occurs in commit() or doClose() - * - * @see #incRef - */ - public void decRef() throws IOException { - ensureOpen(); - if (refCount.getAndDecrement() == 1) { - boolean success = false; - try { - commit(); - doClose(); - success = true; - } finally { - if (!success) { - // Put reference back on failure - refCount.incrementAndGet(); - } - } - readerFinished(); - } - } - - protected IndexReader() { - refCount.set(1); - } - - /** - * @throws AlreadyClosedException if this IndexReader is closed - */ - protected final void ensureOpen() throws AlreadyClosedException { - if (refCount.get() <= 0) { - throw new AlreadyClosedException("this IndexReader is closed"); - } - } - - /** Returns a IndexReader reading the index in the given - * Directory, with readOnly=true. - * @param directory the index directory - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Returns an IndexReader reading the index in the given - * Directory. You should pass readOnly=true, since it - * gives much better concurrent performance, unless you - * intend to do write operations (delete documents or - * change norms) with the reader. - * @param directory the index directory - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** - * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. - * - * @param writer The IndexWriter to open from - * @param applyAllDeletes If true, all buffered deletes will - * be applied (made visible) in the returned reader. If - * false, the deletes are not applied but remain buffered - * (in IndexWriter) so that they will be applied in the - * future. Applying deletes can be costly, so if your app - * can tolerate deleted documents being returned you might - * gain some performance by passing false. - * @return The new IndexReader - * @throws CorruptIndexException - * @throws IOException if there is a low-level IO error - * - * @see #reopen(IndexWriter,boolean) - * - * @lucene.experimental - */ - public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { - return writer.getReader(applyAllDeletes); - } - - /** Expert: returns an IndexReader reading the index in the given - * {@link IndexCommit}. You should pass readOnly=true, since it - * gives much better concurrent performance, unless you - * intend to do write operations (delete documents or - * change norms) with the reader. - * @param commit the commit point to open - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Expert: returns an IndexReader reading the index in - * the given Directory, with a custom {@link - * IndexDeletionPolicy}. You should pass readOnly=true, - * since it gives much better concurrent performance, - * unless you intend to do write operations (delete - * documents or change norms) with the reader. - * @param directory the index directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Expert: returns an IndexReader reading the index in - * the given Directory, with a custom {@link - * IndexDeletionPolicy}. You should pass readOnly=true, - * since it gives much better concurrent performance, - * unless you intend to do write operations (delete - * documents or change norms) with the reader. - * @param directory the index directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @param termInfosIndexDivisor Subsamples which indexed - * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1. Set this - * to -1 to skip loading the terms index entirely. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); - } - - /** Expert: returns an IndexReader reading the index in - * the given Directory, using a specific commit and with - * a custom {@link IndexDeletionPolicy}. You should pass - * readOnly=true, since it gives much better concurrent - * performance, unless you intend to do write operations - * (delete documents or change norms) with the reader. - * @param commit the specific {@link IndexCommit} to open; - * see {@link IndexReader#listCommits} to list all commits - * in a directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - /** Expert: returns an IndexReader reading the index in - * the given Directory, using a specific commit and with - * a custom {@link IndexDeletionPolicy}. You should pass - * readOnly=true, since it gives much better concurrent - * performance, unless you intend to do write operations - * (delete documents or change norms) with the reader. - * @param commit the specific {@link IndexCommit} to open; - * see {@link IndexReader#listCommits} to list all commits - * in a directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @param termInfosIndexDivisor Subsamples which indexed - * terms are loaded into RAM. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1. Set this - * to -1 to skip loading the terms index entirely. This is only useful in - * advanced situations when you will only .next() through all terms; - * attempts to seek will hit an exception. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); - } - - private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { - return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); - } - - /** - * Refreshes an IndexReader if the index has changed since this instance - * was (re)opened. - *

- * Opening an IndexReader is an expensive operation. This method can be used - * to refresh an existing IndexReader to reduce these costs. This method - * tries to only load segments that have changed or were created after the - * IndexReader was (re)opened. - *

- * If the index has not changed since this instance was (re)opened, then this - * call is a NOOP and returns this instance. Otherwise, a new instance is - * returned. The old instance is not closed and remains usable.
- *

- * If the reader is reopened, even though they share - * resources internally, it's safe to make changes - * (deletions, norms) with the new reader. All shared - * mutable state obeys "copy on write" semantics to ensure - * the changes are not seen by other readers. - *

- * You can determine whether a reader was actually reopened by comparing the - * old instance with the instance returned by this method: - *

-   * IndexReader reader = ... 
-   * ...
-   * IndexReader newReader = r.reopen();
-   * if (newReader != reader) {
-   * ...     // reader was reopened
-   *   reader.close(); 
-   * }
-   * reader = newReader;
-   * ...
-   * 
- * - * Be sure to synchronize that code so that other threads, - * if present, can never use reader after it has been - * closed and before it's switched to newReader. - * - *

NOTE: If this reader is a near real-time - * reader (obtained from {@link IndexWriter#getReader()}, - * reopen() will simply call writer.getReader() again for - * you, though this may change in the future. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public synchronized IndexReader reopen() throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support reopen()."); - } - - - /** Just like {@link #reopen()}, except you can change the - * readOnly of the original reader. If the index is - * unchanged but readOnly is different then a new reader - * will be returned. */ - public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support reopen()."); - } - - /** Expert: reopen this reader on a specific commit point. - * This always returns a readOnly reader. If the - * specified commit point matches what this reader is - * already on, and this reader is already readOnly, then - * this same instance is returned; if it is not already - * readOnly, a readOnly clone is returned. */ - public synchronized IndexReader reopen(final IndexCommit commit) throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit)."); - } - - /** - * Expert: returns a readonly reader, covering all - * committed as well as un-committed changes to the index. - * This provides "near real-time" searching, in that - * changes made during an IndexWriter session can be - * quickly made available for searching without closing - * the writer nor calling {@link #commit}. - * - *

Note that this is functionally equivalent to calling - * {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to - * open a new reader. But the turnaround time of this - * method should be faster since it avoids the potentially - * costly {@link #commit}.

- * - *

You must close the {@link IndexReader} returned by - * this method once you are done using it.

- * - *

It's near real-time because there is no hard - * guarantee on how quickly you can get a new reader after - * making changes with IndexWriter. You'll have to - * experiment in your situation to determine if it's - * fast enough. As this is a new and experimental - * feature, please report back on your findings so we can - * learn, improve and iterate.

- * - *

The resulting reader supports {@link - * IndexReader#reopen}, but that call will simply forward - * back to this method (though this may change in the - * future).

- * - *

The very first time this method is called, this - * writer instance will make every effort to pool the - * readers that it opens for doing merges, applying - * deletes, etc. This means additional resources (RAM, - * file descriptors, CPU time) will be consumed.

- * - *

For lower latency on reopening a reader, you should - * call {@link IndexWriterConfig#setMergedSegmentWarmer} to - * pre-warm a newly merged segment before it's committed - * to the index. This is important for minimizing - * index-to-search delay after a large merge.

- * - *

If an addIndexes* call is running in another thread, - * then this reader will only search those segments from - * the foreign index that have been successfully copied - * over, so far

. - * - *

NOTE: Once the writer is closed, any - * outstanding readers may continue to be used. However, - * if you attempt to reopen any of those readers, you'll - * hit an {@link AlreadyClosedException}.

- * - * @return IndexReader that covers entire index plus all - * changes made so far by this IndexWriter instance - * - * @param writer The IndexWriter to open from - * @param applyAllDeletes If true, all buffered deletes will - * be applied (made visible) in the returned reader. If - * false, the deletes are not applied but remain buffered - * (in IndexWriter) so that they will be applied in the - * future. Applying deletes can be costly, so if your app - * can tolerate deleted documents being returned you might - * gain some performance by passing false. - * - * @throws IOException - * - * @lucene.experimental - */ - public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { - return writer.getReader(applyAllDeletes); - } - - /** - * Efficiently clones the IndexReader (sharing most - * internal state). - *

- * On cloning a reader with pending changes (deletions, - * norms), the original reader transfers its write lock to - * the cloned reader. This means only the cloned reader - * may make further changes to the index, and commit the - * changes to the index on close, but the old reader still - * reflects all changes made up until it was cloned. - *

- * Like {@link #reopen()}, it's safe to make changes to - * either the original or the cloned reader: all shared - * mutable state obeys "copy on write" semantics to ensure - * the changes are not seen by other readers. - *

- */ - @Override - public synchronized Object clone() { - throw new UnsupportedOperationException("This reader does not implement clone()"); - } - - /** - * Clones the IndexReader and optionally changes readOnly. A readOnly - * reader cannot open a writeable reader. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not implement clone()"); - } - - /** - * Returns the directory associated with this index. The Default - * implementation returns the directory specified by subclasses when - * delegating to the IndexReader(Directory) constructor, or throws an - * UnsupportedOperationException if one was not specified. - * @throws UnsupportedOperationException if no directory - */ - public Directory directory() { - ensureOpen(); - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Returns the time the index in the named directory was last modified. - * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException { - return ((Long) new SegmentInfos.FindSegmentsFile(directory2) { - @Override - public Object doBody(String segmentFileName) throws IOException { - return Long.valueOf(directory2.fileModified(segmentFileName)); - } - }.run()).longValue(); - } - - /** - * Reads version number from segments files. The version number is - * initialized with a timestamp and then increased by one for each change of - * the index. - * - * @param directory where the index resides. - * @return version number. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException { - return SegmentInfos.readCurrentVersion(directory); - } - - /** - * Reads commitUserData, previously passed to {@link - * IndexWriter#commit(Map)}, from current index - * segments file. This will return null if {@link - * IndexWriter#commit(Map)} has never been called for - * this index. - * - * @param directory where the index resides. - * @return commit userData. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * - * @see #getCommitUserData() - */ - public static Map getCommitUserData(Directory directory) throws CorruptIndexException, IOException { - return SegmentInfos.readCurrentUserData(directory); - } - - /** - * Version number when this IndexReader was opened. Not - * implemented in the IndexReader base class. - * - *

If this reader is based on a Directory (ie, was - * created by calling {@link #open}, or {@link #reopen} on - * a reader based on a Directory), then this method - * returns the version recorded in the commit that the - * reader opened. This version is advanced every time - * {@link IndexWriter#commit} is called.

- * - *

If instead this reader is a near real-time reader - * (ie, obtained by a call to {@link - * IndexWriter#getReader}, or by calling {@link #reopen} - * on a near real-time reader), then this method returns - * the version of the last commit done by the writer. - * Note that even as further changes are made with the - * writer, the version will not changed until a commit is - * completed. Thus, you should not rely on this method to - * determine when a near real-time reader should be - * opened. Use {@link #isCurrent} instead.

- * - * @throws UnsupportedOperationException unless overridden in subclass - */ - public long getVersion() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Retrieve the String userData optionally passed to - * IndexWriter#commit. This will return null if {@link - * IndexWriter#commit(Map)} has never been called for - * this index. - * - * @see #getCommitUserData(Directory) - */ - public Map getCommitUserData() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - - /** - * Check whether any new changes have occurred to the - * index since this reader was opened. - * - *

If this reader is based on a Directory (ie, was - * created by calling {@link #open}, or {@link #reopen} on - * a reader based on a Directory), then this method checks - * if any further commits (see {@link IndexWriter#commit} - * have occurred in that directory).

- * - *

If instead this reader is a near real-time reader - * (ie, obtained by a call to {@link - * IndexWriter#getReader}, or by calling {@link #reopen} - * on a near real-time reader), then this method checks if - * either a new commmit has occurred, or any new - * uncommitted changes have taken place via the writer. - * Note that even if the writer has only performed - * merging, this method will still return false.

- * - *

In any event, if this returns false, you should call - * {@link #reopen} to get a new reader that sees the - * changes.

- * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @throws UnsupportedOperationException unless overridden in subclass - */ - public boolean isCurrent() throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Checks is the index is optimized (if it has a single segment and - * no deletions). Not implemented in the IndexReader base class. - * @return true if the index is optimized; false otherwise - * @throws UnsupportedOperationException unless overridden in subclass - */ - public boolean isOptimized() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Return an array of term frequency vectors for the specified document. - * The array contains a vector for each vectorized field in the document. - * Each vector contains terms and frequencies for all terms in a given vectorized field. - * If no such fields existed, the method returns null. The term vectors that are - * returned may either be of type {@link TermFreqVector} - * or of type {@link TermPositionVector} if - * positions or offsets have been stored. - * - * @param docNumber document for which term frequency vectors are returned - * @return array of term frequency vectors. May be null if no term vectors have been - * stored for the specified document. - * @throws IOException if index cannot be accessed - * @see org.apache.lucene.document.Field.TermVector - */ - abstract public TermFreqVector[] getTermFreqVectors(int docNumber) - throws IOException; - - - /** - * Return a term frequency vector for the specified document and field. The - * returned vector contains terms and frequencies for the terms in - * the specified field of this document, if the field had the storeTermVector - * flag set. If termvectors had been stored with positions or offsets, a - * {@link TermPositionVector} is returned. - * - * @param docNumber document for which the term frequency vector is returned - * @param field field for which the term frequency vector is returned. - * @return term frequency vector May be null if field does not exist in the specified - * document or term vector was not stored. - * @throws IOException if index cannot be accessed - * @see org.apache.lucene.document.Field.TermVector - */ - abstract public TermFreqVector getTermFreqVector(int docNumber, String field) - throws IOException; - - /** - * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of - * the {@link TermFreqVector}. - * @param docNumber The number of the document to load the vector for - * @param field The name of the field to load - * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null - * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. - * - */ - abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException; - - /** - * Map all the term vectors for all fields in a Document - * @param docNumber The number of the document to load the vector for - * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null - * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. - */ - abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException; - - /** - * Returns true if an index exists at the specified directory. - * @param directory the directory to check for an index - * @return true if an index exists; false otherwise - * @throws IOException if there is a problem with accessing the index - */ - public static boolean indexExists(Directory directory) throws IOException { - try { - new SegmentInfos().read(directory); - return true; - } catch (IOException ioe) { - return false; - } - } - - /** Returns the number of documents in this index. */ - public abstract int numDocs(); - - /** Returns one greater than the largest possible document number. - * This may be used to, e.g., determine how big to allocate an array which - * will have an element for every document number in an index. - */ - public abstract int maxDoc(); - - /** Returns the number of deleted documents. */ - public int numDeletedDocs() { - return maxDoc() - numDocs(); - } - - /** - * Returns the stored fields of the nth - * Document in this index. - *

- * NOTE: for performance reasons, this method does not check if the - * requested document is deleted, and therefore asking for a deleted document - * may yield unspecified results. Usually this is not required, however you - * can call {@link #isDeleted(int)} with the requested document ID to verify - * the document is not deleted. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public Document document(int n) throws CorruptIndexException, IOException { - ensureOpen(); - if (n < 0 || n >= maxDoc()) { - throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")"); - } - return document(n, null); - } - - /** - * Get the {@link org.apache.lucene.document.Document} at the n - * th position. The {@link FieldSelector} may be used to determine - * what {@link org.apache.lucene.document.Field}s to load and how they should - * be loaded. NOTE: If this Reader (more specifically, the underlying - * FieldsReader) is closed before the lazy - * {@link org.apache.lucene.document.Field} is loaded an exception may be - * thrown. If you want the value of a lazy - * {@link org.apache.lucene.document.Field} to be available after closing you - * must explicitly load it or fetch the Document again with a new loader. - *

- * NOTE: for performance reasons, this method does not check if the - * requested document is deleted, and therefore asking for a deleted document - * may yield unspecified results. Usually this is not required, however you - * can call {@link #isDeleted(int)} with the requested document ID to verify - * the document is not deleted. - * - * @param n Get the document at the nth position - * @param fieldSelector The {@link FieldSelector} to use to determine what - * Fields should be loaded on the Document. May be null, in which case - * all Fields will be loaded. - * @return The stored fields of the - * {@link org.apache.lucene.document.Document} at the nth position - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @see org.apache.lucene.document.Fieldable - * @see org.apache.lucene.document.FieldSelector - * @see org.apache.lucene.document.SetBasedFieldSelector - * @see org.apache.lucene.document.LoadFirstFieldSelector - */ - // TODO (1.5): When we convert to JDK 1.5 make this Set - public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; - - /** Returns true if document n has been deleted */ - public abstract boolean isDeleted(int n); - - /** Returns true if any documents have been deleted */ - public abstract boolean hasDeletions(); - - /** Returns true if there are norms stored for this field. */ - public boolean hasNorms(String field) throws IOException { - // backward compatible implementation. - // SegmentReader has an efficient implementation. - ensureOpen(); - return norms(field) != null; - } - - /** Returns the byte-encoded normalization factor for the named field of - * every document. This is used by the search code to score documents. - * Returns null if norms were not indexed for this field. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract byte[] norms(String field) throws IOException; - - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract void norms(String field, byte[] bytes, int offset) - throws IOException; - - /** Expert: Resets the normalization factor for the named field of the named - * document. The norm represents the product of the field's {@link - * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, - * int) length normalization}. Thus, to preserve the length normalization - * values when resetting this, one should base the new value upon the old. - * - * NOTE: If this field does not index norms, then - * this method throws {@link IllegalStateException}. - * - * @see #norms(String) - * @see Similarity#decodeNormValue(byte) - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - * @throws IllegalStateException if the field does not index norms - */ - public synchronized void setNorm(int doc, String field, byte value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doSetNorm(doc, field, value); - } - - /** Implements setNorm in subclass.*/ - protected abstract void doSetNorm(int doc, String field, byte value) - throws CorruptIndexException, IOException; - - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNormValue(byte) - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - * @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the - * float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}. - * This method will be removed in Lucene 4.0 - */ - @Deprecated - public void setNorm(int doc, String field, float value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); - } - - /** Returns an enumeration of all the terms in the index. The - * enumeration is ordered by Term.compareTo(). Each term is greater - * than all that precede it in the enumeration. Note that after - * calling terms(), {@link TermEnum#next()} must be called - * on the resulting enumeration before calling other methods such as - * {@link TermEnum#term()}. - * @throws IOException if there is a low-level IO error - */ - public abstract TermEnum terms() throws IOException; - - /** Returns an enumeration of all terms starting at a given term. If - * the given term does not exist, the enumeration is positioned at the - * first term greater than the supplied term. The enumeration is - * ordered by Term.compareTo(). Each term is greater than all that - * precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public abstract TermEnum terms(Term t) throws IOException; - - /** Returns the number of documents containing the term t. - * @throws IOException if there is a low-level IO error - */ - public abstract int docFreq(Term t) throws IOException; - - /** Returns an enumeration of all the documents which contain - * term. For each document, the document number, the frequency of - * the term in that document is also provided, for use in - * search scoring. If term is null, then all non-deleted - * docs are returned with freq=1. - * Thus, this method implements the mapping: - *

- *

The enumeration is ordered by document number. Each document number - * is greater than all that precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - TermDocs termDocs = termDocs(); - termDocs.seek(term); - return termDocs; - } - - /** Returns an unpositioned {@link TermDocs} enumerator. - *

- * Note: the TermDocs returned is unpositioned. Before using it, ensure - * that you first position it with {@link TermDocs#seek(Term)} or - * {@link TermDocs#seek(TermEnum)}. - * - * @throws IOException if there is a low-level IO error - */ - public abstract TermDocs termDocs() throws IOException; - - /** Returns an enumeration of all the documents which contain - * term. For each document, in addition to the document number - * and frequency of the term in that document, a list of all of the ordinal - * positions of the term in the document is available. Thus, this method - * implements the mapping: - * - *

- *

This positional information facilitates phrase and proximity searching. - *

The enumeration is ordered by document number. Each document number is - * greater than all that precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public TermPositions termPositions(Term term) throws IOException { - ensureOpen(); - TermPositions termPositions = termPositions(); - termPositions.seek(term); - return termPositions; - } - - /** Returns an unpositioned {@link TermPositions} enumerator. - * @throws IOException if there is a low-level IO error - */ - public abstract TermPositions termPositions() throws IOException; - - - - /** Deletes the document numbered docNum. Once a document is - * deleted it will not appear in TermDocs or TermPostitions enumerations. - * Attempts to read its field with the {@link #document} - * method will result in an error. The presence of this document may still be - * reflected in the {@link #docFreq} statistic, though - * this will be corrected eventually as the index is further modified. - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doDelete(docNum); - } - - - /** Implements deletion of the document numbered docNum. - * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. - */ - protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException; - - - /** Deletes all documents that have a given term indexed. - * This is useful if one uses a document field to hold a unique ID string for - * the document. Then to delete such a document, one merely constructs a - * term with the appropriate field and the unique ID string as its text and - * passes it to this method. - * See {@link #deleteDocument(int)} for information about when this deletion will - * become effective. - * - * @return the number of documents deleted - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - TermDocs docs = termDocs(term); - if (docs == null) return 0; - int n = 0; - try { - while (docs.next()) { - deleteDocument(docs.doc()); - n++; - } - } finally { - docs.close(); - } - return n; - } - - /** Undeletes all documents currently marked as deleted in - * this index. - * - *

NOTE: this method can only recover documents marked - * for deletion but not yet removed from the index; when - * and how Lucene removes deleted documents is an - * implementation detail, subject to change from release - * to release. However, you can use {@link - * #numDeletedDocs} on the current IndexReader instance to - * see how many documents will be un-deleted. - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doUndeleteAll(); - } - - /** Implements actual undeleteAll() in subclass. */ - protected abstract void doUndeleteAll() throws CorruptIndexException, IOException; - - /** Does nothing by default. Subclasses that require a write lock for - * index modifications must implement this method. */ - protected synchronized void acquireWriteLock() throws IOException { - /* NOOP */ - } - - /** - * - * @throws IOException - */ - public final synchronized void flush() throws IOException { - ensureOpen(); - commit(); - } - - /** - * @param commitUserData Opaque Map (String -> String) - * that's recorded into the segments file in the index, - * and retrievable by {@link - * IndexReader#getCommitUserData}. - * @throws IOException - */ - public final synchronized void flush(Map commitUserData) throws IOException { - ensureOpen(); - commit(commitUserData); - } - - /** - * Commit changes resulting from delete, undeleteAll, or - * setNorm operations - * - * If an exception is hit, then either no changes or all - * changes will have been committed to the index - * (transactional semantics). - * @throws IOException if there is a low-level IO error - */ - protected final synchronized void commit() throws IOException { - commit(null); - } - - /** - * Commit changes resulting from delete, undeleteAll, or - * setNorm operations - * - * If an exception is hit, then either no changes or all - * changes will have been committed to the index - * (transactional semantics). - * @throws IOException if there is a low-level IO error - */ - public final synchronized void commit(Map commitUserData) throws IOException { - if (hasChanges) { - doCommit(commitUserData); - } - hasChanges = false; - } - - /** Implements commit. */ - protected abstract void doCommit(Map commitUserData) throws IOException; - - /** - * Closes files associated with this index. - * Also saves any new deletions to disk. - * No other methods should be called after this has been called. - * @throws IOException if there is a low-level IO error - */ - public final synchronized void close() throws IOException { - if (!closed) { - decRef(); - closed = true; - } - } - - /** Implements close. */ - protected abstract void doClose() throws IOException; - - - /** - * Get a list of unique field names that exist in this index and have the specified - * field option information. - * @param fldOption specifies which field option should be available for the returned fields - * @return Collection of Strings indicating the names of the fields. - * @see IndexReader.FieldOption - */ - public abstract Collection getFieldNames(FieldOption fldOption); - - /** - * Expert: return the IndexCommit that this reader has - * opened. This method is only implemented by those - * readers that correspond to a Directory with its own - * segments_N file. - * - * @lucene.experimental - */ - public IndexCommit getIndexCommit() throws IOException { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Prints the filename and size of each file within a given compound file. - * Add the -extract flag to extract files to the current working directory. - * In order to make the extracted version of the index work, you have to copy - * the segments file from the compound index into the directory where the extracted files are stored. - * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> - */ - public static void main(String [] args) { - String filename = null; - boolean extract = false; - - for (int i = 0; i < args.length; ++i) { - if (args[i].equals("-extract")) { - extract = true; - } else if (filename == null) { - filename = args[i]; - } - } - - if (filename == null) { - System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] "); - return; - } - - Directory dir = null; - CompoundFileReader cfr = null; - - try { - File file = new File(filename); - String dirname = file.getAbsoluteFile().getParent(); - filename = file.getName(); - dir = FSDirectory.open(new File(dirname)); - cfr = new CompoundFileReader(dir, filename); - - String [] files = cfr.listAll(); - ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable - - for (int i = 0; i < files.length; ++i) { - long len = cfr.fileLength(files[i]); - - if (extract) { - System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); - IndexInput ii = cfr.openInput(files[i]); - - FileOutputStream f = new FileOutputStream(files[i]); - - // read and write with a small buffer, which is more effective than reading byte by byte - byte[] buffer = new byte[1024]; - int chunk = buffer.length; - while(len > 0) { - final int bufLen = (int) Math.min(chunk, len); - ii.readBytes(buffer, 0, bufLen); - f.write(buffer, 0, bufLen); - len -= bufLen; - } - - f.close(); - ii.close(); - } - else - System.out.println(files[i] + ": " + len + " bytes"); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - finally { - try { - if (dir != null) - dir.close(); - if (cfr != null) - cfr.close(); - } - catch (IOException ioe) { - ioe.printStackTrace(); - } - } - } - - /** Returns all commit points that exist in the Directory. - * Normally, because the default is {@link - * KeepOnlyLastCommitDeletionPolicy}, there would be only - * one commit point. But if you're using a custom {@link - * IndexDeletionPolicy} then there could be many commits. - * Once you have a given commit, you can open a reader on - * it by calling {@link IndexReader#open(IndexCommit,boolean)} - * There must be at least one commit in - * the Directory, else this method throws {@link - * IndexNotFoundException}. Note that if a commit is in - * progress while this method is running, that commit - * may or may not be returned. - * - * @return a sorted list of {@link IndexCommit}s, from oldest - * to latest. */ - public static Collection listCommits(Directory dir) throws IOException { - return DirectoryReader.listCommits(dir); - } - - /** Expert: returns the sequential sub readers that this - * reader is logically composed of. For example, - * IndexSearcher uses this API to drive searching by one - * sub reader at a time. If this reader is not composed - * of sequential child readers, it should return null. - * If this method returns an empty array, that means this - * reader is a null reader (for example a MultiReader - * that has no sub readers). - *

- * NOTE: You should not try using sub-readers returned by - * this method to make any changes (setNorm, deleteDocument, - * etc.). While this might succeed for one composite reader - * (like MultiReader), it will most likely lead to index - * corruption for other readers (like DirectoryReader obtained - * through {@link #open}. Use the parent reader directly. */ - public IndexReader[] getSequentialSubReaders() { - return null; - } - - /** Expert */ - public Object getCoreCacheKey() { - return this; - } - - /** Expert. Warning: this returns null if the reader has - * no deletions */ - public Object getDeletesCacheKey() { - return this; - } - - /** Returns the number of unique terms (across all fields) - * in this reader. - * - * This method returns long, even though internally - * Lucene cannot handle more than 2^31 unique terms, for - * a possible future when this limitation is removed. - * - * @throws UnsupportedOperationException if this count - * cannot be easily determined (eg Multi*Readers). - * Instead, you should call {@link - * #getSequentialSubReaders} and ask each sub reader for - * its unique term count. */ - public long getUniqueTermCount() throws IOException { - throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); - } - - /** For IndexReader implementations that use - * TermInfosReader to read terms, this returns the - * current indexDivisor as specified when the reader was - * opened. - */ - public int getTermInfosIndexDivisor() { - throw new UnsupportedOperationException("This reader does not support this method."); - } -}