lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexReader.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.Closeable;
  21 import java.io.File;
  22 import java.io.FileOutputStream;
  23 import java.io.IOException;
  24 import java.util.Collection;
  25 import java.util.Map;
  26 import java.util.concurrent.atomic.AtomicInteger;
  27
  28 import org.apache.lucene.document.Document;
  29 import org.apache.lucene.document.FieldSelector;
  30 import org.apache.lucene.search.FieldCache; // javadocs
  31 import org.apache.lucene.search.Similarity;
  32 import org.apache.lucene.store.*;
  33 import org.apache.lucene.util.ArrayUtil;
  34 import org.apache.lucene.util.ReaderUtil;         // for javadocs
  35 import org.apache.lucene.util.VirtualMethod;
  36
  37 /** IndexReader is an abstract class, providing an interface for accessing an
  38  index.  Search of an index is done entirely through this abstract interface,
  39  so that any subclass which implements it is searchable.
  40
  41  <p> Concrete subclasses of IndexReader are usually constructed with a call to
  42  one of the static <code>open()</code> methods, e.g. {@link
  43  #open(Directory, boolean)}.
  44
  45  <p> For efficiency, in this API documents are often referred to via
  46  <i>document numbers</i>, non-negative integers which each name a unique
  47  document in the index.  These document numbers are ephemeral--they may change
  48  as documents are added to and deleted from an index.  Clients should thus not
  49  rely on a given document having the same number between sessions.
  50
  51  <p> An IndexReader can be opened on a directory for which an IndexWriter is
  52  opened already, but it cannot be used to delete documents from the index then.
  53
  54  <p>
  55  <b>NOTE</b>: for backwards API compatibility, several methods are not listed
  56  as abstract, but have no useful implementations in this base class and
  57  instead always throw UnsupportedOperationException.  Subclasses are
  58  strongly encouraged to override these methods, but in many cases may not
  59  need to.
  60  </p>
  61
  62  <p>
  63
  64  <b>NOTE</b>: as of 2.4, it's possible to open a read-only
  65  IndexReader using the static open methods that accept the
  66  boolean readOnly parameter.  Such a reader has better
  67  concurrency as it's not necessary to synchronize on the
  68  isDeleted method.  You must specify false if you want to
  69  make changes with the resulting IndexReader.
  70  </p>
  71
  72  <a name="thread-safety"></a><p><b>NOTE</b>: {@link
  73  IndexReader} instances are completely thread
  74  safe, meaning multiple threads can call any of its methods,
  75  concurrently.  If your application requires external
  76  synchronization, you should <b>not</b> synchronize on the
  77  <code>IndexReader</code> instance; use your own
  78  (non-Lucene) objects instead.
  79 */
  80 public abstract class IndexReader implements Cloneable,Closeable {
  81
  82   /**
  83    * A custom listener that's invoked when the IndexReader
  84    * is finished.
  85    *
  86    * <p>For a SegmentReader, this listener is called only
  87    * once all SegmentReaders sharing the same core are
  88    * closed.  At this point it is safe for apps to evict
  89    * this reader from any caches keyed on {@link
  90    * #getCoreCacheKey}.  This is the same interface that
  91    * {@link FieldCache} uses, internally, to evict
  92    * entries.</p>
  93    *
  94    * <p>For other readers, this listener is called when they
  95    * are closed.</p>
  96    *
  97    * @lucene.experimental
  98    */
  99   public static interface ReaderFinishedListener {
 100     public void finished(IndexReader reader);
 101   }
 102
 103   // Impls must set this if they may call add/removeReaderFinishedListener:
 104   protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
 105
 106   /** Expert: adds a {@link ReaderFinishedListener}.  The
 107    * provided listener is also added to any sub-readers, if
 108    * this is a composite reader.  Also, any reader reopened
 109    * or cloned from this one will also copy the listeners at
 110    * the time of reopen.
 111    *
 112    * @lucene.experimental */
 113   public void addReaderFinishedListener(ReaderFinishedListener listener) {
 114     ensureOpen();
 115     readerFinishedListeners.add(listener);
 116   }
 117
 118   /** Expert: remove a previously added {@link ReaderFinishedListener}.
 119    *
 120    * @lucene.experimental */
 121   public void removeReaderFinishedListener(ReaderFinishedListener listener) {
 122     ensureOpen();
 123     readerFinishedListeners.remove(listener);
 124   }
 125
 126   protected void notifyReaderFinishedListeners() {
 127     // Defensive (should never be null -- all impls must set
 128     // this):
 129     if (readerFinishedListeners != null) {
 130       for(ReaderFinishedListener listener : readerFinishedListeners) {
 131         listener.finished(this);
 132       }
 133     }
 134   }
 135
 136   protected void readerFinished() {
 137     notifyReaderFinishedListeners();
 138   }
 139
 140   /**
 141    * Constants describing field properties, for example used for
 142    * {@link IndexReader#getFieldNames(FieldOption)}.
 143    */
 144   public static enum FieldOption {
 145     /** All fields */
 146     ALL,
 147     /** All indexed fields */
 148     INDEXED,
 149     /** All fields that store payloads */
 150     STORES_PAYLOADS,
 151     /** All fields that omit tf */
 152     OMIT_TERM_FREQ_AND_POSITIONS,
 153     /** All fields that omit positions */
 154     OMIT_POSITIONS,
 155     /** All fields which are not indexed */
 156     UNINDEXED,
 157     /** All fields which are indexed with termvectors enabled */
 158     INDEXED_WITH_TERMVECTOR,
 159     /** All fields which are indexed but don't have termvectors enabled */
 160     INDEXED_NO_TERMVECTOR,
 161     /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
 162     TERMVECTOR,
 163     /** All fields with termvectors with position values enabled */
 164     TERMVECTOR_WITH_POSITION,
 165     /** All fields with termvectors with offset values enabled */
 166     TERMVECTOR_WITH_OFFSET,
 167     /** All fields with termvectors with offset values and position values enabled */
 168     TERMVECTOR_WITH_POSITION_OFFSET,
 169   }
 170
 171   private volatile boolean closed;
 172   protected boolean hasChanges;
 173
 174   private final AtomicInteger refCount = new AtomicInteger();
 175
 176   static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
 177
 178   /** Expert: returns the current refCount for this reader */
 179   public int getRefCount() {
 180     return refCount.get();
 181   }
 182
 183   /**
 184    * Expert: increments the refCount of this IndexReader
 185    * instance.  RefCounts are used to determine when a
 186    * reader can be closed safely, i.e. as soon as there are
 187    * no more references.  Be sure to always call a
 188    * corresponding {@link #decRef}, in a finally clause;
 189    * otherwise the reader may never be closed.  Note that
 190    * {@link #close} simply calls decRef(), which means that
 191    * the IndexReader will not really be closed until {@link
 192    * #decRef} has been called for all outstanding
 193    * references.
 194    *
 195    * @see #decRef
 196    * @see #tryIncRef
 197    */
 198   public void incRef() {
 199     ensureOpen();
 200     refCount.incrementAndGet();
 201   }
 202
 203   /**
 204    * Expert: increments the refCount of this IndexReader
 205    * instance only if the IndexReader has not been closed yet
 206    * and returns <code>true</code> iff the refCount was
 207    * successfully incremented, otherwise <code>false</code>.
 208    * If this method returns <code>false</code> the reader is either
 209    * already closed or is currently been closed. Either way this
 210    * reader instance shouldn't be used by an application unless
 211    * <code>true</code> is returned.
 212    * <p>
 213    * RefCounts are used to determine when a
 214    * reader can be closed safely, i.e. as soon as there are
 215    * no more references.  Be sure to always call a
 216    * corresponding {@link #decRef}, in a finally clause;
 217    * otherwise the reader may never be closed.  Note that
 218    * {@link #close} simply calls decRef(), which means that
 219    * the IndexReader will not really be closed until {@link
 220    * #decRef} has been called for all outstanding
 221    * references.
 222    *
 223    * @see #decRef
 224    * @see #incRef
 225    */
 226   public boolean tryIncRef() {
 227     int count;
 228     while ((count = refCount.get()) > 0) {
 229       if (refCount.compareAndSet(count, count+1)) {
 230         return true;
 231       }
 232     }
 233     return false;
 234   }
 235
 236   /** {@inheritDoc} */
 237   @Override
 238   public String toString() {
 239     final StringBuilder buffer = new StringBuilder();
 240     if (hasChanges) {
 241       buffer.append('*');
 242     }
 243     buffer.append(getClass().getSimpleName());
 244     buffer.append('(');
 245     final IndexReader[] subReaders = getSequentialSubReaders();
 246     if ((subReaders != null) && (subReaders.length > 0)) {
 247       buffer.append(subReaders[0]);
 248       for (int i = 1; i < subReaders.length; ++i) {
 249         buffer.append(" ").append(subReaders[i]);
 250       }
 251     }
 252     buffer.append(')');
 253     return buffer.toString();
 254   }
 255
 256   /**
 257    * Expert: decreases the refCount of this IndexReader
 258    * instance.  If the refCount drops to 0, then pending
 259    * changes (if any) are committed to the index and this
 260    * reader is closed.  If an exception is hit, the refCount
 261    * is unchanged.
 262    *
 263    * @throws IOException in case an IOException occurs in commit() or doClose()
 264    *
 265    * @see #incRef
 266    */
 267   public void decRef() throws IOException {
 268     ensureOpen();
 269     final int rc = refCount.getAndDecrement();
 270     if (rc == 1) {
 271       boolean success = false;
 272       try {
 273         commit();
 274         doClose();
 275         success = true;
 276       } finally {
 277         if (!success) {
 278           // Put reference back on failure
 279           refCount.incrementAndGet();
 280         }
 281       }
 282       readerFinished();
 283     } else if (rc <= 0) {
 284       throw new IllegalStateException("too many decRef calls: refCount was " + rc + " before decrement");
 285     }
 286   }
 287
 288   protected IndexReader() {
 289     refCount.set(1);
 290   }
 291
 292   /**
 293    * @throws AlreadyClosedException if this IndexReader is closed
 294    */
 295   protected final void ensureOpen() throws AlreadyClosedException {
 296     if (refCount.get() <= 0) {
 297       throw new AlreadyClosedException("this IndexReader is closed");
 298     }
 299   }
 300
 301   /** Returns a IndexReader reading the index in the given
 302    *  Directory, with readOnly=true.
 303    * @param directory the index directory
 304    * @throws CorruptIndexException if the index is corrupt
 305    * @throws IOException if there is a low-level IO error
 306    */
 307   public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
 308     return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR);
 309   }
 310
 311   /** Returns an IndexReader reading the index in the given
 312    *  Directory.  You should pass readOnly=true, since it
 313    *  gives much better concurrent performance, unless you
 314    *  intend to do write operations (delete documents or
 315    *  change norms) with the reader.
 316    * @param directory the index directory
 317    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 318    * @throws CorruptIndexException if the index is corrupt
 319    * @throws IOException if there is a low-level IO error
 320    */
 321   public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
 322     return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 323   }
 324
 325   /**
 326    * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
 327    *
 328    * @param writer The IndexWriter to open from
 329    * @param applyAllDeletes If true, all buffered deletes will
 330    * be applied (made visible) in the returned reader.  If
 331    * false, the deletes are not applied but remain buffered
 332    * (in IndexWriter) so that they will be applied in the
 333    * future.  Applying deletes can be costly, so if your app
 334    * can tolerate deleted documents being returned you might
 335    * gain some performance by passing false.
 336    * @return The new IndexReader
 337    * @throws CorruptIndexException
 338    * @throws IOException if there is a low-level IO error
 339    *
 340    * @see #openIfChanged(IndexReader,IndexWriter,boolean)
 341    *
 342    * @lucene.experimental
 343    */
 344   public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 345     return writer.getReader(applyAllDeletes);
 346   }
 347
 348   /** Expert: returns an IndexReader reading the index in the given
 349    *  {@link IndexCommit}.  You should pass readOnly=true, since it
 350    *  gives much better concurrent performance, unless you
 351    *  intend to do write operations (delete documents or
 352    *  change norms) with the reader.
 353    * @param commit the commit point to open
 354    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 355    * @throws CorruptIndexException if the index is corrupt
 356    * @throws IOException if there is a low-level IO error
 357    */
 358   public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException {
 359     return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 360   }
 361
 362   /** Expert: returns an IndexReader reading the index in
 363    *  the given Directory, with a custom {@link
 364    *  IndexDeletionPolicy}.  You should pass readOnly=true,
 365    *  since it gives much better concurrent performance,
 366    *  unless you intend to do write operations (delete
 367    *  documents or change norms) with the reader.
 368    * @param directory the index directory
 369    * @param deletionPolicy a custom deletion policy (only used
 370    *  if you use this reader to perform deletes or to set
 371    *  norms); see {@link IndexWriter} for details.
 372    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 373    * @throws CorruptIndexException if the index is corrupt
 374    * @throws IOException if there is a low-level IO error
 375    */
 376   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
 377     return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 378   }
 379
 380   /** Expert: returns an IndexReader reading the index in
 381    *  the given Directory, with a custom {@link
 382    *  IndexDeletionPolicy}.  You should pass readOnly=true,
 383    *  since it gives much better concurrent performance,
 384    *  unless you intend to do write operations (delete
 385    *  documents or change norms) with the reader.
 386    * @param directory the index directory
 387    * @param deletionPolicy a custom deletion policy (only used
 388    *  if you use this reader to perform deletes or to set
 389    *  norms); see {@link IndexWriter} for details.
 390    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 391    * @param termInfosIndexDivisor Subsamples which indexed
 392    *  terms are loaded into RAM. This has the same effect as {@link
 393    *  IndexWriter#setTermIndexInterval} except that setting
 394    *  must be done at indexing time while this setting can be
 395    *  set per reader.  When set to N, then one in every
 396    *  N*termIndexInterval terms in the index is loaded into
 397    *  memory.  By setting this to a value > 1 you can reduce
 398    *  memory usage, at the expense of higher latency when
 399    *  loading a TermInfo.  The default value is 1.  Set this
 400    *  to -1 to skip loading the terms index entirely.
 401    * @throws CorruptIndexException if the index is corrupt
 402    * @throws IOException if there is a low-level IO error
 403    */
 404   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 405     return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
 406   }
 407
 408   /** Expert: returns an IndexReader reading the index in
 409    *  the given Directory, using a specific commit and with
 410    *  a custom {@link IndexDeletionPolicy}.  You should pass
 411    *  readOnly=true, since it gives much better concurrent
 412    *  performance, unless you intend to do write operations
 413    *  (delete documents or change norms) with the reader.
 414    * @param commit the specific {@link IndexCommit} to open;
 415    * see {@link IndexReader#listCommits} to list all commits
 416    * in a directory
 417    * @param deletionPolicy a custom deletion policy (only used
 418    *  if you use this reader to perform deletes or to set
 419    *  norms); see {@link IndexWriter} for details.
 420    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 421    * @throws CorruptIndexException if the index is corrupt
 422    * @throws IOException if there is a low-level IO error
 423    */
 424   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
 425     return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 426   }
 427
 428   /** Expert: returns an IndexReader reading the index in
 429    *  the given Directory, using a specific commit and with
 430    *  a custom {@link IndexDeletionPolicy}.  You should pass
 431    *  readOnly=true, since it gives much better concurrent
 432    *  performance, unless you intend to do write operations
 433    *  (delete documents or change norms) with the reader.
 434    * @param commit the specific {@link IndexCommit} to open;
 435    * see {@link IndexReader#listCommits} to list all commits
 436    * in a directory
 437    * @param deletionPolicy a custom deletion policy (only used
 438    *  if you use this reader to perform deletes or to set
 439    *  norms); see {@link IndexWriter} for details.
 440    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 441    * @param termInfosIndexDivisor Subsamples which indexed
 442    *  terms are loaded into RAM. This has the same effect as {@link
 443    *  IndexWriter#setTermIndexInterval} except that setting
 444    *  must be done at indexing time while this setting can be
 445    *  set per reader.  When set to N, then one in every
 446    *  N*termIndexInterval terms in the index is loaded into
 447    *  memory.  By setting this to a value > 1 you can reduce
 448    *  memory usage, at the expense of higher latency when
 449    *  loading a TermInfo.  The default value is 1.  Set this
 450    *  to -1 to skip loading the terms index entirely. This is only useful in
 451    *  advanced situations when you will only .next() through all terms;
 452    *  attempts to seek will hit an exception.
 453    *
 454    * @throws CorruptIndexException if the index is corrupt
 455    * @throws IOException if there is a low-level IO error
 456    */
 457   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 458     return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor);
 459   }
 460
 461   private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 462     return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
 463   }
 464
 465   /**
 466    * If the index has changed since the provided reader was
 467    * opened, open and return a new reader; else, return
 468    * null.  The new reader, if not null, will be the same
 469    * type of reader as the previous one, ie an NRT reader
 470    * will open a new NRT reader, a MultiReader will open a
 471    * new MultiReader,  etc.
 472    *
 473    * <p>This method is typically far less costly than opening a
 474    * fully new <code>IndexReader</code> as it shares
 475    * resources (for example sub-readers) with the provided
 476    * <code>IndexReader</code>, when possible.
 477    *
 478    * <p>The provided reader is not closed (you are responsible
 479    * for doing so); if a new reader is returned you also
 480    * must eventually close it.  Be sure to never close a
 481    * reader while other threads are still using it; see
 482    * <code>SearcherManager</code> in
 483    * <code>contrib/misc</code> to simplify managing this.
 484    *
 485    * <p>If a new reader is returned, it's safe to make changes
 486    * (deletions, norms) with it.  All shared mutable state
 487    * with the old reader uses "copy on write" semantics to
 488    * ensure the changes are not seen by other readers.
 489    *
 490    * @throws CorruptIndexException if the index is corrupt
 491    * @throws IOException if there is a low-level IO error
 492    * @return null if there are no changes; else, a new
 493    * IndexReader instance which you must eventually close
 494    */
 495   public static IndexReader openIfChanged(IndexReader oldReader) throws IOException {
 496     if (oldReader.hasNewReopenAPI1) {
 497       final IndexReader newReader = oldReader.doOpenIfChanged();
 498       assert newReader != oldReader;
 499       return newReader;
 500     } else {
 501       final IndexReader newReader = oldReader.reopen();
 502       if (newReader == oldReader) {
 503         return null;
 504       } else {
 505         return newReader;
 506       }
 507     }
 508   }
 509
 510   /**
 511    * If the index has changed since the provided reader was
 512    * opened, open and return a new reader, with the
 513    * specified <code>readOnly</code>; else, return
 514    * null.
 515    *
 516    * @see #openIfChanged(IndexReader)
 517    */
 518   public static IndexReader openIfChanged(IndexReader oldReader, boolean readOnly) throws IOException {
 519     if (oldReader.hasNewReopenAPI2) {
 520       final IndexReader newReader = oldReader.doOpenIfChanged(readOnly);
 521       assert newReader != oldReader;
 522       return newReader;
 523     } else {
 524       final IndexReader newReader = oldReader.reopen(readOnly);
 525       if (newReader == oldReader) {
 526         return null;
 527       } else {
 528         return newReader;
 529       }
 530     }
 531   }
 532
 533   /**
 534    * If the IndexCommit differs from what the
 535    * provided reader is searching, or the provided reader is
 536    * not already read-only, open and return a new
 537    * <code>readOnly=true</code> reader; else, return null.
 538    *
 539    * @see #openIfChanged(IndexReader)
 540    */
 541   // TODO: should you be able to specify readOnly?
 542   public static IndexReader openIfChanged(IndexReader oldReader, IndexCommit commit) throws IOException {
 543     if (oldReader.hasNewReopenAPI3) {
 544       final IndexReader newReader = oldReader.doOpenIfChanged(commit);
 545       assert newReader != oldReader;
 546       return newReader;
 547     } else {
 548       final IndexReader newReader = oldReader.reopen(commit);
 549       if (newReader == oldReader) {
 550         return null;
 551       } else {
 552         return newReader;
 553       }
 554     }
 555   }
 556
 557   /**
 558    * Expert: If there changes (committed or not) in the
 559    * {@link IndexWriter} versus what the provided reader is
 560    * searching, then open and return a new read-only
 561    * IndexReader searching both committed and uncommitted
 562    * changes from the writer; else, return null (though, the
 563    * current implementation never returns null).
 564    *
 565    * <p>This provides "near real-time" searching, in that
 566    * changes made during an {@link IndexWriter} session can be
 567    * quickly made available for searching without closing
 568    * the writer nor calling {@link #commit}.
 569    *
 570    * <p>It's <i>near</i> real-time because there is no hard
 571    * guarantee on how quickly you can get a new reader after
 572    * making changes with IndexWriter.  You'll have to
 573    * experiment in your situation to determine if it's
 574    * fast enough.  As this is a new and experimental
 575    * feature, please report back on your findings so we can
 576    * learn, improve and iterate.</p>
 577    *
 578    * <p>The very first time this method is called, this
 579    * writer instance will make every effort to pool the
 580    * readers that it opens for doing merges, applying
 581    * deletes, etc.  This means additional resources (RAM,
 582    * file descriptors, CPU time) will be consumed.</p>
 583    *
 584    * <p>For lower latency on reopening a reader, you should
 585    * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
 586    * pre-warm a newly merged segment before it's committed
 587    * to the index.  This is important for minimizing
 588    * index-to-search delay after a large merge.  </p>
 589    *
 590    * <p>If an addIndexes* call is running in another thread,
 591    * then this reader will only search those segments from
 592    * the foreign index that have been successfully copied
 593    * over, so far.</p>
 594    *
 595    * <p><b>NOTE</b>: Once the writer is closed, any
 596    * outstanding readers may continue to be used.  However,
 597    * if you attempt to reopen any of those readers, you'll
 598    * hit an {@link AlreadyClosedException}.</p>
 599    *
 600    * @return IndexReader that covers entire index plus all
 601    * changes made so far by this IndexWriter instance, or
 602    * null if there are no new changes
 603    *
 604    * @param writer The IndexWriter to open from
 605    *
 606    * @param applyAllDeletes If true, all buffered deletes will
 607    * be applied (made visible) in the returned reader.  If
 608    * false, the deletes are not applied but remain buffered
 609    * (in IndexWriter) so that they will be applied in the
 610    * future.  Applying deletes can be costly, so if your app
 611    * can tolerate deleted documents being returned you might
 612    * gain some performance by passing false.
 613    *
 614    * @throws IOException
 615    *
 616    * @lucene.experimental
 617    */
 618   public static IndexReader openIfChanged(IndexReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException {
 619     if (oldReader.hasNewReopenAPI4) {
 620       final IndexReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes);
 621       assert newReader != oldReader;
 622       return newReader;
 623     } else {
 624       final IndexReader newReader = oldReader.reopen(writer, applyAllDeletes);
 625       if (newReader == oldReader) {
 626         return null;
 627       } else {
 628         return newReader;
 629       }
 630     }
 631   }
 632
 633   /**
 634    * Refreshes an IndexReader if the index has changed since this instance
 635    * was (re)opened.
 636    * <p>
 637    * Opening an IndexReader is an expensive operation. This method can be used
 638    * to refresh an existing IndexReader to reduce these costs. This method
 639    * tries to only load segments that have changed or were created after the
 640    * IndexReader was (re)opened.
 641    * <p>
 642    * If the index has not changed since this instance was (re)opened, then this
 643    * call is a NOOP and returns this instance. Otherwise, a new instance is
 644    * returned. The old instance is <b>not</b> closed and remains usable.<br>
 645    * <p>
 646    * If the reader is reopened, even though they share
 647    * resources internally, it's safe to make changes
 648    * (deletions, norms) with the new reader.  All shared
 649    * mutable state obeys "copy on write" semantics to ensure
 650    * the changes are not seen by other readers.
 651    * <p>
 652    * You can determine whether a reader was actually reopened by comparing the
 653    * old instance with the instance returned by this method:
 654    * <pre>
 655    * IndexReader reader = ...
 656    * ...
 657    * IndexReader newReader = r.reopen();
 658    * if (newReader != reader) {
 659    * ...     // reader was reopened
 660    *   reader.close();
 661    * }
 662    * reader = newReader;
 663    * ...
 664    * </pre>
 665    *
 666    * Be sure to synchronize that code so that other threads,
 667    * if present, can never use reader after it has been
 668    * closed and before it's switched to newReader.
 669    *
 670    * <p><b>NOTE</b>: If this reader is a near real-time
 671    * reader (obtained from {@link IndexWriter#getReader()},
 672    * reopen() will simply call writer.getReader() again for
 673    * you, though this may change in the future.
 674    *
 675    * @throws CorruptIndexException if the index is corrupt
 676    * @throws IOException if there is a low-level IO error
 677    * @deprecated Use IndexReader#openIfChanged(IndexReader) instead
 678    */
 679   @Deprecated
 680   public IndexReader reopen() throws CorruptIndexException, IOException {
 681     final IndexReader newReader = IndexReader.openIfChanged(this);
 682     if (newReader == null) {
 683       return this;
 684     } else {
 685       return newReader;
 686     }
 687   }
 688
 689   /** Just like {@link #reopen()}, except you can change the
 690    *  readOnly of the original reader.  If the index is
 691    *  unchanged but readOnly is different then a new reader
 692    *  will be returned.
 693    * @deprecated Use
 694    * IndexReader#openIfChanged(IndexReader,boolean) instead */
 695   @Deprecated
 696   public IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
 697     final IndexReader newReader = IndexReader.openIfChanged(this, openReadOnly);
 698     if (newReader == null) {
 699       return this;
 700     } else {
 701       return newReader;
 702     }
 703   }
 704
 705   /** Expert: reopen this reader on a specific commit point.
 706    *  This always returns a readOnly reader.  If the
 707    *  specified commit point matches what this reader is
 708    *  already on, and this reader is already readOnly, then
 709    *  this same instance is returned; if it is not already
 710    *  readOnly, a readOnly clone is returned.
 711    * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexCommit) instead
 712    */
 713   @Deprecated
 714   public IndexReader reopen(IndexCommit commit) throws CorruptIndexException, IOException {
 715     final IndexReader newReader = IndexReader.openIfChanged(this, commit);
 716     if (newReader == null) {
 717       return this;
 718     } else {
 719       return newReader;
 720     }
 721   }
 722
 723   /**
 724    * Expert: returns a readonly reader, covering all
 725    * committed as well as un-committed changes to the index.
 726    * This provides "near real-time" searching, in that
 727    * changes made during an IndexWriter session can be
 728    * quickly made available for searching without closing
 729    * the writer nor calling {@link #commit}.
 730    *
 731    * <p>Note that this is functionally equivalent to calling
 732    * {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to
 733    * open a new reader.  But the turnaround time of this
 734    * method should be faster since it avoids the potentially
 735    * costly {@link #commit}.</p>
 736    *
 737    * <p>You must close the {@link IndexReader} returned by
 738    * this method once you are done using it.</p>
 739    *
 740    * <p>It's <i>near</i> real-time because there is no hard
 741    * guarantee on how quickly you can get a new reader after
 742    * making changes with IndexWriter.  You'll have to
 743    * experiment in your situation to determine if it's
 744    * fast enough.  As this is a new and experimental
 745    * feature, please report back on your findings so we can
 746    * learn, improve and iterate.</p>
 747    *
 748    * <p>The resulting reader supports {@link
 749    * IndexReader#reopen}, but that call will simply forward
 750    * back to this method (though this may change in the
 751    * future).</p>
 752    *
 753    * <p>The very first time this method is called, this
 754    * writer instance will make every effort to pool the
 755    * readers that it opens for doing merges, applying
 756    * deletes, etc.  This means additional resources (RAM,
 757    * file descriptors, CPU time) will be consumed.</p>
 758    *
 759    * <p>For lower latency on reopening a reader, you should
 760    * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
 761    * pre-warm a newly merged segment before it's committed
 762    * to the index.  This is important for minimizing
 763    * index-to-search delay after a large merge.  </p>
 764    *
 765    * <p>If an addIndexes* call is running in another thread,
 766    * then this reader will only search those segments from
 767    * the foreign index that have been successfully copied
 768    * over, so far</p>.
 769    *
 770    * <p><b>NOTE</b>: Once the writer is closed, any
 771    * outstanding readers may continue to be used.  However,
 772    * if you attempt to reopen any of those readers, you'll
 773    * hit an {@link AlreadyClosedException}.</p>
 774    *
 775    * @return IndexReader that covers entire index plus all
 776    * changes made so far by this IndexWriter instance
 777    *
 778    * @param writer The IndexWriter to open from
 779    * @param applyAllDeletes If true, all buffered deletes will
 780    * be applied (made visible) in the returned reader.  If
 781    * false, the deletes are not applied but remain buffered
 782    * (in IndexWriter) so that they will be applied in the
 783    * future.  Applying deletes can be costly, so if your app
 784    * can tolerate deleted documents being returned you might
 785    * gain some performance by passing false.
 786    *
 787    * @throws IOException
 788    *
 789    * @lucene.experimental
 790    * @deprecated Use IndexReader#openIfChanged(IndexReader,IndexReader,boolean) instead
 791    */
 792   @Deprecated
 793   public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 794     final IndexReader newReader = IndexReader.openIfChanged(this, writer, applyAllDeletes);
 795     if (newReader == null) {
 796       return this;
 797     } else {
 798       return newReader;
 799     }
 800   }
 801
 802   /**
 803    * If the index has changed since it was opened, open and return a new reader;
 804    * else, return {@code null}.
 805    *
 806    * @see #openIfChanged(IndexReader)
 807    */
 808   protected IndexReader doOpenIfChanged() throws CorruptIndexException, IOException {
 809     throw new UnsupportedOperationException("This reader does not support reopen().");
 810   }
 811
 812   /**
 813    * If the index has changed since it was opened, open and return a new reader;
 814    * else, return {@code null}.
 815    *
 816    * @see #openIfChanged(IndexReader, boolean)
 817    */
 818   protected IndexReader doOpenIfChanged(boolean openReadOnly) throws CorruptIndexException, IOException {
 819     throw new UnsupportedOperationException("This reader does not support reopen().");
 820   }
 821
 822   /**
 823    * If the index has changed since it was opened, open and return a new reader;
 824    * else, return {@code null}.
 825    *
 826    * @see #openIfChanged(IndexReader, IndexCommit)
 827    */
 828   protected IndexReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
 829     throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit).");
 830   }
 831
 832   /**
 833    * If the index has changed since it was opened, open and return a new reader;
 834    * else, return {@code null}.
 835    *
 836    * @see #openIfChanged(IndexReader, IndexWriter, boolean)
 837    */
 838   protected IndexReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 839     return writer.getReader(applyAllDeletes);
 840   }
 841
 842   /**
 843    * Efficiently clones the IndexReader (sharing most
 844    * internal state).
 845    * <p>
 846    * On cloning a reader with pending changes (deletions,
 847    * norms), the original reader transfers its write lock to
 848    * the cloned reader.  This means only the cloned reader
 849    * may make further changes to the index, and commit the
 850    * changes to the index on close, but the old reader still
 851    * reflects all changes made up until it was cloned.
 852    * <p>
 853    * Like {@link #openIfChanged(IndexReader)}, it's safe to make changes to
 854    * either the original or the cloned reader: all shared
 855    * mutable state obeys "copy on write" semantics to ensure
 856    * the changes are not seen by other readers.
 857    * <p>
 858    */
 859   @Override
 860   public synchronized Object clone() {
 861     throw new UnsupportedOperationException("This reader does not implement clone()");
 862   }
 863
 864   /**
 865    * Clones the IndexReader and optionally changes readOnly.  A readOnly
 866    * reader cannot open a writeable reader.
 867    * @throws CorruptIndexException if the index is corrupt
 868    * @throws IOException if there is a low-level IO error
 869    */
 870   public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
 871     throw new UnsupportedOperationException("This reader does not implement clone()");
 872   }
 873
 874   /**
 875    * Returns the directory associated with this index.  The Default
 876    * implementation returns the directory specified by subclasses when
 877    * delegating to the IndexReader(Directory) constructor, or throws an
 878    * UnsupportedOperationException if one was not specified.
 879    * @throws UnsupportedOperationException if no directory
 880    */
 881   public Directory directory() {
 882     ensureOpen();
 883     throw new UnsupportedOperationException("This reader does not support this method.");
 884   }
 885
 886   /**
 887    * Returns the time the index in the named directory was last modified.
 888    * Do not use this to check whether the reader is still up-to-date, use
 889    * {@link #isCurrent()} instead.
 890    * @throws CorruptIndexException if the index is corrupt
 891    * @throws IOException if there is a low-level IO error
 892    */
 893   public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
 894     return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
 895         @Override
 896         public Object doBody(String segmentFileName) throws IOException {
 897           return Long.valueOf(directory2.fileModified(segmentFileName));
 898         }
 899       }.run()).longValue();
 900   }
 901
 902   /**
 903    * Reads version number from segments files. The version number is
 904    * initialized with a timestamp and then increased by one for each change of
 905    * the index.
 906    *
 907    * @param directory where the index resides.
 908    * @return version number.
 909    * @throws CorruptIndexException if the index is corrupt
 910    * @throws IOException if there is a low-level IO error
 911    */
 912   public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
 913     return SegmentInfos.readCurrentVersion(directory);
 914   }
 915
 916   /**
 917    * Reads commitUserData, previously passed to {@link
 918    * IndexWriter#commit(Map)}, from current index
 919    * segments file.  This will return null if {@link
 920    * IndexWriter#commit(Map)} has never been called for
 921    * this index.
 922    *
 923    * @param directory where the index resides.
 924    * @return commit userData.
 925    * @throws CorruptIndexException if the index is corrupt
 926    * @throws IOException if there is a low-level IO error
 927    *
 928    * @see #getCommitUserData()
 929    */
 930   public static Map<String,String> getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
 931     return SegmentInfos.readCurrentUserData(directory);
 932   }
 933
 934   /**
 935    * Version number when this IndexReader was opened. Not
 936    * implemented in the IndexReader base class.
 937    *
 938    * <p>If this reader is based on a Directory (ie, was
 939    * created by calling {@link #open}, or {@link #openIfChanged} on
 940    * a reader based on a Directory), then this method
 941    * returns the version recorded in the commit that the
 942    * reader opened.  This version is advanced every time
 943    * {@link IndexWriter#commit} is called.</p>
 944    *
 945    * <p>If instead this reader is a near real-time reader
 946    * (ie, obtained by a call to {@link
 947    * IndexWriter#getReader}, or by calling {@link #openIfChanged}
 948    * on a near real-time reader), then this method returns
 949    * the version of the last commit done by the writer.
 950    * Note that even as further changes are made with the
 951    * writer, the version will not changed until a commit is
 952    * completed.  Thus, you should not rely on this method to
 953    * determine when a near real-time reader should be
 954    * opened.  Use {@link #isCurrent} instead.</p>
 955    *
 956    * @throws UnsupportedOperationException unless overridden in subclass
 957    */
 958   public long getVersion() {
 959     throw new UnsupportedOperationException("This reader does not support this method.");
 960   }
 961
 962   /**
 963    * Retrieve the String userData optionally passed to
 964    * IndexWriter#commit.  This will return null if {@link
 965    * IndexWriter#commit(Map)} has never been called for
 966    * this index.
 967    *
 968    * @see #getCommitUserData(Directory)
 969    */
 970   public Map<String,String> getCommitUserData() {
 971     throw new UnsupportedOperationException("This reader does not support this method.");
 972   }
 973
 974
 975   /**
 976    * Check whether any new changes have occurred to the
 977    * index since this reader was opened.
 978    *
 979    * <p>If this reader is based on a Directory (ie, was
 980    * created by calling {@link #open}, or {@link #openIfChanged} on
 981    * a reader based on a Directory), then this method checks
 982    * if any further commits (see {@link IndexWriter#commit}
 983    * have occurred in that directory).</p>
 984    *
 985    * <p>If instead this reader is a near real-time reader
 986    * (ie, obtained by a call to {@link
 987    * IndexWriter#getReader}, or by calling {@link #openIfChanged}
 988    * on a near real-time reader), then this method checks if
 989    * either a new commmit has occurred, or any new
 990    * uncommitted changes have taken place via the writer.
 991    * Note that even if the writer has only performed
 992    * merging, this method will still return false.</p>
 993    *
 994    * <p>In any event, if this returns false, you should call
 995    * {@link #openIfChanged} to get a new reader that sees the
 996    * changes.</p>
 997    *
 998    * @throws CorruptIndexException if the index is corrupt
 999    * @throws IOException           if there is a low-level IO error
1000    * @throws UnsupportedOperationException unless overridden in subclass
1001    */
1002   public boolean isCurrent() throws CorruptIndexException, IOException {
1003     throw new UnsupportedOperationException("This reader does not support this method.");
1004   }
1005
1006   /** @deprecated Check segment count using {@link
1007    *  #getSequentialSubReaders} instead. */
1008   @Deprecated
1009   public boolean isOptimized() {
1010     throw new UnsupportedOperationException("This reader does not support this method.");
1011   }
1012
1013   /**
1014    * Return an array of term frequency vectors for the specified document.
1015    * The array contains a vector for each vectorized field in the document.
1016    * Each vector contains terms and frequencies for all terms in a given vectorized field.
1017    * If no such fields existed, the method returns null. The term vectors that are
1018    * returned may either be of type {@link TermFreqVector}
1019    * or of type {@link TermPositionVector} if
1020    * positions or offsets have been stored.
1021    *
1022    * @param docNumber document for which term frequency vectors are returned
1023    * @return array of term frequency vectors. May be null if no term vectors have been
1024    *  stored for the specified document.
1025    * @throws IOException if index cannot be accessed
1026    * @see org.apache.lucene.document.Field.TermVector
1027    */
1028   abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
1029           throws IOException;
1030
1031
1032   /**
1033    * Return a term frequency vector for the specified document and field. The
1034    * returned vector contains terms and frequencies for the terms in
1035    * the specified field of this document, if the field had the storeTermVector
1036    * flag set. If termvectors had been stored with positions or offsets, a
1037    * {@link TermPositionVector} is returned.
1038    *
1039    * @param docNumber document for which the term frequency vector is returned
1040    * @param field field for which the term frequency vector is returned.
1041    * @return term frequency vector May be null if field does not exist in the specified
1042    * document or term vector was not stored.
1043    * @throws IOException if index cannot be accessed
1044    * @see org.apache.lucene.document.Field.TermVector
1045    */
1046   abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
1047           throws IOException;
1048
1049   /**
1050    * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
1051    * the {@link TermFreqVector}.
1052    * @param docNumber The number of the document to load the vector for
1053    * @param field The name of the field to load
1054    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
1055    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1056    *
1057    */
1058   abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
1059
1060   /**
1061    * Map all the term vectors for all fields in a Document
1062    * @param docNumber The number of the document to load the vector for
1063    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
1064    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
1065    */
1066   abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
1067
1068   /**
1069    * Returns <code>true</code> if an index exists at the specified directory.
1070    * @param  directory the directory to check for an index
1071    * @return <code>true</code> if an index exists; <code>false</code> otherwise
1072    * @throws IOException if there is a problem with accessing the index
1073    */
1074   public static boolean indexExists(Directory directory) throws IOException {
1075     try {
1076       new SegmentInfos().read(directory);
1077       return true;
1078     } catch (IOException ioe) {
1079       return false;
1080     }
1081   }
1082
1083   /** Returns the number of documents in this index. */
1084   public abstract int numDocs();
1085
1086   /** Returns one greater than the largest possible document number.
1087    * This may be used to, e.g., determine how big to allocate an array which
1088    * will have an element for every document number in an index.
1089    */
1090   public abstract int maxDoc();
1091
1092   /** Returns the number of deleted documents. */
1093   public int numDeletedDocs() {
1094     return maxDoc() - numDocs();
1095   }
1096
1097   /**
1098    * Returns the stored fields of the <code>n</code><sup>th</sup>
1099    * <code>Document</code> in this index.
1100    * <p>
1101    * <b>NOTE:</b> for performance reasons, this method does not check if the
1102    * requested document is deleted, and therefore asking for a deleted document
1103    * may yield unspecified results. Usually this is not required, however you
1104    * can call {@link #isDeleted(int)} with the requested document ID to verify
1105    * the document is not deleted.
1106    *
1107    * @throws CorruptIndexException if the index is corrupt
1108    * @throws IOException if there is a low-level IO error
1109    */
1110   public Document document(int n) throws CorruptIndexException, IOException {
1111     ensureOpen();
1112     if (n < 0 || n >= maxDoc()) {
1113       throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")");
1114     }
1115     return document(n, null);
1116   }
1117
1118   /**
1119    * Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
1120    * <sup>th</sup> position. The {@link FieldSelector} may be used to determine
1121    * what {@link org.apache.lucene.document.Field}s to load and how they should
1122    * be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
1123    * <code>FieldsReader</code>) is closed before the lazy
1124    * {@link org.apache.lucene.document.Field} is loaded an exception may be
1125    * thrown. If you want the value of a lazy
1126    * {@link org.apache.lucene.document.Field} to be available after closing you
1127    * must explicitly load it or fetch the Document again with a new loader.
1128    * <p>
1129    * <b>NOTE:</b> for performance reasons, this method does not check if the
1130    * requested document is deleted, and therefore asking for a deleted document
1131    * may yield unspecified results. Usually this is not required, however you
1132    * can call {@link #isDeleted(int)} with the requested document ID to verify
1133    * the document is not deleted.
1134    *
1135    * @param n Get the document at the <code>n</code><sup>th</sup> position
1136    * @param fieldSelector The {@link FieldSelector} to use to determine what
1137    *        Fields should be loaded on the Document. May be null, in which case
1138    *        all Fields will be loaded.
1139    * @return The stored fields of the
1140    *         {@link org.apache.lucene.document.Document} at the nth position
1141    * @throws CorruptIndexException if the index is corrupt
1142    * @throws IOException if there is a low-level IO error
1143    * @see org.apache.lucene.document.Fieldable
1144    * @see org.apache.lucene.document.FieldSelector
1145    * @see org.apache.lucene.document.SetBasedFieldSelector
1146    * @see org.apache.lucene.document.LoadFirstFieldSelector
1147    */
1148   // TODO (1.5): When we convert to JDK 1.5 make this Set<String>
1149   public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
1150
1151   /** Returns true if document <i>n</i> has been deleted */
1152   public abstract boolean isDeleted(int n);
1153
1154   /** Returns true if any documents have been deleted */
1155   public abstract boolean hasDeletions();
1156
1157   /** Returns true if there are norms stored for this field. */
1158   public boolean hasNorms(String field) throws IOException {
1159     // backward compatible implementation.
1160     // SegmentReader has an efficient implementation.
1161     ensureOpen();
1162     return norms(field) != null;
1163   }
1164
1165   /** Returns the byte-encoded normalization factor for the named field of
1166    *  every document.  This is used by the search code to score documents.
1167    *  Returns null if norms were not indexed for this field.
1168    *
1169    * @see org.apache.lucene.document.Field#setBoost(float)
1170    */
1171   public abstract byte[] norms(String field) throws IOException;
1172
1173   /** Reads the byte-encoded normalization factor for the named field of every
1174    *  document.  This is used by the search code to score documents.
1175    *
1176    * @see org.apache.lucene.document.Field#setBoost(float)
1177    */
1178   public abstract void norms(String field, byte[] bytes, int offset)
1179     throws IOException;
1180
1181   /** Expert: Resets the normalization factor for the named field of the named
1182    * document.  The norm represents the product of the field's {@link
1183    * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
1184    * int) length normalization}.  Thus, to preserve the length normalization
1185    * values when resetting this, one should base the new value upon the old.
1186    *
1187    * <b>NOTE:</b> If this field does not index norms, then
1188    * this method throws {@link IllegalStateException}.
1189    *
1190    * @see #norms(String)
1191    * @see Similarity#decodeNormValue(byte)
1192    * @throws StaleReaderException if the index has changed
1193    *  since this reader was opened
1194    * @throws CorruptIndexException if the index is corrupt
1195    * @throws LockObtainFailedException if another writer
1196    *  has this index open (<code>write.lock</code> could not
1197    *  be obtained)
1198    * @throws IOException if there is a low-level IO error
1199    * @throws IllegalStateException if the field does not index norms
1200    */
1201   public synchronized  void setNorm(int doc, String field, byte value)
1202           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1203     ensureOpen();
1204     acquireWriteLock();
1205     hasChanges = true;
1206     doSetNorm(doc, field, value);
1207   }
1208
1209   /** Implements setNorm in subclass.*/
1210   protected abstract void doSetNorm(int doc, String field, byte value)
1211           throws CorruptIndexException, IOException;
1212
1213   /** Expert: Resets the normalization factor for the named field of the named
1214    * document.
1215    *
1216    * @see #norms(String)
1217    * @see Similarity#decodeNormValue(byte)
1218    *
1219    * @throws StaleReaderException if the index has changed
1220    *  since this reader was opened
1221    * @throws CorruptIndexException if the index is corrupt
1222    * @throws LockObtainFailedException if another writer
1223    *  has this index open (<code>write.lock</code> could not
1224    *  be obtained)
1225    * @throws IOException if there is a low-level IO error
1226    * @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the
1227    * float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}.
1228    * This method will be removed in Lucene 4.0
1229    */
1230   @Deprecated
1231   public void setNorm(int doc, String field, float value)
1232           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1233     ensureOpen();
1234     setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
1235   }
1236
1237   /** Returns an enumeration of all the terms in the index. The
1238    * enumeration is ordered by Term.compareTo(). Each term is greater
1239    * than all that precede it in the enumeration. Note that after
1240    * calling terms(), {@link TermEnum#next()} must be called
1241    * on the resulting enumeration before calling other methods such as
1242    * {@link TermEnum#term()}.
1243    * @throws IOException if there is a low-level IO error
1244    */
1245   public abstract TermEnum terms() throws IOException;
1246
1247   /** Returns an enumeration of all terms starting at a given term. If
1248    * the given term does not exist, the enumeration is positioned at the
1249    * first term greater than the supplied term. The enumeration is
1250    * ordered by Term.compareTo(). Each term is greater than all that
1251    * precede it in the enumeration.
1252    * @throws IOException if there is a low-level IO error
1253    */
1254   public abstract TermEnum terms(Term t) throws IOException;
1255
1256   /** Returns the number of documents containing the term <code>t</code>.
1257    * @throws IOException if there is a low-level IO error
1258    */
1259   public abstract int docFreq(Term t) throws IOException;
1260
1261   /** Returns an enumeration of all the documents which contain
1262    * <code>term</code>. For each document, the document number, the frequency of
1263    * the term in that document is also provided, for use in
1264    * search scoring.  If term is null, then all non-deleted
1265    * docs are returned with freq=1.
1266    * Thus, this method implements the mapping:
1267    * <p><ul>
1268    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
1269    * </ul>
1270    * <p>The enumeration is ordered by document number.  Each document number
1271    * is greater than all that precede it in the enumeration.
1272    * @throws IOException if there is a low-level IO error
1273    */
1274   public TermDocs termDocs(Term term) throws IOException {
1275     ensureOpen();
1276     TermDocs termDocs = termDocs();
1277     termDocs.seek(term);
1278     return termDocs;
1279   }
1280
1281   /** Returns an unpositioned {@link TermDocs} enumerator.
1282    * <p>
1283    * Note: the TermDocs returned is unpositioned. Before using it, ensure
1284    * that you first position it with {@link TermDocs#seek(Term)} or
1285    * {@link TermDocs#seek(TermEnum)}.
1286    *
1287    * @throws IOException if there is a low-level IO error
1288    */
1289   public abstract TermDocs termDocs() throws IOException;
1290
1291   /** Returns an enumeration of all the documents which contain
1292    * <code>term</code>.  For each document, in addition to the document number
1293    * and frequency of the term in that document, a list of all of the ordinal
1294    * positions of the term in the document is available.  Thus, this method
1295    * implements the mapping:
1296    *
1297    * <p><ul>
1298    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
1299    * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
1300    * pos<sub>freq-1</sub>&gt;
1301    * &gt;<sup>*</sup>
1302    * </ul>
1303    * <p> This positional information facilitates phrase and proximity searching.
1304    * <p>The enumeration is ordered by document number.  Each document number is
1305    * greater than all that precede it in the enumeration.
1306    * @throws IOException if there is a low-level IO error
1307    */
1308   public TermPositions termPositions(Term term) throws IOException {
1309     ensureOpen();
1310     TermPositions termPositions = termPositions();
1311     termPositions.seek(term);
1312     return termPositions;
1313   }
1314
1315   /** Returns an unpositioned {@link TermPositions} enumerator.
1316    * @throws IOException if there is a low-level IO error
1317    */
1318   public abstract TermPositions termPositions() throws IOException;
1319
1320
1321
1322   /** Deletes the document numbered <code>docNum</code>.  Once a document is
1323    * deleted it will not appear in TermDocs or TermPostitions enumerations.
1324    * Attempts to read its field with the {@link #document}
1325    * method will result in an error.  The presence of this document may still be
1326    * reflected in the {@link #docFreq} statistic, though
1327    * this will be corrected eventually as the index is further modified.
1328    *
1329    * @throws StaleReaderException if the index has changed
1330    * since this reader was opened
1331    * @throws CorruptIndexException if the index is corrupt
1332    * @throws LockObtainFailedException if another writer
1333    *  has this index open (<code>write.lock</code> could not
1334    *  be obtained)
1335    * @throws IOException if there is a low-level IO error
1336    */
1337   public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1338     ensureOpen();
1339     acquireWriteLock();
1340     hasChanges = true;
1341     doDelete(docNum);
1342   }
1343
1344
1345   /** Implements deletion of the document numbered <code>docNum</code>.
1346    * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
1347    */
1348   protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException;
1349
1350
1351   /** Deletes all documents that have a given <code>term</code> indexed.
1352    * This is useful if one uses a document field to hold a unique ID string for
1353    * the document.  Then to delete such a document, one merely constructs a
1354    * term with the appropriate field and the unique ID string as its text and
1355    * passes it to this method.
1356    * See {@link #deleteDocument(int)} for information about when this deletion will
1357    * become effective.
1358    *
1359    * @return the number of documents deleted
1360    * @throws StaleReaderException if the index has changed
1361    *  since this reader was opened
1362    * @throws CorruptIndexException if the index is corrupt
1363    * @throws LockObtainFailedException if another writer
1364    *  has this index open (<code>write.lock</code> could not
1365    *  be obtained)
1366    * @throws IOException if there is a low-level IO error
1367    */
1368   public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1369     ensureOpen();
1370     TermDocs docs = termDocs(term);
1371     if (docs == null) return 0;
1372     int n = 0;
1373     try {
1374       while (docs.next()) {
1375         deleteDocument(docs.doc());
1376         n++;
1377       }
1378     } finally {
1379       docs.close();
1380     }
1381     return n;
1382   }
1383
1384   /** Undeletes all documents currently marked as deleted in
1385    * this index.
1386    *
1387    * <p>NOTE: this method can only recover documents marked
1388    * for deletion but not yet removed from the index; when
1389    * and how Lucene removes deleted documents is an
1390    * implementation detail, subject to change from release
1391    * to release.  However, you can use {@link
1392    * #numDeletedDocs} on the current IndexReader instance to
1393    * see how many documents will be un-deleted.
1394    *
1395    * @throws StaleReaderException if the index has changed
1396    *  since this reader was opened
1397    * @throws LockObtainFailedException if another writer
1398    *  has this index open (<code>write.lock</code> could not
1399    *  be obtained)
1400    * @throws CorruptIndexException if the index is corrupt
1401    * @throws IOException if there is a low-level IO error
1402    */
1403   public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1404     ensureOpen();
1405     acquireWriteLock();
1406     hasChanges = true;
1407     doUndeleteAll();
1408   }
1409
1410   /** Implements actual undeleteAll() in subclass. */
1411   protected abstract void doUndeleteAll() throws CorruptIndexException, IOException;
1412
1413   /** Does nothing by default. Subclasses that require a write lock for
1414    *  index modifications must implement this method. */
1415   protected synchronized void acquireWriteLock() throws IOException {
1416     /* NOOP */
1417   }
1418
1419   /**
1420    *
1421    * @throws IOException
1422    */
1423   public final synchronized void flush() throws IOException {
1424     ensureOpen();
1425     commit();
1426   }
1427
1428   /**
1429    * @param commitUserData Opaque Map (String -> String)
1430    *  that's recorded into the segments file in the index,
1431    *  and retrievable by {@link
1432    *  IndexReader#getCommitUserData}.
1433    * @throws IOException
1434    */
1435   public final synchronized void flush(Map<String, String> commitUserData) throws IOException {
1436     ensureOpen();
1437     commit(commitUserData);
1438   }
1439
1440   /**
1441    * Commit changes resulting from delete, undeleteAll, or
1442    * setNorm operations
1443    *
1444    * If an exception is hit, then either no changes or all
1445    * changes will have been committed to the index
1446    * (transactional semantics).
1447    * @throws IOException if there is a low-level IO error
1448    */
1449   protected final synchronized void commit() throws IOException {
1450     commit(null);
1451   }
1452
1453   /**
1454    * Commit changes resulting from delete, undeleteAll, or
1455    * setNorm operations
1456    *
1457    * If an exception is hit, then either no changes or all
1458    * changes will have been committed to the index
1459    * (transactional semantics).
1460    * @throws IOException if there is a low-level IO error
1461    */
1462   public final synchronized void commit(Map<String, String> commitUserData) throws IOException {
1463     // Don't call ensureOpen since we commit() on close
1464     doCommit(commitUserData);
1465     hasChanges = false;
1466   }
1467
1468   /** Implements commit.  */
1469   protected abstract void doCommit(Map<String, String> commitUserData) throws IOException;
1470
1471   /**
1472    * Closes files associated with this index.
1473    * Also saves any new deletions to disk.
1474    * No other methods should be called after this has been called.
1475    * @throws IOException if there is a low-level IO error
1476    */
1477   public final synchronized void close() throws IOException {
1478     if (!closed) {
1479       decRef();
1480       closed = true;
1481     }
1482   }
1483
1484   /** Implements close. */
1485   protected abstract void doClose() throws IOException;
1486
1487
1488   /**
1489    * Get a list of unique field names that exist in this index and have the specified
1490    * field option information.
1491    * @param fldOption specifies which field option should be available for the returned fields
1492    * @return Collection of Strings indicating the names of the fields.
1493    * @see IndexReader.FieldOption
1494    */
1495   public abstract Collection<String> getFieldNames(FieldOption fldOption);
1496
1497   /**
1498    * Expert: return the IndexCommit that this reader has
1499    * opened.  This method is only implemented by those
1500    * readers that correspond to a Directory with its own
1501    * segments_N file.
1502    *
1503    * @lucene.experimental
1504    */
1505   public IndexCommit getIndexCommit() throws IOException {
1506     throw new UnsupportedOperationException("This reader does not support this method.");
1507   }
1508
1509   /**
1510    * Prints the filename and size of each file within a given compound file.
1511    * Add the -extract flag to extract files to the current working directory.
1512    * In order to make the extracted version of the index work, you have to copy
1513    * the segments file from the compound index into the directory where the extracted files are stored.
1514    * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
1515    */
1516   public static void main(String [] args) {
1517     String filename = null;
1518     boolean extract = false;
1519
1520     for (int i = 0; i < args.length; ++i) {
1521       if (args[i].equals("-extract")) {
1522         extract = true;
1523       } else if (filename == null) {
1524         filename = args[i];
1525       }
1526     }
1527
1528     if (filename == null) {
1529       System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
1530       return;
1531     }
1532
1533     Directory dir = null;
1534     CompoundFileReader cfr = null;
1535
1536     try {
1537       File file = new File(filename);
1538       String dirname = file.getAbsoluteFile().getParent();
1539       filename = file.getName();
1540       dir = FSDirectory.open(new File(dirname));
1541       cfr = new CompoundFileReader(dir, filename);
1542
1543       String [] files = cfr.listAll();
1544       ArrayUtil.mergeSort(files);   // sort the array of filename so that the output is more readable
1545
1546       for (int i = 0; i < files.length; ++i) {
1547         long len = cfr.fileLength(files[i]);
1548
1549         if (extract) {
1550           System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
1551           IndexInput ii = cfr.openInput(files[i]);
1552
1553           FileOutputStream f = new FileOutputStream(files[i]);
1554
1555           // read and write with a small buffer, which is more effective than reading byte by byte
1556           byte[] buffer = new byte[1024];
1557           int chunk = buffer.length;
1558           while(len > 0) {
1559             final int bufLen = (int) Math.min(chunk, len);
1560             ii.readBytes(buffer, 0, bufLen);
1561             f.write(buffer, 0, bufLen);
1562             len -= bufLen;
1563           }
1564
1565           f.close();
1566           ii.close();
1567         }
1568         else
1569           System.out.println(files[i] + ": " + len + " bytes");
1570       }
1571     } catch (IOException ioe) {
1572       ioe.printStackTrace();
1573     }
1574     finally {
1575       try {
1576         if (dir != null)
1577           dir.close();
1578         if (cfr != null)
1579           cfr.close();
1580       }
1581       catch (IOException ioe) {
1582         ioe.printStackTrace();
1583       }
1584     }
1585   }
1586
1587   /** Returns all commit points that exist in the Directory.
1588    *  Normally, because the default is {@link
1589    *  KeepOnlyLastCommitDeletionPolicy}, there would be only
1590    *  one commit point.  But if you're using a custom {@link
1591    *  IndexDeletionPolicy} then there could be many commits.
1592    *  Once you have a given commit, you can open a reader on
1593    *  it by calling {@link IndexReader#open(IndexCommit,boolean)}
1594    *  There must be at least one commit in
1595    *  the Directory, else this method throws {@link
1596    *  IndexNotFoundException}.  Note that if a commit is in
1597    *  progress while this method is running, that commit
1598    *  may or may not be returned.
1599    *
1600    *  @return a sorted list of {@link IndexCommit}s, from oldest
1601    *  to latest. */
1602   public static Collection<IndexCommit> listCommits(Directory dir) throws IOException {
1603     return DirectoryReader.listCommits(dir);
1604   }
1605
1606   /** Expert: returns the sequential sub readers that this
1607    *  reader is logically composed of.  For example,
1608    *  IndexSearcher uses this API to drive searching by one
1609    *  sub reader at a time.  If this reader is not composed
1610    *  of sequential child readers, it should return null.
1611    *  If this method returns an empty array, that means this
1612    *  reader is a null reader (for example a MultiReader
1613    *  that has no sub readers).
1614    *  <p>
1615    *  NOTE: You should not try using sub-readers returned by
1616    *  this method to make any changes (setNorm, deleteDocument,
1617    *  etc.). While this might succeed for one composite reader
1618    *  (like MultiReader), it will most likely lead to index
1619    *  corruption for other readers (like DirectoryReader obtained
1620    *  through {@link #open}. Use the parent reader directly. */
1621   public IndexReader[] getSequentialSubReaders() {
1622     ensureOpen();
1623     return null;
1624   }
1625
1626   /** Expert */
1627   public Object getCoreCacheKey() {
1628     // Don't can ensureOpen since FC calls this (to evict)
1629     // on close
1630     return this;
1631   }
1632
1633   /** Expert.  Warning: this returns null if the reader has
1634    *  no deletions */
1635   public Object getDeletesCacheKey() {
1636     return this;
1637   }
1638
1639   /** Returns the number of unique terms (across all fields)
1640    *  in this reader.
1641    *
1642    *  This method returns long, even though internally
1643    *  Lucene cannot handle more than 2^31 unique terms, for
1644    *  a possible future when this limitation is removed.
1645    *
1646    *  @throws UnsupportedOperationException if this count
1647    *  cannot be easily determined (eg Multi*Readers).
1648    *  Instead, you should call {@link
1649    *  #getSequentialSubReaders} and ask each sub reader for
1650    *  its unique term count. */
1651   public long getUniqueTermCount() throws IOException {
1652     throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
1653   }
1654
1655   // Back compat for reopen()
1656   @Deprecated
1657   private static final VirtualMethod<IndexReader> reopenMethod1 =
1658     new VirtualMethod<IndexReader>(IndexReader.class, "reopen");
1659   @Deprecated
1660   private static final VirtualMethod<IndexReader> doOpenIfChangedMethod1 =
1661     new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged");
1662   @Deprecated
1663   private final boolean hasNewReopenAPI1 =
1664     VirtualMethod.compareImplementationDistance(getClass(),
1665         doOpenIfChangedMethod1, reopenMethod1) >= 0; // its ok for both to be overridden
1666
1667   // Back compat for reopen(boolean openReadOnly)
1668   @Deprecated
1669   private static final VirtualMethod<IndexReader> reopenMethod2 =
1670     new VirtualMethod<IndexReader>(IndexReader.class, "reopen", boolean.class);
1671   @Deprecated
1672   private static final VirtualMethod<IndexReader> doOpenIfChangedMethod2 =
1673     new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", boolean.class);
1674   @Deprecated
1675   private final boolean hasNewReopenAPI2 =
1676     VirtualMethod.compareImplementationDistance(getClass(),
1677         doOpenIfChangedMethod2, reopenMethod2) >= 0; // its ok for both to be overridden
1678
1679   // Back compat for reopen(IndexCommit commit)
1680   @Deprecated
1681   private static final VirtualMethod<IndexReader> reopenMethod3 =
1682     new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexCommit.class);
1683   @Deprecated
1684   private static final VirtualMethod<IndexReader> doOpenIfChangedMethod3 =
1685     new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexCommit.class);
1686   @Deprecated
1687   private final boolean hasNewReopenAPI3 =
1688     VirtualMethod.compareImplementationDistance(getClass(),
1689         doOpenIfChangedMethod3, reopenMethod3) >= 0; // its ok for both to be overridden
1690
1691   // Back compat for reopen(IndexWriter writer, boolean applyDeletes)
1692   @Deprecated
1693   private static final VirtualMethod<IndexReader> reopenMethod4 =
1694     new VirtualMethod<IndexReader>(IndexReader.class, "reopen", IndexWriter.class, boolean.class);
1695   @Deprecated
1696   private static final VirtualMethod<IndexReader> doOpenIfChangedMethod4 =
1697     new VirtualMethod<IndexReader>(IndexReader.class, "doOpenIfChanged", IndexWriter.class, boolean.class);
1698   @Deprecated
1699   private final boolean hasNewReopenAPI4 =
1700     VirtualMethod.compareImplementationDistance(getClass(),
1701         doOpenIfChangedMethod4, reopenMethod4) >= 0; // its ok for both to be overridden
1702
1703   /** For IndexReader implementations that use
1704    *  TermInfosReader to read terms, this returns the
1705    *  current indexDivisor as specified when the reader was
1706    *  opened.
1707    */
1708   public int getTermInfosIndexDivisor() {
1709     throw new UnsupportedOperationException("This reader does not support this method.");
1710   }
1711 }