lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/IndexReader.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.document.Document;
  21 import org.apache.lucene.document.FieldSelector;
  22 import org.apache.lucene.search.FieldCache; // javadocs
  23 import org.apache.lucene.search.Similarity;
  24 import org.apache.lucene.store.*;
  25 import org.apache.lucene.util.ArrayUtil;
  26
  27 import java.io.File;
  28 import java.io.FileOutputStream;
  29 import java.io.IOException;
  30 import java.io.Closeable;
  31 import java.util.Collection;
  32 import java.util.Map;
  33 import java.util.concurrent.atomic.AtomicInteger;
  34
  35 /** IndexReader is an abstract class, providing an interface for accessing an
  36  index.  Search of an index is done entirely through this abstract interface,
  37  so that any subclass which implements it is searchable.
  38
  39  <p> Concrete subclasses of IndexReader are usually constructed with a call to
  40  one of the static <code>open()</code> methods, e.g. {@link
  41  #open(Directory, boolean)}.
  42
  43  <p> For efficiency, in this API documents are often referred to via
  44  <i>document numbers</i>, non-negative integers which each name a unique
  45  document in the index.  These document numbers are ephemeral--they may change
  46  as documents are added to and deleted from an index.  Clients should thus not
  47  rely on a given document having the same number between sessions.
  48
  49  <p> An IndexReader can be opened on a directory for which an IndexWriter is
  50  opened already, but it cannot be used to delete documents from the index then.
  51
  52  <p>
  53  <b>NOTE</b>: for backwards API compatibility, several methods are not listed
  54  as abstract, but have no useful implementations in this base class and
  55  instead always throw UnsupportedOperationException.  Subclasses are
  56  strongly encouraged to override these methods, but in many cases may not
  57  need to.
  58  </p>
  59
  60  <p>
  61
  62  <b>NOTE</b>: as of 2.4, it's possible to open a read-only
  63  IndexReader using the static open methods that accept the
  64  boolean readOnly parameter.  Such a reader has better
  65  concurrency as it's not necessary to synchronize on the
  66  isDeleted method.  You must specify false if you want to
  67  make changes with the resulting IndexReader.
  68  </p>
  69
  70  <a name="thread-safety"></a><p><b>NOTE</b>: {@link
  71  IndexReader} instances are completely thread
  72  safe, meaning multiple threads can call any of its methods,
  73  concurrently.  If your application requires external
  74  synchronization, you should <b>not</b> synchronize on the
  75  <code>IndexReader</code> instance; use your own
  76  (non-Lucene) objects instead.
  77 */
  78 public abstract class IndexReader implements Cloneable,Closeable {
  79
  80   /**
  81    * A custom listener that's invoked when the IndexReader
  82    * is finished.
  83    *
  84    * <p>For a SegmentReader, this listener is called only
  85    * once all SegmentReaders sharing the same core are
  86    * closed.  At this point it is safe for apps to evict
  87    * this reader from any caches keyed on {@link
  88    * #getCoreCacheKey}.  This is the same interface that
  89    * {@link FieldCache} uses, internally, to evict
  90    * entries.</p>
  91    *
  92    * <p>For other readers, this listener is called when they
  93    * are closed.</p>
  94    *
  95    * @lucene.experimental
  96    */
  97   public static interface ReaderFinishedListener {
  98     public void finished(IndexReader reader);
  99   }
 100
 101   // Impls must set this if they may call add/removeReaderFinishedListener:
 102   protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
 103
 104   /** Expert: adds a {@link ReaderFinishedListener}.  The
 105    * provided listener is also added to any sub-readers, if
 106    * this is a composite reader.  Also, any reader reopened
 107    * or cloned from this one will also copy the listeners at
 108    * the time of reopen.
 109    *
 110    * @lucene.experimental */
 111   public void addReaderFinishedListener(ReaderFinishedListener listener) {
 112     readerFinishedListeners.add(listener);
 113   }
 114
 115   /** Expert: remove a previously added {@link ReaderFinishedListener}.
 116    *
 117    * @lucene.experimental */
 118   public void removeReaderFinishedListener(ReaderFinishedListener listener) {
 119     readerFinishedListeners.remove(listener);
 120   }
 121
 122   protected void notifyReaderFinishedListeners() {
 123     // Defensive (should never be null -- all impls must set
 124     // this):
 125     if (readerFinishedListeners != null) {
 126       for(ReaderFinishedListener listener : readerFinishedListeners) {
 127         listener.finished(this);
 128       }
 129     }
 130   }
 131
 132   protected void readerFinished() {
 133     notifyReaderFinishedListeners();
 134   }
 135
 136   /**
 137    * Constants describing field properties, for example used for
 138    * {@link IndexReader#getFieldNames(FieldOption)}.
 139    */
 140   public static enum FieldOption {
 141     /** All fields */
 142     ALL,
 143     /** All indexed fields */
 144     INDEXED,
 145     /** All fields that store payloads */
 146     STORES_PAYLOADS,
 147     /** All fields that omit tf */
 148     OMIT_TERM_FREQ_AND_POSITIONS,
 149     /** All fields that omit positions */
 150     OMIT_POSITIONS,
 151     /** All fields which are not indexed */
 152     UNINDEXED,
 153     /** All fields which are indexed with termvectors enabled */
 154     INDEXED_WITH_TERMVECTOR,
 155     /** All fields which are indexed but don't have termvectors enabled */
 156     INDEXED_NO_TERMVECTOR,
 157     /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
 158     TERMVECTOR,
 159     /** All fields with termvectors with position values enabled */
 160     TERMVECTOR_WITH_POSITION,
 161     /** All fields with termvectors with offset values enabled */
 162     TERMVECTOR_WITH_OFFSET,
 163     /** All fields with termvectors with offset values and position values enabled */
 164     TERMVECTOR_WITH_POSITION_OFFSET,
 165   }
 166
 167   private boolean closed;
 168   protected boolean hasChanges;
 169
 170   private final AtomicInteger refCount = new AtomicInteger();
 171
 172   static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
 173
 174   /** Expert: returns the current refCount for this reader */
 175   public int getRefCount() {
 176     return refCount.get();
 177   }
 178
 179   /**
 180    * Expert: increments the refCount of this IndexReader
 181    * instance.  RefCounts are used to determine when a
 182    * reader can be closed safely, i.e. as soon as there are
 183    * no more references.  Be sure to always call a
 184    * corresponding {@link #decRef}, in a finally clause;
 185    * otherwise the reader may never be closed.  Note that
 186    * {@link #close} simply calls decRef(), which means that
 187    * the IndexReader will not really be closed until {@link
 188    * #decRef} has been called for all outstanding
 189    * references.
 190    *
 191    * @see #decRef
 192    */
 193   public void incRef() {
 194     ensureOpen();
 195     refCount.incrementAndGet();
 196   }
 197
 198   /** {@inheritDoc} */
 199   @Override
 200   public String toString() {
 201     final StringBuilder buffer = new StringBuilder();
 202     if (hasChanges) {
 203       buffer.append('*');
 204     }
 205     buffer.append(getClass().getSimpleName());
 206     buffer.append('(');
 207     final IndexReader[] subReaders = getSequentialSubReaders();
 208     if ((subReaders != null) && (subReaders.length > 0)) {
 209       buffer.append(subReaders[0]);
 210       for (int i = 1; i < subReaders.length; ++i) {
 211         buffer.append(" ").append(subReaders[i]);
 212       }
 213     }
 214     buffer.append(')');
 215     return buffer.toString();
 216   }
 217
 218   /**
 219    * Expert: decreases the refCount of this IndexReader
 220    * instance.  If the refCount drops to 0, then pending
 221    * changes (if any) are committed to the index and this
 222    * reader is closed.  If an exception is hit, the refCount
 223    * is unchanged.
 224    *
 225    * @throws IOException in case an IOException occurs in commit() or doClose()
 226    *
 227    * @see #incRef
 228    */
 229   public void decRef() throws IOException {
 230     ensureOpen();
 231     if (refCount.getAndDecrement() == 1) {
 232       boolean success = false;
 233       try {
 234         commit();
 235         doClose();
 236         success = true;
 237       } finally {
 238         if (!success) {
 239           // Put reference back on failure
 240           refCount.incrementAndGet();
 241         }
 242       }
 243       readerFinished();
 244     }
 245   }
 246
 247   protected IndexReader() {
 248     refCount.set(1);
 249   }
 250
 251   /**
 252    * @throws AlreadyClosedException if this IndexReader is closed
 253    */
 254   protected final void ensureOpen() throws AlreadyClosedException {
 255     if (refCount.get() <= 0) {
 256       throw new AlreadyClosedException("this IndexReader is closed");
 257     }
 258   }
 259
 260   /** Returns a IndexReader reading the index in the given
 261    *  Directory, with readOnly=true.
 262    * @param directory the index directory
 263    * @throws CorruptIndexException if the index is corrupt
 264    * @throws IOException if there is a low-level IO error
 265    */
 266   public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
 267     return open(directory, null, null, true, DEFAULT_TERMS_INDEX_DIVISOR);
 268   }
 269
 270   /** Returns an IndexReader reading the index in the given
 271    *  Directory.  You should pass readOnly=true, since it
 272    *  gives much better concurrent performance, unless you
 273    *  intend to do write operations (delete documents or
 274    *  change norms) with the reader.
 275    * @param directory the index directory
 276    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 277    * @throws CorruptIndexException if the index is corrupt
 278    * @throws IOException if there is a low-level IO error
 279    */
 280   public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
 281     return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 282   }
 283
 284   /**
 285    * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
 286    *
 287    * @param writer The IndexWriter to open from
 288    * @param applyAllDeletes If true, all buffered deletes will
 289    * be applied (made visible) in the returned reader.  If
 290    * false, the deletes are not applied but remain buffered
 291    * (in IndexWriter) so that they will be applied in the
 292    * future.  Applying deletes can be costly, so if your app
 293    * can tolerate deleted documents being returned you might
 294    * gain some performance by passing false.
 295    * @return The new IndexReader
 296    * @throws CorruptIndexException
 297    * @throws IOException if there is a low-level IO error
 298    *
 299    * @see #reopen(IndexWriter,boolean)
 300    *
 301    * @lucene.experimental
 302    */
 303   public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 304     return writer.getReader(applyAllDeletes);
 305   }
 306
 307   /** Expert: returns an IndexReader reading the index in the given
 308    *  {@link IndexCommit}.  You should pass readOnly=true, since it
 309    *  gives much better concurrent performance, unless you
 310    *  intend to do write operations (delete documents or
 311    *  change norms) with the reader.
 312    * @param commit the commit point to open
 313    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 314    * @throws CorruptIndexException if the index is corrupt
 315    * @throws IOException if there is a low-level IO error
 316    */
 317   public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException {
 318     return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 319   }
 320
 321   /** Expert: returns an IndexReader reading the index in
 322    *  the given Directory, with a custom {@link
 323    *  IndexDeletionPolicy}.  You should pass readOnly=true,
 324    *  since it gives much better concurrent performance,
 325    *  unless you intend to do write operations (delete
 326    *  documents or change norms) with the reader.
 327    * @param directory the index directory
 328    * @param deletionPolicy a custom deletion policy (only used
 329    *  if you use this reader to perform deletes or to set
 330    *  norms); see {@link IndexWriter} for details.
 331    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 332    * @throws CorruptIndexException if the index is corrupt
 333    * @throws IOException if there is a low-level IO error
 334    */
 335   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
 336     return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 337   }
 338
 339   /** Expert: returns an IndexReader reading the index in
 340    *  the given Directory, with a custom {@link
 341    *  IndexDeletionPolicy}.  You should pass readOnly=true,
 342    *  since it gives much better concurrent performance,
 343    *  unless you intend to do write operations (delete
 344    *  documents or change norms) with the reader.
 345    * @param directory the index directory
 346    * @param deletionPolicy a custom deletion policy (only used
 347    *  if you use this reader to perform deletes or to set
 348    *  norms); see {@link IndexWriter} for details.
 349    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 350    * @param termInfosIndexDivisor Subsamples which indexed
 351    *  terms are loaded into RAM. This has the same effect as {@link
 352    *  IndexWriter#setTermIndexInterval} except that setting
 353    *  must be done at indexing time while this setting can be
 354    *  set per reader.  When set to N, then one in every
 355    *  N*termIndexInterval terms in the index is loaded into
 356    *  memory.  By setting this to a value > 1 you can reduce
 357    *  memory usage, at the expense of higher latency when
 358    *  loading a TermInfo.  The default value is 1.  Set this
 359    *  to -1 to skip loading the terms index entirely.
 360    * @throws CorruptIndexException if the index is corrupt
 361    * @throws IOException if there is a low-level IO error
 362    */
 363   public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 364     return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
 365   }
 366
 367   /** Expert: returns an IndexReader reading the index in
 368    *  the given Directory, using a specific commit and with
 369    *  a custom {@link IndexDeletionPolicy}.  You should pass
 370    *  readOnly=true, since it gives much better concurrent
 371    *  performance, unless you intend to do write operations
 372    *  (delete documents or change norms) with the reader.
 373    * @param commit the specific {@link IndexCommit} to open;
 374    * see {@link IndexReader#listCommits} to list all commits
 375    * in a directory
 376    * @param deletionPolicy a custom deletion policy (only used
 377    *  if you use this reader to perform deletes or to set
 378    *  norms); see {@link IndexWriter} for details.
 379    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 380    * @throws CorruptIndexException if the index is corrupt
 381    * @throws IOException if there is a low-level IO error
 382    */
 383   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
 384     return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
 385   }
 386
 387   /** Expert: returns an IndexReader reading the index in
 388    *  the given Directory, using a specific commit and with
 389    *  a custom {@link IndexDeletionPolicy}.  You should pass
 390    *  readOnly=true, since it gives much better concurrent
 391    *  performance, unless you intend to do write operations
 392    *  (delete documents or change norms) with the reader.
 393    * @param commit the specific {@link IndexCommit} to open;
 394    * see {@link IndexReader#listCommits} to list all commits
 395    * in a directory
 396    * @param deletionPolicy a custom deletion policy (only used
 397    *  if you use this reader to perform deletes or to set
 398    *  norms); see {@link IndexWriter} for details.
 399    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
 400    * @param termInfosIndexDivisor Subsamples which indexed
 401    *  terms are loaded into RAM. This has the same effect as {@link
 402    *  IndexWriter#setTermIndexInterval} except that setting
 403    *  must be done at indexing time while this setting can be
 404    *  set per reader.  When set to N, then one in every
 405    *  N*termIndexInterval terms in the index is loaded into
 406    *  memory.  By setting this to a value > 1 you can reduce
 407    *  memory usage, at the expense of higher latency when
 408    *  loading a TermInfo.  The default value is 1.  Set this
 409    *  to -1 to skip loading the terms index entirely. This is only useful in
 410    *  advanced situations when you will only .next() through all terms;
 411    *  attempts to seek will hit an exception.
 412    *
 413    * @throws CorruptIndexException if the index is corrupt
 414    * @throws IOException if there is a low-level IO error
 415    */
 416   public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 417     return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor);
 418   }
 419
 420   private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
 421     return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
 422   }
 423
 424   /**
 425    * Refreshes an IndexReader if the index has changed since this instance
 426    * was (re)opened.
 427    * <p>
 428    * Opening an IndexReader is an expensive operation. This method can be used
 429    * to refresh an existing IndexReader to reduce these costs. This method
 430    * tries to only load segments that have changed or were created after the
 431    * IndexReader was (re)opened.
 432    * <p>
 433    * If the index has not changed since this instance was (re)opened, then this
 434    * call is a NOOP and returns this instance. Otherwise, a new instance is
 435    * returned. The old instance is <b>not</b> closed and remains usable.<br>
 436    * <p>
 437    * If the reader is reopened, even though they share
 438    * resources internally, it's safe to make changes
 439    * (deletions, norms) with the new reader.  All shared
 440    * mutable state obeys "copy on write" semantics to ensure
 441    * the changes are not seen by other readers.
 442    * <p>
 443    * You can determine whether a reader was actually reopened by comparing the
 444    * old instance with the instance returned by this method:
 445    * <pre>
 446    * IndexReader reader = ...
 447    * ...
 448    * IndexReader newReader = r.reopen();
 449    * if (newReader != reader) {
 450    * ...     // reader was reopened
 451    *   reader.close();
 452    * }
 453    * reader = newReader;
 454    * ...
 455    * </pre>
 456    *
 457    * Be sure to synchronize that code so that other threads,
 458    * if present, can never use reader after it has been
 459    * closed and before it's switched to newReader.
 460    *
 461    * <p><b>NOTE</b>: If this reader is a near real-time
 462    * reader (obtained from {@link IndexWriter#getReader()},
 463    * reopen() will simply call writer.getReader() again for
 464    * you, though this may change in the future.
 465    *
 466    * @throws CorruptIndexException if the index is corrupt
 467    * @throws IOException if there is a low-level IO error
 468    */
 469   public synchronized IndexReader reopen() throws CorruptIndexException, IOException {
 470     throw new UnsupportedOperationException("This reader does not support reopen().");
 471   }
 472
 473
 474   /** Just like {@link #reopen()}, except you can change the
 475    *  readOnly of the original reader.  If the index is
 476    *  unchanged but readOnly is different then a new reader
 477    *  will be returned. */
 478   public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
 479     throw new UnsupportedOperationException("This reader does not support reopen().");
 480   }
 481
 482   /** Expert: reopen this reader on a specific commit point.
 483    *  This always returns a readOnly reader.  If the
 484    *  specified commit point matches what this reader is
 485    *  already on, and this reader is already readOnly, then
 486    *  this same instance is returned; if it is not already
 487    *  readOnly, a readOnly clone is returned. */
 488   public synchronized IndexReader reopen(final IndexCommit commit) throws CorruptIndexException, IOException {
 489     throw new UnsupportedOperationException("This reader does not support reopen(IndexCommit).");
 490   }
 491
 492   /**
 493    * Expert: returns a readonly reader, covering all
 494    * committed as well as un-committed changes to the index.
 495    * This provides "near real-time" searching, in that
 496    * changes made during an IndexWriter session can be
 497    * quickly made available for searching without closing
 498    * the writer nor calling {@link #commit}.
 499    *
 500    * <p>Note that this is functionally equivalent to calling
 501    * {#flush} (an internal IndexWriter operation) and then using {@link IndexReader#open} to
 502    * open a new reader.  But the turnaround time of this
 503    * method should be faster since it avoids the potentially
 504    * costly {@link #commit}.</p>
 505    *
 506    * <p>You must close the {@link IndexReader} returned by
 507    * this method once you are done using it.</p>
 508    *
 509    * <p>It's <i>near</i> real-time because there is no hard
 510    * guarantee on how quickly you can get a new reader after
 511    * making changes with IndexWriter.  You'll have to
 512    * experiment in your situation to determine if it's
 513    * fast enough.  As this is a new and experimental
 514    * feature, please report back on your findings so we can
 515    * learn, improve and iterate.</p>
 516    *
 517    * <p>The resulting reader supports {@link
 518    * IndexReader#reopen}, but that call will simply forward
 519    * back to this method (though this may change in the
 520    * future).</p>
 521    *
 522    * <p>The very first time this method is called, this
 523    * writer instance will make every effort to pool the
 524    * readers that it opens for doing merges, applying
 525    * deletes, etc.  This means additional resources (RAM,
 526    * file descriptors, CPU time) will be consumed.</p>
 527    *
 528    * <p>For lower latency on reopening a reader, you should
 529    * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
 530    * pre-warm a newly merged segment before it's committed
 531    * to the index.  This is important for minimizing
 532    * index-to-search delay after a large merge.  </p>
 533    *
 534    * <p>If an addIndexes* call is running in another thread,
 535    * then this reader will only search those segments from
 536    * the foreign index that have been successfully copied
 537    * over, so far</p>.
 538    *
 539    * <p><b>NOTE</b>: Once the writer is closed, any
 540    * outstanding readers may continue to be used.  However,
 541    * if you attempt to reopen any of those readers, you'll
 542    * hit an {@link AlreadyClosedException}.</p>
 543    *
 544    * @return IndexReader that covers entire index plus all
 545    * changes made so far by this IndexWriter instance
 546    *
 547    * @param writer The IndexWriter to open from
 548    * @param applyAllDeletes If true, all buffered deletes will
 549    * be applied (made visible) in the returned reader.  If
 550    * false, the deletes are not applied but remain buffered
 551    * (in IndexWriter) so that they will be applied in the
 552    * future.  Applying deletes can be costly, so if your app
 553    * can tolerate deleted documents being returned you might
 554    * gain some performance by passing false.
 555    *
 556    * @throws IOException
 557    *
 558    * @lucene.experimental
 559    */
 560   public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 561     return writer.getReader(applyAllDeletes);
 562   }
 563
 564   /**
 565    * Efficiently clones the IndexReader (sharing most
 566    * internal state).
 567    * <p>
 568    * On cloning a reader with pending changes (deletions,
 569    * norms), the original reader transfers its write lock to
 570    * the cloned reader.  This means only the cloned reader
 571    * may make further changes to the index, and commit the
 572    * changes to the index on close, but the old reader still
 573    * reflects all changes made up until it was cloned.
 574    * <p>
 575    * Like {@link #reopen()}, it's safe to make changes to
 576    * either the original or the cloned reader: all shared
 577    * mutable state obeys "copy on write" semantics to ensure
 578    * the changes are not seen by other readers.
 579    * <p>
 580    */
 581   @Override
 582   public synchronized Object clone() {
 583     throw new UnsupportedOperationException("This reader does not implement clone()");
 584   }
 585
 586   /**
 587    * Clones the IndexReader and optionally changes readOnly.  A readOnly
 588    * reader cannot open a writeable reader.
 589    * @throws CorruptIndexException if the index is corrupt
 590    * @throws IOException if there is a low-level IO error
 591    */
 592   public synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
 593     throw new UnsupportedOperationException("This reader does not implement clone()");
 594   }
 595
 596   /**
 597    * Returns the directory associated with this index.  The Default
 598    * implementation returns the directory specified by subclasses when
 599    * delegating to the IndexReader(Directory) constructor, or throws an
 600    * UnsupportedOperationException if one was not specified.
 601    * @throws UnsupportedOperationException if no directory
 602    */
 603   public Directory directory() {
 604     ensureOpen();
 605     throw new UnsupportedOperationException("This reader does not support this method.");
 606   }
 607
 608   /**
 609    * Returns the time the index in the named directory was last modified.
 610    * Do not use this to check whether the reader is still up-to-date, use
 611    * {@link #isCurrent()} instead.
 612    * @throws CorruptIndexException if the index is corrupt
 613    * @throws IOException if there is a low-level IO error
 614    */
 615   public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException {
 616     return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
 617         @Override
 618         public Object doBody(String segmentFileName) throws IOException {
 619           return Long.valueOf(directory2.fileModified(segmentFileName));
 620         }
 621       }.run()).longValue();
 622   }
 623
 624   /**
 625    * Reads version number from segments files. The version number is
 626    * initialized with a timestamp and then increased by one for each change of
 627    * the index.
 628    *
 629    * @param directory where the index resides.
 630    * @return version number.
 631    * @throws CorruptIndexException if the index is corrupt
 632    * @throws IOException if there is a low-level IO error
 633    */
 634   public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException {
 635     return SegmentInfos.readCurrentVersion(directory);
 636   }
 637
 638   /**
 639    * Reads commitUserData, previously passed to {@link
 640    * IndexWriter#commit(Map)}, from current index
 641    * segments file.  This will return null if {@link
 642    * IndexWriter#commit(Map)} has never been called for
 643    * this index.
 644    *
 645    * @param directory where the index resides.
 646    * @return commit userData.
 647    * @throws CorruptIndexException if the index is corrupt
 648    * @throws IOException if there is a low-level IO error
 649    *
 650    * @see #getCommitUserData()
 651    */
 652   public static Map<String,String> getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
 653     return SegmentInfos.readCurrentUserData(directory);
 654   }
 655
 656   /**
 657    * Version number when this IndexReader was opened. Not
 658    * implemented in the IndexReader base class.
 659    *
 660    * <p>If this reader is based on a Directory (ie, was
 661    * created by calling {@link #open}, or {@link #reopen} on
 662    * a reader based on a Directory), then this method
 663    * returns the version recorded in the commit that the
 664    * reader opened.  This version is advanced every time
 665    * {@link IndexWriter#commit} is called.</p>
 666    *
 667    * <p>If instead this reader is a near real-time reader
 668    * (ie, obtained by a call to {@link
 669    * IndexWriter#getReader}, or by calling {@link #reopen}
 670    * on a near real-time reader), then this method returns
 671    * the version of the last commit done by the writer.
 672    * Note that even as further changes are made with the
 673    * writer, the version will not changed until a commit is
 674    * completed.  Thus, you should not rely on this method to
 675    * determine when a near real-time reader should be
 676    * opened.  Use {@link #isCurrent} instead.</p>
 677    *
 678    * @throws UnsupportedOperationException unless overridden in subclass
 679    */
 680   public long getVersion() {
 681     throw new UnsupportedOperationException("This reader does not support this method.");
 682   }
 683
 684   /**
 685    * Retrieve the String userData optionally passed to
 686    * IndexWriter#commit.  This will return null if {@link
 687    * IndexWriter#commit(Map)} has never been called for
 688    * this index.
 689    *
 690    * @see #getCommitUserData(Directory)
 691    */
 692   public Map<String,String> getCommitUserData() {
 693     throw new UnsupportedOperationException("This reader does not support this method.");
 694   }
 695
 696
 697   /**
 698    * Check whether any new changes have occurred to the
 699    * index since this reader was opened.
 700    *
 701    * <p>If this reader is based on a Directory (ie, was
 702    * created by calling {@link #open}, or {@link #reopen} on
 703    * a reader based on a Directory), then this method checks
 704    * if any further commits (see {@link IndexWriter#commit}
 705    * have occurred in that directory).</p>
 706    *
 707    * <p>If instead this reader is a near real-time reader
 708    * (ie, obtained by a call to {@link
 709    * IndexWriter#getReader}, or by calling {@link #reopen}
 710    * on a near real-time reader), then this method checks if
 711    * either a new commmit has occurred, or any new
 712    * uncommitted changes have taken place via the writer.
 713    * Note that even if the writer has only performed
 714    * merging, this method will still return false.</p>
 715    *
 716    * <p>In any event, if this returns false, you should call
 717    * {@link #reopen} to get a new reader that sees the
 718    * changes.</p>
 719    *
 720    * @throws CorruptIndexException if the index is corrupt
 721    * @throws IOException           if there is a low-level IO error
 722    * @throws UnsupportedOperationException unless overridden in subclass
 723    */
 724   public boolean isCurrent() throws CorruptIndexException, IOException {
 725     throw new UnsupportedOperationException("This reader does not support this method.");
 726   }
 727
 728   /**
 729    * Checks is the index is optimized (if it has a single segment and
 730    * no deletions).  Not implemented in the IndexReader base class.
 731    * @return <code>true</code> if the index is optimized; <code>false</code> otherwise
 732    * @throws UnsupportedOperationException unless overridden in subclass
 733    */
 734   public boolean isOptimized() {
 735     throw new UnsupportedOperationException("This reader does not support this method.");
 736   }
 737
 738   /**
 739    * Return an array of term frequency vectors for the specified document.
 740    * The array contains a vector for each vectorized field in the document.
 741    * Each vector contains terms and frequencies for all terms in a given vectorized field.
 742    * If no such fields existed, the method returns null. The term vectors that are
 743    * returned may either be of type {@link TermFreqVector}
 744    * or of type {@link TermPositionVector} if
 745    * positions or offsets have been stored.
 746    *
 747    * @param docNumber document for which term frequency vectors are returned
 748    * @return array of term frequency vectors. May be null if no term vectors have been
 749    *  stored for the specified document.
 750    * @throws IOException if index cannot be accessed
 751    * @see org.apache.lucene.document.Field.TermVector
 752    */
 753   abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
 754           throws IOException;
 755
 756
 757   /**
 758    * Return a term frequency vector for the specified document and field. The
 759    * returned vector contains terms and frequencies for the terms in
 760    * the specified field of this document, if the field had the storeTermVector
 761    * flag set. If termvectors had been stored with positions or offsets, a
 762    * {@link TermPositionVector} is returned.
 763    *
 764    * @param docNumber document for which the term frequency vector is returned
 765    * @param field field for which the term frequency vector is returned.
 766    * @return term frequency vector May be null if field does not exist in the specified
 767    * document or term vector was not stored.
 768    * @throws IOException if index cannot be accessed
 769    * @see org.apache.lucene.document.Field.TermVector
 770    */
 771   abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
 772           throws IOException;
 773
 774   /**
 775    * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
 776    * the {@link TermFreqVector}.
 777    * @param docNumber The number of the document to load the vector for
 778    * @param field The name of the field to load
 779    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
 780    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
 781    *
 782    */
 783   abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
 784
 785   /**
 786    * Map all the term vectors for all fields in a Document
 787    * @param docNumber The number of the document to load the vector for
 788    * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
 789    * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
 790    */
 791   abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
 792
 793   /**
 794    * Returns <code>true</code> if an index exists at the specified directory.
 795    * @param  directory the directory to check for an index
 796    * @return <code>true</code> if an index exists; <code>false</code> otherwise
 797    * @throws IOException if there is a problem with accessing the index
 798    */
 799   public static boolean indexExists(Directory directory) throws IOException {
 800     try {
 801       new SegmentInfos().read(directory);
 802       return true;
 803     } catch (IOException ioe) {
 804       return false;
 805     }
 806   }
 807
 808   /** Returns the number of documents in this index. */
 809   public abstract int numDocs();
 810
 811   /** Returns one greater than the largest possible document number.
 812    * This may be used to, e.g., determine how big to allocate an array which
 813    * will have an element for every document number in an index.
 814    */
 815   public abstract int maxDoc();
 816
 817   /** Returns the number of deleted documents. */
 818   public int numDeletedDocs() {
 819     return maxDoc() - numDocs();
 820   }
 821
 822   /**
 823    * Returns the stored fields of the <code>n</code><sup>th</sup>
 824    * <code>Document</code> in this index.
 825    * <p>
 826    * <b>NOTE:</b> for performance reasons, this method does not check if the
 827    * requested document is deleted, and therefore asking for a deleted document
 828    * may yield unspecified results. Usually this is not required, however you
 829    * can call {@link #isDeleted(int)} with the requested document ID to verify
 830    * the document is not deleted.
 831    *
 832    * @throws CorruptIndexException if the index is corrupt
 833    * @throws IOException if there is a low-level IO error
 834    */
 835   public Document document(int n) throws CorruptIndexException, IOException {
 836     ensureOpen();
 837     if (n < 0 || n >= maxDoc()) {
 838       throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + n + ")");
 839     }
 840     return document(n, null);
 841   }
 842
 843   /**
 844    * Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
 845    * <sup>th</sup> position. The {@link FieldSelector} may be used to determine
 846    * what {@link org.apache.lucene.document.Field}s to load and how they should
 847    * be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
 848    * <code>FieldsReader</code>) is closed before the lazy
 849    * {@link org.apache.lucene.document.Field} is loaded an exception may be
 850    * thrown. If you want the value of a lazy
 851    * {@link org.apache.lucene.document.Field} to be available after closing you
 852    * must explicitly load it or fetch the Document again with a new loader.
 853    * <p>
 854    * <b>NOTE:</b> for performance reasons, this method does not check if the
 855    * requested document is deleted, and therefore asking for a deleted document
 856    * may yield unspecified results. Usually this is not required, however you
 857    * can call {@link #isDeleted(int)} with the requested document ID to verify
 858    * the document is not deleted.
 859    *
 860    * @param n Get the document at the <code>n</code><sup>th</sup> position
 861    * @param fieldSelector The {@link FieldSelector} to use to determine what
 862    *        Fields should be loaded on the Document. May be null, in which case
 863    *        all Fields will be loaded.
 864    * @return The stored fields of the
 865    *         {@link org.apache.lucene.document.Document} at the nth position
 866    * @throws CorruptIndexException if the index is corrupt
 867    * @throws IOException if there is a low-level IO error
 868    * @see org.apache.lucene.document.Fieldable
 869    * @see org.apache.lucene.document.FieldSelector
 870    * @see org.apache.lucene.document.SetBasedFieldSelector
 871    * @see org.apache.lucene.document.LoadFirstFieldSelector
 872    */
 873   // TODO (1.5): When we convert to JDK 1.5 make this Set<String>
 874   public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
 875
 876   /** Returns true if document <i>n</i> has been deleted */
 877   public abstract boolean isDeleted(int n);
 878
 879   /** Returns true if any documents have been deleted */
 880   public abstract boolean hasDeletions();
 881
 882   /** Returns true if there are norms stored for this field. */
 883   public boolean hasNorms(String field) throws IOException {
 884     // backward compatible implementation.
 885     // SegmentReader has an efficient implementation.
 886     ensureOpen();
 887     return norms(field) != null;
 888   }
 889
 890   /** Returns the byte-encoded normalization factor for the named field of
 891    *  every document.  This is used by the search code to score documents.
 892    *  Returns null if norms were not indexed for this field.
 893    *
 894    * @see org.apache.lucene.document.Field#setBoost(float)
 895    */
 896   public abstract byte[] norms(String field) throws IOException;
 897
 898   /** Reads the byte-encoded normalization factor for the named field of every
 899    *  document.  This is used by the search code to score documents.
 900    *
 901    * @see org.apache.lucene.document.Field#setBoost(float)
 902    */
 903   public abstract void norms(String field, byte[] bytes, int offset)
 904     throws IOException;
 905
 906   /** Expert: Resets the normalization factor for the named field of the named
 907    * document.  The norm represents the product of the field's {@link
 908    * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
 909    * int) length normalization}.  Thus, to preserve the length normalization
 910    * values when resetting this, one should base the new value upon the old.
 911    *
 912    * <b>NOTE:</b> If this field does not index norms, then
 913    * this method throws {@link IllegalStateException}.
 914    *
 915    * @see #norms(String)
 916    * @see Similarity#decodeNormValue(byte)
 917    * @throws StaleReaderException if the index has changed
 918    *  since this reader was opened
 919    * @throws CorruptIndexException if the index is corrupt
 920    * @throws LockObtainFailedException if another writer
 921    *  has this index open (<code>write.lock</code> could not
 922    *  be obtained)
 923    * @throws IOException if there is a low-level IO error
 924    * @throws IllegalStateException if the field does not index norms
 925    */
 926   public synchronized  void setNorm(int doc, String field, byte value)
 927           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
 928     ensureOpen();
 929     acquireWriteLock();
 930     hasChanges = true;
 931     doSetNorm(doc, field, value);
 932   }
 933
 934   /** Implements setNorm in subclass.*/
 935   protected abstract void doSetNorm(int doc, String field, byte value)
 936           throws CorruptIndexException, IOException;
 937
 938   /** Expert: Resets the normalization factor for the named field of the named
 939    * document.
 940    *
 941    * @see #norms(String)
 942    * @see Similarity#decodeNormValue(byte)
 943    *
 944    * @throws StaleReaderException if the index has changed
 945    *  since this reader was opened
 946    * @throws CorruptIndexException if the index is corrupt
 947    * @throws LockObtainFailedException if another writer
 948    *  has this index open (<code>write.lock</code> could not
 949    *  be obtained)
 950    * @throws IOException if there is a low-level IO error
 951    * @deprecated Use {@link #setNorm(int, String, byte)} instead, encoding the
 952    * float to byte with your Similarity's {@link Similarity#encodeNormValue(float)}.
 953    * This method will be removed in Lucene 4.0
 954    */
 955   @Deprecated
 956   public void setNorm(int doc, String field, float value)
 957           throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
 958     ensureOpen();
 959     setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
 960   }
 961
 962   /** Returns an enumeration of all the terms in the index. The
 963    * enumeration is ordered by Term.compareTo(). Each term is greater
 964    * than all that precede it in the enumeration. Note that after
 965    * calling terms(), {@link TermEnum#next()} must be called
 966    * on the resulting enumeration before calling other methods such as
 967    * {@link TermEnum#term()}.
 968    * @throws IOException if there is a low-level IO error
 969    */
 970   public abstract TermEnum terms() throws IOException;
 971
 972   /** Returns an enumeration of all terms starting at a given term. If
 973    * the given term does not exist, the enumeration is positioned at the
 974    * first term greater than the supplied term. The enumeration is
 975    * ordered by Term.compareTo(). Each term is greater than all that
 976    * precede it in the enumeration.
 977    * @throws IOException if there is a low-level IO error
 978    */
 979   public abstract TermEnum terms(Term t) throws IOException;
 980
 981   /** Returns the number of documents containing the term <code>t</code>.
 982    * @throws IOException if there is a low-level IO error
 983    */
 984   public abstract int docFreq(Term t) throws IOException;
 985
 986   /** Returns an enumeration of all the documents which contain
 987    * <code>term</code>. For each document, the document number, the frequency of
 988    * the term in that document is also provided, for use in
 989    * search scoring.  If term is null, then all non-deleted
 990    * docs are returned with freq=1.
 991    * Thus, this method implements the mapping:
 992    * <p><ul>
 993    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
 994    * </ul>
 995    * <p>The enumeration is ordered by document number.  Each document number
 996    * is greater than all that precede it in the enumeration.
 997    * @throws IOException if there is a low-level IO error
 998    */
 999   public TermDocs termDocs(Term term) throws IOException {
1000     ensureOpen();
1001     TermDocs termDocs = termDocs();
1002     termDocs.seek(term);
1003     return termDocs;
1004   }
1005
1006   /** Returns an unpositioned {@link TermDocs} enumerator.
1007    * <p>
1008    * Note: the TermDocs returned is unpositioned. Before using it, ensure
1009    * that you first position it with {@link TermDocs#seek(Term)} or
1010    * {@link TermDocs#seek(TermEnum)}.
1011    *
1012    * @throws IOException if there is a low-level IO error
1013    */
1014   public abstract TermDocs termDocs() throws IOException;
1015
1016   /** Returns an enumeration of all the documents which contain
1017    * <code>term</code>.  For each document, in addition to the document number
1018    * and frequency of the term in that document, a list of all of the ordinal
1019    * positions of the term in the document is available.  Thus, this method
1020    * implements the mapping:
1021    *
1022    * <p><ul>
1023    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
1024    * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
1025    * pos<sub>freq-1</sub>&gt;
1026    * &gt;<sup>*</sup>
1027    * </ul>
1028    * <p> This positional information facilitates phrase and proximity searching.
1029    * <p>The enumeration is ordered by document number.  Each document number is
1030    * greater than all that precede it in the enumeration.
1031    * @throws IOException if there is a low-level IO error
1032    */
1033   public TermPositions termPositions(Term term) throws IOException {
1034     ensureOpen();
1035     TermPositions termPositions = termPositions();
1036     termPositions.seek(term);
1037     return termPositions;
1038   }
1039
1040   /** Returns an unpositioned {@link TermPositions} enumerator.
1041    * @throws IOException if there is a low-level IO error
1042    */
1043   public abstract TermPositions termPositions() throws IOException;
1044
1045
1046
1047   /** Deletes the document numbered <code>docNum</code>.  Once a document is
1048    * deleted it will not appear in TermDocs or TermPostitions enumerations.
1049    * Attempts to read its field with the {@link #document}
1050    * method will result in an error.  The presence of this document may still be
1051    * reflected in the {@link #docFreq} statistic, though
1052    * this will be corrected eventually as the index is further modified.
1053    *
1054    * @throws StaleReaderException if the index has changed
1055    * since this reader was opened
1056    * @throws CorruptIndexException if the index is corrupt
1057    * @throws LockObtainFailedException if another writer
1058    *  has this index open (<code>write.lock</code> could not
1059    *  be obtained)
1060    * @throws IOException if there is a low-level IO error
1061    */
1062   public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1063     ensureOpen();
1064     acquireWriteLock();
1065     hasChanges = true;
1066     doDelete(docNum);
1067   }
1068
1069
1070   /** Implements deletion of the document numbered <code>docNum</code>.
1071    * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
1072    */
1073   protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException;
1074
1075
1076   /** Deletes all documents that have a given <code>term</code> indexed.
1077    * This is useful if one uses a document field to hold a unique ID string for
1078    * the document.  Then to delete such a document, one merely constructs a
1079    * term with the appropriate field and the unique ID string as its text and
1080    * passes it to this method.
1081    * See {@link #deleteDocument(int)} for information about when this deletion will
1082    * become effective.
1083    *
1084    * @return the number of documents deleted
1085    * @throws StaleReaderException if the index has changed
1086    *  since this reader was opened
1087    * @throws CorruptIndexException if the index is corrupt
1088    * @throws LockObtainFailedException if another writer
1089    *  has this index open (<code>write.lock</code> could not
1090    *  be obtained)
1091    * @throws IOException if there is a low-level IO error
1092    */
1093   public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1094     ensureOpen();
1095     TermDocs docs = termDocs(term);
1096     if (docs == null) return 0;
1097     int n = 0;
1098     try {
1099       while (docs.next()) {
1100         deleteDocument(docs.doc());
1101         n++;
1102       }
1103     } finally {
1104       docs.close();
1105     }
1106     return n;
1107   }
1108
1109   /** Undeletes all documents currently marked as deleted in
1110    * this index.
1111    *
1112    * <p>NOTE: this method can only recover documents marked
1113    * for deletion but not yet removed from the index; when
1114    * and how Lucene removes deleted documents is an
1115    * implementation detail, subject to change from release
1116    * to release.  However, you can use {@link
1117    * #numDeletedDocs} on the current IndexReader instance to
1118    * see how many documents will be un-deleted.
1119    *
1120    * @throws StaleReaderException if the index has changed
1121    *  since this reader was opened
1122    * @throws LockObtainFailedException if another writer
1123    *  has this index open (<code>write.lock</code> could not
1124    *  be obtained)
1125    * @throws CorruptIndexException if the index is corrupt
1126    * @throws IOException if there is a low-level IO error
1127    */
1128   public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
1129     ensureOpen();
1130     acquireWriteLock();
1131     hasChanges = true;
1132     doUndeleteAll();
1133   }
1134
1135   /** Implements actual undeleteAll() in subclass. */
1136   protected abstract void doUndeleteAll() throws CorruptIndexException, IOException;
1137
1138   /** Does nothing by default. Subclasses that require a write lock for
1139    *  index modifications must implement this method. */
1140   protected synchronized void acquireWriteLock() throws IOException {
1141     /* NOOP */
1142   }
1143
1144   /**
1145    *
1146    * @throws IOException
1147    */
1148   public final synchronized void flush() throws IOException {
1149     ensureOpen();
1150     commit();
1151   }
1152
1153   /**
1154    * @param commitUserData Opaque Map (String -> String)
1155    *  that's recorded into the segments file in the index,
1156    *  and retrievable by {@link
1157    *  IndexReader#getCommitUserData}.
1158    * @throws IOException
1159    */
1160   public final synchronized void flush(Map<String, String> commitUserData) throws IOException {
1161     ensureOpen();
1162     commit(commitUserData);
1163   }
1164
1165   /**
1166    * Commit changes resulting from delete, undeleteAll, or
1167    * setNorm operations
1168    *
1169    * If an exception is hit, then either no changes or all
1170    * changes will have been committed to the index
1171    * (transactional semantics).
1172    * @throws IOException if there is a low-level IO error
1173    */
1174   protected final synchronized void commit() throws IOException {
1175     commit(null);
1176   }
1177
1178   /**
1179    * Commit changes resulting from delete, undeleteAll, or
1180    * setNorm operations
1181    *
1182    * If an exception is hit, then either no changes or all
1183    * changes will have been committed to the index
1184    * (transactional semantics).
1185    * @throws IOException if there is a low-level IO error
1186    */
1187   public final synchronized void commit(Map<String, String> commitUserData) throws IOException {
1188     if (hasChanges) {
1189       doCommit(commitUserData);
1190     }
1191     hasChanges = false;
1192   }
1193
1194   /** Implements commit.  */
1195   protected abstract void doCommit(Map<String, String> commitUserData) throws IOException;
1196
1197   /**
1198    * Closes files associated with this index.
1199    * Also saves any new deletions to disk.
1200    * No other methods should be called after this has been called.
1201    * @throws IOException if there is a low-level IO error
1202    */
1203   public final synchronized void close() throws IOException {
1204     if (!closed) {
1205       decRef();
1206       closed = true;
1207     }
1208   }
1209
1210   /** Implements close. */
1211   protected abstract void doClose() throws IOException;
1212
1213
1214   /**
1215    * Get a list of unique field names that exist in this index and have the specified
1216    * field option information.
1217    * @param fldOption specifies which field option should be available for the returned fields
1218    * @return Collection of Strings indicating the names of the fields.
1219    * @see IndexReader.FieldOption
1220    */
1221   public abstract Collection<String> getFieldNames(FieldOption fldOption);
1222
1223   /**
1224    * Expert: return the IndexCommit that this reader has
1225    * opened.  This method is only implemented by those
1226    * readers that correspond to a Directory with its own
1227    * segments_N file.
1228    *
1229    * @lucene.experimental
1230    */
1231   public IndexCommit getIndexCommit() throws IOException {
1232     throw new UnsupportedOperationException("This reader does not support this method.");
1233   }
1234
1235   /**
1236    * Prints the filename and size of each file within a given compound file.
1237    * Add the -extract flag to extract files to the current working directory.
1238    * In order to make the extracted version of the index work, you have to copy
1239    * the segments file from the compound index into the directory where the extracted files are stored.
1240    * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
1241    */
1242   public static void main(String [] args) {
1243     String filename = null;
1244     boolean extract = false;
1245
1246     for (int i = 0; i < args.length; ++i) {
1247       if (args[i].equals("-extract")) {
1248         extract = true;
1249       } else if (filename == null) {
1250         filename = args[i];
1251       }
1252     }
1253
1254     if (filename == null) {
1255       System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
1256       return;
1257     }
1258
1259     Directory dir = null;
1260     CompoundFileReader cfr = null;
1261
1262     try {
1263       File file = new File(filename);
1264       String dirname = file.getAbsoluteFile().getParent();
1265       filename = file.getName();
1266       dir = FSDirectory.open(new File(dirname));
1267       cfr = new CompoundFileReader(dir, filename);
1268
1269       String [] files = cfr.listAll();
1270       ArrayUtil.mergeSort(files);   // sort the array of filename so that the output is more readable
1271
1272       for (int i = 0; i < files.length; ++i) {
1273         long len = cfr.fileLength(files[i]);
1274
1275         if (extract) {
1276           System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
1277           IndexInput ii = cfr.openInput(files[i]);
1278
1279           FileOutputStream f = new FileOutputStream(files[i]);
1280
1281           // read and write with a small buffer, which is more effective than reading byte by byte
1282           byte[] buffer = new byte[1024];
1283           int chunk = buffer.length;
1284           while(len > 0) {
1285             final int bufLen = (int) Math.min(chunk, len);
1286             ii.readBytes(buffer, 0, bufLen);
1287             f.write(buffer, 0, bufLen);
1288             len -= bufLen;
1289           }
1290
1291           f.close();
1292           ii.close();
1293         }
1294         else
1295           System.out.println(files[i] + ": " + len + " bytes");
1296       }
1297     } catch (IOException ioe) {
1298       ioe.printStackTrace();
1299     }
1300     finally {
1301       try {
1302         if (dir != null)
1303           dir.close();
1304         if (cfr != null)
1305           cfr.close();
1306       }
1307       catch (IOException ioe) {
1308         ioe.printStackTrace();
1309       }
1310     }
1311   }
1312
1313   /** Returns all commit points that exist in the Directory.
1314    *  Normally, because the default is {@link
1315    *  KeepOnlyLastCommitDeletionPolicy}, there would be only
1316    *  one commit point.  But if you're using a custom {@link
1317    *  IndexDeletionPolicy} then there could be many commits.
1318    *  Once you have a given commit, you can open a reader on
1319    *  it by calling {@link IndexReader#open(IndexCommit,boolean)}
1320    *  There must be at least one commit in
1321    *  the Directory, else this method throws {@link
1322    *  IndexNotFoundException}.  Note that if a commit is in
1323    *  progress while this method is running, that commit
1324    *  may or may not be returned.
1325    *
1326    *  @return a sorted list of {@link IndexCommit}s, from oldest
1327    *  to latest. */
1328   public static Collection<IndexCommit> listCommits(Directory dir) throws IOException {
1329     return DirectoryReader.listCommits(dir);
1330   }
1331
1332   /** Expert: returns the sequential sub readers that this
1333    *  reader is logically composed of.  For example,
1334    *  IndexSearcher uses this API to drive searching by one
1335    *  sub reader at a time.  If this reader is not composed
1336    *  of sequential child readers, it should return null.
1337    *  If this method returns an empty array, that means this
1338    *  reader is a null reader (for example a MultiReader
1339    *  that has no sub readers).
1340    *  <p>
1341    *  NOTE: You should not try using sub-readers returned by
1342    *  this method to make any changes (setNorm, deleteDocument,
1343    *  etc.). While this might succeed for one composite reader
1344    *  (like MultiReader), it will most likely lead to index
1345    *  corruption for other readers (like DirectoryReader obtained
1346    *  through {@link #open}. Use the parent reader directly. */
1347   public IndexReader[] getSequentialSubReaders() {
1348     return null;
1349   }
1350
1351   /** Expert */
1352   public Object getCoreCacheKey() {
1353     return this;
1354   }
1355
1356   /** Expert.  Warning: this returns null if the reader has
1357    *  no deletions */
1358   public Object getDeletesCacheKey() {
1359     return this;
1360   }
1361
1362   /** Returns the number of unique terms (across all fields)
1363    *  in this reader.
1364    *
1365    *  This method returns long, even though internally
1366    *  Lucene cannot handle more than 2^31 unique terms, for
1367    *  a possible future when this limitation is removed.
1368    *
1369    *  @throws UnsupportedOperationException if this count
1370    *  cannot be easily determined (eg Multi*Readers).
1371    *  Instead, you should call {@link
1372    *  #getSequentialSubReaders} and ask each sub reader for
1373    *  its unique term count. */
1374   public long getUniqueTermCount() throws IOException {
1375     throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
1376   }
1377
1378   /** For IndexReader implementations that use
1379    *  TermInfosReader to read terms, this returns the
1380    *  current indexDivisor as specified when the reader was
1381    *  opened.
1382    */
1383   public int getTermInfosIndexDivisor() {
1384     throw new UnsupportedOperationException("This reader does not support this method.");
1385   }
1386 }