lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexWriter.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.Closeable;
  21 import java.io.IOException;
  22 import java.io.PrintStream;
  23 import java.util.ArrayList;
  24 import java.util.Collection;
  25 import java.util.Collections;
  26 import java.util.Comparator;
  27 import java.util.Date;
  28 import java.util.HashMap;
  29 import java.util.HashSet;
  30 import java.util.LinkedList;
  31 import java.util.List;
  32 import java.util.Map;
  33 import java.util.Set;
  34 import java.util.concurrent.atomic.AtomicInteger;
  35 import java.util.concurrent.ConcurrentHashMap;
  36
  37 import org.apache.lucene.analysis.Analyzer;
  38 import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
  39 import org.apache.lucene.document.Document;
  40 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  41 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
  42 import org.apache.lucene.search.Similarity;
  43 import org.apache.lucene.search.Query;
  44 import org.apache.lucene.store.AlreadyClosedException;
  45 import org.apache.lucene.store.BufferedIndexInput;
  46 import org.apache.lucene.store.Directory;
  47 import org.apache.lucene.store.Lock;
  48 import org.apache.lucene.store.LockObtainFailedException;
  49 import org.apache.lucene.util.Constants;
  50 import org.apache.lucene.util.StringHelper;
  51 import org.apache.lucene.util.ThreadInterruptedException;
  52 import org.apache.lucene.util.Version;
  53 import org.apache.lucene.util.MapBackedSet;
  54 import org.apache.lucene.util.TwoPhaseCommit;
  55
  56 /**
  57   An <code>IndexWriter</code> creates and maintains an index.
  58
  59   <p>The <code>create</code> argument to the {@link
  60   #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
  61   whether a new index is created, or whether an existing index is
  62   opened.  Note that you can open an index with <code>create=true</code>
  63   even while readers are using the index.  The old readers will
  64   continue to search the "point in time" snapshot they had opened,
  65   and won't see the newly created index until they re-open.  There are
  66   also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
  67   with no <code>create</code> argument which will create a new index
  68   if there is not already an index at the provided path and otherwise
  69   open the existing index.</p>
  70
  71   <p>In either case, documents are added with {@link #addDocument(Document)
  72   addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
  73   #deleteDocuments(Query)}. A document can be updated with {@link
  74   #updateDocument(Term, Document) updateDocument} (which just deletes
  75   and then adds the entire document). When finished adding, deleting
  76   and updating documents, {@link #close() close} should be called.</p>
  77
  78   <a name="flush"></a>
  79   <p>These changes are buffered in memory and periodically
  80   flushed to the {@link Directory} (during the above method
  81   calls).  A flush is triggered when there are enough
  82   buffered deletes (see {@link #setMaxBufferedDeleteTerms})
  83   or enough added documents since the last flush, whichever
  84   is sooner.  For the added documents, flushing is triggered
  85   either by RAM usage of the documents (see {@link
  86   #setRAMBufferSizeMB}) or the number of added documents.
  87   The default is to flush when RAM usage hits 16 MB.  For
  88   best indexing speed you should flush by RAM usage with a
  89   large RAM buffer.  Note that flushing just moves the
  90   internal buffered state in IndexWriter into the index, but
  91   these changes are not visible to IndexReader until either
  92   {@link #commit()} or {@link #close} is called.  A flush may
  93   also trigger one or more segment merges which by default
  94   run with a background thread so as not to block the
  95   addDocument calls (see <a href="#mergePolicy">below</a>
  96   for changing the {@link MergeScheduler}).</p>
  97
  98   <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
  99   another <code>IndexWriter</code> on the same directory will lead to a
 100   {@link LockObtainFailedException}. The {@link LockObtainFailedException}
 101   is also thrown if an IndexReader on the same directory is used to delete documents
 102   from the index.</p>
 103
 104   <a name="deletionPolicy"></a>
 105   <p>Expert: <code>IndexWriter</code> allows an optional
 106   {@link IndexDeletionPolicy} implementation to be
 107   specified.  You can use this to control when prior commits
 108   are deleted from the index.  The default policy is {@link
 109   KeepOnlyLastCommitDeletionPolicy} which removes all prior
 110   commits as soon as a new commit is done (this matches
 111   behavior before 2.2).  Creating your own policy can allow
 112   you to explicitly keep previous "point in time" commits
 113   alive in the index for some time, to allow readers to
 114   refresh to the new commit without having the old commit
 115   deleted out from under them.  This is necessary on
 116   filesystems like NFS that do not support "delete on last
 117   close" semantics, which Lucene's "point in time" search
 118   normally relies on. </p>
 119
 120   <a name="mergePolicy"></a> <p>Expert:
 121   <code>IndexWriter</code> allows you to separately change
 122   the {@link MergePolicy} and the {@link MergeScheduler}.
 123   The {@link MergePolicy} is invoked whenever there are
 124   changes to the segments in the index.  Its role is to
 125   select which merges to do, if any, and return a {@link
 126   MergePolicy.MergeSpecification} describing the merges.
 127   The default is {@link LogByteSizeMergePolicy}.  Then, the {@link
 128   MergeScheduler} is invoked with the requested merges and
 129   it decides when and how to run the merges.  The default is
 130   {@link ConcurrentMergeScheduler}. </p>
 131
 132   <a name="OOME"></a><p><b>NOTE</b>: if you hit an
 133   OutOfMemoryError then IndexWriter will quietly record this
 134   fact and block all future segment commits.  This is a
 135   defensive measure in case any internal state (buffered
 136   documents and deletions) were corrupted.  Any subsequent
 137   calls to {@link #commit()} will throw an
 138   IllegalStateException.  The only course of action is to
 139   call {@link #close()}, which internally will call {@link
 140   #rollback()}, to undo any changes to the index since the
 141   last commit.  You can also just call {@link #rollback()}
 142   directly.</p>
 143
 144   <a name="thread-safety"></a><p><b>NOTE</b>: {@link
 145   IndexWriter} instances are completely thread
 146   safe, meaning multiple threads can call any of its
 147   methods, concurrently.  If your application requires
 148   external synchronization, you should <b>not</b>
 149   synchronize on the <code>IndexWriter</code> instance as
 150   this may cause deadlock; use your own (non-Lucene) objects
 151   instead. </p>
 152
 153   <p><b>NOTE</b>: If you call
 154   <code>Thread.interrupt()</code> on a thread that's within
 155   IndexWriter, IndexWriter will try to catch this (eg, if
 156   it's in a wait() or Thread.sleep()), and will then throw
 157   the unchecked exception {@link ThreadInterruptedException}
 158   and <b>clear</b> the interrupt status on the thread.</p>
 159 */
 160
 161 /*
 162  * Clarification: Check Points (and commits)
 163  * IndexWriter writes new index files to the directory without writing a new segments_N
 164  * file which references these new files. It also means that the state of
 165  * the in memory SegmentInfos object is different than the most recent
 166  * segments_N file written to the directory.
 167  *
 168  * Each time the SegmentInfos is changed, and matches the (possibly
 169  * modified) directory files, we have a new "check point".
 170  * If the modified/new SegmentInfos is written to disk - as a new
 171  * (generation of) segments_N file - this check point is also an
 172  * IndexCommit.
 173  *
 174  * A new checkpoint always replaces the previous checkpoint and
 175  * becomes the new "front" of the index. This allows the IndexFileDeleter
 176  * to delete files that are referenced only by stale checkpoints.
 177  * (files that were created since the last commit, but are no longer
 178  * referenced by the "front" of the index). For this, IndexFileDeleter
 179  * keeps track of the last non commit checkpoint.
 180  */
 181 public class IndexWriter implements Closeable, TwoPhaseCommit {
 182
 183   /**
 184    * Default value for the write lock timeout (1,000).
 185    * @see #setDefaultWriteLockTimeout
 186    * @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
 187    */
 188   @Deprecated
 189   public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
 190
 191   private long writeLockTimeout;
 192
 193   /**
 194    * Name of the write lock in the index.
 195    */
 196   public static final String WRITE_LOCK_NAME = "write.lock";
 197
 198   /**
 199    * Value to denote a flush trigger is disabled
 200    * @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
 201    */
 202   @Deprecated
 203   public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
 204
 205   /**
 206    * Disabled by default (because IndexWriter flushes by RAM usage
 207    * by default). Change using {@link #setMaxBufferedDocs(int)}.
 208    * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
 209    */
 210   @Deprecated
 211   public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
 212
 213   /**
 214    * Default value is 16 MB (which means flush when buffered
 215    * docs consume 16 MB RAM).  Change using {@link #setRAMBufferSizeMB}.
 216    * @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
 217    */
 218   @Deprecated
 219   public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
 220
 221   /**
 222    * Disabled by default (because IndexWriter flushes by RAM usage
 223    * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
 224    * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
 225    */
 226   @Deprecated
 227   public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
 228
 229   /**
 230    * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
 231    *
 232    * @deprecated see {@link IndexWriterConfig}
 233    */
 234   @Deprecated
 235   public final static int DEFAULT_MAX_FIELD_LENGTH = MaxFieldLength.UNLIMITED.getLimit();
 236
 237   /**
 238    * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
 239    * @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
 240    */
 241   @Deprecated
 242   public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
 243
 244   /**
 245    * Absolute hard maximum length for a term.  If a term
 246    * arrives from the analyzer longer than this length, it
 247    * is skipped and a message is printed to infoStream, if
 248    * set (see {@link #setInfoStream}).
 249    */
 250   public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
 251
 252   // The normal read buffer size defaults to 1024, but
 253   // increasing this during merging seems to yield
 254   // performance gains.  However we don't want to increase
 255   // it too much because there are quite a few
 256   // BufferedIndexInputs created during merging.  See
 257   // LUCENE-888 for details.
 258   private final static int MERGE_READ_BUFFER_SIZE = 4096;
 259
 260   // Used for printing messages
 261   private static final AtomicInteger MESSAGE_ID = new AtomicInteger();
 262   private int messageID = MESSAGE_ID.getAndIncrement();
 263   volatile private boolean hitOOM;
 264
 265   private final Directory directory;  // where this index resides
 266   private final Analyzer analyzer;    // how to analyze text
 267
 268   // TODO 4.0: this should be made final once the setter is out
 269   private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
 270
 271   private volatile long changeCount; // increments every time a change is completed
 272   private long lastCommitChangeCount; // last changeCount that was committed
 273
 274   private List<SegmentInfo> rollbackSegments;      // list of segmentInfo we will fallback to if the commit fails
 275
 276   volatile SegmentInfos pendingCommit;            // set when a commit is pending (after prepareCommit() & before commit())
 277   volatile long pendingCommitChangeCount;
 278
 279   final SegmentInfos segmentInfos = new SegmentInfos();       // the segments
 280
 281   private DocumentsWriter docWriter;
 282   private IndexFileDeleter deleter;
 283
 284   // used by forceMerge to note those needing merging
 285   private Map<SegmentInfo,Boolean> segmentsToMerge = new HashMap<SegmentInfo,Boolean>();
 286   private int mergeMaxNumSegments;
 287
 288   private Lock writeLock;
 289
 290   private volatile boolean closed;
 291   private volatile boolean closing;
 292
 293   // Holds all SegmentInfo instances currently involved in
 294   // merges
 295   private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
 296
 297   private MergePolicy mergePolicy;
 298   // TODO 4.0: this should be made final once the setter is removed
 299   private /*final*/MergeScheduler mergeScheduler;
 300   private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
 301   private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
 302   private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
 303   private long mergeGen;
 304   private boolean stopMerges;
 305
 306   private final AtomicInteger flushCount = new AtomicInteger();
 307   private final AtomicInteger flushDeletesCount = new AtomicInteger();
 308
 309   final ReaderPool readerPool = new ReaderPool();
 310   final BufferedDeletesStream bufferedDeletesStream;
 311
 312   // This is a "write once" variable (like the organic dye
 313   // on a DVD-R that may or may not be heated by a laser and
 314   // then cooled to permanently record the event): it's
 315   // false, until getReader() is called for the first time,
 316   // at which point it's switched to true and never changes
 317   // back to false.  Once this is true, we hold open and
 318   // reuse SegmentReader instances internally for applying
 319   // deletes, doing merges, and reopening near real-time
 320   // readers.
 321   private volatile boolean poolReaders;
 322
 323   // The instance that was passed to the constructor. It is saved only in order
 324   // to allow users to query an IndexWriter settings.
 325   private final IndexWriterConfig config;
 326
 327   // The PayloadProcessorProvider to use when segments are merged
 328   private PayloadProcessorProvider payloadProcessorProvider;
 329
 330   // for testing
 331   boolean anyNonBulkMerges;
 332
 333   /**
 334    * Expert: returns a readonly reader, covering all
 335    * committed as well as un-committed changes to the index.
 336    * This provides "near real-time" searching, in that
 337    * changes made during an IndexWriter session can be
 338    * quickly made available for searching without closing
 339    * the writer nor calling {@link #commit}.
 340    *
 341    * <p>Note that this is functionally equivalent to calling
 342    * {#flush} and then using {@link IndexReader#open} to
 343    * open a new reader.  But the turarnound time of this
 344    * method should be faster since it avoids the potentially
 345    * costly {@link #commit}.</p>
 346    *
 347    * <p>You must close the {@link IndexReader} returned by
 348    * this method once you are done using it.</p>
 349    *
 350    * <p>It's <i>near</i> real-time because there is no hard
 351    * guarantee on how quickly you can get a new reader after
 352    * making changes with IndexWriter.  You'll have to
 353    * experiment in your situation to determine if it's
 354    * fast enough.  As this is a new and experimental
 355    * feature, please report back on your findings so we can
 356    * learn, improve and iterate.</p>
 357    *
 358    * <p>The resulting reader supports {@link
 359    * IndexReader#reopen}, but that call will simply forward
 360    * back to this method (though this may change in the
 361    * future).</p>
 362    *
 363    * <p>The very first time this method is called, this
 364    * writer instance will make every effort to pool the
 365    * readers that it opens for doing merges, applying
 366    * deletes, etc.  This means additional resources (RAM,
 367    * file descriptors, CPU time) will be consumed.</p>
 368    *
 369    * <p>For lower latency on reopening a reader, you should
 370    * call {@link #setMergedSegmentWarmer} to
 371    * pre-warm a newly merged segment before it's committed
 372    * to the index.  This is important for minimizing
 373    * index-to-search delay after a large merge.  </p>
 374    *
 375    * <p>If an addIndexes* call is running in another thread,
 376    * then this reader will only search those segments from
 377    * the foreign index that have been successfully copied
 378    * over, so far</p>.
 379    *
 380    * <p><b>NOTE</b>: Once the writer is closed, any
 381    * outstanding readers may continue to be used.  However,
 382    * if you attempt to reopen any of those readers, you'll
 383    * hit an {@link AlreadyClosedException}.</p>
 384    *
 385    * @lucene.experimental
 386    *
 387    * @return IndexReader that covers entire index plus all
 388    * changes made so far by this IndexWriter instance
 389    *
 390    * @deprecated Please use {@link
 391    * IndexReader#open(IndexWriter,boolean)} instead.
 392    *
 393    * @throws IOException
 394    */
 395   @Deprecated
 396   public IndexReader getReader() throws IOException {
 397     return getReader(config.getReaderTermsIndexDivisor(), true);
 398   }
 399
 400   IndexReader getReader(boolean applyAllDeletes) throws IOException {
 401     return getReader(config.getReaderTermsIndexDivisor(), applyAllDeletes);
 402   }
 403
 404   /** Expert: like {@link #getReader}, except you can
 405    *  specify which termInfosIndexDivisor should be used for
 406    *  any newly opened readers.
 407    * @param termInfosIndexDivisor Subsamples which indexed
 408    *  terms are loaded into RAM. This has the same effect as {@link
 409    *  IndexWriter#setTermIndexInterval} except that setting
 410    *  must be done at indexing time while this setting can be
 411    *  set per reader.  When set to N, then one in every
 412    *  N*termIndexInterval terms in the index is loaded into
 413    *  memory.  By setting this to a value > 1 you can reduce
 414    *  memory usage, at the expense of higher latency when
 415    *  loading a TermInfo.  The default value is 1.  Set this
 416    *  to -1 to skip loading the terms index entirely.
 417    *
 418    *  @deprecated Please use {@link
 419    *  IndexReader#open(IndexWriter,boolean)} instead.  Furthermore,
 420    *  this method cannot guarantee the reader (and its
 421    *  sub-readers) will be opened with the
 422    *  termInfosIndexDivisor setting because some of them may
 423    *  have already been opened according to {@link
 424    *  IndexWriterConfig#setReaderTermsIndexDivisor}. You
 425    *  should set the requested termInfosIndexDivisor through
 426    *  {@link IndexWriterConfig#setReaderTermsIndexDivisor} and use
 427    *  {@link #getReader()}. */
 428   @Deprecated
 429   public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
 430     return getReader(termInfosIndexDivisor, true);
 431   }
 432
 433   IndexReader getReader(int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException {
 434     ensureOpen();
 435
 436     final long tStart = System.currentTimeMillis();
 437
 438     if (infoStream != null) {
 439       message("flush at getReader");
 440     }
 441
 442     // Do this up front before flushing so that the readers
 443     // obtained during this flush are pooled, the first time
 444     // this method is called:
 445     poolReaders = true;
 446
 447     // Prevent segmentInfos from changing while opening the
 448     // reader; in theory we could do similar retry logic,
 449     // just like we do when loading segments_N
 450     IndexReader r;
 451     synchronized(this) {
 452       flush(false, applyAllDeletes);
 453       r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor, applyAllDeletes);
 454       if (infoStream != null) {
 455         message("return reader version=" + r.getVersion() + " reader=" + r);
 456       }
 457     }
 458
 459     maybeMerge();
 460
 461     if (infoStream != null) {
 462       message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
 463     }
 464     return r;
 465   }
 466
 467   // Used for all SegmentReaders we open
 468   private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
 469
 470   Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
 471     return readerFinishedListeners;
 472   }
 473
 474   /** Holds shared SegmentReader instances. IndexWriter uses
 475    *  SegmentReaders for 1) applying deletes, 2) doing
 476    *  merges, 3) handing out a real-time reader.  This pool
 477    *  reuses instances of the SegmentReaders in all these
 478    *  places if it is in "near real-time mode" (getReader()
 479    *  has been called on this instance). */
 480
 481   class ReaderPool {
 482
 483     private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
 484
 485     /** Forcefully clear changes for the specified segments.  This is called on successful merge. */
 486     synchronized void clear(List<SegmentInfo> infos) throws IOException {
 487       if (infos == null) {
 488         for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
 489           ent.getValue().hasChanges = false;
 490         }
 491       } else {
 492         for (final SegmentInfo info: infos) {
 493           final SegmentReader r = readerMap.get(info);
 494           if (r != null) {
 495             r.hasChanges = false;
 496           }
 497         }
 498       }
 499     }
 500
 501     // used only by asserts
 502     public synchronized boolean infoIsLive(SegmentInfo info) {
 503       int idx = segmentInfos.indexOf(info);
 504       assert idx != -1: "info=" + info + " isn't in pool";
 505       assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
 506       return true;
 507     }
 508
 509     public synchronized SegmentInfo mapToLive(SegmentInfo info) {
 510       int idx = segmentInfos.indexOf(info);
 511       if (idx != -1) {
 512         info = segmentInfos.info(idx);
 513       }
 514       return info;
 515     }
 516
 517     /**
 518      * Release the segment reader (i.e. decRef it and close if there
 519      * are no more references.
 520      * @return true if this release altered the index (eg
 521      * the SegmentReader had pending changes to del docs and
 522      * was closed).  Caller must call checkpoint() if so.
 523      * @param sr
 524      * @throws IOException
 525      */
 526     public synchronized boolean release(SegmentReader sr) throws IOException {
 527       return release(sr, false);
 528     }
 529
 530     /**
 531      * Release the segment reader (i.e. decRef it and close if there
 532      * are no more references.
 533      * @return true if this release altered the index (eg
 534      * the SegmentReader had pending changes to del docs and
 535      * was closed).  Caller must call checkpoint() if so.
 536      * @param sr
 537      * @throws IOException
 538      */
 539     public synchronized boolean release(SegmentReader sr, boolean drop) throws IOException {
 540
 541       final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
 542
 543       assert !pooled || readerMap.get(sr.getSegmentInfo()) == sr;
 544
 545       // Drop caller's ref; for an external reader (not
 546       // pooled), this decRef will close it
 547       sr.decRef();
 548
 549       if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
 550
 551         // We invoke deleter.checkpoint below, so we must be
 552         // sync'd on IW if there are changes:
 553         assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
 554
 555         // Discard (don't save) changes when we are dropping
 556         // the reader; this is used only on the sub-readers
 557         // after a successful merge.
 558         sr.hasChanges &= !drop;
 559
 560         final boolean hasChanges = sr.hasChanges;
 561
 562         // Drop our ref -- this will commit any pending
 563         // changes to the dir
 564         sr.close();
 565
 566         // We are the last ref to this reader; since we're
 567         // not pooling readers, we release it:
 568         readerMap.remove(sr.getSegmentInfo());
 569
 570         return hasChanges;
 571       }
 572
 573       return false;
 574     }
 575
 576     public synchronized void drop(List<SegmentInfo> infos) throws IOException {
 577       for(SegmentInfo info : infos) {
 578         drop(info);
 579       }
 580     }
 581
 582     public synchronized void drop(SegmentInfo info) throws IOException {
 583       final SegmentReader sr = readerMap.get(info);
 584       if (sr != null) {
 585         sr.hasChanges = false;
 586         readerMap.remove(info);
 587         sr.close();
 588       }
 589     }
 590
 591     public synchronized void dropAll() throws IOException {
 592       for(SegmentReader reader : readerMap.values()) {
 593         reader.hasChanges = false;
 594
 595         // NOTE: it is allowed that this decRef does not
 596         // actually close the SR; this can happen when a
 597         // near real-time reader using this SR is still open
 598         reader.decRef();
 599       }
 600       readerMap.clear();
 601     }
 602
 603     /** Remove all our references to readers, and commits
 604      *  any pending changes. */
 605     synchronized void close() throws IOException {
 606       // We invoke deleter.checkpoint below, so we must be
 607       // sync'd on IW:
 608       assert Thread.holdsLock(IndexWriter.this);
 609
 610       for(Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
 611
 612         SegmentReader sr = ent.getValue();
 613         if (sr.hasChanges) {
 614           assert infoIsLive(sr.getSegmentInfo());
 615           sr.doCommit(null);
 616
 617           // Must checkpoint w/ deleter, because this
 618           // segment reader will have created new _X_N.del
 619           // file.
 620           deleter.checkpoint(segmentInfos, false);
 621         }
 622
 623         // NOTE: it is allowed that this decRef does not
 624         // actually close the SR; this can happen when a
 625         // near real-time reader is kept open after the
 626         // IndexWriter instance is closed
 627         sr.decRef();
 628       }
 629
 630       readerMap.clear();
 631     }
 632
 633     /**
 634      * Commit all segment reader in the pool.
 635      * @throws IOException
 636      */
 637     synchronized void commit(SegmentInfos infos) throws IOException {
 638
 639       // We invoke deleter.checkpoint below, so we must be
 640       // sync'd on IW:
 641       assert Thread.holdsLock(IndexWriter.this);
 642
 643       for (SegmentInfo info : infos) {
 644
 645         final SegmentReader sr = readerMap.get(info);
 646         if (sr != null && sr.hasChanges) {
 647           assert infoIsLive(info);
 648           sr.doCommit(null);
 649           // Must checkpoint w/ deleter, because this
 650           // segment reader will have created new _X_N.del
 651           // file.
 652           deleter.checkpoint(segmentInfos, false);
 653         }
 654       }
 655     }
 656
 657     /**
 658      * Returns a ref to a clone.  NOTE: this clone is not
 659      * enrolled in the pool, so you should simply close()
 660      * it when you're done (ie, do not call release()).
 661      */
 662     public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
 663       SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
 664       try {
 665         return (SegmentReader) sr.clone(true);
 666       } finally {
 667         sr.decRef();
 668       }
 669     }
 670
 671     /**
 672      * Obtain a SegmentReader from the readerPool.  The reader
 673      * must be returned by calling {@link #release(SegmentReader)}
 674      * @see #release(SegmentReader)
 675      * @param info
 676      * @param doOpenStores
 677      * @throws IOException
 678      */
 679     public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
 680       return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, config.getReaderTermsIndexDivisor());
 681     }
 682
 683     /**
 684      * Obtain a SegmentReader from the readerPool.  The reader
 685      * must be returned by calling {@link #release(SegmentReader)}
 686      *
 687      * @see #release(SegmentReader)
 688      * @param info
 689      * @param doOpenStores
 690      * @param readBufferSize
 691      * @param termsIndexDivisor
 692      * @throws IOException
 693      */
 694     public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
 695
 696       if (poolReaders) {
 697         readBufferSize = BufferedIndexInput.BUFFER_SIZE;
 698       }
 699
 700       SegmentReader sr = readerMap.get(info);
 701       if (sr == null) {
 702         // TODO: we may want to avoid doing this while
 703         // synchronized
 704         // Returns a ref, which we xfer to readerMap:
 705         sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
 706         sr.readerFinishedListeners = readerFinishedListeners;
 707
 708         if (info.dir == directory) {
 709           // Only pool if reader is not external
 710           readerMap.put(info, sr);
 711         }
 712       } else {
 713         if (doOpenStores) {
 714           sr.openDocStores();
 715         }
 716         if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
 717           // If this reader was originally opened because we
 718           // needed to merge it, we didn't load the terms
 719           // index.  But now, if the caller wants the terms
 720           // index (eg because it's doing deletes, or an NRT
 721           // reader is being opened) we ask the reader to
 722           // load its terms index.
 723           sr.loadTermsIndex(termsIndexDivisor);
 724         }
 725       }
 726
 727       // Return a ref to our caller
 728       if (info.dir == directory) {
 729         // Only incRef if we pooled (reader is not external)
 730         sr.incRef();
 731       }
 732       return sr;
 733     }
 734
 735     // Returns a ref
 736     public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
 737       SegmentReader sr = readerMap.get(info);
 738       if (sr != null) {
 739         sr.incRef();
 740       }
 741       return sr;
 742     }
 743   }
 744
 745
 746
 747   /**
 748    * Obtain the number of deleted docs for a pooled reader.
 749    * If the reader isn't being pooled, the segmentInfo's
 750    * delCount is returned.
 751    */
 752   public int numDeletedDocs(SegmentInfo info) throws IOException {
 753     ensureOpen(false);
 754     SegmentReader reader = readerPool.getIfExists(info);
 755     try {
 756       if (reader != null) {
 757         return reader.numDeletedDocs();
 758       } else {
 759         return info.getDelCount();
 760       }
 761     } finally {
 762       if (reader != null) {
 763         readerPool.release(reader);
 764       }
 765     }
 766   }
 767
 768   /**
 769    * Used internally to throw an {@link
 770    * AlreadyClosedException} if this IndexWriter has been
 771    * closed.
 772    * @throws AlreadyClosedException if this IndexWriter is closed
 773    */
 774   protected final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
 775     if (closed || (includePendingClose && closing)) {
 776       throw new AlreadyClosedException("this IndexWriter is closed");
 777     }
 778   }
 779
 780   protected final void ensureOpen() throws AlreadyClosedException {
 781     ensureOpen(true);
 782   }
 783
 784   /**
 785    * Prints a message to the infoStream (if non-null),
 786    * prefixed with the identifying information for this
 787    * writer and the thread that's calling it.
 788    */
 789   public void message(String message) {
 790     if (infoStream != null)
 791       infoStream.println("IW " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
 792   }
 793
 794   /**
 795    * Casts current mergePolicy to LogMergePolicy, and throws
 796    * an exception if the mergePolicy is not a LogMergePolicy.
 797    */
 798   private LogMergePolicy getLogMergePolicy() {
 799     if (mergePolicy instanceof LogMergePolicy)
 800       return (LogMergePolicy) mergePolicy;
 801     else
 802       throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
 803   }
 804
 805   /** <p>Get the current setting of whether newly flushed
 806    *  segments will use the compound file format.  Note that
 807    *  this just returns the value previously set with
 808    *  setUseCompoundFile(boolean), or the default value
 809    *  (true).  You cannot use this to query the status of
 810    *  previously flushed segments.</p>
 811    *
 812    *  <p>Note that this method is a convenience method: it
 813    *  just calls mergePolicy.getUseCompoundFile as long as
 814    *  mergePolicy is an instance of {@link LogMergePolicy}.
 815    *  Otherwise an IllegalArgumentException is thrown.</p>
 816    *
 817    *  @see #setUseCompoundFile(boolean)
 818    *  @deprecated use {@link LogMergePolicy#getUseCompoundFile()}
 819    */
 820   @Deprecated
 821   public boolean getUseCompoundFile() {
 822     return getLogMergePolicy().getUseCompoundFile();
 823   }
 824
 825   /**
 826    * <p>
 827    * Setting to turn on usage of a compound file. When on, multiple files for
 828    * each segment are merged into a single file when a new segment is flushed.
 829    * </p>
 830    *
 831    * <p>
 832    * Note that this method is a convenience method: it just calls
 833    * mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
 834    * {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
 835    * </p>
 836    *
 837    * @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}.
 838    */
 839   @Deprecated
 840   public void setUseCompoundFile(boolean value) {
 841     getLogMergePolicy().setUseCompoundFile(value);
 842   }
 843
 844   /** Expert: Set the Similarity implementation used by this IndexWriter.
 845    *
 846    * @see Similarity#setDefault(Similarity)
 847    * @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
 848    */
 849   @Deprecated
 850   public void setSimilarity(Similarity similarity) {
 851     ensureOpen();
 852     this.similarity = similarity;
 853     docWriter.setSimilarity(similarity);
 854     // Required so config.getSimilarity returns the right value. But this will
 855     // go away together with the method in 4.0.
 856     config.setSimilarity(similarity);
 857   }
 858
 859   /** Expert: Return the Similarity implementation used by this IndexWriter.
 860    *
 861    * <p>This defaults to the current value of {@link Similarity#getDefault()}.
 862    * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
 863    */
 864   @Deprecated
 865   public Similarity getSimilarity() {
 866     ensureOpen();
 867     return similarity;
 868   }
 869
 870   /** Expert: Set the interval between indexed terms.  Large values cause less
 871    * memory to be used by IndexReader, but slow random-access to terms.  Small
 872    * values cause more memory to be used by an IndexReader, and speed
 873    * random-access to terms.
 874    *
 875    * This parameter determines the amount of computation required per query
 876    * term, regardless of the number of documents that contain that term.  In
 877    * particular, it is the maximum number of other terms that must be
 878    * scanned before a term is located and its frequency and position information
 879    * may be processed.  In a large index with user-entered query terms, query
 880    * processing time is likely to be dominated not by term lookup but rather
 881    * by the processing of frequency and positional data.  In a small index
 882    * or when many uncommon query terms are generated (e.g., by wildcard
 883    * queries) term lookup may become a dominant cost.
 884    *
 885    * In particular, <code>numUniqueTerms/interval</code> terms are read into
 886    * memory by an IndexReader, and, on average, <code>interval/2</code> terms
 887    * must be scanned for each random term access.
 888    *
 889    * @see #DEFAULT_TERM_INDEX_INTERVAL
 890    * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
 891    */
 892   @Deprecated
 893   public void setTermIndexInterval(int interval) {
 894     ensureOpen();
 895     config.setTermIndexInterval(interval);
 896   }
 897
 898   /** Expert: Return the interval between indexed terms.
 899    *
 900    * @see #setTermIndexInterval(int)
 901    * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
 902    */
 903   @Deprecated
 904   public int getTermIndexInterval() {
 905     // We pass false because this method is called by SegmentMerger while we are in the process of closing
 906     ensureOpen(false);
 907     return config.getTermIndexInterval();
 908   }
 909
 910   /**
 911    * Constructs an IndexWriter for the index in <code>d</code>.
 912    * Text will be analyzed with <code>a</code>.  If <code>create</code>
 913    * is true, then a new, empty index will be created in
 914    * <code>d</code>, replacing the index already there, if any.
 915    *
 916    * @param d the index directory
 917    * @param a the analyzer to use
 918    * @param create <code>true</code> to create the index or overwrite
 919    *  the existing one; <code>false</code> to append to the existing
 920    *  index
 921    * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
 922    *   via the MaxFieldLength constructor.
 923    * @throws CorruptIndexException if the index is corrupt
 924    * @throws LockObtainFailedException if another writer
 925    *  has this index open (<code>write.lock</code> could not
 926    *  be obtained)
 927    * @throws IOException if the directory cannot be read/written to, or
 928    *  if it does not exist and <code>create</code> is
 929    *  <code>false</code> or if there is any other low-level
 930    *  IO error
 931    *  @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
 932    */
 933   @Deprecated
 934   public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
 935        throws CorruptIndexException, LockObtainFailedException, IOException {
 936     this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
 937         create ? OpenMode.CREATE : OpenMode.APPEND));
 938     setMaxFieldLength(mfl.getLimit());
 939   }
 940
 941   /**
 942    * Constructs an IndexWriter for the index in
 943    * <code>d</code>, first creating it if it does not
 944    * already exist.  Text will be analyzed with
 945    * <code>a</code>.
 946    *
 947    * @param d the index directory
 948    * @param a the analyzer to use
 949    * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
 950    *   via the MaxFieldLength constructor.
 951    * @throws CorruptIndexException if the index is corrupt
 952    * @throws LockObtainFailedException if another writer
 953    *  has this index open (<code>write.lock</code> could not
 954    *  be obtained)
 955    * @throws IOException if the directory cannot be
 956    *  read/written to or if there is any other low-level
 957    *  IO error
 958    *  @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
 959    */
 960   @Deprecated
 961   public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
 962     throws CorruptIndexException, LockObtainFailedException, IOException {
 963     this(d, new IndexWriterConfig(Version.LUCENE_31, a));
 964     setMaxFieldLength(mfl.getLimit());
 965   }
 966
 967   /**
 968    * Expert: constructs an IndexWriter with a custom {@link
 969    * IndexDeletionPolicy}, for the index in <code>d</code>,
 970    * first creating it if it does not already exist.  Text
 971    * will be analyzed with <code>a</code>.
 972    *
 973    * @param d the index directory
 974    * @param a the analyzer to use
 975    * @param deletionPolicy see <a href="#deletionPolicy">above</a>
 976    * @param mfl whether or not to limit field lengths
 977    * @throws CorruptIndexException if the index is corrupt
 978    * @throws LockObtainFailedException if another writer
 979    *  has this index open (<code>write.lock</code> could not
 980    *  be obtained)
 981    * @throws IOException if the directory cannot be
 982    *  read/written to or if there is any other low-level
 983    *  IO error
 984    *  @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
 985    */
 986   @Deprecated
 987   public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
 988     throws CorruptIndexException, LockObtainFailedException, IOException {
 989     this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));
 990     setMaxFieldLength(mfl.getLimit());
 991   }
 992
 993   /**
 994    * Expert: constructs an IndexWriter with a custom {@link
 995    * IndexDeletionPolicy}, for the index in <code>d</code>.
 996    * Text will be analyzed with <code>a</code>.  If
 997    * <code>create</code> is true, then a new, empty index
 998    * will be created in <code>d</code>, replacing the index
 999    * already there, if any.
1000    *
1001    * @param d the index directory
1002    * @param a the analyzer to use
1003    * @param create <code>true</code> to create the index or overwrite
1004    *  the existing one; <code>false</code> to append to the existing
1005    *  index
1006    * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1007    * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths.  Value is in number of terms/tokens
1008    * @throws CorruptIndexException if the index is corrupt
1009    * @throws LockObtainFailedException if another writer
1010    *  has this index open (<code>write.lock</code> could not
1011    *  be obtained)
1012    * @throws IOException if the directory cannot be read/written to, or
1013    *  if it does not exist and <code>create</code> is
1014    *  <code>false</code> or if there is any other low-level
1015    *  IO error
1016    *  @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1017    */
1018   @Deprecated
1019   public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1020        throws CorruptIndexException, LockObtainFailedException, IOException {
1021     this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
1022         create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));
1023     setMaxFieldLength(mfl.getLimit());
1024   }
1025
1026   /**
1027    * Expert: constructs an IndexWriter on specific commit
1028    * point, with a custom {@link IndexDeletionPolicy}, for
1029    * the index in <code>d</code>.  Text will be analyzed
1030    * with <code>a</code>.
1031    *
1032    * <p> This is only meaningful if you've used a {@link
1033    * IndexDeletionPolicy} in that past that keeps more than
1034    * just the last commit.
1035    *
1036    * <p>This operation is similar to {@link #rollback()},
1037    * except that method can only rollback what's been done
1038    * with the current instance of IndexWriter since its last
1039    * commit, whereas this method can rollback to an
1040    * arbitrary commit point from the past, assuming the
1041    * {@link IndexDeletionPolicy} has preserved past
1042    * commits.
1043    *
1044    * @param d the index directory
1045    * @param a the analyzer to use
1046    * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1047    * @param mfl whether or not to limit field lengths, value is in number of terms/tokens.  See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
1048    * @param commit which commit to open
1049    * @throws CorruptIndexException if the index is corrupt
1050    * @throws LockObtainFailedException if another writer
1051    *  has this index open (<code>write.lock</code> could not
1052    *  be obtained)
1053    * @throws IOException if the directory cannot be read/written to, or
1054    *  if it does not exist and <code>create</code> is
1055    *  <code>false</code> or if there is any other low-level
1056    *  IO error
1057    *  @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1058    */
1059   @Deprecated
1060   public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
1061        throws CorruptIndexException, LockObtainFailedException, IOException {
1062     this(d, new IndexWriterConfig(Version.LUCENE_31, a)
1063         .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
1064     setMaxFieldLength(mfl.getLimit());
1065   }
1066
1067   /**
1068    * Constructs a new IndexWriter per the settings given in <code>conf</code>.
1069    * Note that the passed in {@link IndexWriterConfig} is
1070    * privately cloned; if you need to make subsequent "live"
1071    * changes to the configuration use {@link #getConfig}.
1072    * <p>
1073    *
1074    * @param d
1075    *          the index directory. The index is either created or appended
1076    *          according <code>conf.getOpenMode()</code>.
1077    * @param conf
1078    *          the configuration settings according to which IndexWriter should
1079    *          be initialized.
1080    * @throws CorruptIndexException
1081    *           if the index is corrupt
1082    * @throws LockObtainFailedException
1083    *           if another writer has this index open (<code>write.lock</code>
1084    *           could not be obtained)
1085    * @throws IOException
1086    *           if the directory cannot be read/written to, or if it does not
1087    *           exist and <code>conf.getOpenMode()</code> is
1088    *           <code>OpenMode.APPEND</code> or if there is any other low-level
1089    *           IO error
1090    */
1091   public IndexWriter(Directory d, IndexWriterConfig conf)
1092       throws CorruptIndexException, LockObtainFailedException, IOException {
1093     config = (IndexWriterConfig) conf.clone();
1094     directory = d;
1095     analyzer = conf.getAnalyzer();
1096     infoStream = defaultInfoStream;
1097     writeLockTimeout = conf.getWriteLockTimeout();
1098     similarity = conf.getSimilarity();
1099     mergePolicy = conf.getMergePolicy();
1100     mergePolicy.setIndexWriter(this);
1101     mergeScheduler = conf.getMergeScheduler();
1102     bufferedDeletesStream = new BufferedDeletesStream(messageID);
1103     bufferedDeletesStream.setInfoStream(infoStream);
1104     poolReaders = conf.getReaderPooling();
1105
1106     writeLock = directory.makeLock(WRITE_LOCK_NAME);
1107
1108     if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1109       throw new LockObtainFailedException("Index locked for write: " + writeLock);
1110
1111     OpenMode mode = conf.getOpenMode();
1112     boolean create;
1113     if (mode == OpenMode.CREATE) {
1114       create = true;
1115     } else if (mode == OpenMode.APPEND) {
1116       create = false;
1117     } else {
1118       // CREATE_OR_APPEND - create only if an index does not exist
1119       create = !IndexReader.indexExists(directory);
1120     }
1121
1122     boolean success = false;
1123
1124     // TODO: we should check whether this index is too old,
1125     // and throw an IndexFormatTooOldExc up front, here,
1126     // instead of later when merge, applyDeletes, getReader
1127     // is attempted.  I think to do this we should store the
1128     // oldest segment's version in segments_N.
1129
1130     try {
1131       if (create) {
1132         // Try to read first.  This is to allow create
1133         // against an index that's currently open for
1134         // searching.  In this case we write the next
1135         // segments_N file with no segments:
1136         try {
1137           segmentInfos.read(directory);
1138           segmentInfos.clear();
1139         } catch (IOException e) {
1140           // Likely this means it's a fresh directory
1141         }
1142
1143         // Record that we have a change (zero out all
1144         // segments) pending:
1145         changeCount++;
1146         segmentInfos.changed();
1147       } else {
1148         segmentInfos.read(directory);
1149
1150         IndexCommit commit = conf.getIndexCommit();
1151         if (commit != null) {
1152           // Swap out all segments, but, keep metadata in
1153           // SegmentInfos, like version & generation, to
1154           // preserve write-once.  This is important if
1155           // readers are open against the future commit
1156           // points.
1157           if (commit.getDirectory() != directory)
1158             throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
1159           SegmentInfos oldInfos = new SegmentInfos();
1160           oldInfos.read(directory, commit.getSegmentsFileName());
1161           segmentInfos.replace(oldInfos);
1162           changeCount++;
1163           segmentInfos.changed();
1164           if (infoStream != null)
1165             message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
1166         }
1167       }
1168
1169       rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
1170
1171       docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
1172       docWriter.setInfoStream(infoStream);
1173       docWriter.setMaxFieldLength(maxFieldLength);
1174
1175       // Default deleter (for backwards compatibility) is
1176       // KeepOnlyLastCommitDeleter:
1177       synchronized(this) {
1178         deleter = new IndexFileDeleter(directory,
1179                                        conf.getIndexDeletionPolicy(),
1180                                        segmentInfos, infoStream,
1181                                        this);
1182       }
1183
1184       if (deleter.startingCommitDeleted) {
1185         // Deletion policy deleted the "head" commit point.
1186         // We have to mark ourself as changed so that if we
1187         // are closed w/o any further changes we write a new
1188         // segments_N file.
1189         changeCount++;
1190         segmentInfos.changed();
1191       }
1192
1193       if (infoStream != null) {
1194         messageState();
1195       }
1196
1197       success = true;
1198
1199     } finally {
1200       if (!success) {
1201         if (infoStream != null) {
1202           message("init: hit exception on init; releasing write lock");
1203         }
1204         try {
1205           writeLock.release();
1206         } catch (Throwable t) {
1207           // don't mask the original exception
1208         }
1209         writeLock = null;
1210       }
1211     }
1212   }
1213
1214   private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
1215     Directory cfsDir = null;
1216     try {
1217       if (info.getUseCompoundFile()) {
1218         cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
1219       } else {
1220         cfsDir = directory;
1221       }
1222       return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
1223     } finally {
1224       if (info.getUseCompoundFile() && cfsDir != null) {
1225         cfsDir.close();
1226       }
1227     }
1228   }
1229
1230   private FieldInfos getCurrentFieldInfos() throws IOException {
1231     final FieldInfos fieldInfos;
1232     if (segmentInfos.size() > 0) {
1233       if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
1234         // Pre-3.1 index.  In this case we sweep all
1235         // segments, merging their FieldInfos:
1236         fieldInfos = new FieldInfos();
1237         for(SegmentInfo info : segmentInfos) {
1238           final FieldInfos segFieldInfos = getFieldInfos(info);
1239           final int fieldCount = segFieldInfos.size();
1240           for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
1241             fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
1242           }
1243         }
1244       } else {
1245         // Already a 3.1 index; just seed the FieldInfos
1246         // from the last segment
1247         fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
1248       }
1249     } else {
1250       fieldInfos = new FieldInfos();
1251     }
1252     return fieldInfos;
1253   }
1254
1255   /**
1256    * Returns the private {@link IndexWriterConfig}, cloned
1257    * from the {@link IndexWriterConfig} passed to
1258    * {@link #IndexWriter(Directory, IndexWriterConfig)}.
1259    * <p>
1260    * <b>NOTE:</b> some settings may be changed on the
1261    * returned {@link IndexWriterConfig}, and will take
1262    * effect in the current IndexWriter instance.  See the
1263    * javadocs for the specific setters in {@link
1264    * IndexWriterConfig} for details.
1265    */
1266   public IndexWriterConfig getConfig() {
1267     ensureOpen(false);
1268     return config;
1269   }
1270
1271   /**
1272    * Expert: set the merge policy used by this writer.
1273    *
1274    * @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
1275    */
1276   @Deprecated
1277   public void setMergePolicy(MergePolicy mp) {
1278     ensureOpen();
1279     if (mp == null)
1280       throw new NullPointerException("MergePolicy must be non-null");
1281
1282     if (mergePolicy != mp)
1283       mergePolicy.close();
1284     mergePolicy = mp;
1285     mergePolicy.setIndexWriter(this);
1286     pushMaxBufferedDocs();
1287     if (infoStream != null)
1288       message("setMergePolicy " + mp);
1289     // Required so config.getMergePolicy returns the right value. But this will
1290     // go away together with the method in 4.0.
1291     config.setMergePolicy(mp);
1292   }
1293
1294   /**
1295    * Expert: returns the current MergePolicy in use by this writer.
1296    * @see #setMergePolicy
1297    *
1298    * @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
1299    */
1300   @Deprecated
1301   public MergePolicy getMergePolicy() {
1302     ensureOpen();
1303     return mergePolicy;
1304   }
1305
1306   /**
1307    * Expert: set the merge scheduler used by this writer.
1308    * @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
1309    */
1310   @Deprecated
1311   synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1312     ensureOpen();
1313     if (mergeScheduler == null)
1314       throw new NullPointerException("MergeScheduler must be non-null");
1315
1316     if (this.mergeScheduler != mergeScheduler) {
1317       finishMerges(true);
1318       this.mergeScheduler.close();
1319     }
1320     this.mergeScheduler = mergeScheduler;
1321     if (infoStream != null)
1322       message("setMergeScheduler " + mergeScheduler);
1323     // Required so config.getMergeScheduler returns the right value. But this will
1324     // go away together with the method in 4.0.
1325     config.setMergeScheduler(mergeScheduler);
1326   }
1327
1328   /**
1329    * Expert: returns the current MergeScheduler in use by this
1330    * writer.
1331    * @see #setMergeScheduler(MergeScheduler)
1332    * @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
1333    */
1334   @Deprecated
1335   public MergeScheduler getMergeScheduler() {
1336     ensureOpen();
1337     return mergeScheduler;
1338   }
1339
1340   /** <p>Determines the largest segment (measured by
1341    * document count) that may be merged with other segments.
1342    * Small values (e.g., less than 10,000) are best for
1343    * interactive indexing, as this limits the length of
1344    * pauses while indexing to a few seconds.  Larger values
1345    * are best for batched indexing and speedier
1346    * searches.</p>
1347    *
1348    * <p>The default value is {@link Integer#MAX_VALUE}.</p>
1349    *
1350    * <p>Note that this method is a convenience method: it
1351    * just calls mergePolicy.setMaxMergeDocs as long as
1352    * mergePolicy is an instance of {@link LogMergePolicy}.
1353    * Otherwise an IllegalArgumentException is thrown.</p>
1354    *
1355    * <p>The default merge policy ({@link
1356    * LogByteSizeMergePolicy}) also allows you to set this
1357    * limit by net size (in MB) of the segment, using {@link
1358    * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1359    * @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
1360    */
1361   @Deprecated
1362   public void setMaxMergeDocs(int maxMergeDocs) {
1363     getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1364   }
1365
1366   /**
1367    * <p>Returns the largest segment (measured by document
1368    * count) that may be merged with other segments.</p>
1369    *
1370    * <p>Note that this method is a convenience method: it
1371    * just calls mergePolicy.getMaxMergeDocs as long as
1372    * mergePolicy is an instance of {@link LogMergePolicy}.
1373    * Otherwise an IllegalArgumentException is thrown.</p>
1374    *
1375    * @see #setMaxMergeDocs
1376    * @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
1377    */
1378   @Deprecated
1379   public int getMaxMergeDocs() {
1380     return getLogMergePolicy().getMaxMergeDocs();
1381   }
1382
1383   /**
1384    * The maximum number of terms that will be indexed for a single field in a
1385    * document. This limits the amount of memory required for indexing, so that
1386    * collections with very large files will not crash the indexing process by
1387    * running out of memory. This setting refers to the number of running terms,
1388    * not to the number of different terms.
1389    * <p/>
1390    * <strong>Note:</strong> this silently truncates large documents, excluding
1391    * from the index all terms that occur further in the document. If you know
1392    * your source documents are large, be sure to set this value high enough to
1393    * accomodate the expected size. If you set it to Integer.MAX_VALUE, then the
1394    * only limit is your memory, but you should anticipate an OutOfMemoryError.
1395    * <p/>
1396    * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be
1397    * indexed for a field.
1398    *
1399    * @deprecated use {@link LimitTokenCountAnalyzer} instead. Note that the
1400    *             behvaior slightly changed - the analyzer limits the number of
1401    *             tokens per token stream created, while this setting limits the
1402    *             total number of tokens to index. This only matters if you index
1403    *             many multi-valued fields though.
1404    */
1405   @Deprecated
1406   public void setMaxFieldLength(int maxFieldLength) {
1407     ensureOpen();
1408     this.maxFieldLength = maxFieldLength;
1409     docWriter.setMaxFieldLength(maxFieldLength);
1410     if (infoStream != null)
1411       message("setMaxFieldLength " + maxFieldLength);
1412   }
1413
1414   /**
1415    * Returns the maximum number of terms that will be
1416    * indexed for a single field in a document.
1417    * @see #setMaxFieldLength
1418    * @deprecated use {@link LimitTokenCountAnalyzer} to limit number of tokens.
1419    */
1420   @Deprecated
1421   public int getMaxFieldLength() {
1422     ensureOpen();
1423     return maxFieldLength;
1424   }
1425
1426   /**
1427    * @deprecated use {@link
1428    *  IndexWriterConfig#setReaderTermsIndexDivisor} instead.
1429    */
1430   @Deprecated
1431   public void setReaderTermsIndexDivisor(int divisor) {
1432     ensureOpen();
1433     config.setReaderTermsIndexDivisor(divisor);
1434     if (infoStream != null) {
1435       message("setReaderTermsIndexDivisor " + divisor);
1436     }
1437   }
1438
1439   /**
1440    * @deprecated use {@link
1441    *  IndexWriterConfig#getReaderTermsIndexDivisor} instead.
1442    */
1443   @Deprecated
1444   public int getReaderTermsIndexDivisor() {
1445     ensureOpen();
1446     return config.getReaderTermsIndexDivisor();
1447   }
1448
1449   /** Determines the minimal number of documents required
1450    * before the buffered in-memory documents are flushed as
1451    * a new Segment.  Large values generally gives faster
1452    * indexing.
1453    *
1454    * <p>When this is set, the writer will flush every
1455    * maxBufferedDocs added documents.  Pass in {@link
1456    * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1457    * to number of buffered documents.  Note that if flushing
1458    * by RAM usage is also enabled, then the flush will be
1459    * triggered by whichever comes first.</p>
1460    *
1461    * <p>Disabled by default (writer flushes by RAM usage).</p>
1462    *
1463    * @throws IllegalArgumentException if maxBufferedDocs is
1464    * enabled but smaller than 2, or it disables maxBufferedDocs
1465    * when ramBufferSize is already disabled
1466    * @see #setRAMBufferSizeMB
1467    * @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.
1468    */
1469   @Deprecated
1470   public void setMaxBufferedDocs(int maxBufferedDocs) {
1471     ensureOpen();
1472     pushMaxBufferedDocs();
1473     if (infoStream != null) {
1474       message("setMaxBufferedDocs " + maxBufferedDocs);
1475     }
1476     // Required so config.getMaxBufferedDocs returns the right value. But this
1477     // will go away together with the method in 4.0.
1478     config.setMaxBufferedDocs(maxBufferedDocs);
1479   }
1480
1481   /**
1482    * If we are flushing by doc count (not by RAM usage), and
1483    * using LogDocMergePolicy then push maxBufferedDocs down
1484    * as its minMergeDocs, to keep backwards compatibility.
1485    */
1486   private void pushMaxBufferedDocs() {
1487     if (config.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1488       final MergePolicy mp = mergePolicy;
1489       if (mp instanceof LogDocMergePolicy) {
1490         LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1491         final int maxBufferedDocs = config.getMaxBufferedDocs();
1492         if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1493           if (infoStream != null)
1494             message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1495           lmp.setMinMergeDocs(maxBufferedDocs);
1496         }
1497       }
1498     }
1499   }
1500
1501   /**
1502    * Returns the number of buffered added documents that will
1503    * trigger a flush if enabled.
1504    * @see #setMaxBufferedDocs
1505    * @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.
1506    */
1507   @Deprecated
1508   public int getMaxBufferedDocs() {
1509     ensureOpen();
1510     return config.getMaxBufferedDocs();
1511   }
1512
1513   /** Determines the amount of RAM that may be used for
1514    * buffering added documents and deletions before they are
1515    * flushed to the Directory.  Generally for faster
1516    * indexing performance it's best to flush by RAM usage
1517    * instead of document count and use as large a RAM buffer
1518    * as you can.
1519    *
1520    * <p>When this is set, the writer will flush whenever
1521    * buffered documents and deletions use this much RAM.
1522    * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
1523    * triggering a flush due to RAM usage.  Note that if
1524    * flushing by document count is also enabled, then the
1525    * flush will be triggered by whichever comes first.</p>
1526    *
1527    * <p> <b>NOTE</b>: the account of RAM usage for pending
1528    * deletions is only approximate.  Specifically, if you
1529    * delete by Query, Lucene currently has no way to measure
1530    * the RAM usage if individual Queries so the accounting
1531    * will under-estimate and you should compensate by either
1532    * calling commit() periodically yourself, or by using
1533    * {@link #setMaxBufferedDeleteTerms} to flush by count
1534    * instead of RAM usage (each buffered delete Query counts
1535    * as one).
1536    *
1537    * <p> <b>NOTE</b>: because IndexWriter uses
1538    * <code>int</code>s when managing its internal storage,
1539    * the absolute maximum value for this setting is somewhat
1540    * less than 2048 MB.  The precise limit depends on
1541    * various factors, such as how large your documents are,
1542    * how many fields have norms, etc., so it's best to set
1543    * this value comfortably under 2048.</p>
1544    *
1545    * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1546    *
1547    * @throws IllegalArgumentException if ramBufferSize is
1548    * enabled but non-positive, or it disables ramBufferSize
1549    * when maxBufferedDocs is already disabled
1550    * @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.
1551    */
1552   @Deprecated
1553   public void setRAMBufferSizeMB(double mb) {
1554     if (infoStream != null) {
1555       message("setRAMBufferSizeMB " + mb);
1556     }
1557     // Required so config.getRAMBufferSizeMB returns the right value. But this
1558     // will go away together with the method in 4.0.
1559     config.setRAMBufferSizeMB(mb);
1560   }
1561
1562   /**
1563    * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1564    * @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.
1565    */
1566   @Deprecated
1567   public double getRAMBufferSizeMB() {
1568     return config.getRAMBufferSizeMB();
1569   }
1570
1571   /**
1572    * <p>Determines the minimal number of delete terms required before the buffered
1573    * in-memory delete terms are applied and flushed. If there are documents
1574    * buffered in memory at the time, they are merged and a new segment is
1575    * created.</p>
1576
1577    * <p>Disabled by default (writer flushes by RAM usage).</p>
1578    *
1579    * @throws IllegalArgumentException if maxBufferedDeleteTerms
1580    * is enabled but smaller than 1
1581    * @see #setRAMBufferSizeMB
1582    * @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.
1583    */
1584   @Deprecated
1585   public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1586     ensureOpen();
1587     if (infoStream != null)
1588       message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1589     // Required so config.getMaxBufferedDeleteTerms returns the right value. But
1590     // this will go away together with the method in 4.0.
1591     config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1592   }
1593
1594   /**
1595    * Returns the number of buffered deleted terms that will
1596    * trigger a flush if enabled.
1597    * @see #setMaxBufferedDeleteTerms
1598    * @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead
1599    */
1600   @Deprecated
1601   public int getMaxBufferedDeleteTerms() {
1602     ensureOpen();
1603     return config.getMaxBufferedDeleteTerms();
1604   }
1605
1606   /** Determines how often segment indices are merged by addDocument().  With
1607    * smaller values, less RAM is used while indexing, and searches on
1608    * unoptimized indices are faster, but indexing speed is slower.  With larger
1609    * values, more RAM is used during indexing, and while searches on unoptimized
1610    * indices are slower, indexing is faster.  Thus larger values (> 10) are best
1611    * for batch index creation, and smaller values (< 10) for indices that are
1612    * interactively maintained.
1613    *
1614    * <p>Note that this method is a convenience method: it
1615    * just calls mergePolicy.setMergeFactor as long as
1616    * mergePolicy is an instance of {@link LogMergePolicy}.
1617    * Otherwise an IllegalArgumentException is thrown.</p>
1618    *
1619    * <p>This must never be less than 2.  The default value is 10.
1620    * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
1621    */
1622   @Deprecated
1623   public void setMergeFactor(int mergeFactor) {
1624     getLogMergePolicy().setMergeFactor(mergeFactor);
1625   }
1626
1627   /**
1628    * <p>Returns the number of segments that are merged at
1629    * once and also controls the total number of segments
1630    * allowed to accumulate in the index.</p>
1631    *
1632    * <p>Note that this method is a convenience method: it
1633    * just calls mergePolicy.getMergeFactor as long as
1634    * mergePolicy is an instance of {@link LogMergePolicy}.
1635    * Otherwise an IllegalArgumentException is thrown.</p>
1636    *
1637    * @see #setMergeFactor
1638    * @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.
1639    */
1640   @Deprecated
1641   public int getMergeFactor() {
1642     return getLogMergePolicy().getMergeFactor();
1643   }
1644
1645   /** If non-null, this will be the default infoStream used
1646    * by a newly instantiated IndexWriter.
1647    * @see #setInfoStream
1648    */
1649   public static void setDefaultInfoStream(PrintStream infoStream) {
1650     IndexWriter.defaultInfoStream = infoStream;
1651   }
1652
1653   /**
1654    * Returns the current default infoStream for newly
1655    * instantiated IndexWriters.
1656    * @see #setDefaultInfoStream
1657    */
1658   public static PrintStream getDefaultInfoStream() {
1659     return IndexWriter.defaultInfoStream;
1660   }
1661
1662   /** If non-null, information about merges, deletes and a
1663    * message when maxFieldLength is reached will be printed
1664    * to this.
1665    */
1666   public void setInfoStream(PrintStream infoStream) throws IOException {
1667     ensureOpen();
1668     this.infoStream = infoStream;
1669     docWriter.setInfoStream(infoStream);
1670     deleter.setInfoStream(infoStream);
1671     bufferedDeletesStream.setInfoStream(infoStream);
1672     if (infoStream != null)
1673       messageState();
1674   }
1675
1676   private void messageState() throws IOException {
1677     message("\ndir=" + directory + "\n" +
1678             "index=" + segString() + "\n" +
1679             "version=" + Constants.LUCENE_VERSION + "\n" +
1680             config.toString());
1681   }
1682
1683   /**
1684    * Returns the current infoStream in use by this writer.
1685    * @see #setInfoStream
1686    */
1687   public PrintStream getInfoStream() {
1688     ensureOpen();
1689     return infoStream;
1690   }
1691
1692   /** Returns true if verbosing is enabled (i.e., infoStream != null). */
1693   public boolean verbose() {
1694     return infoStream != null;
1695   }
1696
1697   /**
1698    * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter.  @see
1699    * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1700    * @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
1701    */
1702   @Deprecated
1703   public void setWriteLockTimeout(long writeLockTimeout) {
1704     ensureOpen();
1705     this.writeLockTimeout = writeLockTimeout;
1706     // Required so config.getWriteLockTimeout returns the right value. But this
1707     // will go away together with the method in 4.0.
1708     config.setWriteLockTimeout(writeLockTimeout);
1709   }
1710
1711   /**
1712    * Returns allowed timeout when acquiring the write lock.
1713    * @see #setWriteLockTimeout
1714    * @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
1715    */
1716   @Deprecated
1717   public long getWriteLockTimeout() {
1718     ensureOpen();
1719     return writeLockTimeout;
1720   }
1721
1722   /**
1723    * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1724    * milliseconds).
1725    * @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
1726    */
1727   @Deprecated
1728   public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1729     IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
1730   }
1731
1732   /**
1733    * Returns default write lock timeout for newly
1734    * instantiated IndexWriters.
1735    * @see #setDefaultWriteLockTimeout
1736    * @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
1737    */
1738   @Deprecated
1739   public static long getDefaultWriteLockTimeout() {
1740     return IndexWriterConfig.getDefaultWriteLockTimeout();
1741   }
1742
1743   /**
1744    * Commits all changes to an index and closes all
1745    * associated files.  Note that this may be a costly
1746    * operation, so, try to re-use a single writer instead of
1747    * closing and opening a new one.  See {@link #commit()} for
1748    * caveats about write caching done by some IO devices.
1749    *
1750    * <p> If an Exception is hit during close, eg due to disk
1751    * full or some other reason, then both the on-disk index
1752    * and the internal state of the IndexWriter instance will
1753    * be consistent.  However, the close will not be complete
1754    * even though part of it (flushing buffered documents)
1755    * may have succeeded, so the write lock will still be
1756    * held.</p>
1757    *
1758    * <p> If you can correct the underlying cause (eg free up
1759    * some disk space) then you can call close() again.
1760    * Failing that, if you want to force the write lock to be
1761    * released (dangerous, because you may then lose buffered
1762    * docs in the IndexWriter instance) then you can do
1763    * something like this:</p>
1764    *
1765    * <pre>
1766    * try {
1767    *   writer.close();
1768    * } finally {
1769    *   if (IndexWriter.isLocked(directory)) {
1770    *     IndexWriter.unlock(directory);
1771    *   }
1772    * }
1773    * </pre>
1774    *
1775    * after which, you must be certain not to use the writer
1776    * instance anymore.</p>
1777    *
1778    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1779    * you should immediately close the writer, again.  See <a
1780    * href="#OOME">above</a> for details.</p>
1781    *
1782    * @throws CorruptIndexException if the index is corrupt
1783    * @throws IOException if there is a low-level IO error
1784    */
1785   public void close() throws CorruptIndexException, IOException {
1786     close(true);
1787   }
1788
1789   /**
1790    * Closes the index with or without waiting for currently
1791    * running merges to finish.  This is only meaningful when
1792    * using a MergeScheduler that runs merges in background
1793    * threads.
1794    *
1795    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1796    * you should immediately close the writer, again.  See <a
1797    * href="#OOME">above</a> for details.</p>
1798    *
1799    * <p><b>NOTE</b>: it is dangerous to always call
1800    * close(false), especially when IndexWriter is not open
1801    * for very long, because this can result in "merge
1802    * starvation" whereby long merges will never have a
1803    * chance to finish.  This will cause too many segments in
1804    * your index over time.</p>
1805    *
1806    * @param waitForMerges if true, this call will block
1807    * until all merges complete; else, it will ask all
1808    * running merges to abort, wait until those merges have
1809    * finished (which should be at most a few seconds), and
1810    * then return.
1811    */
1812   public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1813
1814     // Ensure that only one thread actually gets to do the closing:
1815     if (shouldClose()) {
1816       // If any methods have hit OutOfMemoryError, then abort
1817       // on close, in case the internal state of IndexWriter
1818       // or DocumentsWriter is corrupt
1819       if (hitOOM)
1820         rollbackInternal();
1821       else
1822         closeInternal(waitForMerges);
1823     }
1824   }
1825
1826   // Returns true if this thread should attempt to close, or
1827   // false if IndexWriter is now closed; else, waits until
1828   // another thread finishes closing
1829   synchronized private boolean shouldClose() {
1830     while(true) {
1831       if (!closed) {
1832         if (!closing) {
1833           closing = true;
1834           return true;
1835         } else {
1836           // Another thread is presently trying to close;
1837           // wait until it finishes one way (closes
1838           // successfully) or another (fails to close)
1839           doWait();
1840         }
1841       } else
1842         return false;
1843     }
1844   }
1845
1846   private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1847
1848     try {
1849       if (infoStream != null) {
1850         message("now flush at close waitForMerges=" + waitForMerges);
1851       }
1852
1853       docWriter.close();
1854
1855       // Only allow a new merge to be triggered if we are
1856       // going to wait for merges:
1857       if (!hitOOM) {
1858         flush(waitForMerges, true);
1859       }
1860
1861       if (waitForMerges)
1862         // Give merge scheduler last chance to run, in case
1863         // any pending merges are waiting:
1864         mergeScheduler.merge(this);
1865
1866       mergePolicy.close();
1867
1868       synchronized(this) {
1869         finishMerges(waitForMerges);
1870         stopMerges = true;
1871       }
1872
1873       mergeScheduler.close();
1874
1875       if (infoStream != null)
1876         message("now call final commit()");
1877
1878       if (!hitOOM) {
1879         commitInternal(null);
1880       }
1881
1882       if (infoStream != null)
1883         message("at close: " + segString());
1884
1885       synchronized(this) {
1886         readerPool.close();
1887         docWriter = null;
1888         deleter.close();
1889       }
1890
1891       if (writeLock != null) {
1892         writeLock.release();                          // release write lock
1893         writeLock = null;
1894       }
1895       synchronized(this) {
1896         closed = true;
1897       }
1898     } catch (OutOfMemoryError oom) {
1899       handleOOM(oom, "closeInternal");
1900     } finally {
1901       synchronized(this) {
1902         closing = false;
1903         notifyAll();
1904         if (!closed) {
1905           if (infoStream != null)
1906             message("hit exception while closing");
1907         }
1908       }
1909     }
1910   }
1911
1912   /** Returns the Directory used by this index. */
1913   public Directory getDirectory() {
1914     // Pass false because the flush during closing calls getDirectory
1915     ensureOpen(false);
1916     return directory;
1917   }
1918
1919   /** Returns the analyzer used by this index. */
1920   public Analyzer getAnalyzer() {
1921     ensureOpen();
1922     return analyzer;
1923   }
1924
1925   /** Returns total number of docs in this index, including
1926    *  docs not yet flushed (still in the RAM buffer),
1927    *  not counting deletions.
1928    *  @see #numDocs */
1929   public synchronized int maxDoc() {
1930     ensureOpen();
1931     int count;
1932     if (docWriter != null)
1933       count = docWriter.getNumDocs();
1934     else
1935       count = 0;
1936
1937     count += segmentInfos.totalDocCount();
1938     return count;
1939   }
1940
1941   /** Returns total number of docs in this index, including
1942    *  docs not yet flushed (still in the RAM buffer), and
1943    *  including deletions.  <b>NOTE:</b> buffered deletions
1944    *  are not counted.  If you really need these to be
1945    *  counted you should call {@link #commit()} first.
1946    *  @see #numDocs */
1947   public synchronized int numDocs() throws IOException {
1948     ensureOpen();
1949     int count;
1950     if (docWriter != null)
1951       count = docWriter.getNumDocs();
1952     else
1953       count = 0;
1954
1955     for (final SegmentInfo info : segmentInfos) {
1956       count += info.docCount - numDeletedDocs(info);
1957     }
1958     return count;
1959   }
1960
1961   public synchronized boolean hasDeletions() throws IOException {
1962     ensureOpen();
1963     if (bufferedDeletesStream.any()) {
1964       return true;
1965     }
1966     if (docWriter.anyDeletions()) {
1967       return true;
1968     }
1969     for (final SegmentInfo info : segmentInfos) {
1970       if (info.hasDeletions()) {
1971         return true;
1972       }
1973     }
1974     return false;
1975   }
1976
1977   /**
1978    * The maximum number of terms that will be indexed for a single field in a
1979    * document.  This limits the amount of memory required for indexing, so that
1980    * collections with very large files will not crash the indexing process by
1981    * running out of memory.<p/>
1982    * Note that this effectively truncates large documents, excluding from the
1983    * index terms that occur further in the document.  If you know your source
1984    * documents are large, be sure to set this value high enough to accommodate
1985    * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
1986    * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1987    * By default, no more than 10,000 terms will be indexed for a field.
1988    *
1989    * @see MaxFieldLength
1990    * @deprecated remove in 4.0
1991    */
1992   @Deprecated
1993   private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1994
1995   /**
1996    * Adds a document to this index.  If the document contains more than
1997    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1998    * discarded.
1999    *
2000    * <p> Note that if an Exception is hit (for example disk full)
2001    * then the index will be consistent, but this document
2002    * may not have been added.  Furthermore, it's possible
2003    * the index will have one segment in non-compound format
2004    * even when using compound files (when a merge has
2005    * partially succeeded).</p>
2006    *
2007    * <p> This method periodically flushes pending documents
2008    * to the Directory (see <a href="#flush">above</a>), and
2009    * also periodically triggers segment merges in the index
2010    * according to the {@link MergePolicy} in use.</p>
2011    *
2012    * <p>Merges temporarily consume space in the
2013    * directory. The amount of space required is up to 1X the
2014    * size of all segments being merged, when no
2015    * readers/searchers are open against the index, and up to
2016    * 2X the size of all segments being merged when
2017    * readers/searchers are open against the index (see
2018    * {@link #forceMerge(int)} for details). The sequence of
2019    * primitive merge operations performed is governed by the
2020    * merge policy.
2021    *
2022    * <p>Note that each term in the document can be no longer
2023    * than 16383 characters, otherwise an
2024    * IllegalArgumentException will be thrown.</p>
2025    *
2026    * <p>Note that it's possible to create an invalid Unicode
2027    * string in java if a UTF16 surrogate pair is malformed.
2028    * In this case, the invalid characters are silently
2029    * replaced with the Unicode replacement character
2030    * U+FFFD.</p>
2031    *
2032    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2033    * you should immediately close the writer.  See <a
2034    * href="#OOME">above</a> for details.</p>
2035    *
2036    * @throws CorruptIndexException if the index is corrupt
2037    * @throws IOException if there is a low-level IO error
2038    */
2039   public void addDocument(Document doc) throws CorruptIndexException, IOException {
2040     addDocument(doc, analyzer);
2041   }
2042
2043   /**
2044    * Adds a document to this index, using the provided analyzer instead of the
2045    * value of {@link #getAnalyzer()}.  If the document contains more than
2046    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2047    * discarded.
2048    *
2049    * <p>See {@link #addDocument(Document)} for details on
2050    * index and IndexWriter state after an Exception, and
2051    * flushing/merging temporary free space requirements.</p>
2052    *
2053    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2054    * you should immediately close the writer.  See <a
2055    * href="#OOME">above</a> for details.</p>
2056    *
2057    * @throws CorruptIndexException if the index is corrupt
2058    * @throws IOException if there is a low-level IO error
2059    */
2060   public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
2061     ensureOpen();
2062     boolean doFlush = false;
2063     boolean success = false;
2064     try {
2065       try {
2066         doFlush = docWriter.updateDocument(doc, analyzer, null);
2067         success = true;
2068       } finally {
2069         if (!success && infoStream != null)
2070           message("hit exception adding document");
2071       }
2072       if (doFlush)
2073         flush(true, false);
2074     } catch (OutOfMemoryError oom) {
2075       handleOOM(oom, "addDocument");
2076     }
2077   }
2078
2079   /**
2080    * Atomically adds a block of documents with sequentially
2081    * assigned document IDs, such that an external reader
2082    * will see all or none of the documents.
2083    *
2084    * <p><b>WARNING</b>: the index does not currently record
2085    * which documents were added as a block.  Today this is
2086    * fine, because merging will preserve the block (as long
2087    * as none them were deleted).  But it's possible in the
2088    * future that Lucene may more aggressively re-order
2089    * documents (for example, perhaps to obtain better index
2090    * compression), in which case you may need to fully
2091    * re-index your documents at that time.
2092    *
2093    * <p>See {@link #addDocument(Document)} for details on
2094    * index and IndexWriter state after an Exception, and
2095    * flushing/merging temporary free space requirements.</p>
2096    *
2097    * <p><b>NOTE</b>: tools that do offline splitting of an index
2098    * (for example, IndexSplitter in contrib) or
2099    * re-sorting of documents (for example, IndexSorter in
2100    * contrib) are not aware of these atomically added documents
2101    * and will likely break them up.  Use such tools at your
2102    * own risk!
2103    *
2104    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2105    * you should immediately close the writer.  See <a
2106    * href="#OOME">above</a> for details.</p>
2107    *
2108    * @throws CorruptIndexException if the index is corrupt
2109    * @throws IOException if there is a low-level IO error
2110    *
2111    * @lucene.experimental
2112    */
2113   public void addDocuments(Collection<Document> docs) throws CorruptIndexException, IOException {
2114     // TODO: if we backport DWPT we should change arg to Iterable<Document>
2115     addDocuments(docs, analyzer);
2116   }
2117
2118   /**
2119    * Atomically adds a block of documents, analyzed using the
2120    * provided analyzer, with sequentially assigned document
2121    * IDs, such that an external reader will see all or none
2122    * of the documents.
2123    *
2124    * @throws CorruptIndexException if the index is corrupt
2125    * @throws IOException if there is a low-level IO error
2126    *
2127    * @lucene.experimental
2128    */
2129   public void addDocuments(Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2130     // TODO: if we backport DWPT we should change arg to Iterable<Document>
2131     updateDocuments(null, docs, analyzer);
2132   }
2133
2134   /**
2135    * Atomically deletes documents matching the provided
2136    * delTerm and adds a block of documents with sequentially
2137    * assigned document IDs, such that an external reader
2138    * will see all or none of the documents.
2139    *
2140    * See {@link #addDocuments(Collection)}.
2141    *
2142    * @throws CorruptIndexException if the index is corrupt
2143    * @throws IOException if there is a low-level IO error
2144    *
2145    * @lucene.experimental
2146    */
2147   public void updateDocuments(Term delTerm, Collection<Document> docs) throws CorruptIndexException, IOException {
2148     // TODO: if we backport DWPT we should change arg to Iterable<Document>
2149     updateDocuments(delTerm, docs, analyzer);
2150   }
2151
2152   /**
2153    * Atomically deletes documents matching the provided
2154    * delTerm and adds a block of documents, analyzed  using
2155    * the provided analyzer, with sequentially
2156    * assigned document IDs, such that an external reader
2157    * will see all or none of the documents.
2158    *
2159    * See {@link #addDocuments(Collection)}.
2160    *
2161    * @throws CorruptIndexException if the index is corrupt
2162    * @throws IOException if there is a low-level IO error
2163    *
2164    * @lucene.experimental
2165    */
2166   public void updateDocuments(Term delTerm, Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2167     // TODO: if we backport DWPT we should change arg to Iterable<Document>
2168     ensureOpen();
2169     try {
2170       boolean success = false;
2171       boolean doFlush = false;
2172       try {
2173         doFlush = docWriter.updateDocuments(docs, analyzer, delTerm);
2174         success = true;
2175       } finally {
2176         if (!success && infoStream != null) {
2177           message("hit exception updating document");
2178         }
2179       }
2180       if (doFlush) {
2181         flush(true, false);
2182       }
2183     } catch (OutOfMemoryError oom) {
2184       handleOOM(oom, "updateDocuments");
2185     }
2186   }
2187
2188   /**
2189    * Deletes the document(s) containing <code>term</code>.
2190    *
2191    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2192    * you should immediately close the writer.  See <a
2193    * href="#OOME">above</a> for details.</p>
2194    *
2195    * @param term the term to identify the documents to be deleted
2196    * @throws CorruptIndexException if the index is corrupt
2197    * @throws IOException if there is a low-level IO error
2198    */
2199   public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
2200     ensureOpen();
2201     try {
2202       if (docWriter.deleteTerm(term, false)) {
2203         flush(true, false);
2204       }
2205     } catch (OutOfMemoryError oom) {
2206       handleOOM(oom, "deleteDocuments(Term)");
2207     }
2208   }
2209
2210   /**
2211    * Deletes the document(s) containing any of the
2212    * terms. All deletes are flushed at the same time.
2213    *
2214    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2215    * you should immediately close the writer.  See <a
2216    * href="#OOME">above</a> for details.</p>
2217    *
2218    * @param terms array of terms to identify the documents
2219    * to be deleted
2220    * @throws CorruptIndexException if the index is corrupt
2221    * @throws IOException if there is a low-level IO error
2222    */
2223   public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
2224     ensureOpen();
2225     try {
2226       if (docWriter.deleteTerms(terms)) {
2227         flush(true, false);
2228       }
2229     } catch (OutOfMemoryError oom) {
2230       handleOOM(oom, "deleteDocuments(Term..)");
2231     }
2232   }
2233
2234   /**
2235    * Deletes the document(s) matching the provided query.
2236    *
2237    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2238    * you should immediately close the writer.  See <a
2239    * href="#OOME">above</a> for details.</p>
2240    *
2241    * @param query the query to identify the documents to be deleted
2242    * @throws CorruptIndexException if the index is corrupt
2243    * @throws IOException if there is a low-level IO error
2244    */
2245   public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
2246     ensureOpen();
2247     try {
2248       if (docWriter.deleteQuery(query)) {
2249         flush(true, false);
2250       }
2251     } catch (OutOfMemoryError oom) {
2252       handleOOM(oom, "deleteDocuments(Query)");
2253     }
2254   }
2255
2256   /**
2257    * Deletes the document(s) matching any of the provided queries.
2258    * All deletes are flushed at the same time.
2259    *
2260    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2261    * you should immediately close the writer.  See <a
2262    * href="#OOME">above</a> for details.</p>
2263    *
2264    * @param queries array of queries to identify the documents
2265    * to be deleted
2266    * @throws CorruptIndexException if the index is corrupt
2267    * @throws IOException if there is a low-level IO error
2268    */
2269   public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
2270     ensureOpen();
2271     try {
2272       if (docWriter.deleteQueries(queries)) {
2273         flush(true, false);
2274       }
2275     } catch (OutOfMemoryError oom) {
2276       handleOOM(oom, "deleteDocuments(Query..)");
2277     }
2278   }
2279
2280   /**
2281    * Updates a document by first deleting the document(s)
2282    * containing <code>term</code> and then adding the new
2283    * document.  The delete and then add are atomic as seen
2284    * by a reader on the same index (flush may happen only after
2285    * the add).
2286    *
2287    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2288    * you should immediately close the writer.  See <a
2289    * href="#OOME">above</a> for details.</p>
2290    *
2291    * @param term the term to identify the document(s) to be
2292    * deleted
2293    * @param doc the document to be added
2294    * @throws CorruptIndexException if the index is corrupt
2295    * @throws IOException if there is a low-level IO error
2296    */
2297   public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2298     ensureOpen();
2299     updateDocument(term, doc, getAnalyzer());
2300   }
2301
2302   /**
2303    * Updates a document by first deleting the document(s)
2304    * containing <code>term</code> and then adding the new
2305    * document.  The delete and then add are atomic as seen
2306    * by a reader on the same index (flush may happen only after
2307    * the add).
2308    *
2309    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2310    * you should immediately close the writer.  See <a
2311    * href="#OOME">above</a> for details.</p>
2312    *
2313    * @param term the term to identify the document(s) to be
2314    * deleted
2315    * @param doc the document to be added
2316    * @param analyzer the analyzer to use when analyzing the document
2317    * @throws CorruptIndexException if the index is corrupt
2318    * @throws IOException if there is a low-level IO error
2319    */
2320   public void updateDocument(Term term, Document doc, Analyzer analyzer)
2321       throws CorruptIndexException, IOException {
2322     ensureOpen();
2323     try {
2324       boolean doFlush = false;
2325       boolean success = false;
2326       try {
2327         doFlush = docWriter.updateDocument(doc, analyzer, term);
2328         success = true;
2329       } finally {
2330         if (!success && infoStream != null)
2331           message("hit exception updating document");
2332       }
2333       if (doFlush) {
2334         flush(true, false);
2335       }
2336     } catch (OutOfMemoryError oom) {
2337       handleOOM(oom, "updateDocument");
2338     }
2339   }
2340
2341   // for test purpose
2342   final synchronized int getSegmentCount(){
2343     return segmentInfos.size();
2344   }
2345
2346   // for test purpose
2347   final synchronized int getNumBufferedDocuments(){
2348     return docWriter.getNumDocs();
2349   }
2350
2351   // for test purpose
2352   final synchronized int getDocCount(int i) {
2353     if (i >= 0 && i < segmentInfos.size()) {
2354       return segmentInfos.info(i).docCount;
2355     } else {
2356       return -1;
2357     }
2358   }
2359
2360   // for test purpose
2361   final int getFlushCount() {
2362     return flushCount.get();
2363   }
2364
2365   // for test purpose
2366   final int getFlushDeletesCount() {
2367     return flushDeletesCount.get();
2368   }
2369
2370   final String newSegmentName() {
2371     // Cannot synchronize on IndexWriter because that causes
2372     // deadlock
2373     synchronized(segmentInfos) {
2374       // Important to increment changeCount so that the
2375       // segmentInfos is written on close.  Otherwise we
2376       // could close, re-open and re-return the same segment
2377       // name that was previously returned which can cause
2378       // problems at least with ConcurrentMergeScheduler.
2379       changeCount++;
2380       segmentInfos.changed();
2381       return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2382     }
2383   }
2384
2385   /** If non-null, information about merges will be printed to this.
2386    */
2387   private PrintStream infoStream;
2388   private static PrintStream defaultInfoStream;
2389
2390   /** This method has been deprecated, as it is horribly
2391    *  inefficient and very rarely justified.  Lucene's
2392    *  multi-segment search performance has improved over
2393    *  time, and the default TieredMergePolicy now targets
2394    *  segments with deletions.
2395    *
2396    * @deprecated */
2397   @Deprecated
2398   public void optimize() throws CorruptIndexException, IOException {
2399     forceMerge(1, true);
2400   }
2401
2402   /** This method has been deprecated, as it is horribly
2403    *  inefficient and very rarely justified.  Lucene's
2404    *  multi-segment search performance has improved over
2405    *  time, and the default TieredMergePolicy now targets
2406    *  segments with deletions.
2407    *
2408    * @deprecated */
2409   @Deprecated
2410   public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2411     forceMerge(maxNumSegments, true);
2412   }
2413
2414   /** This method has been deprecated, as it is horribly
2415    *  inefficient and very rarely justified.  Lucene's
2416    *  multi-segment search performance has improved over
2417    *  time, and the default TieredMergePolicy now targets
2418    *  segments with deletions.
2419    *
2420    * @deprecated */
2421   @Deprecated
2422   public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2423     forceMerge(1, doWait);
2424   }
2425
2426   /**
2427    * Forces merge policy to merge segments until there's <=
2428    * maxNumSegments.  The actual merges to be
2429    * executed are determined by the {@link MergePolicy}.
2430    *
2431    * <p>This is a horribly costly operation, especially when
2432    * you pass a small {@code maxNumSegments}; usually you
2433    * should only call this if the index is static (will no
2434    * longer be changed).</p>
2435    *
2436    * <p>Note that this requires up to 2X the index size free
2437    * space in your Directory (3X if you're using compound
2438    * file format).  For example, if your index size is 10 MB
2439    * then you need up to 20 MB free for this to complete (30
2440    * MB if you're using compound file format).  Also,
2441    * it's best to call {@link #commit()} afterwards,
2442    * to allow IndexWriter to free up disk space.</p>
2443    *
2444    * <p>If some but not all readers re-open while merging
2445    * is underway, this will cause > 2X temporary
2446    * space to be consumed as those new readers will then
2447    * hold open the temporary segments at that time.  It is
2448    * best not to re-open readers while merging is running.</p>
2449    *
2450    * <p>The actual temporary usage could be much less than
2451    * these figures (it depends on many factors).</p>
2452    *
2453    * <p>In general, once the this completes, the total size of the
2454    * index will be less than the size of the starting index.
2455    * It could be quite a bit smaller (if there were many
2456    * pending deletes) or just slightly smaller.</p>
2457    *
2458    * <p>If an Exception is hit, for example
2459    * due to disk full, the index will not be corrupt and no
2460    * documents will have been lost.  However, it may have
2461    * been partially merged (some segments were merged but
2462    * not all), and it's possible that one of the segments in
2463    * the index will be in non-compound format even when
2464    * using compound file format.  This will occur when the
2465    * Exception is hit during conversion of the segment into
2466    * compound format.</p>
2467    *
2468    * <p>This call will merge those segments present in
2469    * the index when the call started.  If other threads are
2470    * still adding documents and flushing segments, those
2471    * newly created segments will not be merged unless you
2472    * call forceMerge again.</p>
2473    *
2474    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2475    * you should immediately close the writer.  See <a
2476    * href="#OOME">above</a> for details.</p>
2477    *
2478    * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2479    * with <tt>false</tt>, which aborts all running merges,
2480    * then any thread still running this method might hit a
2481    * {@link MergePolicy.MergeAbortedException}.
2482    *
2483    * @throws CorruptIndexException if the index is corrupt
2484    * @throws IOException if there is a low-level IO error
2485    * @see MergePolicy#findMerges
2486    *
2487    * @param maxNumSegments maximum number of segments left
2488    * in the index after merging finishes
2489   */
2490   public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2491     forceMerge(maxNumSegments, true);
2492   }
2493
2494   /** Just like {@link #forceMerge(int)}, except you can
2495    *  specify whether the call should block until
2496    *  all merging completes.  This is only meaningful with a
2497    *  {@link MergeScheduler} that is able to run merges in
2498    *  background threads.
2499    *
2500    *  <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2501    *  you should immediately close the writer.  See <a
2502    *  href="#OOME">above</a> for details.</p>
2503    */
2504   public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2505     ensureOpen();
2506
2507     if (maxNumSegments < 1)
2508       throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2509
2510     if (infoStream != null) {
2511       message("forceMerge: index now " + segString());
2512       message("now flush at forceMerge");
2513     }
2514
2515     flush(true, true);
2516
2517     synchronized(this) {
2518       resetMergeExceptions();
2519       segmentsToMerge.clear();
2520       for(SegmentInfo info : segmentInfos) {
2521         segmentsToMerge.put(info, Boolean.TRUE);
2522       }
2523       mergeMaxNumSegments = maxNumSegments;
2524
2525       // Now mark all pending & running merges as isMaxNumSegments:
2526       for(final MergePolicy.OneMerge merge  : pendingMerges) {
2527         merge.maxNumSegments = maxNumSegments;
2528         segmentsToMerge.put(merge.info, Boolean.TRUE);
2529       }
2530
2531       for ( final MergePolicy.OneMerge merge: runningMerges ) {
2532         merge.maxNumSegments = maxNumSegments;
2533         segmentsToMerge.put(merge.info, Boolean.TRUE);
2534       }
2535     }
2536
2537     maybeMerge(maxNumSegments);
2538
2539     if (doWait) {
2540       synchronized(this) {
2541         while(true) {
2542
2543           if (hitOOM) {
2544             throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
2545           }
2546
2547           if (mergeExceptions.size() > 0) {
2548             // Forward any exceptions in background merge
2549             // threads to the current thread:
2550             final int size = mergeExceptions.size();
2551             for(int i=0;i<size;i++) {
2552               final MergePolicy.OneMerge merge = mergeExceptions.get(i);
2553               if (merge.maxNumSegments != -1) {
2554                 IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2555                 final Throwable t = merge.getException();
2556                 if (t != null)
2557                   err.initCause(t);
2558                 throw err;
2559               }
2560             }
2561           }
2562
2563           if (maxNumSegmentsMergesPending())
2564             doWait();
2565           else
2566             break;
2567         }
2568       }
2569
2570       // If close is called while we are still
2571       // running, throw an exception so the calling
2572       // thread will know merging did not
2573       // complete
2574       ensureOpen();
2575     }
2576
2577     // NOTE: in the ConcurrentMergeScheduler case, when
2578     // doWait is false, we can return immediately while
2579     // background threads accomplish the merging
2580   }
2581
2582   /** Returns true if any merges in pendingMerges or
2583    *  runningMerges are maxNumSegments merges. */
2584   private synchronized boolean maxNumSegmentsMergesPending() {
2585     for (final MergePolicy.OneMerge merge : pendingMerges) {
2586       if (merge.maxNumSegments != -1)
2587         return true;
2588     }
2589
2590     for (final MergePolicy.OneMerge merge : runningMerges) {
2591       if (merge.maxNumSegments != -1)
2592         return true;
2593     }
2594
2595     return false;
2596   }
2597
2598   /** This method has been deprecated, as it is horribly
2599    *  inefficient and very rarely justified.  Lucene's
2600    *  multi-segment search performance has improved over
2601    *  time, and the default TieredMergePolicy now targets
2602    *  segments with deletions.
2603    *
2604    * @deprecated */
2605   @Deprecated
2606   public void expungeDeletes(boolean doWait) throws CorruptIndexException, IOException {
2607     forceMergeDeletes(doWait);
2608   }
2609
2610   /** Just like {@link #forceMergeDeletes()}, except you can
2611    *  specify whether the call should block until the
2612    *  operation completes.  This is only meaningful with a
2613    *  {@link MergeScheduler} that is able to run merges in
2614    *  background threads.
2615    *
2616    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2617    * you should immediately close the writer.  See <a
2618    * href="#OOME">above</a> for details.</p>
2619    *
2620    * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2621    * with <tt>false</tt>, which aborts all running merges,
2622    * then any thread still running this method might hit a
2623    * {@link MergePolicy.MergeAbortedException}.
2624    */
2625   public void forceMergeDeletes(boolean doWait)
2626     throws CorruptIndexException, IOException {
2627     ensureOpen();
2628
2629     flush(true, true);
2630
2631     if (infoStream != null)
2632       message("forceMergeDeletes: index now " + segString());
2633
2634     MergePolicy.MergeSpecification spec;
2635
2636     synchronized(this) {
2637       spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
2638       if (spec != null) {
2639         final int numMerges = spec.merges.size();
2640         for(int i=0;i<numMerges;i++)
2641           registerMerge(spec.merges.get(i));
2642       }
2643     }
2644
2645     mergeScheduler.merge(this);
2646
2647     if (spec != null && doWait) {
2648       final int numMerges = spec.merges.size();
2649       synchronized(this) {
2650         boolean running = true;
2651         while(running) {
2652
2653           if (hitOOM) {
2654             throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMergeDeletes");
2655           }
2656
2657           // Check each merge that MergePolicy asked us to
2658           // do, to see if any of them are still running and
2659           // if any of them have hit an exception.
2660           running = false;
2661           for(int i=0;i<numMerges;i++) {
2662             final MergePolicy.OneMerge merge = spec.merges.get(i);
2663             if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2664               running = true;
2665             Throwable t = merge.getException();
2666             if (t != null) {
2667               IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2668               ioe.initCause(t);
2669               throw ioe;
2670             }
2671           }
2672
2673           // If any of our merges are still running, wait:
2674           if (running)
2675             doWait();
2676         }
2677       }
2678     }
2679
2680     // NOTE: in the ConcurrentMergeScheduler case, when
2681     // doWait is false, we can return immediately while
2682     // background threads accomplish the merging
2683   }
2684
2685
2686   /** This method has been deprecated, as it is horribly
2687    *  inefficient and very rarely justified.  Lucene's
2688    *  multi-segment search performance has improved over
2689    *  time, and the default TieredMergePolicy now targets
2690    *  segments with deletions.
2691    *
2692    * @deprecated */
2693   @Deprecated
2694   public void expungeDeletes() throws CorruptIndexException, IOException {
2695     forceMergeDeletes();
2696   }
2697
2698   /**
2699    *  Forces merging of all segments that have deleted
2700    *  documents.  The actual merges to be executed are
2701    *  determined by the {@link MergePolicy}.  For example,
2702    *  the default {@link TieredMergePolicy} will only
2703    *  pick a segment if the percentage of
2704    *  deleted docs is over 10%.
2705    *
2706    *  <p>This is often a horribly costly operation; rarely
2707    *  is it warranted.</p>
2708    *
2709    *  <p>To see how
2710    *  many deletions you have pending in your index, call
2711    *  {@link IndexReader#numDeletedDocs}.</p>
2712    *
2713    *  <p><b>NOTE</b>: this method first flushes a new
2714    *  segment (if there are indexed documents), and applies
2715    *  all buffered deletes.
2716    *
2717    *  <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2718    *  you should immediately close the writer.  See <a
2719    *  href="#OOME">above</a> for details.</p>
2720    */
2721   public void forceMergeDeletes() throws CorruptIndexException, IOException {
2722     forceMergeDeletes(true);
2723   }
2724
2725   /**
2726    * Expert: asks the mergePolicy whether any merges are
2727    * necessary now and if so, runs the requested merges and
2728    * then iterate (test again if merges are needed) until no
2729    * more merges are returned by the mergePolicy.
2730    *
2731    * Explicit calls to maybeMerge() are usually not
2732    * necessary. The most common case is when merge policy
2733    * parameters have changed.
2734    *
2735    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2736    * you should immediately close the writer.  See <a
2737    * href="#OOME">above</a> for details.</p>
2738    */
2739   public final void maybeMerge() throws CorruptIndexException, IOException {
2740     maybeMerge(-1);
2741   }
2742
2743   private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2744     ensureOpen(false);
2745     updatePendingMerges(maxNumSegments);
2746     mergeScheduler.merge(this);
2747   }
2748
2749   private synchronized void updatePendingMerges(int maxNumSegments)
2750     throws CorruptIndexException, IOException {
2751     assert maxNumSegments == -1 || maxNumSegments > 0;
2752
2753     if (stopMerges) {
2754       return;
2755     }
2756
2757     // Do not start new merges if we've hit OOME
2758     if (hitOOM) {
2759       return;
2760     }
2761
2762     final MergePolicy.MergeSpecification spec;
2763     if (maxNumSegments != -1) {
2764       spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));
2765       if (spec != null) {
2766         final int numMerges = spec.merges.size();
2767         for(int i=0;i<numMerges;i++) {
2768           final MergePolicy.OneMerge merge = spec.merges.get(i);
2769           merge.maxNumSegments = maxNumSegments;
2770         }
2771       }
2772
2773     } else {
2774       spec = mergePolicy.findMerges(segmentInfos);
2775     }
2776
2777     if (spec != null) {
2778       final int numMerges = spec.merges.size();
2779       for(int i=0;i<numMerges;i++) {
2780         registerMerge(spec.merges.get(i));
2781       }
2782     }
2783   }
2784
2785   /** Expert: to be used by a {@link MergePolicy} to avoid
2786    *  selecting merges for segments already being merged.
2787    *  The returned collection is not cloned, and thus is
2788    *  only safe to access if you hold IndexWriter's lock
2789    *  (which you do when IndexWriter invokes the
2790    *  MergePolicy).
2791    *
2792    *  <p>Do not alter the returned collection! */
2793   public synchronized Collection<SegmentInfo> getMergingSegments() {
2794     return mergingSegments;
2795   }
2796
2797   /** Expert: the {@link MergeScheduler} calls this method
2798    *  to retrieve the next merge requested by the
2799    *  MergePolicy
2800    *
2801    * @lucene.experimental
2802    */
2803   public synchronized MergePolicy.OneMerge getNextMerge() {
2804     if (pendingMerges.size() == 0)
2805       return null;
2806     else {
2807       // Advance the merge from pending to running
2808       MergePolicy.OneMerge merge = pendingMerges.removeFirst();
2809       runningMerges.add(merge);
2810       return merge;
2811     }
2812   }
2813
2814   /**
2815    * Close the <code>IndexWriter</code> without committing
2816    * any changes that have occurred since the last commit
2817    * (or since it was opened, if commit hasn't been called).
2818    * This removes any temporary files that had been created,
2819    * after which the state of the index will be the same as
2820    * it was when commit() was last called or when this
2821    * writer was first opened.  This also clears a previous
2822    * call to {@link #prepareCommit}.
2823    * @throws IOException if there is a low-level IO error
2824    */
2825   public void rollback() throws IOException {
2826     ensureOpen();
2827
2828     // Ensure that only one thread actually gets to do the closing:
2829     if (shouldClose())
2830       rollbackInternal();
2831   }
2832
2833   private void rollbackInternal() throws IOException {
2834
2835     boolean success = false;
2836
2837     if (infoStream != null ) {
2838       message("rollback");
2839     }
2840
2841     try {
2842       synchronized(this) {
2843         finishMerges(false);
2844         stopMerges = true;
2845       }
2846
2847       if (infoStream != null ) {
2848         message("rollback: done finish merges");
2849       }
2850
2851       // Must pre-close these two, in case they increment
2852       // changeCount so that we can then set it to false
2853       // before calling closeInternal
2854       mergePolicy.close();
2855       mergeScheduler.close();
2856
2857       bufferedDeletesStream.clear();
2858
2859       synchronized(this) {
2860
2861         if (pendingCommit != null) {
2862           pendingCommit.rollbackCommit(directory);
2863           deleter.decRef(pendingCommit);
2864           pendingCommit = null;
2865           notifyAll();
2866         }
2867
2868         // Keep the same segmentInfos instance but replace all
2869         // of its SegmentInfo instances.  This is so the next
2870         // attempt to commit using this instance of IndexWriter
2871         // will always write to a new generation ("write
2872         // once").
2873         segmentInfos.rollbackSegmentInfos(rollbackSegments);
2874         if (infoStream != null ) {
2875           message("rollback: infos=" + segString(segmentInfos));
2876         }
2877
2878         docWriter.abort();
2879
2880         assert testPoint("rollback before checkpoint");
2881
2882         // Ask deleter to locate unreferenced files & remove
2883         // them:
2884         deleter.checkpoint(segmentInfos, false);
2885         deleter.refresh();
2886       }
2887
2888       // Don't bother saving any changes in our segmentInfos
2889       readerPool.clear(null);
2890
2891       lastCommitChangeCount = changeCount;
2892
2893       success = true;
2894     } catch (OutOfMemoryError oom) {
2895       handleOOM(oom, "rollbackInternal");
2896     } finally {
2897       synchronized(this) {
2898         if (!success) {
2899           closing = false;
2900           notifyAll();
2901           if (infoStream != null)
2902             message("hit exception during rollback");
2903         }
2904       }
2905     }
2906
2907     closeInternal(false);
2908   }
2909
2910   /**
2911    * Delete all documents in the index.
2912    *
2913    * <p>This method will drop all buffered documents and will
2914    *    remove all segments from the index. This change will not be
2915    *    visible until a {@link #commit()} has been called. This method
2916    *    can be rolled back using {@link #rollback()}.</p>
2917    *
2918    * <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
2919    *
2920    * <p>NOTE: this method will forcefully abort all merges
2921    *    in progress.  If other threads are running {@link
2922    *    #forceMerge}, {@link #addIndexes(IndexReader[])} or
2923    *    {@link #forceMergeDeletes} methods, they may receive
2924    *    {@link MergePolicy.MergeAbortedException}s.
2925    */
2926   public synchronized void deleteAll() throws IOException {
2927     ensureOpen();
2928     try {
2929
2930       // Abort any running merges
2931       finishMerges(false);
2932
2933       // Remove any buffered docs
2934       docWriter.abort();
2935
2936       // Remove all segments
2937       segmentInfos.clear();
2938
2939       // Ask deleter to locate unreferenced files & remove them:
2940       deleter.checkpoint(segmentInfos, false);
2941       deleter.refresh();
2942
2943       // Don't bother saving any changes in our segmentInfos
2944       readerPool.dropAll();
2945
2946       // Mark that the index has changed
2947       ++changeCount;
2948       segmentInfos.changed();
2949     } catch (OutOfMemoryError oom) {
2950       handleOOM(oom, "deleteAll");
2951     } finally {
2952       if (infoStream != null) {
2953         message("hit exception during deleteAll");
2954       }
2955     }
2956   }
2957
2958   private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2959     if (!waitForMerges) {
2960
2961       stopMerges = true;
2962
2963       // Abort all pending & running merges:
2964       for (final MergePolicy.OneMerge merge : pendingMerges) {
2965         if (infoStream != null)
2966           message("now abort pending merge " + merge.segString(directory));
2967         merge.abort();
2968         mergeFinish(merge);
2969       }
2970       pendingMerges.clear();
2971
2972       for (final MergePolicy.OneMerge merge : runningMerges) {
2973         if (infoStream != null)
2974           message("now abort running merge " + merge.segString(directory));
2975         merge.abort();
2976       }
2977
2978       // These merges periodically check whether they have
2979       // been aborted, and stop if so.  We wait here to make
2980       // sure they all stop.  It should not take very long
2981       // because the merge threads periodically check if
2982       // they are aborted.
2983       while(runningMerges.size() > 0) {
2984         if (infoStream != null)
2985           message("now wait for " + runningMerges.size() + " running merge to abort");
2986         doWait();
2987       }
2988
2989       stopMerges = false;
2990       notifyAll();
2991
2992       assert 0 == mergingSegments.size();
2993
2994       if (infoStream != null)
2995         message("all running merges have aborted");
2996
2997     } else {
2998       // waitForMerges() will ensure any running addIndexes finishes.
2999       // It's fine if a new one attempts to start because from our
3000       // caller above the call will see that we are in the
3001       // process of closing, and will throw an
3002       // AlreadyClosedException.
3003       waitForMerges();
3004     }
3005   }
3006
3007   /**
3008    * Wait for any currently outstanding merges to finish.
3009    *
3010    * <p>It is guaranteed that any merges started prior to calling this method
3011    *    will have completed once this method completes.</p>
3012    */
3013   public synchronized void waitForMerges() {
3014     ensureOpen(false);
3015     if (infoStream != null) {
3016       message("waitForMerges");
3017     }
3018     while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
3019       doWait();
3020     }
3021
3022     // sanity check
3023     assert 0 == mergingSegments.size();
3024
3025     if (infoStream != null) {
3026       message("waitForMerges done");
3027     }
3028   }
3029
3030   /**
3031    * Called whenever the SegmentInfos has been updated and
3032    * the index files referenced exist (correctly) in the
3033    * index directory.
3034    */
3035   synchronized void checkpoint() throws IOException {
3036     changeCount++;
3037     segmentInfos.changed();
3038     deleter.checkpoint(segmentInfos, false);
3039   }
3040
3041   private synchronized void resetMergeExceptions() {
3042     mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
3043     mergeGen++;
3044   }
3045
3046   private void noDupDirs(Directory... dirs) {
3047     HashSet<Directory> dups = new HashSet<Directory>();
3048     for (Directory dir : dirs) {
3049       if (dups.contains(dir))
3050         throw new IllegalArgumentException("Directory " + dir + " appears more than once");
3051       if (dir == directory)
3052         throw new IllegalArgumentException("Cannot add directory to itself");
3053       dups.add(dir);
3054     }
3055   }
3056
3057   /**
3058    * @deprecated use {@link #addIndexes(Directory...)} instead
3059    */
3060   @Deprecated
3061   public void addIndexesNoOptimize(Directory... dirs)
3062       throws CorruptIndexException, IOException {
3063     addIndexes(dirs);
3064   }
3065
3066   /**
3067    * Adds all segments from an array of indexes into this index.
3068    *
3069    * <p>This may be used to parallelize batch indexing. A large document
3070    * collection can be broken into sub-collections. Each sub-collection can be
3071    * indexed in parallel, on a different thread, process or machine. The
3072    * complete index can then be created by merging sub-collection indexes
3073    * with this method.
3074    *
3075    * <p>
3076    * <b>NOTE:</b> the index in each {@link Directory} must not be
3077    * changed (opened by a writer) while this method is
3078    * running.  This method does not acquire a write lock in
3079    * each input Directory, so it is up to the caller to
3080    * enforce this.
3081    *
3082    * <p>This method is transactional in how Exceptions are
3083    * handled: it does not commit a new segments_N file until
3084    * all indexes are added.  This means if an Exception
3085    * occurs (for example disk full), then either no indexes
3086    * will have been added or they all will have been.
3087    *
3088    * <p>Note that this requires temporary free space in the
3089    * {@link Directory} up to 2X the sum of all input indexes
3090    * (including the starting index). If readers/searchers
3091    * are open against the starting index, then temporary
3092    * free space required will be higher by the size of the
3093    * starting index (see {@link #forceMerge(int)} for details).
3094    *
3095    * <p>
3096    * <b>NOTE:</b> this method only copies the segments of the incomning indexes
3097    * and does not merge them. Therefore deleted documents are not removed and
3098    * the new segments are not merged with the existing ones. Also, if the merge
3099    * policy allows compound files, then any segment that is not compound is
3100    * converted to such. However, if the segment is compound, it is copied as-is
3101    * even if the merge policy does not allow compound files.
3102    *
3103    * <p>
3104    * <p>This requires this index not be among those to be added.
3105    *
3106    * <p>
3107    * <b>NOTE</b>: if this method hits an OutOfMemoryError
3108    * you should immediately close the writer. See <a
3109    * href="#OOME">above</a> for details.
3110    *
3111    * @throws CorruptIndexException if the index is corrupt
3112    * @throws IOException if there is a low-level IO error
3113    */
3114   public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
3115     ensureOpen();
3116
3117     noDupDirs(dirs);
3118
3119     try {
3120       if (infoStream != null)
3121         message("flush at addIndexes(Directory...)");
3122       flush(false, true);
3123
3124       int docCount = 0;
3125       List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
3126       Comparator<String> versionComparator = StringHelper.getVersionComparator();
3127       for (Directory dir : dirs) {
3128         if (infoStream != null) {
3129           message("addIndexes: process directory " + dir);
3130         }
3131         SegmentInfos sis = new SegmentInfos(); // read infos from dir
3132         sis.read(dir);
3133         final Set<String> dsFilesCopied = new HashSet<String>();
3134         final Map<String, String> dsNames = new HashMap<String, String>();
3135         for (SegmentInfo info : sis) {
3136           assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3137
3138           docCount += info.docCount;
3139           String newSegName = newSegmentName();
3140           String dsName = info.getDocStoreSegment();
3141
3142           if (infoStream != null) {
3143             message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
3144           }
3145
3146           // create CFS only if the source segment is not CFS, and MP agrees it
3147           // should be CFS.
3148           boolean createCFS;
3149           synchronized (this) { // Guard segmentInfos
3150             createCFS = !info.getUseCompoundFile()
3151                 && mergePolicy.useCompoundFile(segmentInfos, info)
3152                 // optimize case only for segments that don't share doc stores
3153                 && versionComparator.compare(info.getVersion(), "3.1") >= 0;
3154           }
3155
3156           if (createCFS) {
3157             copySegmentIntoCFS(info, newSegName);
3158           } else {
3159             copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
3160           }
3161           infos.add(info);
3162         }
3163       }
3164
3165       synchronized (this) {
3166         ensureOpen();
3167         segmentInfos.addAll(infos);
3168         checkpoint();
3169       }
3170
3171     } catch (OutOfMemoryError oom) {
3172       handleOOM(oom, "addIndexes(Directory...)");
3173     }
3174   }
3175
3176   /**
3177    * Merges the provided indexes into this index. This method is useful
3178    * if you use extensions of {@link IndexReader}. Otherwise, using
3179    * {@link #addIndexes(Directory...)} is highly recommended for performance
3180    * reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set
3181    * on this writer, which may perform merges in parallel.
3182    *
3183    * <p>The provided IndexReaders are not closed.
3184    *
3185    * <p><b>NOTE:</b> this method does not merge the current segments,
3186    * only the incoming ones.
3187    *
3188    * <p>See {@link #addIndexes(Directory...)} for details on transactional
3189    * semantics, temporary free space required in the Directory,
3190    * and non-CFS segments on an Exception.
3191    *
3192    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3193    * you should immediately close the writer.  See <a
3194    * href="#OOME">above</a> for details.
3195    *
3196    * <p><b>NOTE</b>: if you call {@link #close(boolean)}
3197    * with <tt>false</tt>, which aborts all running merges,
3198    * then any thread still running this method might hit a
3199    * {@link MergePolicy.MergeAbortedException}.
3200    *
3201    * @throws CorruptIndexException if the index is corrupt
3202    * @throws IOException if there is a low-level IO error
3203    */
3204   public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
3205
3206     ensureOpen();
3207
3208     try {
3209       if (infoStream != null)
3210         message("flush at addIndexes(IndexReader...)");
3211       flush(false, true);
3212
3213       String mergedName = newSegmentName();
3214       // TODO: somehow we should fix this merge so it's
3215       // abortable so that IW.close(false) is able to stop it
3216       SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
3217                                                mergedName, null, payloadProcessorProvider,
3218                                                ((FieldInfos) docWriter.getFieldInfos().clone()));
3219
3220       for (IndexReader reader : readers)      // add new indexes
3221         merger.add(reader);
3222
3223       int docCount = merger.merge();                // merge 'em
3224
3225       SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
3226                                          false, true,
3227                                          merger.fieldInfos().hasProx(),
3228                                          merger.fieldInfos().hasVectors());
3229       setDiagnostics(info, "addIndexes(IndexReader...)");
3230
3231       boolean useCompoundFile;
3232       synchronized(this) { // Guard segmentInfos
3233         if (stopMerges) {
3234           deleter.deleteNewFiles(info.files());
3235           return;
3236         }
3237         ensureOpen();
3238         useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
3239       }
3240
3241       // Now create the compound file if needed
3242       if (useCompoundFile) {
3243         merger.createCompoundFile(mergedName + ".cfs", info);
3244
3245         // delete new non cfs files directly: they were never
3246         // registered with IFD
3247         synchronized(this) {
3248           deleter.deleteNewFiles(info.files());
3249         }
3250         info.setUseCompoundFile(true);
3251       }
3252
3253       // Register the new segment
3254       synchronized(this) {
3255         if (stopMerges) {
3256           deleter.deleteNewFiles(info.files());
3257           return;
3258         }
3259         ensureOpen();
3260         segmentInfos.add(info);
3261         checkpoint();
3262       }
3263
3264     } catch (OutOfMemoryError oom) {
3265       handleOOM(oom, "addIndexes(IndexReader...)");
3266     }
3267   }
3268
3269   /** Copies the segment into the IndexWriter's directory, as a compound segment. */
3270   private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
3271     String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);
3272     Collection<String> files = info.files();
3273     CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
3274     for (String file : files) {
3275       String newFileName = segName + IndexFileNames.stripSegmentName(file);
3276       if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
3277           && !IndexFileNames.isSeparateNormsFile(file)) {
3278         cfsWriter.addFile(file, info.dir);
3279       } else {
3280         assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3281         info.dir.copy(directory, file, newFileName);
3282       }
3283     }
3284
3285     // Create the .cfs
3286     cfsWriter.close();
3287
3288     info.dir = directory;
3289     info.name = segName;
3290     info.setUseCompoundFile(true);
3291   }
3292
3293   /** Copies the segment files as-is into the IndexWriter's directory. */
3294   private void copySegmentAsIs(SegmentInfo info, String segName,
3295       Map<String, String> dsNames, Set<String> dsFilesCopied)
3296       throws IOException {
3297     // Determine if the doc store of this segment needs to be copied. It's
3298     // only relevant for segments that share doc store with others,
3299     // because the DS might have been copied already, in which case we
3300     // just want to update the DS name of this SegmentInfo.
3301     // NOTE: pre-3x segments include a null DSName if they don't share doc
3302     // store. The following code ensures we don't accidentally insert
3303     // 'null' to the map.
3304     String dsName = info.getDocStoreSegment();
3305     final String newDsName;
3306     if (dsName != null) {
3307       if (dsNames.containsKey(dsName)) {
3308         newDsName = dsNames.get(dsName);
3309       } else {
3310         dsNames.put(dsName, segName);
3311         newDsName = segName;
3312       }
3313     } else {
3314       newDsName = segName;
3315     }
3316
3317     // Copy the segment files
3318     for (String file: info.files()) {
3319       final String newFileName;
3320       if (IndexFileNames.isDocStoreFile(file)) {
3321         newFileName = newDsName + IndexFileNames.stripSegmentName(file);
3322         if (dsFilesCopied.contains(newFileName)) {
3323           continue;
3324         }
3325         dsFilesCopied.add(newFileName);
3326       } else {
3327         newFileName = segName + IndexFileNames.stripSegmentName(file);
3328       }
3329
3330       assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3331       info.dir.copy(directory, file, newFileName);
3332     }
3333
3334     info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
3335     info.dir = directory;
3336     info.name = segName;
3337   }
3338
3339   /**
3340    * A hook for extending classes to execute operations after pending added and
3341    * deleted documents have been flushed to the Directory but before the change
3342    * is committed (new segments_N file written).
3343    */
3344   protected void doAfterFlush() throws IOException {}
3345
3346   /**
3347    * A hook for extending classes to execute operations before pending added and
3348    * deleted documents are flushed to the Directory.
3349    */
3350   protected void doBeforeFlush() throws IOException {}
3351
3352   /** Expert: prepare for commit.
3353    *
3354    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3355    * you should immediately close the writer.  See <a
3356    * href="#OOME">above</a> for details.</p>
3357    *
3358    * @see #prepareCommit(Map) */
3359   public final void prepareCommit() throws CorruptIndexException, IOException {
3360     ensureOpen();
3361     prepareCommit(null);
3362   }
3363
3364   /** <p>Expert: prepare for commit, specifying
3365    *  commitUserData Map (String -> String).  This does the
3366    *  first phase of 2-phase commit. This method does all
3367    *  steps necessary to commit changes since this writer
3368    *  was opened: flushes pending added and deleted docs,
3369    *  syncs the index files, writes most of next segments_N
3370    *  file.  After calling this you must call either {@link
3371    *  #commit()} to finish the commit, or {@link
3372    *  #rollback()} to revert the commit and undo all changes
3373    *  done since the writer was opened.</p>
3374    *
3375    *  You can also just call {@link #commit(Map)} directly
3376    *  without prepareCommit first in which case that method
3377    *  will internally call prepareCommit.
3378    *
3379    *  <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3380    *  you should immediately close the writer.  See <a
3381    *  href="#OOME">above</a> for details.</p>
3382    *
3383    *  @param commitUserData Opaque Map (String->String)
3384    *  that's recorded into the segments file in the index,
3385    *  and retrievable by {@link
3386    *  IndexReader#getCommitUserData}.  Note that when
3387    *  IndexWriter commits itself during {@link #close}, the
3388    *  commitUserData is unchanged (just carried over from
3389    *  the prior commit).  If this is null then the previous
3390    *  commitUserData is kept.  Also, the commitUserData will
3391    *  only "stick" if there are actually changes in the
3392    *  index to commit.
3393    */
3394   public final void prepareCommit(Map<String, String> commitUserData)
3395       throws CorruptIndexException, IOException {
3396     ensureOpen(false);
3397
3398     if (hitOOM) {
3399       throw new IllegalStateException(
3400           "this writer hit an OutOfMemoryError; cannot commit");
3401     }
3402
3403     if (pendingCommit != null)
3404       throw new IllegalStateException(
3405           "prepareCommit was already called with no corresponding call to commit");
3406
3407     if (infoStream != null)
3408       message("prepareCommit: flush");
3409
3410     ensureOpen(false);
3411     boolean anySegmentsFlushed = false;
3412     SegmentInfos toCommit = null;
3413     boolean success = false;
3414     try {
3415       try {
3416         synchronized (this) {
3417           anySegmentsFlushed = doFlush(true);
3418           readerPool.commit(segmentInfos);
3419           toCommit = (SegmentInfos) segmentInfos.clone();
3420           pendingCommitChangeCount = changeCount;
3421           // This protects the segmentInfos we are now going
3422           // to commit. This is important in case, eg, while
3423           // we are trying to sync all referenced files, a
3424           // merge completes which would otherwise have
3425           // removed the files we are now syncing.
3426           deleter.incRef(toCommit, false);
3427         }
3428         success = true;
3429       } finally {
3430         if (!success && infoStream != null) {
3431           message("hit exception during prepareCommit");
3432         }
3433         doAfterFlush();
3434       }
3435     } catch (OutOfMemoryError oom) {
3436       handleOOM(oom, "prepareCommit");
3437     }
3438
3439     success = false;
3440     try {
3441       if (anySegmentsFlushed) {
3442         maybeMerge();
3443       }
3444       success = true;
3445     } finally {
3446       if (!success) {
3447         synchronized (this) {
3448           deleter.decRef(toCommit);
3449         }
3450       }
3451     }
3452
3453     startCommit(toCommit, commitUserData);
3454   }
3455
3456   // Used only by commit, below; lock order is commitLock -> IW
3457   private final Object commitLock = new Object();
3458
3459   /**
3460    * <p>Commits all pending changes (added & deleted
3461    * documents, segment merges, added
3462    * indexes, etc.) to the index, and syncs all referenced
3463    * index files, such that a reader will see the changes
3464    * and the index updates will survive an OS or machine
3465    * crash or power loss.  Note that this does not wait for
3466    * any running background merges to finish.  This may be a
3467    * costly operation, so you should test the cost in your
3468    * application and do it only when really necessary.</p>
3469    *
3470    * <p> Note that this operation calls Directory.sync on
3471    * the index files.  That call should not return until the
3472    * file contents & metadata are on stable storage.  For
3473    * FSDirectory, this calls the OS's fsync.  But, beware:
3474    * some hardware devices may in fact cache writes even
3475    * during fsync, and return before the bits are actually
3476    * on stable storage, to give the appearance of faster
3477    * performance.  If you have such a device, and it does
3478    * not have a battery backup (for example) then on power
3479    * loss it may still lose data.  Lucene cannot guarantee
3480    * consistency on such devices.  </p>
3481    *
3482    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3483    * you should immediately close the writer.  See <a
3484    * href="#OOME">above</a> for details.</p>
3485    *
3486    * @see #prepareCommit
3487    * @see #commit(Map)
3488    */
3489   public final void commit() throws CorruptIndexException, IOException {
3490     commit(null);
3491   }
3492
3493   /** Commits all changes to the index, specifying a
3494    *  commitUserData Map (String -> String).  This just
3495    *  calls {@link #prepareCommit(Map)} (if you didn't
3496    *  already call it) and then {@link #finishCommit}.
3497    *
3498    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3499    * you should immediately close the writer.  See <a
3500    * href="#OOME">above</a> for details.</p>
3501    */
3502   public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3503
3504     ensureOpen();
3505
3506     commitInternal(commitUserData);
3507   }
3508
3509   private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3510
3511     if (infoStream != null) {
3512       message("commit: start");
3513     }
3514
3515     synchronized(commitLock) {
3516       if (infoStream != null) {
3517         message("commit: enter lock");
3518       }
3519
3520       if (pendingCommit == null) {
3521         if (infoStream != null) {
3522           message("commit: now prepare");
3523         }
3524         prepareCommit(commitUserData);
3525       } else if (infoStream != null) {
3526         message("commit: already prepared");
3527       }
3528
3529       finishCommit();
3530     }
3531   }
3532
3533   private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3534
3535     if (pendingCommit != null) {
3536       try {
3537         if (infoStream != null)
3538           message("commit: pendingCommit != null");
3539         pendingCommit.finishCommit(directory);
3540         if (infoStream != null)
3541           message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
3542         lastCommitChangeCount = pendingCommitChangeCount;
3543         segmentInfos.updateGeneration(pendingCommit);
3544         segmentInfos.setUserData(pendingCommit.getUserData());
3545         rollbackSegments = pendingCommit.createBackupSegmentInfos(true);
3546         deleter.checkpoint(pendingCommit, true);
3547       } finally {
3548         // Matches the incRef done in startCommit:
3549         deleter.decRef(pendingCommit);
3550         pendingCommit = null;
3551         notifyAll();
3552       }
3553
3554     } else if (infoStream != null) {
3555       message("commit: pendingCommit == null; skip");
3556     }
3557
3558     if (infoStream != null) {
3559       message("commit: done");
3560     }
3561   }
3562
3563   /** NOTE: flushDocStores is ignored now (hardwired to
3564    *  true); this method is only here for backwards
3565    *  compatibility */
3566   protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3567     flush(triggerMerge, flushDeletes);
3568   }
3569
3570   /**
3571    * Flush all in-memory buffered updates (adds and deletes)
3572    * to the Directory.
3573    * @param triggerMerge if true, we may merge segments (if
3574    *  deletes or docs were flushed) if necessary
3575    * @param applyAllDeletes whether pending deletes should also
3576    */
3577   protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
3578
3579     // NOTE: this method cannot be sync'd because
3580     // maybeMerge() in turn calls mergeScheduler.merge which
3581     // in turn can take a long time to run and we don't want
3582     // to hold the lock for that.  In the case of
3583     // ConcurrentMergeScheduler this can lead to deadlock
3584     // when it stalls due to too many running merges.
3585
3586     // We can be called during close, when closing==true, so we must pass false to ensureOpen:
3587     ensureOpen(false);
3588     if (doFlush(applyAllDeletes) && triggerMerge) {
3589       maybeMerge();
3590     }
3591   }
3592
3593   // TODO: this method should not have to be entirely
3594   // synchronized, ie, merges should be allowed to commit
3595   // even while a flush is happening
3596   private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
3597
3598     if (hitOOM) {
3599       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3600     }
3601
3602     doBeforeFlush();
3603
3604     assert testPoint("startDoFlush");
3605
3606     // We may be flushing because it was triggered by doc
3607     // count, del count, ram usage (in which case flush
3608     // pending is already set), or we may be flushing
3609     // due to external event eg getReader or commit is
3610     // called (in which case we now set it, and this will
3611     // pause all threads):
3612     flushControl.setFlushPendingNoWait("explicit flush");
3613
3614     boolean success = false;
3615
3616     try {
3617
3618       if (infoStream != null) {
3619         message("  start flush: applyAllDeletes=" + applyAllDeletes);
3620         message("  index before flush " + segString());
3621       }
3622
3623       final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);
3624       if (newSegment != null) {
3625         setDiagnostics(newSegment, "flush");
3626         segmentInfos.add(newSegment);
3627         checkpoint();
3628       }
3629
3630       if (!applyAllDeletes) {
3631         // If deletes alone are consuming > 1/2 our RAM
3632         // buffer, force them all to apply now. This is to
3633         // prevent too-frequent flushing of a long tail of
3634         // tiny segments:
3635         if (flushControl.getFlushDeletes() ||
3636             (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
3637              bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
3638           applyAllDeletes = true;
3639           if (infoStream != null) {
3640             message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
3641           }
3642         }
3643       }
3644
3645       if (applyAllDeletes) {
3646         if (infoStream != null) {
3647           message("apply all deletes during flush");
3648         }
3649
3650         flushDeletesCount.incrementAndGet();
3651         final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
3652           .applyDeletes(readerPool, segmentInfos.asList());
3653         if (result.anyDeletes) {
3654           checkpoint();
3655         }
3656         if (!keepFullyDeletedSegments && result.allDeleted != null) {
3657           if (infoStream != null) {
3658             message("drop 100% deleted segments: " + result.allDeleted);
3659           }
3660           for (SegmentInfo info : result.allDeleted) {
3661             // If a merge has already registered for this
3662             // segment, we leave it in the readerPool; the
3663             // merge will skip merging it and will then drop
3664             // it once it's done:
3665             if (!mergingSegments.contains(info)) {
3666               segmentInfos.remove(info);
3667               if (readerPool != null) {
3668                 readerPool.drop(info);
3669               }
3670             }
3671           }
3672           checkpoint();
3673         }
3674         bufferedDeletesStream.prune(segmentInfos);
3675
3676         assert !bufferedDeletesStream.any();
3677         flushControl.clearDeletes();
3678       } else if (infoStream != null) {
3679         message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
3680       }
3681
3682
3683       doAfterFlush();
3684       flushCount.incrementAndGet();
3685
3686       success = true;
3687
3688       return newSegment != null;
3689
3690     } catch (OutOfMemoryError oom) {
3691       handleOOM(oom, "doFlush");
3692       // never hit
3693       return false;
3694     } finally {
3695       flushControl.clearFlushPending();
3696       if (!success && infoStream != null)
3697         message("hit exception during flush");
3698     }
3699   }
3700
3701   /** Expert:  Return the total size of all index files currently cached in memory.
3702    * Useful for size management with flushRamDocs()
3703    */
3704   public final long ramSizeInBytes() {
3705     ensureOpen();
3706     return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
3707   }
3708
3709   /** Expert:  Return the number of documents currently
3710    *  buffered in RAM. */
3711   public final synchronized int numRamDocs() {
3712     ensureOpen();
3713     return docWriter.getNumDocs();
3714   }
3715
3716   private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
3717     for(SegmentInfo info : merge.segments) {
3718       if (!segmentInfos.contains(info)) {
3719         throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
3720       }
3721     }
3722   }
3723
3724   /** Carefully merges deletes for the segments we just
3725    *  merged.  This is tricky because, although merging will
3726    *  clear all deletes (compacts the documents), new
3727    *  deletes may have been flushed to the segments since
3728    *  the merge was started.  This method "carries over"
3729    *  such new deletes onto the newly merged segment, and
3730    *  saves the resulting deletes file (incrementing the
3731    *  delete generation for merge.info).  If no deletes were
3732    *  flushed, no new deletes file is saved. */
3733   synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3734
3735     assert testPoint("startCommitMergeDeletes");
3736
3737     final List<SegmentInfo> sourceSegments = merge.segments;
3738
3739     if (infoStream != null)
3740       message("commitMergeDeletes " + merge.segString(directory));
3741
3742     // Carefully merge deletes that occurred after we
3743     // started merging:
3744     int docUpto = 0;
3745     int delCount = 0;
3746     long minGen = Long.MAX_VALUE;
3747
3748     for(int i=0; i < sourceSegments.size(); i++) {
3749       SegmentInfo info = sourceSegments.get(i);
3750       minGen = Math.min(info.getBufferedDeletesGen(), minGen);
3751       int docCount = info.docCount;
3752       final SegmentReader previousReader = merge.readerClones.get(i);
3753       if (previousReader == null) {
3754         // Reader was skipped because it was 100% deletions
3755         continue;
3756       }
3757       final SegmentReader currentReader = merge.readers.get(i);
3758       if (previousReader.hasDeletions()) {
3759
3760         // There were deletes on this segment when the merge
3761         // started.  The merge has collapsed away those
3762         // deletes, but, if new deletes were flushed since
3763         // the merge started, we must now carefully keep any
3764         // newly flushed deletes but mapping them to the new
3765         // docIDs.
3766
3767         if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
3768           // This means this segment has had new deletes
3769           // committed since we started the merge, so we
3770           // must merge them:
3771           for(int j=0;j<docCount;j++) {
3772             if (previousReader.isDeleted(j))
3773               assert currentReader.isDeleted(j);
3774             else {
3775               if (currentReader.isDeleted(j)) {
3776                 mergedReader.doDelete(docUpto);
3777                 delCount++;
3778               }
3779               docUpto++;
3780             }
3781           }
3782         } else {
3783           docUpto += docCount - previousReader.numDeletedDocs();
3784         }
3785       } else if (currentReader.hasDeletions()) {
3786         // This segment had no deletes before but now it
3787         // does:
3788         for(int j=0; j<docCount; j++) {
3789           if (currentReader.isDeleted(j)) {
3790             mergedReader.doDelete(docUpto);
3791             delCount++;
3792           }
3793           docUpto++;
3794         }
3795       } else
3796         // No deletes before or after
3797         docUpto += info.docCount;
3798     }
3799
3800     assert mergedReader.numDeletedDocs() == delCount;
3801
3802     mergedReader.hasChanges = delCount > 0;
3803
3804     // If new deletes were applied while we were merging
3805     // (which happens if eg commit() or getReader() is
3806     // called during our merge), then it better be the case
3807     // that the delGen has increased for all our merged
3808     // segments:
3809     assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
3810
3811     mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
3812   }
3813
3814   synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3815
3816     assert testPoint("startCommitMerge");
3817
3818     if (hitOOM) {
3819       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
3820     }
3821
3822     if (infoStream != null)
3823       message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3824
3825     assert merge.registerDone;
3826
3827     // If merge was explicitly aborted, or, if rollback() or
3828     // rollbackTransaction() had been called since our merge
3829     // started (which results in an unqualified
3830     // deleter.refresh() call that will remove any index
3831     // file that current segments does not reference), we
3832     // abort this merge
3833     if (merge.isAborted()) {
3834       if (infoStream != null)
3835         message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3836       return false;
3837     }
3838
3839     commitMergedDeletes(merge, mergedReader);
3840
3841     // If the doc store we are using has been closed and
3842     // is in now compound format (but wasn't when we
3843     // started), then we will switch to the compound
3844     // format as well:
3845
3846     assert !segmentInfos.contains(merge.info);
3847
3848     final boolean allDeleted = mergedReader.numDocs() == 0;
3849
3850     if (infoStream != null && allDeleted) {
3851       message("merged segment " + merge.info + " is 100% deleted" +  (keepFullyDeletedSegments ? "" : "; skipping insert"));
3852     }
3853
3854     final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
3855     segmentInfos.applyMergeChanges(merge, dropSegment);
3856
3857     if (dropSegment) {
3858       readerPool.drop(merge.info);
3859     }
3860
3861     if (infoStream != null) {
3862       message("after commit: " + segString());
3863     }
3864
3865     closeMergeReaders(merge, false);
3866
3867     // Must note the change to segmentInfos so any commits
3868     // in-flight don't lose it:
3869     checkpoint();
3870
3871     // If the merged segments had pending changes, clear
3872     // them so that they don't bother writing them to
3873     // disk, updating SegmentInfo, etc.:
3874     readerPool.clear(merge.segments);
3875
3876     if (merge.maxNumSegments != -1) {
3877       // cascade the forceMerge:
3878       if (!segmentsToMerge.containsKey(merge.info)) {
3879         segmentsToMerge.put(merge.info, Boolean.FALSE);
3880       }
3881     }
3882
3883     return true;
3884   }
3885
3886   final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3887
3888     if (infoStream != null) {
3889       message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
3890     }
3891
3892     // Set the exception on the merge, so if
3893     // forceMerge is waiting on us it sees the root
3894     // cause exception:
3895     merge.setException(t);
3896     addMergeException(merge);
3897
3898     if (t instanceof MergePolicy.MergeAbortedException) {
3899       // We can ignore this exception (it happens when
3900       // close(false) or rollback is called), unless the
3901       // merge involves segments from external directories,
3902       // in which case we must throw it so, for example, the
3903       // rollbackTransaction code in addIndexes* is
3904       // executed.
3905       if (merge.isExternal)
3906         throw (MergePolicy.MergeAbortedException) t;
3907     } else if (t instanceof IOException)
3908       throw (IOException) t;
3909     else if (t instanceof RuntimeException)
3910       throw (RuntimeException) t;
3911     else if (t instanceof Error)
3912       throw (Error) t;
3913     else
3914       // Should not get here
3915       throw new RuntimeException(t);
3916   }
3917
3918   /**
3919    * Merges the indicated segments, replacing them in the stack with a
3920    * single segment.
3921    *
3922    * @lucene.experimental
3923    */
3924   public void merge(MergePolicy.OneMerge merge)
3925     throws CorruptIndexException, IOException {
3926
3927     boolean success = false;
3928
3929     final long t0 = System.currentTimeMillis();
3930     //System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n  merge=" + merge.segString(directory) + "\n  idx=" + segString());
3931
3932     try {
3933       try {
3934         try {
3935           mergeInit(merge);
3936
3937           if (infoStream != null)
3938             message("now merge\n  merge=" + merge.segString(directory) + "\n  merge=" + merge + "\n  index=" + segString());
3939
3940           mergeMiddle(merge);
3941           mergeSuccess(merge);
3942           success = true;
3943         } catch (Throwable t) {
3944           handleMergeException(t, merge);
3945         }
3946       } finally {
3947         synchronized(this) {
3948           mergeFinish(merge);
3949
3950           if (!success) {
3951             if (infoStream != null)
3952               message("hit exception during merge");
3953             if (merge.info != null && !segmentInfos.contains(merge.info))
3954               deleter.refresh(merge.info.name);
3955           }
3956
3957           // This merge (and, generally, any change to the
3958           // segments) may now enable new merges, so we call
3959           // merge policy & update pending merges.
3960           if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {
3961             updatePendingMerges(merge.maxNumSegments);
3962           }
3963         }
3964       }
3965     } catch (OutOfMemoryError oom) {
3966       handleOOM(oom, "merge");
3967     }
3968     if (infoStream != null && merge.info != null) {
3969       message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
3970     }
3971     //System.out.println(Thread.currentThread().getName() + ": merge end");
3972   }
3973
3974   /** Hook that's called when the specified merge is complete. */
3975   void mergeSuccess(MergePolicy.OneMerge merge) {
3976   }
3977
3978   /** Checks whether this merge involves any segments
3979    *  already participating in a merge.  If not, this merge
3980    *  is "registered", meaning we record that its segments
3981    *  are now participating in a merge, and true is
3982    *  returned.  Else (the merge conflicts) false is
3983    *  returned. */
3984   final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
3985
3986     if (merge.registerDone)
3987       return true;
3988
3989     if (stopMerges) {
3990       merge.abort();
3991       throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3992     }
3993
3994     boolean isExternal = false;
3995     for(SegmentInfo info : merge.segments) {
3996       if (mergingSegments.contains(info)) {
3997         return false;
3998       }
3999       if (!segmentInfos.contains(info)) {
4000         return false;
4001       }
4002       if (info.dir != directory) {
4003         isExternal = true;
4004       }
4005       if (segmentsToMerge.containsKey(info)) {
4006         merge.maxNumSegments = mergeMaxNumSegments;
4007       }
4008     }
4009
4010     ensureValidMerge(merge);
4011
4012     pendingMerges.add(merge);
4013
4014     if (infoStream != null)
4015       message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
4016
4017     merge.mergeGen = mergeGen;
4018     merge.isExternal = isExternal;
4019
4020     // OK it does not conflict; now record that this merge
4021     // is running (while synchronized) to avoid race
4022     // condition where two conflicting merges from different
4023     // threads, start
4024     message("registerMerge merging=" + mergingSegments);
4025     for(SegmentInfo info : merge.segments) {
4026       message("registerMerge info=" + info);
4027       mergingSegments.add(info);
4028     }
4029
4030     // Merge is now registered
4031     merge.registerDone = true;
4032     return true;
4033   }
4034
4035   /** Does initial setup for a merge, which is fast but holds
4036    *  the synchronized lock on IndexWriter instance.  */
4037   final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
4038     boolean success = false;
4039     try {
4040       _mergeInit(merge);
4041       success = true;
4042     } finally {
4043       if (!success) {
4044         if (infoStream != null) {
4045           message("hit exception in mergeInit");
4046         }
4047         mergeFinish(merge);
4048       }
4049     }
4050   }
4051
4052   synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
4053
4054     assert testPoint("startMergeInit");
4055
4056     assert merge.registerDone;
4057     assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;
4058
4059     if (hitOOM) {
4060       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
4061     }
4062
4063     // TODO: is there any perf benefit to sorting
4064     // merged segments?  eg biggest to smallest?
4065
4066     if (merge.info != null)
4067       // mergeInit already done
4068       return;
4069
4070     if (merge.isAborted())
4071       return;
4072
4073     boolean hasVectors = false;
4074     for (SegmentInfo sourceSegment : merge.segments) {
4075       if (sourceSegment.getHasVectors()) {
4076         hasVectors = true;
4077       }
4078     }
4079
4080     // Bind a new segment name here so even with
4081     // ConcurrentMergePolicy we keep deterministic segment
4082     // names.
4083     merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, true, false, hasVectors);
4084
4085     // Lock order: IW -> BD
4086     final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
4087
4088     if (result.anyDeletes) {
4089       checkpoint();
4090     }
4091
4092     if (!keepFullyDeletedSegments && result.allDeleted != null) {
4093       if (infoStream != null) {
4094         message("drop 100% deleted segments: " + result.allDeleted);
4095       }
4096       for(SegmentInfo info : result.allDeleted) {
4097         segmentInfos.remove(info);
4098         if (merge.segments.contains(info)) {
4099           mergingSegments.remove(info);
4100           merge.segments.remove(info);
4101         }
4102       }
4103       if (readerPool != null) {
4104         readerPool.drop(result.allDeleted);
4105       }
4106       checkpoint();
4107     }
4108
4109     merge.info.setBufferedDeletesGen(result.gen);
4110
4111     // Lock order: IW -> BD
4112     bufferedDeletesStream.prune(segmentInfos);
4113
4114     Map<String,String> details = new HashMap<String,String>();
4115     details.put("mergeMaxNumSegments", ""+merge.maxNumSegments);
4116     details.put("mergeFactor", Integer.toString(merge.segments.size()));
4117     setDiagnostics(merge.info, "merge", details);
4118
4119     if (infoStream != null) {
4120       message("merge seg=" + merge.info.name);
4121     }
4122
4123     assert merge.estimatedMergeBytes == 0;
4124     for(SegmentInfo info : merge.segments) {
4125       if (info.docCount > 0) {
4126         final int delCount = numDeletedDocs(info);
4127         assert delCount <= info.docCount;
4128         final double delRatio = ((double) delCount)/info.docCount;
4129         merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
4130       }
4131     }
4132
4133     // TODO: I think this should no longer be needed (we
4134     // now build CFS before adding segment to the infos);
4135     // however, on removing it, tests fail for some reason!
4136
4137     // Also enroll the merged segment into mergingSegments;
4138     // this prevents it from getting selected for a merge
4139     // after our merge is done but while we are building the
4140     // CFS:
4141     mergingSegments.add(merge.info);
4142   }
4143
4144   private void setDiagnostics(SegmentInfo info, String source) {
4145     setDiagnostics(info, source, null);
4146   }
4147
4148   private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
4149     Map<String,String> diagnostics = new HashMap<String,String>();
4150     diagnostics.put("source", source);
4151     diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
4152     diagnostics.put("os", Constants.OS_NAME);
4153     diagnostics.put("os.arch", Constants.OS_ARCH);
4154     diagnostics.put("os.version", Constants.OS_VERSION);
4155     diagnostics.put("java.version", Constants.JAVA_VERSION);
4156     diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
4157     if (details != null) {
4158       diagnostics.putAll(details);
4159     }
4160     info.setDiagnostics(diagnostics);
4161   }
4162
4163   /** Does fininishing for a merge, which is fast but holds
4164    *  the synchronized lock on IndexWriter instance. */
4165   final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4166
4167     // forceMerge, addIndexes or finishMerges may be waiting
4168     // on merges to finish.
4169     notifyAll();
4170
4171     // It's possible we are called twice, eg if there was an
4172     // exception inside mergeInit
4173     if (merge.registerDone) {
4174       final List<SegmentInfo> sourceSegments = merge.segments;
4175       for(SegmentInfo info : sourceSegments) {
4176         mergingSegments.remove(info);
4177       }
4178       // TODO: if we remove the add in _mergeInit, we should
4179       // also remove this:
4180       mergingSegments.remove(merge.info);
4181       merge.registerDone = false;
4182     }
4183
4184     runningMerges.remove(merge);
4185   }
4186
4187   private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
4188     final int numSegments = merge.readers.size();
4189     Throwable th = null;
4190
4191     boolean anyChanges = false;
4192     boolean drop = !suppressExceptions;
4193     for (int i = 0; i < numSegments; i++) {
4194       if (merge.readers.get(i) != null) {
4195         try {
4196           anyChanges |= readerPool.release(merge.readers.get(i), drop);
4197         } catch (Throwable t) {
4198           if (th == null) {
4199             th = t;
4200           }
4201         }
4202         merge.readers.set(i, null);
4203       }
4204
4205       if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
4206         try {
4207           merge.readerClones.get(i).close();
4208         } catch (Throwable t) {
4209           if (th == null) {
4210             th = t;
4211           }
4212         }
4213         // This was a private clone and we had the
4214         // only reference
4215         assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
4216         merge.readerClones.set(i, null);
4217       }
4218     }
4219
4220     if (suppressExceptions && anyChanges) {
4221       checkpoint();
4222     }
4223
4224     // If any error occured, throw it.
4225     if (!suppressExceptions && th != null) {
4226       if (th instanceof IOException) throw (IOException) th;
4227       if (th instanceof RuntimeException) throw (RuntimeException) th;
4228       if (th instanceof Error) throw (Error) th;
4229       throw new RuntimeException(th);
4230     }
4231   }
4232
4233   /** Does the actual (time-consuming) work of the merge,
4234    *  but without holding synchronized lock on IndexWriter
4235    *  instance */
4236   final private int mergeMiddle(MergePolicy.OneMerge merge)
4237     throws CorruptIndexException, IOException {
4238
4239     merge.checkAborted(directory);
4240
4241     final String mergedName = merge.info.name;
4242
4243     int mergedDocCount = 0;
4244
4245     List<SegmentInfo> sourceSegments = merge.segments;
4246
4247     SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
4248                                              payloadProcessorProvider,
4249                                              ((FieldInfos) docWriter.getFieldInfos().clone()));
4250
4251     if (infoStream != null) {
4252       message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getHasVectors());
4253     }
4254
4255     merge.readers = new ArrayList<SegmentReader>();
4256     merge.readerClones = new ArrayList<SegmentReader>();
4257
4258     // This is try/finally to make sure merger's readers are
4259     // closed:
4260     boolean success = false;
4261     try {
4262       int totDocCount = 0;
4263       int segUpto = 0;
4264       while(segUpto < sourceSegments.size()) {
4265
4266         final SegmentInfo info = sourceSegments.get(segUpto);
4267
4268         // Hold onto the "live" reader; we will use this to
4269         // commit merged deletes
4270         final SegmentReader reader = readerPool.get(info, true,
4271                                                     MERGE_READ_BUFFER_SIZE,
4272                                                     -1);
4273         merge.readers.add(reader);
4274
4275         // We clone the segment readers because other
4276         // deletes may come in while we're merging so we
4277         // need readers that will not change
4278         final SegmentReader clone = (SegmentReader) reader.clone(true);
4279         merge.readerClones.add(clone);
4280
4281         if (clone.numDocs() > 0) {
4282           merger.add(clone);
4283           totDocCount += clone.numDocs();
4284         }
4285         segUpto++;
4286       }
4287
4288       if (infoStream != null) {
4289         message("merge: total " + totDocCount + " docs");
4290       }
4291
4292       merge.checkAborted(directory);
4293
4294       // This is where all the work happens:
4295       mergedDocCount = merge.info.docCount = merger.merge();
4296
4297       // LUCENE-3403: set hasVectors after merge(), so that it is properly set.
4298       merge.info.setHasVectors(merger.fieldInfos().hasVectors());
4299
4300       assert mergedDocCount == totDocCount;
4301
4302       if (infoStream != null) {
4303         message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
4304       }
4305
4306       anyNonBulkMerges |= merger.getAnyNonBulkMerges();
4307
4308       assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
4309
4310       // Very important to do this before opening the reader
4311       // because SegmentReader must know if prox was written for
4312       // this segment:
4313       merge.info.setHasProx(merger.fieldInfos().hasProx());
4314
4315       boolean useCompoundFile;
4316       synchronized (this) { // Guard segmentInfos
4317         useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
4318       }
4319
4320       if (useCompoundFile) {
4321
4322         success = false;
4323         final String compoundFileName = IndexFileNames.segmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
4324
4325         try {
4326           if (infoStream != null) {
4327             message("create compound file " + compoundFileName);
4328           }
4329           merger.createCompoundFile(compoundFileName, merge.info);
4330           success = true;
4331         } catch (IOException ioe) {
4332           synchronized(this) {
4333             if (merge.isAborted()) {
4334               // This can happen if rollback or close(false)
4335               // is called -- fall through to logic below to
4336               // remove the partially created CFS:
4337             } else {
4338               handleMergeException(ioe, merge);
4339             }
4340           }
4341         } catch (Throwable t) {
4342           handleMergeException(t, merge);
4343         } finally {
4344           if (!success) {
4345             if (infoStream != null) {
4346               message("hit exception creating compound file during merge");
4347             }
4348
4349             synchronized(this) {
4350               deleter.deleteFile(compoundFileName);
4351               deleter.deleteNewFiles(merge.info.files());
4352             }
4353           }
4354         }
4355
4356         success = false;
4357
4358         synchronized(this) {
4359
4360           // delete new non cfs files directly: they were never
4361           // registered with IFD
4362           deleter.deleteNewFiles(merge.info.files());
4363
4364           if (merge.isAborted()) {
4365             if (infoStream != null) {
4366               message("abort merge after building CFS");
4367             }
4368             deleter.deleteFile(compoundFileName);
4369             return 0;
4370           }
4371         }
4372
4373         merge.info.setUseCompoundFile(true);
4374       }
4375
4376       if (infoStream != null) {
4377         message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));
4378       }
4379
4380       final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
4381
4382       final int termsIndexDivisor;
4383       final boolean loadDocStores;
4384
4385       if (mergedSegmentWarmer != null) {
4386         // Load terms index & doc stores so the segment
4387         // warmer can run searches, load documents/term
4388         // vectors
4389         termsIndexDivisor = config.getReaderTermsIndexDivisor();
4390         loadDocStores = true;
4391       } else {
4392         termsIndexDivisor = -1;
4393         loadDocStores = false;
4394       }
4395
4396       // TODO: in the non-realtime case, we may want to only
4397       // keep deletes (it's costly to open entire reader
4398       // when we just need deletes)
4399
4400       final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
4401       try {
4402         if (poolReaders && mergedSegmentWarmer != null) {
4403           mergedSegmentWarmer.warm(mergedReader);
4404         }
4405
4406         if (!commitMerge(merge, mergedReader)) {
4407           // commitMerge will return false if this merge was aborted
4408           return 0;
4409         }
4410       } finally {
4411         synchronized(this) {
4412           if (readerPool.release(mergedReader)) {
4413             // Must checkpoint after releasing the
4414             // mergedReader since it may have written a new
4415             // deletes file:
4416             checkpoint();
4417           }
4418         }
4419       }
4420
4421       success = true;
4422
4423     } finally {
4424       // Readers are already closed in commitMerge if we didn't hit
4425       // an exc:
4426       if (!success) {
4427         closeMergeReaders(merge, true);
4428       }
4429     }
4430
4431     return mergedDocCount;
4432   }
4433
4434   synchronized void addMergeException(MergePolicy.OneMerge merge) {
4435     assert merge.getException() != null;
4436     if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4437       mergeExceptions.add(merge);
4438   }
4439
4440   // For test purposes.
4441   final int getBufferedDeleteTermsSize() {
4442     return docWriter.getPendingDeletes().terms.size();
4443   }
4444
4445   // For test purposes.
4446   final int getNumBufferedDeleteTerms() {
4447     return docWriter.getPendingDeletes().numTermDeletes.get();
4448   }
4449
4450   // utility routines for tests
4451   synchronized SegmentInfo newestSegment() {
4452     return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
4453   }
4454
4455   /** @lucene.internal */
4456   public synchronized String segString() throws IOException {
4457     return segString(segmentInfos);
4458   }
4459
4460   /** @lucene.internal */
4461   public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
4462     final StringBuilder buffer = new StringBuilder();
4463     for(final SegmentInfo s : infos) {
4464       if (buffer.length() > 0) {
4465         buffer.append(' ');
4466       }
4467       buffer.append(segString(s));
4468     }
4469     return buffer.toString();
4470   }
4471
4472   /** @lucene.internal */
4473   public synchronized String segString(SegmentInfo info) throws IOException {
4474     StringBuilder buffer = new StringBuilder();
4475     SegmentReader reader = readerPool.getIfExists(info);
4476     try {
4477       if (reader != null) {
4478         buffer.append(reader.toString());
4479       } else {
4480         buffer.append(info.toString(directory, 0));
4481         if (info.dir != directory) {
4482           buffer.append("**");
4483         }
4484       }
4485     } finally {
4486       if (reader != null) {
4487         readerPool.release(reader);
4488       }
4489     }
4490     return buffer.toString();
4491   }
4492
4493   private synchronized void doWait() {
4494     // NOTE: the callers of this method should in theory
4495     // be able to do simply wait(), but, as a defense
4496     // against thread timing hazards where notifyAll()
4497     // fails to be called, we wait for at most 1 second
4498     // and then return so caller can check if wait
4499     // conditions are satisfied:
4500     try {
4501       wait(1000);
4502     } catch (InterruptedException ie) {
4503       throw new ThreadInterruptedException(ie);
4504     }
4505   }
4506
4507   private boolean keepFullyDeletedSegments;
4508
4509   /** Only for testing.
4510    *
4511    * @lucene.internal */
4512   void keepFullyDeletedSegments() {
4513     keepFullyDeletedSegments = true;
4514   }
4515
4516   boolean getKeepFullyDeletedSegments() {
4517     return keepFullyDeletedSegments;
4518   }
4519
4520   // called only from assert
4521   private boolean filesExist(SegmentInfos toSync) throws IOException {
4522     Collection<String> files = toSync.files(directory, false);
4523     for(final String fileName: files) {
4524       assert directory.fileExists(fileName): "file " + fileName + " does not exist";
4525       // If this trips it means we are missing a call to
4526       // .checkpoint somewhere, because by the time we
4527       // are called, deleter should know about every
4528       // file referenced by the current head
4529       // segmentInfos:
4530       assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
4531     }
4532     return true;
4533   }
4534
4535   /** Walk through all files referenced by the current
4536    *  segmentInfos and ask the Directory to sync each file,
4537    *  if it wasn't already.  If that succeeds, then we
4538    *  prepare a new segments_N file but do not fully commit
4539    *  it. */
4540   private void startCommit(SegmentInfos toSync, Map<String,String> commitUserData) throws IOException {
4541
4542     assert testPoint("startStartCommit");
4543     assert pendingCommit == null;
4544
4545     if (hitOOM) {
4546       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
4547     }
4548
4549     try {
4550
4551       if (infoStream != null)
4552         message("startCommit(): start");
4553
4554
4555       synchronized(this) {
4556
4557         assert lastCommitChangeCount <= changeCount;
4558
4559         if (pendingCommitChangeCount == lastCommitChangeCount) {
4560           if (infoStream != null) {
4561             message("  skip startCommit(): no changes pending");
4562           }
4563           deleter.decRef(toSync);
4564           return;
4565         }
4566
4567         // First, we clone & incref the segmentInfos we intend
4568         // to sync, then, without locking, we sync() all files
4569         // referenced by toSync, in the background.
4570
4571         if (infoStream != null)
4572           message("startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
4573
4574         assert filesExist(toSync);
4575
4576         if (commitUserData != null) {
4577           toSync.setUserData(commitUserData);
4578         }
4579       }
4580
4581       assert testPoint("midStartCommit");
4582
4583       boolean pendingCommitSet = false;
4584
4585       try {
4586         // This call can take a long time -- 10s of seconds
4587         // or more.  We do it without sync:
4588         directory.sync(toSync.files(directory, false));
4589
4590         assert testPoint("midStartCommit2");
4591
4592         synchronized(this) {
4593
4594           assert pendingCommit == null;
4595
4596           assert segmentInfos.getGeneration() == toSync.getGeneration();
4597
4598           // Exception here means nothing is prepared
4599           // (this method unwinds everything it did on
4600           // an exception)
4601           toSync.prepareCommit(directory);
4602           pendingCommitSet = true;
4603           pendingCommit = toSync;
4604         }
4605
4606         if (infoStream != null) {
4607           message("done all syncs");
4608         }
4609
4610         assert testPoint("midStartCommitSuccess");
4611
4612       } finally {
4613         synchronized(this) {
4614
4615           // Have our master segmentInfos record the
4616           // generations we just prepared.  We do this
4617           // on error or success so we don't
4618           // double-write a segments_N file.
4619           segmentInfos.updateGeneration(toSync);
4620
4621           if (!pendingCommitSet) {
4622             if (infoStream != null) {
4623               message("hit exception committing segments file");
4624             }
4625
4626             deleter.decRef(toSync);
4627           }
4628         }
4629       }
4630     } catch (OutOfMemoryError oom) {
4631       handleOOM(oom, "startCommit");
4632     }
4633     assert testPoint("finishStartCommit");
4634   }
4635
4636   /**
4637    * Returns <code>true</code> iff the index in the named directory is
4638    * currently locked.
4639    * @param directory the directory to check for a lock
4640    * @throws IOException if there is a low-level IO error
4641    */
4642   public static boolean isLocked(Directory directory) throws IOException {
4643     return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4644   }
4645
4646   /**
4647    * Forcibly unlocks the index in the named directory.
4648    * <P>
4649    * Caution: this should only be used by failure recovery code,
4650    * when it is known that no other process nor thread is in fact
4651    * currently accessing this index.
4652    */
4653   public static void unlock(Directory directory) throws IOException {
4654     directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4655   }
4656
4657   /**
4658    * Specifies maximum field length (in number of tokens/terms) in
4659    * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
4660    * the value set by the constructor.
4661    *
4662    * @deprecated use {@link LimitTokenCountAnalyzer} instead.
4663    */
4664   @Deprecated
4665   public static final class MaxFieldLength {
4666
4667     private int limit;
4668     private String name;
4669
4670     /**
4671      * Private type-safe-enum-pattern constructor.
4672      *
4673      * @param name instance name
4674      * @param limit maximum field length
4675      */
4676     private MaxFieldLength(String name, int limit) {
4677       this.name = name;
4678       this.limit = limit;
4679     }
4680
4681     /**
4682      * Public constructor to allow users to specify the maximum field size limit.
4683      *
4684      * @param limit The maximum field length
4685      */
4686     public MaxFieldLength(int limit) {
4687       this("User-specified", limit);
4688     }
4689
4690     public int getLimit() {
4691       return limit;
4692     }
4693
4694     @Override
4695     public String toString()
4696     {
4697       return name + ":" + limit;
4698     }
4699
4700     /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4701     public static final MaxFieldLength UNLIMITED
4702         = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4703
4704     /**
4705      *  Sets the maximum field length to
4706      * {@link #DEFAULT_MAX_FIELD_LENGTH}
4707      * */
4708     public static final MaxFieldLength LIMITED
4709         = new MaxFieldLength("LIMITED", 10000);
4710   }
4711
4712   /** If {@link #getReader} has been called (ie, this writer
4713    *  is in near real-time mode), then after a merge
4714    *  completes, this class can be invoked to warm the
4715    *  reader on the newly merged segment, before the merge
4716    *  commits.  This is not required for near real-time
4717    *  search, but will reduce search latency on opening a
4718    *  new near real-time reader after a merge completes.
4719    *
4720    * @lucene.experimental
4721    *
4722    * <p><b>NOTE</b>: warm is called before any deletes have
4723    * been carried over to the merged segment. */
4724   public static abstract class IndexReaderWarmer {
4725     public abstract void warm(IndexReader reader) throws IOException;
4726   }
4727
4728   /**
4729    * Set the merged segment warmer. See {@link IndexReaderWarmer}.
4730    *
4731    * @deprecated use
4732    *             {@link IndexWriterConfig#setMergedSegmentWarmer}
4733    *             instead.
4734    */
4735   @Deprecated
4736   public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
4737     config.setMergedSegmentWarmer(warmer);
4738   }
4739
4740   /**
4741    * Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
4742    *
4743    * @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
4744    */
4745   @Deprecated
4746   public IndexReaderWarmer getMergedSegmentWarmer() {
4747     return config.getMergedSegmentWarmer();
4748   }
4749
4750   private void handleOOM(OutOfMemoryError oom, String location) {
4751     if (infoStream != null) {
4752       message("hit OutOfMemoryError inside " + location);
4753     }
4754     hitOOM = true;
4755     throw oom;
4756   }
4757
4758   // Used only by assert for testing.  Current points:
4759   //   startDoFlush
4760   //   startCommitMerge
4761   //   startStartCommit
4762   //   midStartCommit
4763   //   midStartCommit2
4764   //   midStartCommitSuccess
4765   //   finishStartCommit
4766   //   startCommitMergeDeletes
4767   //   startMergeInit
4768   //   DocumentsWriter.ThreadState.init start
4769   boolean testPoint(String name) {
4770     return true;
4771   }
4772
4773   synchronized boolean nrtIsCurrent(SegmentInfos infos) {
4774     //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
4775     ensureOpen();
4776     return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
4777   }
4778
4779   synchronized boolean isClosed() {
4780     return closed;
4781   }
4782
4783   /** Expert: remove any index files that are no longer
4784    *  used.
4785    *
4786    *  <p> IndexWriter normally deletes unused files itself,
4787    *  during indexing.  However, on Windows, which disallows
4788    *  deletion of open files, if there is a reader open on
4789    *  the index then those files cannot be deleted.  This is
4790    *  fine, because IndexWriter will periodically retry
4791    *  the deletion.</p>
4792    *
4793    *  <p> However, IndexWriter doesn't try that often: only
4794    *  on open, close, flushing a new segment, and finishing
4795    *  a merge.  If you don't do any of these actions with your
4796    *  IndexWriter, you'll see the unused files linger.  If
4797    *  that's a problem, call this method to delete them
4798    *  (once you've closed the open readers that were
4799    *  preventing their deletion).
4800    *
4801    *  <p> In addition, you can call this method to delete
4802    *  unreferenced index commits. This might be useful if you
4803    *  are using an {@link IndexDeletionPolicy} which holds
4804    *  onto index commits until some criteria are met, but those
4805    *  commits are no longer needed. Otherwise, those commits will
4806    *  be deleted the next time commit() is called.
4807    */
4808   public synchronized void deleteUnusedFiles() throws IOException {
4809     ensureOpen(false);
4810     deleter.deletePendingFiles();
4811     deleter.revisitPolicy();
4812   }
4813
4814   // Called by DirectoryReader.doClose
4815   synchronized void deletePendingFiles() throws IOException {
4816     deleter.deletePendingFiles();
4817   }
4818
4819   /**
4820    * Sets the {@link PayloadProcessorProvider} to use when merging payloads.
4821    * Note that the given <code>pcp</code> will be invoked for every segment that
4822    * is merged, not only external ones that are given through
4823    * {@link #addIndexes}. If you want only the payloads of the external segments
4824    * to be processed, you can return <code>null</code> whenever a
4825    * {@link DirPayloadProcessor} is requested for the {@link Directory} of the
4826    * {@link IndexWriter}.
4827    * <p>
4828    * The default is <code>null</code> which means payloads are processed
4829    * normally (copied) during segment merges. You can also unset it by passing
4830    * <code>null</code>.
4831    * <p>
4832    * <b>NOTE:</b> the set {@link PayloadProcessorProvider} will be in effect
4833    * immediately, potentially for already running merges too. If you want to be
4834    * sure it is used for further operations only, such as {@link #addIndexes} or
4835    * {@link #forceMerge}, you can call {@link #waitForMerges()} before.
4836    */
4837   public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
4838     ensureOpen();
4839     payloadProcessorProvider = pcp;
4840   }
4841
4842   /**
4843    * Returns the {@link PayloadProcessorProvider} that is used during segment
4844    * merges to process payloads.
4845    */
4846   public PayloadProcessorProvider getPayloadProcessorProvider() {
4847     ensureOpen();
4848     return payloadProcessorProvider;
4849   }
4850
4851   // decides when flushes happen
4852   final class FlushControl {
4853
4854     private boolean flushPending;
4855     private boolean flushDeletes;
4856     private int delCount;
4857     private int docCount;
4858     private boolean flushing;
4859
4860     private synchronized boolean setFlushPending(String reason, boolean doWait) {
4861       if (flushPending || flushing) {
4862         if (doWait) {
4863           while(flushPending || flushing) {
4864             try {
4865               wait();
4866             } catch (InterruptedException ie) {
4867               throw new ThreadInterruptedException(ie);
4868             }
4869           }
4870         }
4871         return false;
4872       } else {
4873         if (infoStream != null) {
4874           message("now trigger flush reason=" + reason);
4875         }
4876         flushPending = true;
4877         return flushPending;
4878       }
4879     }
4880
4881     public synchronized void setFlushPendingNoWait(String reason) {
4882       setFlushPending(reason, false);
4883     }
4884
4885     public synchronized boolean getFlushPending() {
4886       return flushPending;
4887     }
4888
4889     public synchronized boolean getFlushDeletes() {
4890       return flushDeletes;
4891     }
4892
4893     public synchronized void clearFlushPending() {
4894       if (infoStream != null) {
4895         message("clearFlushPending");
4896       }
4897       flushPending = false;
4898       flushDeletes = false;
4899       docCount = 0;
4900       notifyAll();
4901     }
4902
4903     public synchronized void clearDeletes() {
4904       delCount = 0;
4905     }
4906
4907     public synchronized boolean waitUpdate(int docInc, int delInc) {
4908       return waitUpdate(docInc, delInc, false);
4909     }
4910
4911     public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
4912       while(flushPending) {
4913         try {
4914           wait();
4915         } catch (InterruptedException ie) {
4916           throw new ThreadInterruptedException(ie);
4917         }
4918       }
4919
4920       docCount += docInc;
4921       delCount += delInc;
4922
4923       // skipWait is only used when a thread is BOTH adding
4924       // a doc and buffering a del term, and, the adding of
4925       // the doc already triggered a flush
4926       if (skipWait) {
4927         return false;
4928       }
4929
4930       final int maxBufferedDocs = config.getMaxBufferedDocs();
4931       if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4932           docCount >= maxBufferedDocs) {
4933         return setFlushPending("maxBufferedDocs", true);
4934       }
4935
4936       final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
4937       if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4938           delCount >= maxBufferedDeleteTerms) {
4939         flushDeletes = true;
4940         return setFlushPending("maxBufferedDeleteTerms", true);
4941       }
4942
4943       return flushByRAMUsage("add delete/doc");
4944     }
4945
4946     public synchronized boolean flushByRAMUsage(String reason) {
4947       final double ramBufferSizeMB = config.getRAMBufferSizeMB();
4948       if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
4949         final long limit = (long) (ramBufferSizeMB*1024*1024);
4950         long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4951         if (used >= limit) {
4952
4953           // DocumentsWriter may be able to free up some
4954           // RAM:
4955           // Lock order: FC -> DW
4956           docWriter.balanceRAM();
4957
4958           used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4959           if (used >= limit) {
4960             return setFlushPending("ram full: " + reason, false);
4961           }
4962         }
4963       }
4964       return false;
4965     }
4966   }
4967
4968   final FlushControl flushControl = new FlushControl();
4969 }