1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.Comparator;
27 import java.util.Date;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.LinkedList;
31 import java.util.List;
34 import java.util.concurrent.atomic.AtomicInteger;
35 import java.util.concurrent.ConcurrentHashMap;
37 import org.apache.lucene.analysis.Analyzer;
38 import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
39 import org.apache.lucene.document.Document;
40 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
41 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
42 import org.apache.lucene.search.Similarity;
43 import org.apache.lucene.search.Query;
44 import org.apache.lucene.store.AlreadyClosedException;
45 import org.apache.lucene.store.BufferedIndexInput;
46 import org.apache.lucene.store.Directory;
47 import org.apache.lucene.store.Lock;
48 import org.apache.lucene.store.LockObtainFailedException;
49 import org.apache.lucene.util.Constants;
50 import org.apache.lucene.util.StringHelper;
51 import org.apache.lucene.util.ThreadInterruptedException;
52 import org.apache.lucene.util.Version;
53 import org.apache.lucene.util.MapBackedSet;
54 import org.apache.lucene.util.TwoPhaseCommit;
57 An <code>IndexWriter</code> creates and maintains an index.
59 <p>The <code>create</code> argument to the {@link
60 #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
61 whether a new index is created, or whether an existing index is
62 opened. Note that you can open an index with <code>create=true</code>
63 even while readers are using the index. The old readers will
64 continue to search the "point in time" snapshot they had opened,
65 and won't see the newly created index until they re-open. There are
66 also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
67 with no <code>create</code> argument which will create a new index
68 if there is not already an index at the provided path and otherwise
69 open the existing index.</p>
71 <p>In either case, documents are added with {@link #addDocument(Document)
72 addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
73 #deleteDocuments(Query)}. A document can be updated with {@link
74 #updateDocument(Term, Document) updateDocument} (which just deletes
75 and then adds the entire document). When finished adding, deleting
76 and updating documents, {@link #close() close} should be called.</p>
79 <p>These changes are buffered in memory and periodically
80 flushed to the {@link Directory} (during the above method
81 calls). A flush is triggered when there are enough
82 buffered deletes (see {@link #setMaxBufferedDeleteTerms})
83 or enough added documents since the last flush, whichever
84 is sooner. For the added documents, flushing is triggered
85 either by RAM usage of the documents (see {@link
86 #setRAMBufferSizeMB}) or the number of added documents.
87 The default is to flush when RAM usage hits 16 MB. For
88 best indexing speed you should flush by RAM usage with a
89 large RAM buffer. Note that flushing just moves the
90 internal buffered state in IndexWriter into the index, but
91 these changes are not visible to IndexReader until either
92 {@link #commit()} or {@link #close} is called. A flush may
93 also trigger one or more segment merges which by default
94 run with a background thread so as not to block the
95 addDocument calls (see <a href="#mergePolicy">below</a>
96 for changing the {@link MergeScheduler}).</p>
98 <p>If an index will not have more documents added for a while and optimal search
99 performance is desired, then either the full {@link #optimize() optimize}
100 method or partial {@link #optimize(int)} method should be
101 called before the index is closed.</p>
103 <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
104 another <code>IndexWriter</code> on the same directory will lead to a
105 {@link LockObtainFailedException}. The {@link LockObtainFailedException}
106 is also thrown if an IndexReader on the same directory is used to delete documents
109 <a name="deletionPolicy"></a>
110 <p>Expert: <code>IndexWriter</code> allows an optional
111 {@link IndexDeletionPolicy} implementation to be
112 specified. You can use this to control when prior commits
113 are deleted from the index. The default policy is {@link
114 KeepOnlyLastCommitDeletionPolicy} which removes all prior
115 commits as soon as a new commit is done (this matches
116 behavior before 2.2). Creating your own policy can allow
117 you to explicitly keep previous "point in time" commits
118 alive in the index for some time, to allow readers to
119 refresh to the new commit without having the old commit
120 deleted out from under them. This is necessary on
121 filesystems like NFS that do not support "delete on last
122 close" semantics, which Lucene's "point in time" search
123 normally relies on. </p>
125 <a name="mergePolicy"></a> <p>Expert:
126 <code>IndexWriter</code> allows you to separately change
127 the {@link MergePolicy} and the {@link MergeScheduler}.
128 The {@link MergePolicy} is invoked whenever there are
129 changes to the segments in the index. Its role is to
130 select which merges to do, if any, and return a {@link
131 MergePolicy.MergeSpecification} describing the merges. It
132 also selects merges to do for optimize(). (The default is
133 {@link LogByteSizeMergePolicy}. Then, the {@link
134 MergeScheduler} is invoked with the requested merges and
135 it decides when and how to run the merges. The default is
136 {@link ConcurrentMergeScheduler}. </p>
138 <a name="OOME"></a><p><b>NOTE</b>: if you hit an
139 OutOfMemoryError then IndexWriter will quietly record this
140 fact and block all future segment commits. This is a
141 defensive measure in case any internal state (buffered
142 documents and deletions) were corrupted. Any subsequent
143 calls to {@link #commit()} will throw an
144 IllegalStateException. The only course of action is to
145 call {@link #close()}, which internally will call {@link
146 #rollback()}, to undo any changes to the index since the
147 last commit. You can also just call {@link #rollback()}
150 <a name="thread-safety"></a><p><b>NOTE</b>: {@link
151 IndexWriter} instances are completely thread
152 safe, meaning multiple threads can call any of its
153 methods, concurrently. If your application requires
154 external synchronization, you should <b>not</b>
155 synchronize on the <code>IndexWriter</code> instance as
156 this may cause deadlock; use your own (non-Lucene) objects
159 <p><b>NOTE</b>: If you call
160 <code>Thread.interrupt()</code> on a thread that's within
161 IndexWriter, IndexWriter will try to catch this (eg, if
162 it's in a wait() or Thread.sleep()), and will then throw
163 the unchecked exception {@link ThreadInterruptedException}
164 and <b>clear</b> the interrupt status on the thread.</p>
168 * Clarification: Check Points (and commits)
169 * IndexWriter writes new index files to the directory without writing a new segments_N
170 * file which references these new files. It also means that the state of
171 * the in memory SegmentInfos object is different than the most recent
172 * segments_N file written to the directory.
174 * Each time the SegmentInfos is changed, and matches the (possibly
175 * modified) directory files, we have a new "check point".
176 * If the modified/new SegmentInfos is written to disk - as a new
177 * (generation of) segments_N file - this check point is also an
180 * A new checkpoint always replaces the previous checkpoint and
181 * becomes the new "front" of the index. This allows the IndexFileDeleter
182 * to delete files that are referenced only by stale checkpoints.
183 * (files that were created since the last commit, but are no longer
184 * referenced by the "front" of the index). For this, IndexFileDeleter
185 * keeps track of the last non commit checkpoint.
187 public class IndexWriter implements Closeable, TwoPhaseCommit {
190 * Default value for the write lock timeout (1,000).
191 * @see #setDefaultWriteLockTimeout
192 * @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
195 public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
197 private long writeLockTimeout;
200 * Name of the write lock in the index.
202 public static final String WRITE_LOCK_NAME = "write.lock";
205 * Value to denote a flush trigger is disabled
206 * @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
209 public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
212 * Disabled by default (because IndexWriter flushes by RAM usage
213 * by default). Change using {@link #setMaxBufferedDocs(int)}.
214 * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
217 public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
220 * Default value is 16 MB (which means flush when buffered
221 * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
222 * @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
225 public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
228 * Disabled by default (because IndexWriter flushes by RAM usage
229 * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
230 * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
233 public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
236 * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
238 * @deprecated see {@link IndexWriterConfig}
241 public final static int DEFAULT_MAX_FIELD_LENGTH = MaxFieldLength.UNLIMITED.getLimit();
244 * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
245 * @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
248 public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
251 * Absolute hard maximum length for a term. If a term
252 * arrives from the analyzer longer than this length, it
253 * is skipped and a message is printed to infoStream, if
254 * set (see {@link #setInfoStream}).
256 public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
258 // The normal read buffer size defaults to 1024, but
259 // increasing this during merging seems to yield
260 // performance gains. However we don't want to increase
261 // it too much because there are quite a few
262 // BufferedIndexInputs created during merging. See
263 // LUCENE-888 for details.
264 private final static int MERGE_READ_BUFFER_SIZE = 4096;
266 // Used for printing messages
267 private static final AtomicInteger MESSAGE_ID = new AtomicInteger();
268 private int messageID = MESSAGE_ID.getAndIncrement();
269 volatile private boolean hitOOM;
271 private final Directory directory; // where this index resides
272 private final Analyzer analyzer; // how to analyze text
274 // TODO 4.0: this should be made final once the setter is out
275 private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
277 private volatile long changeCount; // increments every time a change is completed
278 private long lastCommitChangeCount; // last changeCount that was committed
280 private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
282 volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
283 volatile long pendingCommitChangeCount;
285 final SegmentInfos segmentInfos = new SegmentInfos(); // the segments
287 private DocumentsWriter docWriter;
288 private IndexFileDeleter deleter;
290 private Map<SegmentInfo,Boolean> segmentsToOptimize = new HashMap<SegmentInfo,Boolean>(); // used by optimize to note those needing optimization
291 private int optimizeMaxNumSegments;
293 private Lock writeLock;
295 private boolean closed;
296 private boolean closing;
298 // Holds all SegmentInfo instances currently involved in
300 private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
302 private MergePolicy mergePolicy;
303 // TODO 4.0: this should be made final once the setter is removed
304 private /*final*/MergeScheduler mergeScheduler;
305 private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
306 private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
307 private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
308 private long mergeGen;
309 private boolean stopMerges;
311 private final AtomicInteger flushCount = new AtomicInteger();
312 private final AtomicInteger flushDeletesCount = new AtomicInteger();
314 final ReaderPool readerPool = new ReaderPool();
315 final BufferedDeletesStream bufferedDeletesStream;
317 // This is a "write once" variable (like the organic dye
318 // on a DVD-R that may or may not be heated by a laser and
319 // then cooled to permanently record the event): it's
320 // false, until getReader() is called for the first time,
321 // at which point it's switched to true and never changes
322 // back to false. Once this is true, we hold open and
323 // reuse SegmentReader instances internally for applying
324 // deletes, doing merges, and reopening near real-time
326 private volatile boolean poolReaders;
328 // The instance that was passed to the constructor. It is saved only in order
329 // to allow users to query an IndexWriter settings.
330 private final IndexWriterConfig config;
332 // The PayloadProcessorProvider to use when segments are merged
333 private PayloadProcessorProvider payloadProcessorProvider;
336 boolean anyNonBulkMerges;
339 * Expert: returns a readonly reader, covering all
340 * committed as well as un-committed changes to the index.
341 * This provides "near real-time" searching, in that
342 * changes made during an IndexWriter session can be
343 * quickly made available for searching without closing
344 * the writer nor calling {@link #commit}.
346 * <p>Note that this is functionally equivalent to calling
347 * {#flush} and then using {@link IndexReader#open} to
348 * open a new reader. But the turarnound time of this
349 * method should be faster since it avoids the potentially
350 * costly {@link #commit}.</p>
352 * <p>You must close the {@link IndexReader} returned by
353 * this method once you are done using it.</p>
355 * <p>It's <i>near</i> real-time because there is no hard
356 * guarantee on how quickly you can get a new reader after
357 * making changes with IndexWriter. You'll have to
358 * experiment in your situation to determine if it's
359 * fast enough. As this is a new and experimental
360 * feature, please report back on your findings so we can
361 * learn, improve and iterate.</p>
363 * <p>The resulting reader supports {@link
364 * IndexReader#reopen}, but that call will simply forward
365 * back to this method (though this may change in the
368 * <p>The very first time this method is called, this
369 * writer instance will make every effort to pool the
370 * readers that it opens for doing merges, applying
371 * deletes, etc. This means additional resources (RAM,
372 * file descriptors, CPU time) will be consumed.</p>
374 * <p>For lower latency on reopening a reader, you should
375 * call {@link #setMergedSegmentWarmer} to
376 * pre-warm a newly merged segment before it's committed
377 * to the index. This is important for minimizing
378 * index-to-search delay after a large merge. </p>
380 * <p>If an addIndexes* call is running in another thread,
381 * then this reader will only search those segments from
382 * the foreign index that have been successfully copied
385 * <p><b>NOTE</b>: Once the writer is closed, any
386 * outstanding readers may continue to be used. However,
387 * if you attempt to reopen any of those readers, you'll
388 * hit an {@link AlreadyClosedException}.</p>
390 * @lucene.experimental
392 * @return IndexReader that covers entire index plus all
393 * changes made so far by this IndexWriter instance
395 * @deprecated Please use {@link
396 * IndexReader#open(IndexWriter,boolean)} instead.
398 * @throws IOException
401 public IndexReader getReader() throws IOException {
402 return getReader(config.getReaderTermsIndexDivisor(), true);
405 IndexReader getReader(boolean applyAllDeletes) throws IOException {
406 return getReader(config.getReaderTermsIndexDivisor(), applyAllDeletes);
409 /** Expert: like {@link #getReader}, except you can
410 * specify which termInfosIndexDivisor should be used for
411 * any newly opened readers.
412 * @param termInfosIndexDivisor Subsamples which indexed
413 * terms are loaded into RAM. This has the same effect as {@link
414 * IndexWriter#setTermIndexInterval} except that setting
415 * must be done at indexing time while this setting can be
416 * set per reader. When set to N, then one in every
417 * N*termIndexInterval terms in the index is loaded into
418 * memory. By setting this to a value > 1 you can reduce
419 * memory usage, at the expense of higher latency when
420 * loading a TermInfo. The default value is 1. Set this
421 * to -1 to skip loading the terms index entirely.
423 * @deprecated Please use {@link
424 * IndexReader#open(IndexWriter,boolean)} instead. Furthermore,
425 * this method cannot guarantee the reader (and its
426 * sub-readers) will be opened with the
427 * termInfosIndexDivisor setting because some of them may
428 * have already been opened according to {@link
429 * IndexWriterConfig#setReaderTermsIndexDivisor}. You
430 * should set the requested termInfosIndexDivisor through
431 * {@link IndexWriterConfig#setReaderTermsIndexDivisor} and use
432 * {@link #getReader()}. */
434 public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
435 return getReader(termInfosIndexDivisor, true);
438 IndexReader getReader(int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException {
441 final long tStart = System.currentTimeMillis();
443 if (infoStream != null) {
444 message("flush at getReader");
447 // Do this up front before flushing so that the readers
448 // obtained during this flush are pooled, the first time
449 // this method is called:
452 // Prevent segmentInfos from changing while opening the
453 // reader; in theory we could do similar retry logic,
454 // just like we do when loading segments_N
457 flush(false, applyAllDeletes);
458 r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor, applyAllDeletes);
459 if (infoStream != null) {
460 message("return reader version=" + r.getVersion() + " reader=" + r);
466 if (infoStream != null) {
467 message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
472 // Used for all SegmentReaders we open
473 private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
475 Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
476 return readerFinishedListeners;
479 /** Holds shared SegmentReader instances. IndexWriter uses
480 * SegmentReaders for 1) applying deletes, 2) doing
481 * merges, 3) handing out a real-time reader. This pool
482 * reuses instances of the SegmentReaders in all these
483 * places if it is in "near real-time mode" (getReader()
484 * has been called on this instance). */
488 private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
490 /** Forcefully clear changes for the specified segments. This is called on successful merge. */
491 synchronized void clear(List<SegmentInfo> infos) throws IOException {
493 for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
494 ent.getValue().hasChanges = false;
497 for (final SegmentInfo info: infos) {
498 final SegmentReader r = readerMap.get(info);
500 r.hasChanges = false;
506 // used only by asserts
507 public synchronized boolean infoIsLive(SegmentInfo info) {
508 int idx = segmentInfos.indexOf(info);
509 assert idx != -1: "info=" + info + " isn't in pool";
510 assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
514 public synchronized SegmentInfo mapToLive(SegmentInfo info) {
515 int idx = segmentInfos.indexOf(info);
517 info = segmentInfos.info(idx);
523 * Release the segment reader (i.e. decRef it and close if there
524 * are no more references.
525 * @return true if this release altered the index (eg
526 * the SegmentReader had pending changes to del docs and
527 * was closed). Caller must call checkpoint() if so.
529 * @throws IOException
531 public synchronized boolean release(SegmentReader sr) throws IOException {
532 return release(sr, false);
536 * Release the segment reader (i.e. decRef it and close if there
537 * are no more references.
538 * @return true if this release altered the index (eg
539 * the SegmentReader had pending changes to del docs and
540 * was closed). Caller must call checkpoint() if so.
542 * @throws IOException
544 public synchronized boolean release(SegmentReader sr, boolean drop) throws IOException {
546 final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
548 assert !pooled || readerMap.get(sr.getSegmentInfo()) == sr;
550 // Drop caller's ref; for an external reader (not
551 // pooled), this decRef will close it
554 if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
556 // We invoke deleter.checkpoint below, so we must be
557 // sync'd on IW if there are changes:
558 assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
560 // Discard (don't save) changes when we are dropping
561 // the reader; this is used only on the sub-readers
562 // after a successful merge.
563 sr.hasChanges &= !drop;
565 final boolean hasChanges = sr.hasChanges;
567 // Drop our ref -- this will commit any pending
568 // changes to the dir
571 // We are the last ref to this reader; since we're
572 // not pooling readers, we release it:
573 readerMap.remove(sr.getSegmentInfo());
581 public synchronized void drop(List<SegmentInfo> infos) throws IOException {
582 for(SegmentInfo info : infos) {
587 public synchronized void drop(SegmentInfo info) throws IOException {
588 final SegmentReader sr = readerMap.get(info);
590 sr.hasChanges = false;
591 readerMap.remove(info);
596 public synchronized void dropAll() throws IOException {
597 for(SegmentReader reader : readerMap.values()) {
598 reader.hasChanges = false;
600 // NOTE: it is allowed that this decRef does not
601 // actually close the SR; this can happen when a
602 // near real-time reader using this SR is still open
608 /** Remove all our references to readers, and commits
609 * any pending changes. */
610 synchronized void close() throws IOException {
611 // We invoke deleter.checkpoint below, so we must be
613 assert Thread.holdsLock(IndexWriter.this);
615 for(Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
617 SegmentReader sr = ent.getValue();
619 assert infoIsLive(sr.getSegmentInfo());
622 // Must checkpoint w/ deleter, because this
623 // segment reader will have created new _X_N.del
625 deleter.checkpoint(segmentInfos, false);
628 // NOTE: it is allowed that this decRef does not
629 // actually close the SR; this can happen when a
630 // near real-time reader is kept open after the
631 // IndexWriter instance is closed
639 * Commit all segment reader in the pool.
640 * @throws IOException
642 synchronized void commit(SegmentInfos infos) throws IOException {
644 // We invoke deleter.checkpoint below, so we must be
646 assert Thread.holdsLock(IndexWriter.this);
648 for (SegmentInfo info : infos) {
650 final SegmentReader sr = readerMap.get(info);
651 if (sr != null && sr.hasChanges) {
652 assert infoIsLive(info);
654 // Must checkpoint w/ deleter, because this
655 // segment reader will have created new _X_N.del
657 deleter.checkpoint(segmentInfos, false);
663 * Returns a ref to a clone. NOTE: this clone is not
664 * enrolled in the pool, so you should simply close()
665 * it when you're done (ie, do not call release()).
667 public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
668 SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
670 return (SegmentReader) sr.clone(true);
677 * Obtain a SegmentReader from the readerPool. The reader
678 * must be returned by calling {@link #release(SegmentReader)}
679 * @see #release(SegmentReader)
681 * @param doOpenStores
682 * @throws IOException
684 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
685 return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, config.getReaderTermsIndexDivisor());
689 * Obtain a SegmentReader from the readerPool. The reader
690 * must be returned by calling {@link #release(SegmentReader)}
692 * @see #release(SegmentReader)
694 * @param doOpenStores
695 * @param readBufferSize
696 * @param termsIndexDivisor
697 * @throws IOException
699 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
702 readBufferSize = BufferedIndexInput.BUFFER_SIZE;
705 SegmentReader sr = readerMap.get(info);
707 // TODO: we may want to avoid doing this while
709 // Returns a ref, which we xfer to readerMap:
710 sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
711 sr.readerFinishedListeners = readerFinishedListeners;
713 if (info.dir == directory) {
714 // Only pool if reader is not external
715 readerMap.put(info, sr);
721 if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
722 // If this reader was originally opened because we
723 // needed to merge it, we didn't load the terms
724 // index. But now, if the caller wants the terms
725 // index (eg because it's doing deletes, or an NRT
726 // reader is being opened) we ask the reader to
727 // load its terms index.
728 sr.loadTermsIndex(termsIndexDivisor);
732 // Return a ref to our caller
733 if (info.dir == directory) {
734 // Only incRef if we pooled (reader is not external)
741 public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
742 SegmentReader sr = readerMap.get(info);
753 * Obtain the number of deleted docs for a pooled reader.
754 * If the reader isn't being pooled, the segmentInfo's
755 * delCount is returned.
757 public int numDeletedDocs(SegmentInfo info) throws IOException {
758 SegmentReader reader = readerPool.getIfExists(info);
760 if (reader != null) {
761 return reader.numDeletedDocs();
763 return info.getDelCount();
766 if (reader != null) {
767 readerPool.release(reader);
773 * Used internally to throw an {@link
774 * AlreadyClosedException} if this IndexWriter has been
776 * @throws AlreadyClosedException if this IndexWriter is closed
778 protected final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
779 if (closed || (includePendingClose && closing)) {
780 throw new AlreadyClosedException("this IndexWriter is closed");
784 protected final void ensureOpen() throws AlreadyClosedException {
789 * Prints a message to the infoStream (if non-null),
790 * prefixed with the identifying information for this
791 * writer and the thread that's calling it.
793 public void message(String message) {
794 if (infoStream != null)
795 infoStream.println("IW " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
799 * Casts current mergePolicy to LogMergePolicy, and throws
800 * an exception if the mergePolicy is not a LogMergePolicy.
802 private LogMergePolicy getLogMergePolicy() {
803 if (mergePolicy instanceof LogMergePolicy)
804 return (LogMergePolicy) mergePolicy;
806 throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
809 /** <p>Get the current setting of whether newly flushed
810 * segments will use the compound file format. Note that
811 * this just returns the value previously set with
812 * setUseCompoundFile(boolean), or the default value
813 * (true). You cannot use this to query the status of
814 * previously flushed segments.</p>
816 * <p>Note that this method is a convenience method: it
817 * just calls mergePolicy.getUseCompoundFile as long as
818 * mergePolicy is an instance of {@link LogMergePolicy}.
819 * Otherwise an IllegalArgumentException is thrown.</p>
821 * @see #setUseCompoundFile(boolean)
822 * @deprecated use {@link LogMergePolicy#getUseCompoundFile()}
825 public boolean getUseCompoundFile() {
826 return getLogMergePolicy().getUseCompoundFile();
831 * Setting to turn on usage of a compound file. When on, multiple files for
832 * each segment are merged into a single file when a new segment is flushed.
836 * Note that this method is a convenience method: it just calls
837 * mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
838 * {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
841 * @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}.
844 public void setUseCompoundFile(boolean value) {
845 getLogMergePolicy().setUseCompoundFile(value);
848 /** Expert: Set the Similarity implementation used by this IndexWriter.
850 * @see Similarity#setDefault(Similarity)
851 * @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
854 public void setSimilarity(Similarity similarity) {
856 this.similarity = similarity;
857 docWriter.setSimilarity(similarity);
858 // Required so config.getSimilarity returns the right value. But this will
859 // go away together with the method in 4.0.
860 config.setSimilarity(similarity);
863 /** Expert: Return the Similarity implementation used by this IndexWriter.
865 * <p>This defaults to the current value of {@link Similarity#getDefault()}.
866 * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
869 public Similarity getSimilarity() {
874 /** Expert: Set the interval between indexed terms. Large values cause less
875 * memory to be used by IndexReader, but slow random-access to terms. Small
876 * values cause more memory to be used by an IndexReader, and speed
877 * random-access to terms.
879 * This parameter determines the amount of computation required per query
880 * term, regardless of the number of documents that contain that term. In
881 * particular, it is the maximum number of other terms that must be
882 * scanned before a term is located and its frequency and position information
883 * may be processed. In a large index with user-entered query terms, query
884 * processing time is likely to be dominated not by term lookup but rather
885 * by the processing of frequency and positional data. In a small index
886 * or when many uncommon query terms are generated (e.g., by wildcard
887 * queries) term lookup may become a dominant cost.
889 * In particular, <code>numUniqueTerms/interval</code> terms are read into
890 * memory by an IndexReader, and, on average, <code>interval/2</code> terms
891 * must be scanned for each random term access.
893 * @see #DEFAULT_TERM_INDEX_INTERVAL
894 * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
897 public void setTermIndexInterval(int interval) {
899 config.setTermIndexInterval(interval);
902 /** Expert: Return the interval between indexed terms.
904 * @see #setTermIndexInterval(int)
905 * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
908 public int getTermIndexInterval() {
909 // We pass false because this method is called by SegmentMerger while we are in the process of closing
911 return config.getTermIndexInterval();
915 * Constructs an IndexWriter for the index in <code>d</code>.
916 * Text will be analyzed with <code>a</code>. If <code>create</code>
917 * is true, then a new, empty index will be created in
918 * <code>d</code>, replacing the index already there, if any.
920 * @param d the index directory
921 * @param a the analyzer to use
922 * @param create <code>true</code> to create the index or overwrite
923 * the existing one; <code>false</code> to append to the existing
925 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
926 * via the MaxFieldLength constructor.
927 * @throws CorruptIndexException if the index is corrupt
928 * @throws LockObtainFailedException if another writer
929 * has this index open (<code>write.lock</code> could not
931 * @throws IOException if the directory cannot be read/written to, or
932 * if it does not exist and <code>create</code> is
933 * <code>false</code> or if there is any other low-level
935 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
938 public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
939 throws CorruptIndexException, LockObtainFailedException, IOException {
940 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
941 create ? OpenMode.CREATE : OpenMode.APPEND));
942 setMaxFieldLength(mfl.getLimit());
946 * Constructs an IndexWriter for the index in
947 * <code>d</code>, first creating it if it does not
948 * already exist. Text will be analyzed with
951 * @param d the index directory
952 * @param a the analyzer to use
953 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
954 * via the MaxFieldLength constructor.
955 * @throws CorruptIndexException if the index is corrupt
956 * @throws LockObtainFailedException if another writer
957 * has this index open (<code>write.lock</code> could not
959 * @throws IOException if the directory cannot be
960 * read/written to or if there is any other low-level
962 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
965 public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
966 throws CorruptIndexException, LockObtainFailedException, IOException {
967 this(d, new IndexWriterConfig(Version.LUCENE_31, a));
968 setMaxFieldLength(mfl.getLimit());
972 * Expert: constructs an IndexWriter with a custom {@link
973 * IndexDeletionPolicy}, for the index in <code>d</code>,
974 * first creating it if it does not already exist. Text
975 * will be analyzed with <code>a</code>.
977 * @param d the index directory
978 * @param a the analyzer to use
979 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
980 * @param mfl whether or not to limit field lengths
981 * @throws CorruptIndexException if the index is corrupt
982 * @throws LockObtainFailedException if another writer
983 * has this index open (<code>write.lock</code> could not
985 * @throws IOException if the directory cannot be
986 * read/written to or if there is any other low-level
988 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
991 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
992 throws CorruptIndexException, LockObtainFailedException, IOException {
993 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));
994 setMaxFieldLength(mfl.getLimit());
998 * Expert: constructs an IndexWriter with a custom {@link
999 * IndexDeletionPolicy}, for the index in <code>d</code>.
1000 * Text will be analyzed with <code>a</code>. If
1001 * <code>create</code> is true, then a new, empty index
1002 * will be created in <code>d</code>, replacing the index
1003 * already there, if any.
1005 * @param d the index directory
1006 * @param a the analyzer to use
1007 * @param create <code>true</code> to create the index or overwrite
1008 * the existing one; <code>false</code> to append to the existing
1010 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1011 * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
1012 * @throws CorruptIndexException if the index is corrupt
1013 * @throws LockObtainFailedException if another writer
1014 * has this index open (<code>write.lock</code> could not
1016 * @throws IOException if the directory cannot be read/written to, or
1017 * if it does not exist and <code>create</code> is
1018 * <code>false</code> or if there is any other low-level
1020 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1023 public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1024 throws CorruptIndexException, LockObtainFailedException, IOException {
1025 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
1026 create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));
1027 setMaxFieldLength(mfl.getLimit());
1031 * Expert: constructs an IndexWriter on specific commit
1032 * point, with a custom {@link IndexDeletionPolicy}, for
1033 * the index in <code>d</code>. Text will be analyzed
1034 * with <code>a</code>.
1036 * <p> This is only meaningful if you've used a {@link
1037 * IndexDeletionPolicy} in that past that keeps more than
1038 * just the last commit.
1040 * <p>This operation is similar to {@link #rollback()},
1041 * except that method can only rollback what's been done
1042 * with the current instance of IndexWriter since its last
1043 * commit, whereas this method can rollback to an
1044 * arbitrary commit point from the past, assuming the
1045 * {@link IndexDeletionPolicy} has preserved past
1048 * @param d the index directory
1049 * @param a the analyzer to use
1050 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1051 * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
1052 * @param commit which commit to open
1053 * @throws CorruptIndexException if the index is corrupt
1054 * @throws LockObtainFailedException if another writer
1055 * has this index open (<code>write.lock</code> could not
1057 * @throws IOException if the directory cannot be read/written to, or
1058 * if it does not exist and <code>create</code> is
1059 * <code>false</code> or if there is any other low-level
1061 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1064 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
1065 throws CorruptIndexException, LockObtainFailedException, IOException {
1066 this(d, new IndexWriterConfig(Version.LUCENE_31, a)
1067 .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
1068 setMaxFieldLength(mfl.getLimit());
1072 * Constructs a new IndexWriter per the settings given in <code>conf</code>.
1073 * Note that the passed in {@link IndexWriterConfig} is
1074 * privately cloned; if you need to make subsequent "live"
1075 * changes to the configuration use {@link #getConfig}.
1079 * the index directory. The index is either created or appended
1080 * according <code>conf.getOpenMode()</code>.
1082 * the configuration settings according to which IndexWriter should
1084 * @throws CorruptIndexException
1085 * if the index is corrupt
1086 * @throws LockObtainFailedException
1087 * if another writer has this index open (<code>write.lock</code>
1088 * could not be obtained)
1089 * @throws IOException
1090 * if the directory cannot be read/written to, or if it does not
1091 * exist and <code>conf.getOpenMode()</code> is
1092 * <code>OpenMode.APPEND</code> or if there is any other low-level
1095 public IndexWriter(Directory d, IndexWriterConfig conf)
1096 throws CorruptIndexException, LockObtainFailedException, IOException {
1097 config = (IndexWriterConfig) conf.clone();
1099 analyzer = conf.getAnalyzer();
1100 infoStream = defaultInfoStream;
1101 writeLockTimeout = conf.getWriteLockTimeout();
1102 similarity = conf.getSimilarity();
1103 mergePolicy = conf.getMergePolicy();
1104 mergePolicy.setIndexWriter(this);
1105 mergeScheduler = conf.getMergeScheduler();
1106 bufferedDeletesStream = new BufferedDeletesStream(messageID);
1107 bufferedDeletesStream.setInfoStream(infoStream);
1108 poolReaders = conf.getReaderPooling();
1110 writeLock = directory.makeLock(WRITE_LOCK_NAME);
1112 if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1113 throw new LockObtainFailedException("Index locked for write: " + writeLock);
1115 OpenMode mode = conf.getOpenMode();
1117 if (mode == OpenMode.CREATE) {
1119 } else if (mode == OpenMode.APPEND) {
1122 // CREATE_OR_APPEND - create only if an index does not exist
1123 create = !IndexReader.indexExists(directory);
1126 boolean success = false;
1128 // TODO: we should check whether this index is too old,
1129 // and throw an IndexFormatTooOldExc up front, here,
1130 // instead of later when merge, applyDeletes, getReader
1131 // is attempted. I think to do this we should store the
1132 // oldest segment's version in segments_N.
1136 // Try to read first. This is to allow create
1137 // against an index that's currently open for
1138 // searching. In this case we write the next
1139 // segments_N file with no segments:
1141 segmentInfos.read(directory);
1142 segmentInfos.clear();
1143 } catch (IOException e) {
1144 // Likely this means it's a fresh directory
1147 // Record that we have a change (zero out all
1148 // segments) pending:
1150 segmentInfos.changed();
1152 segmentInfos.read(directory);
1154 IndexCommit commit = conf.getIndexCommit();
1155 if (commit != null) {
1156 // Swap out all segments, but, keep metadata in
1157 // SegmentInfos, like version & generation, to
1158 // preserve write-once. This is important if
1159 // readers are open against the future commit
1161 if (commit.getDirectory() != directory)
1162 throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
1163 SegmentInfos oldInfos = new SegmentInfos();
1164 oldInfos.read(directory, commit.getSegmentsFileName());
1165 segmentInfos.replace(oldInfos);
1167 segmentInfos.changed();
1168 if (infoStream != null)
1169 message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
1173 rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
1175 docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
1176 docWriter.setInfoStream(infoStream);
1177 docWriter.setMaxFieldLength(maxFieldLength);
1179 // Default deleter (for backwards compatibility) is
1180 // KeepOnlyLastCommitDeleter:
1181 synchronized(this) {
1182 deleter = new IndexFileDeleter(directory,
1183 conf.getIndexDeletionPolicy(),
1184 segmentInfos, infoStream,
1188 if (deleter.startingCommitDeleted) {
1189 // Deletion policy deleted the "head" commit point.
1190 // We have to mark ourself as changed so that if we
1191 // are closed w/o any further changes we write a new
1194 segmentInfos.changed();
1197 if (infoStream != null) {
1205 if (infoStream != null) {
1206 message("init: hit exception on init; releasing write lock");
1209 writeLock.release();
1210 } catch (Throwable t) {
1211 // don't mask the original exception
1218 private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
1219 Directory cfsDir = null;
1221 if (info.getUseCompoundFile()) {
1222 cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
1226 return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
1228 if (info.getUseCompoundFile() && cfsDir != null) {
1234 private FieldInfos getCurrentFieldInfos() throws IOException {
1235 final FieldInfos fieldInfos;
1236 if (segmentInfos.size() > 0) {
1237 if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
1238 // Pre-3.1 index. In this case we sweep all
1239 // segments, merging their FieldInfos:
1240 fieldInfos = new FieldInfos();
1241 for(SegmentInfo info : segmentInfos) {
1242 final FieldInfos segFieldInfos = getFieldInfos(info);
1243 final int fieldCount = segFieldInfos.size();
1244 for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
1245 fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
1249 // Already a 3.1 index; just seed the FieldInfos
1250 // from the last segment
1251 fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
1254 fieldInfos = new FieldInfos();
1260 * Returns the private {@link IndexWriterConfig}, cloned
1261 * from the {@link IndexWriterConfig} passed to
1262 * {@link #IndexWriter(Directory, IndexWriterConfig)}.
1264 * <b>NOTE:</b> some settings may be changed on the
1265 * returned {@link IndexWriterConfig}, and will take
1266 * effect in the current IndexWriter instance. See the
1267 * javadocs for the specific setters in {@link
1268 * IndexWriterConfig} for details.
1270 public IndexWriterConfig getConfig() {
1275 * Expert: set the merge policy used by this writer.
1277 * @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
1280 public void setMergePolicy(MergePolicy mp) {
1283 throw new NullPointerException("MergePolicy must be non-null");
1285 if (mergePolicy != mp)
1286 mergePolicy.close();
1288 mergePolicy.setIndexWriter(this);
1289 pushMaxBufferedDocs();
1290 if (infoStream != null)
1291 message("setMergePolicy " + mp);
1292 // Required so config.getMergePolicy returns the right value. But this will
1293 // go away together with the method in 4.0.
1294 config.setMergePolicy(mp);
1298 * Expert: returns the current MergePolicy in use by this writer.
1299 * @see #setMergePolicy
1301 * @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
1304 public MergePolicy getMergePolicy() {
1310 * Expert: set the merge scheduler used by this writer.
1311 * @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
1314 synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1316 if (mergeScheduler == null)
1317 throw new NullPointerException("MergeScheduler must be non-null");
1319 if (this.mergeScheduler != mergeScheduler) {
1321 this.mergeScheduler.close();
1323 this.mergeScheduler = mergeScheduler;
1324 if (infoStream != null)
1325 message("setMergeScheduler " + mergeScheduler);
1326 // Required so config.getMergeScheduler returns the right value. But this will
1327 // go away together with the method in 4.0.
1328 config.setMergeScheduler(mergeScheduler);
1332 * Expert: returns the current MergeScheduler in use by this
1334 * @see #setMergeScheduler(MergeScheduler)
1335 * @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
1338 public MergeScheduler getMergeScheduler() {
1340 return mergeScheduler;
1343 /** <p>Determines the largest segment (measured by
1344 * document count) that may be merged with other segments.
1345 * Small values (e.g., less than 10,000) are best for
1346 * interactive indexing, as this limits the length of
1347 * pauses while indexing to a few seconds. Larger values
1348 * are best for batched indexing and speedier
1351 * <p>The default value is {@link Integer#MAX_VALUE}.</p>
1353 * <p>Note that this method is a convenience method: it
1354 * just calls mergePolicy.setMaxMergeDocs as long as
1355 * mergePolicy is an instance of {@link LogMergePolicy}.
1356 * Otherwise an IllegalArgumentException is thrown.</p>
1358 * <p>The default merge policy ({@link
1359 * LogByteSizeMergePolicy}) also allows you to set this
1360 * limit by net size (in MB) of the segment, using {@link
1361 * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1362 * @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
1365 public void setMaxMergeDocs(int maxMergeDocs) {
1366 getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1370 * <p>Returns the largest segment (measured by document
1371 * count) that may be merged with other segments.</p>
1373 * <p>Note that this method is a convenience method: it
1374 * just calls mergePolicy.getMaxMergeDocs as long as
1375 * mergePolicy is an instance of {@link LogMergePolicy}.
1376 * Otherwise an IllegalArgumentException is thrown.</p>
1378 * @see #setMaxMergeDocs
1379 * @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
1382 public int getMaxMergeDocs() {
1383 return getLogMergePolicy().getMaxMergeDocs();
1387 * The maximum number of terms that will be indexed for a single field in a
1388 * document. This limits the amount of memory required for indexing, so that
1389 * collections with very large files will not crash the indexing process by
1390 * running out of memory. This setting refers to the number of running terms,
1391 * not to the number of different terms.
1393 * <strong>Note:</strong> this silently truncates large documents, excluding
1394 * from the index all terms that occur further in the document. If you know
1395 * your source documents are large, be sure to set this value high enough to
1396 * accomodate the expected size. If you set it to Integer.MAX_VALUE, then the
1397 * only limit is your memory, but you should anticipate an OutOfMemoryError.
1399 * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be
1400 * indexed for a field.
1402 * @deprecated use {@link LimitTokenCountAnalyzer} instead. Note that the
1403 * behvaior slightly changed - the analyzer limits the number of
1404 * tokens per token stream created, while this setting limits the
1405 * total number of tokens to index. This only matters if you index
1406 * many multi-valued fields though.
1409 public void setMaxFieldLength(int maxFieldLength) {
1411 this.maxFieldLength = maxFieldLength;
1412 docWriter.setMaxFieldLength(maxFieldLength);
1413 if (infoStream != null)
1414 message("setMaxFieldLength " + maxFieldLength);
1418 * Returns the maximum number of terms that will be
1419 * indexed for a single field in a document.
1420 * @see #setMaxFieldLength
1421 * @deprecated use {@link LimitTokenCountAnalyzer} to limit number of tokens.
1424 public int getMaxFieldLength() {
1426 return maxFieldLength;
1430 * @deprecated use {@link
1431 * IndexWriterConfig#setReaderTermsIndexDivisor} instead.
1434 public void setReaderTermsIndexDivisor(int divisor) {
1436 config.setReaderTermsIndexDivisor(divisor);
1437 if (infoStream != null) {
1438 message("setReaderTermsIndexDivisor " + divisor);
1443 * @deprecated use {@link
1444 * IndexWriterConfig#getReaderTermsIndexDivisor} instead.
1447 public int getReaderTermsIndexDivisor() {
1449 return config.getReaderTermsIndexDivisor();
1452 /** Determines the minimal number of documents required
1453 * before the buffered in-memory documents are flushed as
1454 * a new Segment. Large values generally gives faster
1457 * <p>When this is set, the writer will flush every
1458 * maxBufferedDocs added documents. Pass in {@link
1459 * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1460 * to number of buffered documents. Note that if flushing
1461 * by RAM usage is also enabled, then the flush will be
1462 * triggered by whichever comes first.</p>
1464 * <p>Disabled by default (writer flushes by RAM usage).</p>
1466 * @throws IllegalArgumentException if maxBufferedDocs is
1467 * enabled but smaller than 2, or it disables maxBufferedDocs
1468 * when ramBufferSize is already disabled
1469 * @see #setRAMBufferSizeMB
1470 * @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.
1473 public void setMaxBufferedDocs(int maxBufferedDocs) {
1475 pushMaxBufferedDocs();
1476 if (infoStream != null) {
1477 message("setMaxBufferedDocs " + maxBufferedDocs);
1479 // Required so config.getMaxBufferedDocs returns the right value. But this
1480 // will go away together with the method in 4.0.
1481 config.setMaxBufferedDocs(maxBufferedDocs);
1485 * If we are flushing by doc count (not by RAM usage), and
1486 * using LogDocMergePolicy then push maxBufferedDocs down
1487 * as its minMergeDocs, to keep backwards compatibility.
1489 private void pushMaxBufferedDocs() {
1490 if (config.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1491 final MergePolicy mp = mergePolicy;
1492 if (mp instanceof LogDocMergePolicy) {
1493 LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1494 final int maxBufferedDocs = config.getMaxBufferedDocs();
1495 if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1496 if (infoStream != null)
1497 message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1498 lmp.setMinMergeDocs(maxBufferedDocs);
1505 * Returns the number of buffered added documents that will
1506 * trigger a flush if enabled.
1507 * @see #setMaxBufferedDocs
1508 * @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.
1511 public int getMaxBufferedDocs() {
1513 return config.getMaxBufferedDocs();
1516 /** Determines the amount of RAM that may be used for
1517 * buffering added documents and deletions before they are
1518 * flushed to the Directory. Generally for faster
1519 * indexing performance it's best to flush by RAM usage
1520 * instead of document count and use as large a RAM buffer
1523 * <p>When this is set, the writer will flush whenever
1524 * buffered documents and deletions use this much RAM.
1525 * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
1526 * triggering a flush due to RAM usage. Note that if
1527 * flushing by document count is also enabled, then the
1528 * flush will be triggered by whichever comes first.</p>
1530 * <p> <b>NOTE</b>: the account of RAM usage for pending
1531 * deletions is only approximate. Specifically, if you
1532 * delete by Query, Lucene currently has no way to measure
1533 * the RAM usage if individual Queries so the accounting
1534 * will under-estimate and you should compensate by either
1535 * calling commit() periodically yourself, or by using
1536 * {@link #setMaxBufferedDeleteTerms} to flush by count
1537 * instead of RAM usage (each buffered delete Query counts
1540 * <p> <b>NOTE</b>: because IndexWriter uses
1541 * <code>int</code>s when managing its internal storage,
1542 * the absolute maximum value for this setting is somewhat
1543 * less than 2048 MB. The precise limit depends on
1544 * various factors, such as how large your documents are,
1545 * how many fields have norms, etc., so it's best to set
1546 * this value comfortably under 2048.</p>
1548 * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1550 * @throws IllegalArgumentException if ramBufferSize is
1551 * enabled but non-positive, or it disables ramBufferSize
1552 * when maxBufferedDocs is already disabled
1553 * @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.
1556 public void setRAMBufferSizeMB(double mb) {
1557 if (infoStream != null) {
1558 message("setRAMBufferSizeMB " + mb);
1560 // Required so config.getRAMBufferSizeMB returns the right value. But this
1561 // will go away together with the method in 4.0.
1562 config.setRAMBufferSizeMB(mb);
1566 * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1567 * @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.
1570 public double getRAMBufferSizeMB() {
1571 return config.getRAMBufferSizeMB();
1575 * <p>Determines the minimal number of delete terms required before the buffered
1576 * in-memory delete terms are applied and flushed. If there are documents
1577 * buffered in memory at the time, they are merged and a new segment is
1580 * <p>Disabled by default (writer flushes by RAM usage).</p>
1582 * @throws IllegalArgumentException if maxBufferedDeleteTerms
1583 * is enabled but smaller than 1
1584 * @see #setRAMBufferSizeMB
1585 * @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.
1588 public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1590 if (infoStream != null)
1591 message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1592 // Required so config.getMaxBufferedDeleteTerms returns the right value. But
1593 // this will go away together with the method in 4.0.
1594 config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1598 * Returns the number of buffered deleted terms that will
1599 * trigger a flush if enabled.
1600 * @see #setMaxBufferedDeleteTerms
1601 * @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead
1604 public int getMaxBufferedDeleteTerms() {
1606 return config.getMaxBufferedDeleteTerms();
1609 /** Determines how often segment indices are merged by addDocument(). With
1610 * smaller values, less RAM is used while indexing, and searches on
1611 * unoptimized indices are faster, but indexing speed is slower. With larger
1612 * values, more RAM is used during indexing, and while searches on unoptimized
1613 * indices are slower, indexing is faster. Thus larger values (> 10) are best
1614 * for batch index creation, and smaller values (< 10) for indices that are
1615 * interactively maintained.
1617 * <p>Note that this method is a convenience method: it
1618 * just calls mergePolicy.setMergeFactor as long as
1619 * mergePolicy is an instance of {@link LogMergePolicy}.
1620 * Otherwise an IllegalArgumentException is thrown.</p>
1622 * <p>This must never be less than 2. The default value is 10.
1623 * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
1626 public void setMergeFactor(int mergeFactor) {
1627 getLogMergePolicy().setMergeFactor(mergeFactor);
1631 * <p>Returns the number of segments that are merged at
1632 * once and also controls the total number of segments
1633 * allowed to accumulate in the index.</p>
1635 * <p>Note that this method is a convenience method: it
1636 * just calls mergePolicy.getMergeFactor as long as
1637 * mergePolicy is an instance of {@link LogMergePolicy}.
1638 * Otherwise an IllegalArgumentException is thrown.</p>
1640 * @see #setMergeFactor
1641 * @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.
1644 public int getMergeFactor() {
1645 return getLogMergePolicy().getMergeFactor();
1648 /** If non-null, this will be the default infoStream used
1649 * by a newly instantiated IndexWriter.
1650 * @see #setInfoStream
1652 public static void setDefaultInfoStream(PrintStream infoStream) {
1653 IndexWriter.defaultInfoStream = infoStream;
1657 * Returns the current default infoStream for newly
1658 * instantiated IndexWriters.
1659 * @see #setDefaultInfoStream
1661 public static PrintStream getDefaultInfoStream() {
1662 return IndexWriter.defaultInfoStream;
1665 /** If non-null, information about merges, deletes and a
1666 * message when maxFieldLength is reached will be printed
1669 public void setInfoStream(PrintStream infoStream) throws IOException {
1671 this.infoStream = infoStream;
1672 docWriter.setInfoStream(infoStream);
1673 deleter.setInfoStream(infoStream);
1674 bufferedDeletesStream.setInfoStream(infoStream);
1675 if (infoStream != null)
1679 private void messageState() throws IOException {
1680 message("\ndir=" + directory + "\n" +
1681 "index=" + segString() + "\n" +
1682 "version=" + Constants.LUCENE_VERSION + "\n" +
1687 * Returns the current infoStream in use by this writer.
1688 * @see #setInfoStream
1690 public PrintStream getInfoStream() {
1695 /** Returns true if verbosing is enabled (i.e., infoStream != null). */
1696 public boolean verbose() {
1697 return infoStream != null;
1701 * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1702 * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1703 * @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
1706 public void setWriteLockTimeout(long writeLockTimeout) {
1708 this.writeLockTimeout = writeLockTimeout;
1709 // Required so config.getWriteLockTimeout returns the right value. But this
1710 // will go away together with the method in 4.0.
1711 config.setWriteLockTimeout(writeLockTimeout);
1715 * Returns allowed timeout when acquiring the write lock.
1716 * @see #setWriteLockTimeout
1717 * @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
1720 public long getWriteLockTimeout() {
1722 return writeLockTimeout;
1726 * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1728 * @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
1731 public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1732 IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
1736 * Returns default write lock timeout for newly
1737 * instantiated IndexWriters.
1738 * @see #setDefaultWriteLockTimeout
1739 * @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
1742 public static long getDefaultWriteLockTimeout() {
1743 return IndexWriterConfig.getDefaultWriteLockTimeout();
1747 * Commits all changes to an index and closes all
1748 * associated files. Note that this may be a costly
1749 * operation, so, try to re-use a single writer instead of
1750 * closing and opening a new one. See {@link #commit()} for
1751 * caveats about write caching done by some IO devices.
1753 * <p> If an Exception is hit during close, eg due to disk
1754 * full or some other reason, then both the on-disk index
1755 * and the internal state of the IndexWriter instance will
1756 * be consistent. However, the close will not be complete
1757 * even though part of it (flushing buffered documents)
1758 * may have succeeded, so the write lock will still be
1761 * <p> If you can correct the underlying cause (eg free up
1762 * some disk space) then you can call close() again.
1763 * Failing that, if you want to force the write lock to be
1764 * released (dangerous, because you may then lose buffered
1765 * docs in the IndexWriter instance) then you can do
1766 * something like this:</p>
1772 * if (IndexWriter.isLocked(directory)) {
1773 * IndexWriter.unlock(directory);
1778 * after which, you must be certain not to use the writer
1779 * instance anymore.</p>
1781 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1782 * you should immediately close the writer, again. See <a
1783 * href="#OOME">above</a> for details.</p>
1785 * @throws CorruptIndexException if the index is corrupt
1786 * @throws IOException if there is a low-level IO error
1788 public void close() throws CorruptIndexException, IOException {
1793 * Closes the index with or without waiting for currently
1794 * running merges to finish. This is only meaningful when
1795 * using a MergeScheduler that runs merges in background
1798 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1799 * you should immediately close the writer, again. See <a
1800 * href="#OOME">above</a> for details.</p>
1802 * <p><b>NOTE</b>: it is dangerous to always call
1803 * close(false), especially when IndexWriter is not open
1804 * for very long, because this can result in "merge
1805 * starvation" whereby long merges will never have a
1806 * chance to finish. This will cause too many segments in
1807 * your index over time.</p>
1809 * @param waitForMerges if true, this call will block
1810 * until all merges complete; else, it will ask all
1811 * running merges to abort, wait until those merges have
1812 * finished (which should be at most a few seconds), and
1815 public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1817 // Ensure that only one thread actually gets to do the closing:
1818 if (shouldClose()) {
1819 // If any methods have hit OutOfMemoryError, then abort
1820 // on close, in case the internal state of IndexWriter
1821 // or DocumentsWriter is corrupt
1825 closeInternal(waitForMerges);
1829 // Returns true if this thread should attempt to close, or
1830 // false if IndexWriter is now closed; else, waits until
1831 // another thread finishes closing
1832 synchronized private boolean shouldClose() {
1839 // Another thread is presently trying to close;
1840 // wait until it finishes one way (closes
1841 // successfully) or another (fails to close)
1849 private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1852 if (infoStream != null) {
1853 message("now flush at close waitForMerges=" + waitForMerges);
1858 // Only allow a new merge to be triggered if we are
1859 // going to wait for merges:
1861 flush(waitForMerges, true);
1865 // Give merge scheduler last chance to run, in case
1866 // any pending merges are waiting:
1867 mergeScheduler.merge(this);
1869 mergePolicy.close();
1871 synchronized(this) {
1872 finishMerges(waitForMerges);
1876 mergeScheduler.close();
1878 if (infoStream != null)
1879 message("now call final commit()");
1882 commitInternal(null);
1885 if (infoStream != null)
1886 message("at close: " + segString());
1888 synchronized(this) {
1894 if (writeLock != null) {
1895 writeLock.release(); // release write lock
1898 synchronized(this) {
1901 } catch (OutOfMemoryError oom) {
1902 handleOOM(oom, "closeInternal");
1904 synchronized(this) {
1908 if (infoStream != null)
1909 message("hit exception while closing");
1915 /** Returns the Directory used by this index. */
1916 public Directory getDirectory() {
1917 // Pass false because the flush during closing calls getDirectory
1922 /** Returns the analyzer used by this index. */
1923 public Analyzer getAnalyzer() {
1928 /** Returns total number of docs in this index, including
1929 * docs not yet flushed (still in the RAM buffer),
1930 * not counting deletions.
1932 public synchronized int maxDoc() {
1934 if (docWriter != null)
1935 count = docWriter.getNumDocs();
1939 count += segmentInfos.totalDocCount();
1943 /** Returns total number of docs in this index, including
1944 * docs not yet flushed (still in the RAM buffer), and
1945 * including deletions. <b>NOTE:</b> buffered deletions
1946 * are not counted. If you really need these to be
1947 * counted you should call {@link #commit()} first.
1949 public synchronized int numDocs() throws IOException {
1951 if (docWriter != null)
1952 count = docWriter.getNumDocs();
1956 for (final SegmentInfo info : segmentInfos) {
1957 count += info.docCount - numDeletedDocs(info);
1962 public synchronized boolean hasDeletions() throws IOException {
1964 if (bufferedDeletesStream.any()) {
1967 if (docWriter.anyDeletions()) {
1970 for (final SegmentInfo info : segmentInfos) {
1971 if (info.hasDeletions()) {
1979 * The maximum number of terms that will be indexed for a single field in a
1980 * document. This limits the amount of memory required for indexing, so that
1981 * collections with very large files will not crash the indexing process by
1982 * running out of memory.<p/>
1983 * Note that this effectively truncates large documents, excluding from the
1984 * index terms that occur further in the document. If you know your source
1985 * documents are large, be sure to set this value high enough to accommodate
1986 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1987 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1988 * By default, no more than 10,000 terms will be indexed for a field.
1990 * @see MaxFieldLength
1991 * @deprecated remove in 4.0
1994 private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1997 * Adds a document to this index. If the document contains more than
1998 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2001 * <p> Note that if an Exception is hit (for example disk full)
2002 * then the index will be consistent, but this document
2003 * may not have been added. Furthermore, it's possible
2004 * the index will have one segment in non-compound format
2005 * even when using compound files (when a merge has
2006 * partially succeeded).</p>
2008 * <p> This method periodically flushes pending documents
2009 * to the Directory (see <a href="#flush">above</a>), and
2010 * also periodically triggers segment merges in the index
2011 * according to the {@link MergePolicy} in use.</p>
2013 * <p>Merges temporarily consume space in the
2014 * directory. The amount of space required is up to 1X the
2015 * size of all segments being merged, when no
2016 * readers/searchers are open against the index, and up to
2017 * 2X the size of all segments being merged when
2018 * readers/searchers are open against the index (see
2019 * {@link #optimize()} for details). The sequence of
2020 * primitive merge operations performed is governed by the
2023 * <p>Note that each term in the document can be no longer
2024 * than 16383 characters, otherwise an
2025 * IllegalArgumentException will be thrown.</p>
2027 * <p>Note that it's possible to create an invalid Unicode
2028 * string in java if a UTF16 surrogate pair is malformed.
2029 * In this case, the invalid characters are silently
2030 * replaced with the Unicode replacement character
2033 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2034 * you should immediately close the writer. See <a
2035 * href="#OOME">above</a> for details.</p>
2037 * @throws CorruptIndexException if the index is corrupt
2038 * @throws IOException if there is a low-level IO error
2040 public void addDocument(Document doc) throws CorruptIndexException, IOException {
2041 addDocument(doc, analyzer);
2045 * Adds a document to this index, using the provided analyzer instead of the
2046 * value of {@link #getAnalyzer()}. If the document contains more than
2047 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2050 * <p>See {@link #addDocument(Document)} for details on
2051 * index and IndexWriter state after an Exception, and
2052 * flushing/merging temporary free space requirements.</p>
2054 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2055 * you should immediately close the writer. See <a
2056 * href="#OOME">above</a> for details.</p>
2058 * @throws CorruptIndexException if the index is corrupt
2059 * @throws IOException if there is a low-level IO error
2061 public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
2063 boolean doFlush = false;
2064 boolean success = false;
2067 doFlush = docWriter.updateDocument(doc, analyzer, null);
2070 if (!success && infoStream != null)
2071 message("hit exception adding document");
2075 } catch (OutOfMemoryError oom) {
2076 handleOOM(oom, "addDocument");
2081 * Atomically adds a block of documents with sequentially
2082 * assigned document IDs, such that an external reader
2083 * will see all or none of the documents.
2085 * <p><b>WARNING</b>: the index does not currently record
2086 * which documents were added as a block. Today this is
2087 * fine, because merging will preserve the block (as long
2088 * as none them were deleted). But it's possible in the
2089 * future that Lucene may more aggressively re-order
2090 * documents (for example, perhaps to obtain better index
2091 * compression), in which case you may need to fully
2092 * re-index your documents at that time.
2094 * <p>See {@link #addDocument(Document)} for details on
2095 * index and IndexWriter state after an Exception, and
2096 * flushing/merging temporary free space requirements.</p>
2098 * <p><b>NOTE</b>: tools that do offline splitting of an index
2099 * (for example, IndexSplitter in contrib) or
2100 * re-sorting of documents (for example, IndexSorter in
2101 * contrib) are not aware of these atomically added documents
2102 * and will likely break them up. Use such tools at your
2105 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2106 * you should immediately close the writer. See <a
2107 * href="#OOME">above</a> for details.</p>
2109 * @throws CorruptIndexException if the index is corrupt
2110 * @throws IOException if there is a low-level IO error
2112 * @lucene.experimental
2114 public void addDocuments(Collection<Document> docs) throws CorruptIndexException, IOException {
2115 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2116 addDocuments(docs, analyzer);
2120 * Atomically adds a block of documents, analyzed using the
2121 * provided analyzer, with sequentially assigned document
2122 * IDs, such that an external reader will see all or none
2125 * @throws CorruptIndexException if the index is corrupt
2126 * @throws IOException if there is a low-level IO error
2128 * @lucene.experimental
2130 public void addDocuments(Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2131 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2132 updateDocuments(null, docs, analyzer);
2136 * Atomically deletes documents matching the provided
2137 * delTerm and adds a block of documents with sequentially
2138 * assigned document IDs, such that an external reader
2139 * will see all or none of the documents.
2141 * See {@link #addDocuments(Collection)}.
2143 * @throws CorruptIndexException if the index is corrupt
2144 * @throws IOException if there is a low-level IO error
2146 * @lucene.experimental
2148 public void updateDocuments(Term delTerm, Collection<Document> docs) throws CorruptIndexException, IOException {
2149 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2150 updateDocuments(delTerm, docs, analyzer);
2154 * Atomically deletes documents matching the provided
2155 * delTerm and adds a block of documents, analyzed using
2156 * the provided analyzer, with sequentially
2157 * assigned document IDs, such that an external reader
2158 * will see all or none of the documents.
2160 * See {@link #addDocuments(Collection)}.
2162 * @throws CorruptIndexException if the index is corrupt
2163 * @throws IOException if there is a low-level IO error
2165 * @lucene.experimental
2167 public void updateDocuments(Term delTerm, Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2168 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2171 boolean success = false;
2172 boolean doFlush = false;
2174 doFlush = docWriter.updateDocuments(docs, analyzer, delTerm);
2177 if (!success && infoStream != null) {
2178 message("hit exception updating document");
2184 } catch (OutOfMemoryError oom) {
2185 handleOOM(oom, "updateDocuments");
2190 * Deletes the document(s) containing <code>term</code>.
2192 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2193 * you should immediately close the writer. See <a
2194 * href="#OOME">above</a> for details.</p>
2196 * @param term the term to identify the documents to be deleted
2197 * @throws CorruptIndexException if the index is corrupt
2198 * @throws IOException if there is a low-level IO error
2200 public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
2203 if (docWriter.deleteTerm(term, false)) {
2206 } catch (OutOfMemoryError oom) {
2207 handleOOM(oom, "deleteDocuments(Term)");
2212 * Deletes the document(s) containing any of the
2213 * terms. All deletes are flushed at the same time.
2215 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2216 * you should immediately close the writer. See <a
2217 * href="#OOME">above</a> for details.</p>
2219 * @param terms array of terms to identify the documents
2221 * @throws CorruptIndexException if the index is corrupt
2222 * @throws IOException if there is a low-level IO error
2224 public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
2227 if (docWriter.deleteTerms(terms)) {
2230 } catch (OutOfMemoryError oom) {
2231 handleOOM(oom, "deleteDocuments(Term..)");
2236 * Deletes the document(s) matching the provided query.
2238 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2239 * you should immediately close the writer. See <a
2240 * href="#OOME">above</a> for details.</p>
2242 * @param query the query to identify the documents to be deleted
2243 * @throws CorruptIndexException if the index is corrupt
2244 * @throws IOException if there is a low-level IO error
2246 public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
2249 if (docWriter.deleteQuery(query)) {
2252 } catch (OutOfMemoryError oom) {
2253 handleOOM(oom, "deleteDocuments(Query)");
2258 * Deletes the document(s) matching any of the provided queries.
2259 * All deletes are flushed at the same time.
2261 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2262 * you should immediately close the writer. See <a
2263 * href="#OOME">above</a> for details.</p>
2265 * @param queries array of queries to identify the documents
2267 * @throws CorruptIndexException if the index is corrupt
2268 * @throws IOException if there is a low-level IO error
2270 public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
2273 if (docWriter.deleteQueries(queries)) {
2276 } catch (OutOfMemoryError oom) {
2277 handleOOM(oom, "deleteDocuments(Query..)");
2282 * Updates a document by first deleting the document(s)
2283 * containing <code>term</code> and then adding the new
2284 * document. The delete and then add are atomic as seen
2285 * by a reader on the same index (flush may happen only after
2288 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2289 * you should immediately close the writer. See <a
2290 * href="#OOME">above</a> for details.</p>
2292 * @param term the term to identify the document(s) to be
2294 * @param doc the document to be added
2295 * @throws CorruptIndexException if the index is corrupt
2296 * @throws IOException if there is a low-level IO error
2298 public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2300 updateDocument(term, doc, getAnalyzer());
2304 * Updates a document by first deleting the document(s)
2305 * containing <code>term</code> and then adding the new
2306 * document. The delete and then add are atomic as seen
2307 * by a reader on the same index (flush may happen only after
2310 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2311 * you should immediately close the writer. See <a
2312 * href="#OOME">above</a> for details.</p>
2314 * @param term the term to identify the document(s) to be
2316 * @param doc the document to be added
2317 * @param analyzer the analyzer to use when analyzing the document
2318 * @throws CorruptIndexException if the index is corrupt
2319 * @throws IOException if there is a low-level IO error
2321 public void updateDocument(Term term, Document doc, Analyzer analyzer)
2322 throws CorruptIndexException, IOException {
2325 boolean doFlush = false;
2326 boolean success = false;
2328 doFlush = docWriter.updateDocument(doc, analyzer, term);
2331 if (!success && infoStream != null)
2332 message("hit exception updating document");
2337 } catch (OutOfMemoryError oom) {
2338 handleOOM(oom, "updateDocument");
2343 final synchronized int getSegmentCount(){
2344 return segmentInfos.size();
2348 final synchronized int getNumBufferedDocuments(){
2349 return docWriter.getNumDocs();
2353 final synchronized int getDocCount(int i) {
2354 if (i >= 0 && i < segmentInfos.size()) {
2355 return segmentInfos.info(i).docCount;
2362 final int getFlushCount() {
2363 return flushCount.get();
2367 final int getFlushDeletesCount() {
2368 return flushDeletesCount.get();
2371 final String newSegmentName() {
2372 // Cannot synchronize on IndexWriter because that causes
2374 synchronized(segmentInfos) {
2375 // Important to increment changeCount so that the
2376 // segmentInfos is written on close. Otherwise we
2377 // could close, re-open and re-return the same segment
2378 // name that was previously returned which can cause
2379 // problems at least with ConcurrentMergeScheduler.
2381 segmentInfos.changed();
2382 return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2386 /** If non-null, information about merges will be printed to this.
2388 private PrintStream infoStream;
2389 private static PrintStream defaultInfoStream;
2392 * Requests an "optimize" operation on an index, priming the index
2393 * for the fastest available search. Traditionally this has meant
2394 * merging all segments into a single segment as is done in the
2395 * default merge policy, but individual merge policies may implement
2396 * optimize in different ways.
2398 * <p> Optimize is a fairly costly operation, so you
2399 * should only do it if your search performance really
2400 * requires it. Many search applications do fine never
2401 * calling optimize. </p>
2403 * <p>Note that optimize requires 2X the index size free
2404 * space in your Directory (3X if you're using compound
2405 * file format). For example, if your index size is 10 MB
2406 * then you need 20 MB free for optimize to complete (30
2407 * MB if you're using compound file format). Also,
2408 * it's best to call {@link #commit()} after the optimize
2409 * completes to allow IndexWriter to free up disk space.</p>
2411 * <p>If some but not all readers re-open while an
2412 * optimize is underway, this will cause > 2X temporary
2413 * space to be consumed as those new readers will then
2414 * hold open the partially optimized segments at that
2415 * time. It is best not to re-open readers while optimize
2418 * <p>The actual temporary usage could be much less than
2419 * these figures (it depends on many factors).</p>
2421 * <p>In general, once the optimize completes, the total size of the
2422 * index will be less than the size of the starting index.
2423 * It could be quite a bit smaller (if there were many
2424 * pending deletes) or just slightly smaller.</p>
2426 * <p>If an Exception is hit during optimize(), for example
2427 * due to disk full, the index will not be corrupt and no
2428 * documents will have been lost. However, it may have
2429 * been partially optimized (some segments were merged but
2430 * not all), and it's possible that one of the segments in
2431 * the index will be in non-compound format even when
2432 * using compound file format. This will occur when the
2433 * Exception is hit during conversion of the segment into
2434 * compound format.</p>
2436 * <p>This call will optimize those segments present in
2437 * the index when the call started. If other threads are
2438 * still adding documents and flushing segments, those
2439 * newly created segments will not be optimized unless you
2440 * call optimize again.</p>
2442 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2443 * you should immediately close the writer. See <a
2444 * href="#OOME">above</a> for details.</p>
2446 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2447 * with <tt>false</tt>, which aborts all running merges,
2448 * then any thread still running this method might hit a
2449 * {@link MergePolicy.MergeAbortedException}.
2451 * @throws CorruptIndexException if the index is corrupt
2452 * @throws IOException if there is a low-level IO error
2453 * @see MergePolicy#findMergesForOptimize
2455 public void optimize() throws CorruptIndexException, IOException {
2460 * Optimize the index down to <= maxNumSegments. If
2461 * maxNumSegments==1 then this is the same as {@link
2464 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2465 * you should immediately close the writer. See <a
2466 * href="#OOME">above</a> for details.</p>
2468 * @param maxNumSegments maximum number of segments left
2469 * in the index after optimization finishes
2471 public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2472 optimize(maxNumSegments, true);
2475 /** Just like {@link #optimize()}, except you can specify
2476 * whether the call should block until the optimize
2477 * completes. This is only meaningful with a
2478 * {@link MergeScheduler} that is able to run merges in
2479 * background threads.
2481 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2482 * you should immediately close the writer. See <a
2483 * href="#OOME">above</a> for details.</p>
2485 public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2486 optimize(1, doWait);
2489 /** Just like {@link #optimize(int)}, except you can
2490 * specify whether the call should block until the
2491 * optimize completes. This is only meaningful with a
2492 * {@link MergeScheduler} that is able to run merges in
2493 * background threads.
2495 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2496 * you should immediately close the writer. See <a
2497 * href="#OOME">above</a> for details.</p>
2499 public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2502 if (maxNumSegments < 1)
2503 throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2505 if (infoStream != null) {
2506 message("optimize: index now " + segString());
2507 message("now flush at optimize");
2512 synchronized(this) {
2513 resetMergeExceptions();
2514 segmentsToOptimize.clear();
2515 for(SegmentInfo info : segmentInfos) {
2516 segmentsToOptimize.put(info, Boolean.TRUE);
2518 optimizeMaxNumSegments = maxNumSegments;
2520 // Now mark all pending & running merges as optimize
2522 for(final MergePolicy.OneMerge merge : pendingMerges) {
2523 merge.optimize = true;
2524 merge.maxNumSegmentsOptimize = maxNumSegments;
2525 segmentsToOptimize.put(merge.info, Boolean.TRUE);
2528 for ( final MergePolicy.OneMerge merge: runningMerges ) {
2529 merge.optimize = true;
2530 merge.maxNumSegmentsOptimize = maxNumSegments;
2531 segmentsToOptimize.put(merge.info, Boolean.TRUE);
2535 maybeMerge(maxNumSegments, true);
2538 synchronized(this) {
2542 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete optimize");
2545 if (mergeExceptions.size() > 0) {
2546 // Forward any exceptions in background merge
2547 // threads to the current thread:
2548 final int size = mergeExceptions.size();
2549 for(int i=0;i<size;i++) {
2550 final MergePolicy.OneMerge merge = mergeExceptions.get(i);
2551 if (merge.optimize) {
2552 IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2553 final Throwable t = merge.getException();
2561 if (optimizeMergesPending())
2568 // If close is called while we are still
2569 // running, throw an exception so the calling
2570 // thread will know the optimize did not
2575 // NOTE: in the ConcurrentMergeScheduler case, when
2576 // doWait is false, we can return immediately while
2577 // background threads accomplish the optimization
2580 /** Returns true if any merges in pendingMerges or
2581 * runningMerges are optimization merges. */
2582 private synchronized boolean optimizeMergesPending() {
2583 for (final MergePolicy.OneMerge merge : pendingMerges) {
2588 for (final MergePolicy.OneMerge merge : runningMerges) {
2596 /** Just like {@link #expungeDeletes()}, except you can
2597 * specify whether the call should block until the
2598 * operation completes. This is only meaningful with a
2599 * {@link MergeScheduler} that is able to run merges in
2600 * background threads.
2602 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2603 * you should immediately close the writer. See <a
2604 * href="#OOME">above</a> for details.</p>
2606 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2607 * with <tt>false</tt>, which aborts all running merges,
2608 * then any thread still running this method might hit a
2609 * {@link MergePolicy.MergeAbortedException}.
2611 public void expungeDeletes(boolean doWait)
2612 throws CorruptIndexException, IOException {
2617 if (infoStream != null)
2618 message("expungeDeletes: index now " + segString());
2620 MergePolicy.MergeSpecification spec;
2622 synchronized(this) {
2623 spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos);
2625 final int numMerges = spec.merges.size();
2626 for(int i=0;i<numMerges;i++)
2627 registerMerge(spec.merges.get(i));
2631 mergeScheduler.merge(this);
2633 if (spec != null && doWait) {
2634 final int numMerges = spec.merges.size();
2635 synchronized(this) {
2636 boolean running = true;
2640 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes");
2643 // Check each merge that MergePolicy asked us to
2644 // do, to see if any of them are still running and
2645 // if any of them have hit an exception.
2647 for(int i=0;i<numMerges;i++) {
2648 final MergePolicy.OneMerge merge = spec.merges.get(i);
2649 if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2651 Throwable t = merge.getException();
2653 IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2659 // If any of our merges are still running, wait:
2666 // NOTE: in the ConcurrentMergeScheduler case, when
2667 // doWait is false, we can return immediately while
2668 // background threads accomplish the optimization
2672 /** Expunges all deletes from the index. When an index
2673 * has many document deletions (or updates to existing
2674 * documents), it's best to either call optimize or
2675 * expungeDeletes to remove all unused data in the index
2676 * associated with the deleted documents. To see how
2677 * many deletions you have pending in your index, call
2678 * {@link IndexReader#numDeletedDocs}
2679 * This saves disk space and memory usage while
2680 * searching. expungeDeletes should be somewhat faster
2681 * than optimize since it does not insist on reducing the
2682 * index to a single segment (though, this depends on the
2683 * {@link MergePolicy}; see {@link
2684 * MergePolicy#findMergesToExpungeDeletes}.). Note that
2685 * this call does not first commit any buffered
2686 * documents, so you must do so yourself if necessary.
2687 * See also {@link #expungeDeletes(boolean)}
2689 * <p><b>NOTE</b>: this method first flushes a new
2690 * segment (if there are indexed documents), and applies
2691 * all buffered deletes.
2693 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2694 * you should immediately close the writer. See <a
2695 * href="#OOME">above</a> for details.</p>
2697 public void expungeDeletes() throws CorruptIndexException, IOException {
2698 expungeDeletes(true);
2702 * Expert: asks the mergePolicy whether any merges are
2703 * necessary now and if so, runs the requested merges and
2704 * then iterate (test again if merges are needed) until no
2705 * more merges are returned by the mergePolicy.
2707 * Explicit calls to maybeMerge() are usually not
2708 * necessary. The most common case is when merge policy
2709 * parameters have changed.
2711 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2712 * you should immediately close the writer. See <a
2713 * href="#OOME">above</a> for details.</p>
2715 public final void maybeMerge() throws CorruptIndexException, IOException {
2719 private final void maybeMerge(boolean optimize) throws CorruptIndexException, IOException {
2720 maybeMerge(1, optimize);
2723 private final void maybeMerge(int maxNumSegmentsOptimize, boolean optimize) throws CorruptIndexException, IOException {
2724 updatePendingMerges(maxNumSegmentsOptimize, optimize);
2725 mergeScheduler.merge(this);
2728 private synchronized void updatePendingMerges(int maxNumSegmentsOptimize, boolean optimize)
2729 throws CorruptIndexException, IOException {
2730 assert !optimize || maxNumSegmentsOptimize > 0;
2736 // Do not start new merges if we've hit OOME
2741 final MergePolicy.MergeSpecification spec;
2743 spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize));
2745 final int numMerges = spec.merges.size();
2746 for(int i=0;i<numMerges;i++) {
2747 final MergePolicy.OneMerge merge = spec.merges.get(i);
2748 merge.optimize = true;
2749 merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
2754 spec = mergePolicy.findMerges(segmentInfos);
2758 final int numMerges = spec.merges.size();
2759 for(int i=0;i<numMerges;i++) {
2760 registerMerge(spec.merges.get(i));
2765 /** Expert: to be used by a {@link MergePolicy} to avoid
2766 * selecting merges for segments already being merged.
2767 * The returned collection is not cloned, and thus is
2768 * only safe to access if you hold IndexWriter's lock
2769 * (which you do when IndexWriter invokes the
2772 * <p>Do not alter the returned collection! */
2773 public synchronized Collection<SegmentInfo> getMergingSegments() {
2774 return mergingSegments;
2777 /** Expert: the {@link MergeScheduler} calls this method
2778 * to retrieve the next merge requested by the
2781 * @lucene.experimental
2783 public synchronized MergePolicy.OneMerge getNextMerge() {
2784 if (pendingMerges.size() == 0)
2787 // Advance the merge from pending to running
2788 MergePolicy.OneMerge merge = pendingMerges.removeFirst();
2789 runningMerges.add(merge);
2795 * Close the <code>IndexWriter</code> without committing
2796 * any changes that have occurred since the last commit
2797 * (or since it was opened, if commit hasn't been called).
2798 * This removes any temporary files that had been created,
2799 * after which the state of the index will be the same as
2800 * it was when commit() was last called or when this
2801 * writer was first opened. This also clears a previous
2802 * call to {@link #prepareCommit}.
2803 * @throws IOException if there is a low-level IO error
2805 public void rollback() throws IOException {
2808 // Ensure that only one thread actually gets to do the closing:
2813 private void rollbackInternal() throws IOException {
2815 boolean success = false;
2817 if (infoStream != null ) {
2818 message("rollback");
2822 synchronized(this) {
2823 finishMerges(false);
2827 if (infoStream != null ) {
2828 message("rollback: done finish merges");
2831 // Must pre-close these two, in case they increment
2832 // changeCount so that we can then set it to false
2833 // before calling closeInternal
2834 mergePolicy.close();
2835 mergeScheduler.close();
2837 bufferedDeletesStream.clear();
2839 synchronized(this) {
2841 if (pendingCommit != null) {
2842 pendingCommit.rollbackCommit(directory);
2843 deleter.decRef(pendingCommit);
2844 pendingCommit = null;
2848 // Keep the same segmentInfos instance but replace all
2849 // of its SegmentInfo instances. This is so the next
2850 // attempt to commit using this instance of IndexWriter
2851 // will always write to a new generation ("write
2853 segmentInfos.rollbackSegmentInfos(rollbackSegments);
2854 if (infoStream != null ) {
2855 message("rollback: infos=" + segString(segmentInfos));
2860 assert testPoint("rollback before checkpoint");
2862 // Ask deleter to locate unreferenced files & remove
2864 deleter.checkpoint(segmentInfos, false);
2868 // Don't bother saving any changes in our segmentInfos
2869 readerPool.clear(null);
2871 lastCommitChangeCount = changeCount;
2874 } catch (OutOfMemoryError oom) {
2875 handleOOM(oom, "rollbackInternal");
2877 synchronized(this) {
2881 if (infoStream != null)
2882 message("hit exception during rollback");
2887 closeInternal(false);
2891 * Delete all documents in the index.
2893 * <p>This method will drop all buffered documents and will
2894 * remove all segments from the index. This change will not be
2895 * visible until a {@link #commit()} has been called. This method
2896 * can be rolled back using {@link #rollback()}.</p>
2898 * <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
2900 * <p>NOTE: this method will forcefully abort all merges
2901 * in progress. If other threads are running {@link
2902 * #optimize()}, {@link #addIndexes(IndexReader[])} or
2903 * {@link #expungeDeletes} methods, they may receive
2904 * {@link MergePolicy.MergeAbortedException}s.
2906 public synchronized void deleteAll() throws IOException {
2909 // Abort any running merges
2910 finishMerges(false);
2912 // Remove any buffered docs
2915 // Remove all segments
2916 segmentInfos.clear();
2918 // Ask deleter to locate unreferenced files & remove them:
2919 deleter.checkpoint(segmentInfos, false);
2922 // Don't bother saving any changes in our segmentInfos
2923 readerPool.dropAll();
2925 // Mark that the index has changed
2927 segmentInfos.changed();
2928 } catch (OutOfMemoryError oom) {
2929 handleOOM(oom, "deleteAll");
2931 if (infoStream != null) {
2932 message("hit exception during deleteAll");
2937 private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2938 if (!waitForMerges) {
2942 // Abort all pending & running merges:
2943 for (final MergePolicy.OneMerge merge : pendingMerges) {
2944 if (infoStream != null)
2945 message("now abort pending merge " + merge.segString(directory));
2949 pendingMerges.clear();
2951 for (final MergePolicy.OneMerge merge : runningMerges) {
2952 if (infoStream != null)
2953 message("now abort running merge " + merge.segString(directory));
2957 // These merges periodically check whether they have
2958 // been aborted, and stop if so. We wait here to make
2959 // sure they all stop. It should not take very long
2960 // because the merge threads periodically check if
2961 // they are aborted.
2962 while(runningMerges.size() > 0) {
2963 if (infoStream != null)
2964 message("now wait for " + runningMerges.size() + " running merge to abort");
2971 assert 0 == mergingSegments.size();
2973 if (infoStream != null)
2974 message("all running merges have aborted");
2977 // waitForMerges() will ensure any running addIndexes finishes.
2978 // It's fine if a new one attempts to start because from our
2979 // caller above the call will see that we are in the
2980 // process of closing, and will throw an
2981 // AlreadyClosedException.
2987 * Wait for any currently outstanding merges to finish.
2989 * <p>It is guaranteed that any merges started prior to calling this method
2990 * will have completed once this method completes.</p>
2992 public synchronized void waitForMerges() {
2993 if (infoStream != null) {
2994 message("waitForMerges");
2996 while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
3001 assert 0 == mergingSegments.size();
3003 if (infoStream != null) {
3004 message("waitForMerges done");
3009 * Called whenever the SegmentInfos has been updated and
3010 * the index files referenced exist (correctly) in the
3013 synchronized void checkpoint() throws IOException {
3015 segmentInfos.changed();
3016 deleter.checkpoint(segmentInfos, false);
3019 private synchronized void resetMergeExceptions() {
3020 mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
3024 private void noDupDirs(Directory... dirs) {
3025 HashSet<Directory> dups = new HashSet<Directory>();
3026 for (Directory dir : dirs) {
3027 if (dups.contains(dir))
3028 throw new IllegalArgumentException("Directory " + dir + " appears more than once");
3029 if (dir == directory)
3030 throw new IllegalArgumentException("Cannot add directory to itself");
3036 * @deprecated use {@link #addIndexes(Directory...)} instead
3039 public void addIndexesNoOptimize(Directory... dirs)
3040 throws CorruptIndexException, IOException {
3045 * Merges the provided indexes into this index. This method is useful
3046 * if you use extensions of {@link IndexReader}. Otherwise, using
3047 * {@link #addIndexes(Directory...)} is highly recommended for performance
3048 * reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set
3049 * on this writer, which may perform merges in parallel.
3051 * <p>The provided IndexReaders are not closed.
3053 * <p><b>NOTE:</b> this method does not merge the current segments,
3054 * only the incoming ones.
3056 * <p>See {@link #addIndexes(Directory...)} for details on transactional
3057 * semantics, temporary free space required in the Directory,
3058 * and non-CFS segments on an Exception.
3060 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3061 * you should immediately close the writer. See <a
3062 * href="#OOME">above</a> for details.
3064 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
3065 * with <tt>false</tt>, which aborts all running merges,
3066 * then any thread still running this method might hit a
3067 * {@link MergePolicy.MergeAbortedException}.
3069 * @throws CorruptIndexException if the index is corrupt
3070 * @throws IOException if there is a low-level IO error
3072 public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
3077 if (infoStream != null)
3078 message("flush at addIndexes(IndexReader...)");
3081 String mergedName = newSegmentName();
3082 // TODO: somehow we should fix this merge so it's
3083 // abortable so that IW.close(false) is able to stop it
3084 SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
3085 mergedName, null, payloadProcessorProvider,
3086 ((FieldInfos) docWriter.getFieldInfos().clone()));
3088 for (IndexReader reader : readers) // add new indexes
3091 int docCount = merger.merge(); // merge 'em
3093 SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
3095 merger.fieldInfos().hasProx(),
3096 merger.fieldInfos().hasVectors());
3097 setDiagnostics(info, "addIndexes(IndexReader...)");
3099 boolean useCompoundFile;
3100 synchronized(this) { // Guard segmentInfos
3102 deleter.deleteNewFiles(info.files());
3106 useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
3109 // Now create the compound file if needed
3110 if (useCompoundFile) {
3111 merger.createCompoundFile(mergedName + ".cfs", info);
3113 // delete new non cfs files directly: they were never
3114 // registered with IFD
3115 synchronized(this) {
3116 deleter.deleteNewFiles(info.files());
3118 info.setUseCompoundFile(true);
3121 // Register the new segment
3122 synchronized(this) {
3124 deleter.deleteNewFiles(info.files());
3128 segmentInfos.add(info);
3132 } catch (OutOfMemoryError oom) {
3133 handleOOM(oom, "addIndexes(IndexReader...)");
3138 * Adds all segments from an array of indexes into this index.
3140 * <p>This may be used to parallelize batch indexing. A large document
3141 * collection can be broken into sub-collections. Each sub-collection can be
3142 * indexed in parallel, on a different thread, process or machine. The
3143 * complete index can then be created by merging sub-collection indexes
3147 * <b>NOTE:</b> the index in each {@link Directory} must not be
3148 * changed (opened by a writer) while this method is
3149 * running. This method does not acquire a write lock in
3150 * each input Directory, so it is up to the caller to
3153 * <p>This method is transactional in how Exceptions are
3154 * handled: it does not commit a new segments_N file until
3155 * all indexes are added. This means if an Exception
3156 * occurs (for example disk full), then either no indexes
3157 * will have been added or they all will have been.
3159 * <p>Note that this requires temporary free space in the
3160 * {@link Directory} up to 2X the sum of all input indexes
3161 * (including the starting index). If readers/searchers
3162 * are open against the starting index, then temporary
3163 * free space required will be higher by the size of the
3164 * starting index (see {@link #optimize()} for details).
3167 * <b>NOTE:</b> this method only copies the segments of the incomning indexes
3168 * and does not merge them. Therefore deleted documents are not removed and
3169 * the new segments are not merged with the existing ones. Also, if the merge
3170 * policy allows compound files, then any segment that is not compound is
3171 * converted to such. However, if the segment is compound, it is copied as-is
3172 * even if the merge policy does not allow compound files.
3175 * <p>This requires this index not be among those to be added.
3178 * <b>NOTE</b>: if this method hits an OutOfMemoryError
3179 * you should immediately close the writer. See <a
3180 * href="#OOME">above</a> for details.
3182 * @throws CorruptIndexException if the index is corrupt
3183 * @throws IOException if there is a low-level IO error
3185 public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
3191 if (infoStream != null)
3192 message("flush at addIndexes(Directory...)");
3196 List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
3197 Comparator<String> versionComparator = StringHelper.getVersionComparator();
3198 for (Directory dir : dirs) {
3199 if (infoStream != null) {
3200 message("addIndexes: process directory " + dir);
3202 SegmentInfos sis = new SegmentInfos(); // read infos from dir
3204 final Set<String> dsFilesCopied = new HashSet<String>();
3205 final Map<String, String> dsNames = new HashMap<String, String>();
3206 for (SegmentInfo info : sis) {
3207 assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3209 docCount += info.docCount;
3210 String newSegName = newSegmentName();
3211 String dsName = info.getDocStoreSegment();
3213 if (infoStream != null) {
3214 message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
3217 // create CFS only if the source segment is not CFS, and MP agrees it
3220 synchronized (this) { // Guard segmentInfos
3221 createCFS = !info.getUseCompoundFile()
3222 && mergePolicy.useCompoundFile(segmentInfos, info)
3223 // optimize case only for segments that don't share doc stores
3224 && versionComparator.compare(info.getVersion(), "3.1") >= 0;
3228 copySegmentIntoCFS(info, newSegName);
3230 copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
3236 synchronized (this) {
3238 segmentInfos.addAll(infos);
3242 } catch (OutOfMemoryError oom) {
3243 handleOOM(oom, "addIndexes(Directory...)");
3247 /** Copies the segment into the IndexWriter's directory, as a compound segment. */
3248 private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
3249 String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);
3250 Collection<String> files = info.files();
3251 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
3252 for (String file : files) {
3253 String newFileName = segName + IndexFileNames.stripSegmentName(file);
3254 if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
3255 && !IndexFileNames.isSeparateNormsFile(file)) {
3256 cfsWriter.addFile(file, info.dir);
3258 assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3259 info.dir.copy(directory, file, newFileName);
3266 info.dir = directory;
3267 info.name = segName;
3268 info.setUseCompoundFile(true);
3271 /** Copies the segment files as-is into the IndexWriter's directory. */
3272 private void copySegmentAsIs(SegmentInfo info, String segName,
3273 Map<String, String> dsNames, Set<String> dsFilesCopied)
3274 throws IOException {
3275 // Determine if the doc store of this segment needs to be copied. It's
3276 // only relevant for segments that share doc store with others,
3277 // because the DS might have been copied already, in which case we
3278 // just want to update the DS name of this SegmentInfo.
3279 // NOTE: pre-3x segments include a null DSName if they don't share doc
3280 // store. The following code ensures we don't accidentally insert
3281 // 'null' to the map.
3282 String dsName = info.getDocStoreSegment();
3283 final String newDsName;
3284 if (dsName != null) {
3285 if (dsNames.containsKey(dsName)) {
3286 newDsName = dsNames.get(dsName);
3288 dsNames.put(dsName, segName);
3289 newDsName = segName;
3292 newDsName = segName;
3295 // Copy the segment files
3296 for (String file: info.files()) {
3297 final String newFileName;
3298 if (IndexFileNames.isDocStoreFile(file)) {
3299 newFileName = newDsName + IndexFileNames.stripSegmentName(file);
3300 if (dsFilesCopied.contains(newFileName)) {
3303 dsFilesCopied.add(newFileName);
3305 newFileName = segName + IndexFileNames.stripSegmentName(file);
3308 assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3309 info.dir.copy(directory, file, newFileName);
3312 info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
3313 info.dir = directory;
3314 info.name = segName;
3318 * A hook for extending classes to execute operations after pending added and
3319 * deleted documents have been flushed to the Directory but before the change
3320 * is committed (new segments_N file written).
3322 protected void doAfterFlush() throws IOException {}
3325 * A hook for extending classes to execute operations before pending added and
3326 * deleted documents are flushed to the Directory.
3328 protected void doBeforeFlush() throws IOException {}
3330 /** Expert: prepare for commit.
3332 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3333 * you should immediately close the writer. See <a
3334 * href="#OOME">above</a> for details.</p>
3336 * @see #prepareCommit(Map) */
3337 public final void prepareCommit() throws CorruptIndexException, IOException {
3339 prepareCommit(null);
3342 /** <p>Expert: prepare for commit, specifying
3343 * commitUserData Map (String -> String). This does the
3344 * first phase of 2-phase commit. This method does all
3345 * steps necessary to commit changes since this writer
3346 * was opened: flushes pending added and deleted docs,
3347 * syncs the index files, writes most of next segments_N
3348 * file. After calling this you must call either {@link
3349 * #commit()} to finish the commit, or {@link
3350 * #rollback()} to revert the commit and undo all changes
3351 * done since the writer was opened.</p>
3353 * You can also just call {@link #commit(Map)} directly
3354 * without prepareCommit first in which case that method
3355 * will internally call prepareCommit.
3357 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3358 * you should immediately close the writer. See <a
3359 * href="#OOME">above</a> for details.</p>
3361 * @param commitUserData Opaque Map (String->String)
3362 * that's recorded into the segments file in the index,
3363 * and retrievable by {@link
3364 * IndexReader#getCommitUserData}. Note that when
3365 * IndexWriter commits itself during {@link #close}, the
3366 * commitUserData is unchanged (just carried over from
3367 * the prior commit). If this is null then the previous
3368 * commitUserData is kept. Also, the commitUserData will
3369 * only "stick" if there are actually changes in the
3372 public final void prepareCommit(Map<String, String> commitUserData)
3373 throws CorruptIndexException, IOException {
3376 throw new IllegalStateException(
3377 "this writer hit an OutOfMemoryError; cannot commit");
3380 if (pendingCommit != null)
3381 throw new IllegalStateException(
3382 "prepareCommit was already called with no corresponding call to commit");
3384 if (infoStream != null)
3385 message("prepareCommit: flush");
3388 boolean anySegmentsFlushed = false;
3389 SegmentInfos toCommit = null;
3390 boolean success = false;
3393 synchronized (this) {
3394 anySegmentsFlushed = doFlush(true);
3395 readerPool.commit(segmentInfos);
3396 toCommit = (SegmentInfos) segmentInfos.clone();
3397 pendingCommitChangeCount = changeCount;
3398 // This protects the segmentInfos we are now going
3399 // to commit. This is important in case, eg, while
3400 // we are trying to sync all referenced files, a
3401 // merge completes which would otherwise have
3402 // removed the files we are now syncing.
3403 deleter.incRef(toCommit, false);
3407 if (!success && infoStream != null) {
3408 message("hit exception during prepareCommit");
3412 } catch (OutOfMemoryError oom) {
3413 handleOOM(oom, "prepareCommit");
3418 if (anySegmentsFlushed) {
3424 synchronized (this) {
3425 deleter.decRef(toCommit);
3430 startCommit(toCommit, commitUserData);
3433 // Used only by commit, below; lock order is commitLock -> IW
3434 private final Object commitLock = new Object();
3437 * <p>Commits all pending changes (added & deleted
3438 * documents, optimizations, segment merges, added
3439 * indexes, etc.) to the index, and syncs all referenced
3440 * index files, such that a reader will see the changes
3441 * and the index updates will survive an OS or machine
3442 * crash or power loss. Note that this does not wait for
3443 * any running background merges to finish. This may be a
3444 * costly operation, so you should test the cost in your
3445 * application and do it only when really necessary.</p>
3447 * <p> Note that this operation calls Directory.sync on
3448 * the index files. That call should not return until the
3449 * file contents & metadata are on stable storage. For
3450 * FSDirectory, this calls the OS's fsync. But, beware:
3451 * some hardware devices may in fact cache writes even
3452 * during fsync, and return before the bits are actually
3453 * on stable storage, to give the appearance of faster
3454 * performance. If you have such a device, and it does
3455 * not have a battery backup (for example) then on power
3456 * loss it may still lose data. Lucene cannot guarantee
3457 * consistency on such devices. </p>
3459 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3460 * you should immediately close the writer. See <a
3461 * href="#OOME">above</a> for details.</p>
3463 * @see #prepareCommit
3466 public final void commit() throws CorruptIndexException, IOException {
3470 /** Commits all changes to the index, specifying a
3471 * commitUserData Map (String -> String). This just
3472 * calls {@link #prepareCommit(Map)} (if you didn't
3473 * already call it) and then {@link #finishCommit}.
3475 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3476 * you should immediately close the writer. See <a
3477 * href="#OOME">above</a> for details.</p>
3479 public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3483 commitInternal(commitUserData);
3486 private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3488 if (infoStream != null) {
3489 message("commit: start");
3492 synchronized(commitLock) {
3493 if (infoStream != null) {
3494 message("commit: enter lock");
3497 if (pendingCommit == null) {
3498 if (infoStream != null) {
3499 message("commit: now prepare");
3501 prepareCommit(commitUserData);
3502 } else if (infoStream != null) {
3503 message("commit: already prepared");
3510 private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3512 if (pendingCommit != null) {
3514 if (infoStream != null)
3515 message("commit: pendingCommit != null");
3516 pendingCommit.finishCommit(directory);
3517 if (infoStream != null)
3518 message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
3519 lastCommitChangeCount = pendingCommitChangeCount;
3520 segmentInfos.updateGeneration(pendingCommit);
3521 segmentInfos.setUserData(pendingCommit.getUserData());
3522 rollbackSegments = pendingCommit.createBackupSegmentInfos(true);
3523 deleter.checkpoint(pendingCommit, true);
3525 // Matches the incRef done in startCommit:
3526 deleter.decRef(pendingCommit);
3527 pendingCommit = null;
3531 } else if (infoStream != null) {
3532 message("commit: pendingCommit == null; skip");
3535 if (infoStream != null) {
3536 message("commit: done");
3540 /** NOTE: flushDocStores is ignored now (hardwired to
3541 * true); this method is only here for backwards
3543 protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3544 flush(triggerMerge, flushDeletes);
3548 * Flush all in-memory buffered updates (adds and deletes)
3550 * @param triggerMerge if true, we may merge segments (if
3551 * deletes or docs were flushed) if necessary
3552 * @param applyAllDeletes whether pending deletes should also
3554 protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
3556 // NOTE: this method cannot be sync'd because
3557 // maybeMerge() in turn calls mergeScheduler.merge which
3558 // in turn can take a long time to run and we don't want
3559 // to hold the lock for that. In the case of
3560 // ConcurrentMergeScheduler this can lead to deadlock
3561 // when it stalls due to too many running merges.
3563 // We can be called during close, when closing==true, so we must pass false to ensureOpen:
3565 if (doFlush(applyAllDeletes) && triggerMerge) {
3570 // TODO: this method should not have to be entirely
3571 // synchronized, ie, merges should be allowed to commit
3572 // even while a flush is happening
3573 private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
3576 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3581 assert testPoint("startDoFlush");
3583 // We may be flushing because it was triggered by doc
3584 // count, del count, ram usage (in which case flush
3585 // pending is already set), or we may be flushing
3586 // due to external event eg getReader or commit is
3587 // called (in which case we now set it, and this will
3588 // pause all threads):
3589 flushControl.setFlushPendingNoWait("explicit flush");
3591 boolean success = false;
3595 if (infoStream != null) {
3596 message(" start flush: applyAllDeletes=" + applyAllDeletes);
3597 message(" index before flush " + segString());
3600 final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);
3601 if (newSegment != null) {
3602 setDiagnostics(newSegment, "flush");
3603 segmentInfos.add(newSegment);
3607 if (!applyAllDeletes) {
3608 // If deletes alone are consuming > 1/2 our RAM
3609 // buffer, force them all to apply now. This is to
3610 // prevent too-frequent flushing of a long tail of
3612 if (flushControl.getFlushDeletes() ||
3613 (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
3614 bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
3615 applyAllDeletes = true;
3616 if (infoStream != null) {
3617 message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
3622 if (applyAllDeletes) {
3623 if (infoStream != null) {
3624 message("apply all deletes during flush");
3627 flushDeletesCount.incrementAndGet();
3628 final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
3629 .applyDeletes(readerPool, segmentInfos.asList());
3630 if (result.anyDeletes) {
3633 if (!keepFullyDeletedSegments && result.allDeleted != null) {
3634 if (infoStream != null) {
3635 message("drop 100% deleted segments: " + result.allDeleted);
3637 for (SegmentInfo info : result.allDeleted) {
3638 // If a merge has already registered for this
3639 // segment, we leave it in the readerPool; the
3640 // merge will skip merging it and will then drop
3641 // it once it's done:
3642 if (!mergingSegments.contains(info)) {
3643 segmentInfos.remove(info);
3644 if (readerPool != null) {
3645 readerPool.drop(info);
3651 bufferedDeletesStream.prune(segmentInfos);
3653 assert !bufferedDeletesStream.any();
3654 flushControl.clearDeletes();
3655 } else if (infoStream != null) {
3656 message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
3661 flushCount.incrementAndGet();
3665 return newSegment != null;
3667 } catch (OutOfMemoryError oom) {
3668 handleOOM(oom, "doFlush");
3672 flushControl.clearFlushPending();
3673 if (!success && infoStream != null)
3674 message("hit exception during flush");
3678 /** Expert: Return the total size of all index files currently cached in memory.
3679 * Useful for size management with flushRamDocs()
3681 public final long ramSizeInBytes() {
3683 return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
3686 /** Expert: Return the number of documents currently
3687 * buffered in RAM. */
3688 public final synchronized int numRamDocs() {
3690 return docWriter.getNumDocs();
3693 private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
3694 for(SegmentInfo info : merge.segments) {
3695 if (!segmentInfos.contains(info)) {
3696 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
3701 /** Carefully merges deletes for the segments we just
3702 * merged. This is tricky because, although merging will
3703 * clear all deletes (compacts the documents), new
3704 * deletes may have been flushed to the segments since
3705 * the merge was started. This method "carries over"
3706 * such new deletes onto the newly merged segment, and
3707 * saves the resulting deletes file (incrementing the
3708 * delete generation for merge.info). If no deletes were
3709 * flushed, no new deletes file is saved. */
3710 synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3712 assert testPoint("startCommitMergeDeletes");
3714 final List<SegmentInfo> sourceSegments = merge.segments;
3716 if (infoStream != null)
3717 message("commitMergeDeletes " + merge.segString(directory));
3719 // Carefully merge deletes that occurred after we
3723 long minGen = Long.MAX_VALUE;
3725 for(int i=0; i < sourceSegments.size(); i++) {
3726 SegmentInfo info = sourceSegments.get(i);
3727 minGen = Math.min(info.getBufferedDeletesGen(), minGen);
3728 int docCount = info.docCount;
3729 final SegmentReader previousReader = merge.readerClones.get(i);
3730 if (previousReader == null) {
3731 // Reader was skipped because it was 100% deletions
3734 final SegmentReader currentReader = merge.readers.get(i);
3735 if (previousReader.hasDeletions()) {
3737 // There were deletes on this segment when the merge
3738 // started. The merge has collapsed away those
3739 // deletes, but, if new deletes were flushed since
3740 // the merge started, we must now carefully keep any
3741 // newly flushed deletes but mapping them to the new
3744 if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
3745 // This means this segment has had new deletes
3746 // committed since we started the merge, so we
3748 for(int j=0;j<docCount;j++) {
3749 if (previousReader.isDeleted(j))
3750 assert currentReader.isDeleted(j);
3752 if (currentReader.isDeleted(j)) {
3753 mergedReader.doDelete(docUpto);
3760 docUpto += docCount - previousReader.numDeletedDocs();
3762 } else if (currentReader.hasDeletions()) {
3763 // This segment had no deletes before but now it
3765 for(int j=0; j<docCount; j++) {
3766 if (currentReader.isDeleted(j)) {
3767 mergedReader.doDelete(docUpto);
3773 // No deletes before or after
3774 docUpto += info.docCount;
3777 assert mergedReader.numDeletedDocs() == delCount;
3779 mergedReader.hasChanges = delCount > 0;
3781 // If new deletes were applied while we were merging
3782 // (which happens if eg commit() or getReader() is
3783 // called during our merge), then it better be the case
3784 // that the delGen has increased for all our merged
3786 assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
3788 mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
3791 synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3793 assert testPoint("startCommitMerge");
3796 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
3799 if (infoStream != null)
3800 message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3802 assert merge.registerDone;
3804 // If merge was explicitly aborted, or, if rollback() or
3805 // rollbackTransaction() had been called since our merge
3806 // started (which results in an unqualified
3807 // deleter.refresh() call that will remove any index
3808 // file that current segments does not reference), we
3810 if (merge.isAborted()) {
3811 if (infoStream != null)
3812 message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3816 commitMergedDeletes(merge, mergedReader);
3818 // If the doc store we are using has been closed and
3819 // is in now compound format (but wasn't when we
3820 // started), then we will switch to the compound
3823 assert !segmentInfos.contains(merge.info);
3825 final boolean allDeleted = mergedReader.numDocs() == 0;
3827 if (infoStream != null && allDeleted) {
3828 message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
3831 final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
3832 segmentInfos.applyMergeChanges(merge, dropSegment);
3835 readerPool.drop(merge.info);
3838 if (infoStream != null) {
3839 message("after commit: " + segString());
3842 closeMergeReaders(merge, false);
3844 // Must note the change to segmentInfos so any commits
3845 // in-flight don't lose it:
3848 // If the merged segments had pending changes, clear
3849 // them so that they don't bother writing them to
3850 // disk, updating SegmentInfo, etc.:
3851 readerPool.clear(merge.segments);
3853 if (merge.optimize) {
3854 // cascade the optimize:
3855 if (!segmentsToOptimize.containsKey(merge.info)) {
3856 segmentsToOptimize.put(merge.info, Boolean.FALSE);
3863 final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3865 if (infoStream != null) {
3866 message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
3869 // Set the exception on the merge, so if
3870 // optimize() is waiting on us it sees the root
3872 merge.setException(t);
3873 addMergeException(merge);
3875 if (t instanceof MergePolicy.MergeAbortedException) {
3876 // We can ignore this exception (it happens when
3877 // close(false) or rollback is called), unless the
3878 // merge involves segments from external directories,
3879 // in which case we must throw it so, for example, the
3880 // rollbackTransaction code in addIndexes* is
3882 if (merge.isExternal)
3883 throw (MergePolicy.MergeAbortedException) t;
3884 } else if (t instanceof IOException)
3885 throw (IOException) t;
3886 else if (t instanceof RuntimeException)
3887 throw (RuntimeException) t;
3888 else if (t instanceof Error)
3891 // Should not get here
3892 throw new RuntimeException(t);
3896 * Merges the indicated segments, replacing them in the stack with a
3899 * @lucene.experimental
3901 public void merge(MergePolicy.OneMerge merge)
3902 throws CorruptIndexException, IOException {
3904 boolean success = false;
3906 final long t0 = System.currentTimeMillis();
3907 //System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n merge=" + merge.segString(directory) + "\n idx=" + segString());
3914 if (infoStream != null)
3915 message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());
3918 mergeSuccess(merge);
3920 } catch (Throwable t) {
3921 handleMergeException(t, merge);
3924 synchronized(this) {
3928 if (infoStream != null)
3929 message("hit exception during merge");
3930 if (merge.info != null && !segmentInfos.contains(merge.info))
3931 deleter.refresh(merge.info.name);
3934 // This merge (and, generally, any change to the
3935 // segments) may now enable new merges, so we call
3936 // merge policy & update pending merges.
3937 if (success && !merge.isAborted() && (merge.optimize || (!closed && !closing))) {
3938 updatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
3942 } catch (OutOfMemoryError oom) {
3943 handleOOM(oom, "merge");
3945 if (infoStream != null && merge.info != null) {
3946 message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
3948 //System.out.println(Thread.currentThread().getName() + ": merge end");
3951 /** Hook that's called when the specified merge is complete. */
3952 void mergeSuccess(MergePolicy.OneMerge merge) {
3955 /** Checks whether this merge involves any segments
3956 * already participating in a merge. If not, this merge
3957 * is "registered", meaning we record that its segments
3958 * are now participating in a merge, and true is
3959 * returned. Else (the merge conflicts) false is
3961 final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
3963 if (merge.registerDone)
3968 throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3971 boolean isExternal = false;
3972 for(SegmentInfo info : merge.segments) {
3973 if (mergingSegments.contains(info)) {
3976 if (!segmentInfos.contains(info)) {
3979 if (info.dir != directory) {
3982 if (segmentsToOptimize.containsKey(info)) {
3983 merge.optimize = true;
3984 merge.maxNumSegmentsOptimize = optimizeMaxNumSegments;
3988 ensureValidMerge(merge);
3990 pendingMerges.add(merge);
3992 if (infoStream != null)
3993 message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
3995 merge.mergeGen = mergeGen;
3996 merge.isExternal = isExternal;
3998 // OK it does not conflict; now record that this merge
3999 // is running (while synchronized) to avoid race
4000 // condition where two conflicting merges from different
4002 message("registerMerge merging=" + mergingSegments);
4003 for(SegmentInfo info : merge.segments) {
4004 message("registerMerge info=" + info);
4005 mergingSegments.add(info);
4008 // Merge is now registered
4009 merge.registerDone = true;
4013 /** Does initial setup for a merge, which is fast but holds
4014 * the synchronized lock on IndexWriter instance. */
4015 final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
4016 boolean success = false;
4022 if (infoStream != null) {
4023 message("hit exception in mergeInit");
4030 synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
4032 assert testPoint("startMergeInit");
4034 assert merge.registerDone;
4035 assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
4038 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
4041 // TODO: is there any perf benefit to sorting
4042 // merged segments? eg biggest to smallest?
4044 if (merge.info != null)
4045 // mergeInit already done
4048 if (merge.isAborted())
4051 boolean hasVectors = false;
4052 for (SegmentInfo sourceSegment : merge.segments) {
4053 if (sourceSegment.getHasVectors()) {
4058 // Bind a new segment name here so even with
4059 // ConcurrentMergePolicy we keep deterministic segment
4061 merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, true, false, hasVectors);
4063 // Lock order: IW -> BD
4064 final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
4066 if (result.anyDeletes) {
4070 if (!keepFullyDeletedSegments && result.allDeleted != null) {
4071 if (infoStream != null) {
4072 message("drop 100% deleted segments: " + result.allDeleted);
4074 for(SegmentInfo info : result.allDeleted) {
4075 segmentInfos.remove(info);
4076 if (merge.segments.contains(info)) {
4077 mergingSegments.remove(info);
4078 merge.segments.remove(info);
4081 if (readerPool != null) {
4082 readerPool.drop(result.allDeleted);
4087 merge.info.setBufferedDeletesGen(result.gen);
4089 // Lock order: IW -> BD
4090 bufferedDeletesStream.prune(segmentInfos);
4092 Map<String,String> details = new HashMap<String,String>();
4093 details.put("optimize", Boolean.toString(merge.optimize));
4094 details.put("mergeFactor", Integer.toString(merge.segments.size()));
4095 setDiagnostics(merge.info, "merge", details);
4097 if (infoStream != null) {
4098 message("merge seg=" + merge.info.name);
4101 assert merge.estimatedMergeBytes == 0;
4102 for(SegmentInfo info : merge.segments) {
4103 if (info.docCount > 0) {
4104 final int delCount = numDeletedDocs(info);
4105 assert delCount <= info.docCount;
4106 final double delRatio = ((double) delCount)/info.docCount;
4107 merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
4111 // TODO: I think this should no longer be needed (we
4112 // now build CFS before adding segment to the infos);
4113 // however, on removing it, tests fail for some reason!
4115 // Also enroll the merged segment into mergingSegments;
4116 // this prevents it from getting selected for a merge
4117 // after our merge is done but while we are building the
4119 mergingSegments.add(merge.info);
4122 private void setDiagnostics(SegmentInfo info, String source) {
4123 setDiagnostics(info, source, null);
4126 private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
4127 Map<String,String> diagnostics = new HashMap<String,String>();
4128 diagnostics.put("source", source);
4129 diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
4130 diagnostics.put("os", Constants.OS_NAME);
4131 diagnostics.put("os.arch", Constants.OS_ARCH);
4132 diagnostics.put("os.version", Constants.OS_VERSION);
4133 diagnostics.put("java.version", Constants.JAVA_VERSION);
4134 diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
4135 if (details != null) {
4136 diagnostics.putAll(details);
4138 info.setDiagnostics(diagnostics);
4141 /** Does fininishing for a merge, which is fast but holds
4142 * the synchronized lock on IndexWriter instance. */
4143 final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4145 // Optimize, addIndexes or finishMerges may be waiting
4146 // on merges to finish.
4149 // It's possible we are called twice, eg if there was an
4150 // exception inside mergeInit
4151 if (merge.registerDone) {
4152 final List<SegmentInfo> sourceSegments = merge.segments;
4153 for(SegmentInfo info : sourceSegments) {
4154 mergingSegments.remove(info);
4156 // TODO: if we remove the add in _mergeInit, we should
4157 // also remove this:
4158 mergingSegments.remove(merge.info);
4159 merge.registerDone = false;
4162 runningMerges.remove(merge);
4165 private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
4166 final int numSegments = merge.readers.size();
4167 Throwable th = null;
4169 boolean anyChanges = false;
4170 boolean drop = !suppressExceptions;
4171 for (int i = 0; i < numSegments; i++) {
4172 if (merge.readers.get(i) != null) {
4174 anyChanges |= readerPool.release(merge.readers.get(i), drop);
4175 } catch (Throwable t) {
4180 merge.readers.set(i, null);
4183 if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
4185 merge.readerClones.get(i).close();
4186 } catch (Throwable t) {
4191 // This was a private clone and we had the
4193 assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
4194 merge.readerClones.set(i, null);
4198 if (suppressExceptions && anyChanges) {
4202 // If any error occured, throw it.
4203 if (!suppressExceptions && th != null) {
4204 if (th instanceof IOException) throw (IOException) th;
4205 if (th instanceof RuntimeException) throw (RuntimeException) th;
4206 if (th instanceof Error) throw (Error) th;
4207 throw new RuntimeException(th);
4211 /** Does the actual (time-consuming) work of the merge,
4212 * but without holding synchronized lock on IndexWriter
4214 final private int mergeMiddle(MergePolicy.OneMerge merge)
4215 throws CorruptIndexException, IOException {
4217 merge.checkAborted(directory);
4219 final String mergedName = merge.info.name;
4221 int mergedDocCount = 0;
4223 List<SegmentInfo> sourceSegments = merge.segments;
4225 SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
4226 payloadProcessorProvider,
4227 ((FieldInfos) docWriter.getFieldInfos().clone()));
4229 if (infoStream != null) {
4230 message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getHasVectors());
4233 merge.readers = new ArrayList<SegmentReader>();
4234 merge.readerClones = new ArrayList<SegmentReader>();
4236 // This is try/finally to make sure merger's readers are
4238 boolean success = false;
4240 int totDocCount = 0;
4242 while(segUpto < sourceSegments.size()) {
4244 final SegmentInfo info = sourceSegments.get(segUpto);
4246 // Hold onto the "live" reader; we will use this to
4247 // commit merged deletes
4248 final SegmentReader reader = readerPool.get(info, true,
4249 MERGE_READ_BUFFER_SIZE,
4251 merge.readers.add(reader);
4253 // We clone the segment readers because other
4254 // deletes may come in while we're merging so we
4255 // need readers that will not change
4256 final SegmentReader clone = (SegmentReader) reader.clone(true);
4257 merge.readerClones.add(clone);
4259 if (clone.numDocs() > 0) {
4261 totDocCount += clone.numDocs();
4266 if (infoStream != null) {
4267 message("merge: total " + totDocCount + " docs");
4270 merge.checkAborted(directory);
4272 // This is where all the work happens:
4273 mergedDocCount = merge.info.docCount = merger.merge();
4275 // LUCENE-3403: set hasVectors after merge(), so that it is properly set.
4276 merge.info.setHasVectors(merger.fieldInfos().hasVectors());
4278 assert mergedDocCount == totDocCount;
4280 if (infoStream != null) {
4281 message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
4284 anyNonBulkMerges |= merger.getAnyNonBulkMerges();
4286 assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
4288 // Very important to do this before opening the reader
4289 // because SegmentReader must know if prox was written for
4291 merge.info.setHasProx(merger.fieldInfos().hasProx());
4293 boolean useCompoundFile;
4294 synchronized (this) { // Guard segmentInfos
4295 useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
4298 if (useCompoundFile) {
4301 final String compoundFileName = IndexFileNames.segmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
4304 if (infoStream != null) {
4305 message("create compound file " + compoundFileName);
4307 merger.createCompoundFile(compoundFileName, merge.info);
4309 } catch (IOException ioe) {
4310 synchronized(this) {
4311 if (merge.isAborted()) {
4312 // This can happen if rollback or close(false)
4313 // is called -- fall through to logic below to
4314 // remove the partially created CFS:
4316 handleMergeException(ioe, merge);
4319 } catch (Throwable t) {
4320 handleMergeException(t, merge);
4323 if (infoStream != null) {
4324 message("hit exception creating compound file during merge");
4327 synchronized(this) {
4328 deleter.deleteFile(compoundFileName);
4329 deleter.deleteNewFiles(merge.info.files());
4336 synchronized(this) {
4338 // delete new non cfs files directly: they were never
4339 // registered with IFD
4340 deleter.deleteNewFiles(merge.info.files());
4342 if (merge.isAborted()) {
4343 if (infoStream != null) {
4344 message("abort merge after building CFS");
4346 deleter.deleteFile(compoundFileName);
4351 merge.info.setUseCompoundFile(true);
4354 if (infoStream != null) {
4355 message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));
4358 final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
4360 final int termsIndexDivisor;
4361 final boolean loadDocStores;
4363 if (mergedSegmentWarmer != null) {
4364 // Load terms index & doc stores so the segment
4365 // warmer can run searches, load documents/term
4367 termsIndexDivisor = config.getReaderTermsIndexDivisor();
4368 loadDocStores = true;
4370 termsIndexDivisor = -1;
4371 loadDocStores = false;
4374 // TODO: in the non-realtime case, we may want to only
4375 // keep deletes (it's costly to open entire reader
4376 // when we just need deletes)
4378 final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
4380 if (poolReaders && mergedSegmentWarmer != null) {
4381 mergedSegmentWarmer.warm(mergedReader);
4384 if (!commitMerge(merge, mergedReader)) {
4385 // commitMerge will return false if this merge was aborted
4389 synchronized(this) {
4390 if (readerPool.release(mergedReader)) {
4391 // Must checkpoint after releasing the
4392 // mergedReader since it may have written a new
4402 // Readers are already closed in commitMerge if we didn't hit
4405 closeMergeReaders(merge, true);
4409 return mergedDocCount;
4412 synchronized void addMergeException(MergePolicy.OneMerge merge) {
4413 assert merge.getException() != null;
4414 if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4415 mergeExceptions.add(merge);
4418 // For test purposes.
4419 final int getBufferedDeleteTermsSize() {
4420 return docWriter.getPendingDeletes().terms.size();
4423 // For test purposes.
4424 final int getNumBufferedDeleteTerms() {
4425 return docWriter.getPendingDeletes().numTermDeletes.get();
4428 // utility routines for tests
4429 synchronized SegmentInfo newestSegment() {
4430 return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
4433 /** @lucene.internal */
4434 public synchronized String segString() throws IOException {
4435 return segString(segmentInfos);
4438 /** @lucene.internal */
4439 public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
4440 final StringBuilder buffer = new StringBuilder();
4441 for(final SegmentInfo s : infos) {
4442 if (buffer.length() > 0) {
4445 buffer.append(segString(s));
4447 return buffer.toString();
4450 /** @lucene.internal */
4451 public synchronized String segString(SegmentInfo info) throws IOException {
4452 StringBuilder buffer = new StringBuilder();
4453 SegmentReader reader = readerPool.getIfExists(info);
4455 if (reader != null) {
4456 buffer.append(reader.toString());
4458 buffer.append(info.toString(directory, 0));
4459 if (info.dir != directory) {
4460 buffer.append("**");
4464 if (reader != null) {
4465 readerPool.release(reader);
4468 return buffer.toString();
4471 private synchronized void doWait() {
4472 // NOTE: the callers of this method should in theory
4473 // be able to do simply wait(), but, as a defense
4474 // against thread timing hazards where notifyAll()
4475 // fails to be called, we wait for at most 1 second
4476 // and then return so caller can check if wait
4477 // conditions are satisfied:
4480 } catch (InterruptedException ie) {
4481 throw new ThreadInterruptedException(ie);
4485 private boolean keepFullyDeletedSegments;
4487 /** Only for testing.
4489 * @lucene.internal */
4490 void keepFullyDeletedSegments() {
4491 keepFullyDeletedSegments = true;
4494 boolean getKeepFullyDeletedSegments() {
4495 return keepFullyDeletedSegments;
4498 // called only from assert
4499 private boolean filesExist(SegmentInfos toSync) throws IOException {
4500 Collection<String> files = toSync.files(directory, false);
4501 for(final String fileName: files) {
4502 assert directory.fileExists(fileName): "file " + fileName + " does not exist";
4503 // If this trips it means we are missing a call to
4504 // .checkpoint somewhere, because by the time we
4505 // are called, deleter should know about every
4506 // file referenced by the current head
4508 assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
4513 /** Walk through all files referenced by the current
4514 * segmentInfos and ask the Directory to sync each file,
4515 * if it wasn't already. If that succeeds, then we
4516 * prepare a new segments_N file but do not fully commit
4518 private void startCommit(SegmentInfos toSync, Map<String,String> commitUserData) throws IOException {
4520 assert testPoint("startStartCommit");
4521 assert pendingCommit == null;
4524 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
4529 if (infoStream != null)
4530 message("startCommit(): start");
4533 synchronized(this) {
4535 assert lastCommitChangeCount <= changeCount;
4537 if (pendingCommitChangeCount == lastCommitChangeCount) {
4538 if (infoStream != null) {
4539 message(" skip startCommit(): no changes pending");
4541 deleter.decRef(toSync);
4545 // First, we clone & incref the segmentInfos we intend
4546 // to sync, then, without locking, we sync() all files
4547 // referenced by toSync, in the background.
4549 if (infoStream != null)
4550 message("startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
4552 assert filesExist(toSync);
4554 if (commitUserData != null) {
4555 toSync.setUserData(commitUserData);
4559 assert testPoint("midStartCommit");
4561 boolean pendingCommitSet = false;
4564 // This call can take a long time -- 10s of seconds
4565 // or more. We do it without sync:
4566 directory.sync(toSync.files(directory, false));
4568 assert testPoint("midStartCommit2");
4570 synchronized(this) {
4572 assert pendingCommit == null;
4574 assert segmentInfos.getGeneration() == toSync.getGeneration();
4576 // Exception here means nothing is prepared
4577 // (this method unwinds everything it did on
4579 toSync.prepareCommit(directory);
4580 pendingCommitSet = true;
4581 pendingCommit = toSync;
4584 if (infoStream != null) {
4585 message("done all syncs");
4588 assert testPoint("midStartCommitSuccess");
4591 synchronized(this) {
4593 // Have our master segmentInfos record the
4594 // generations we just prepared. We do this
4595 // on error or success so we don't
4596 // double-write a segments_N file.
4597 segmentInfos.updateGeneration(toSync);
4599 if (!pendingCommitSet) {
4600 if (infoStream != null) {
4601 message("hit exception committing segments file");
4604 deleter.decRef(toSync);
4608 } catch (OutOfMemoryError oom) {
4609 handleOOM(oom, "startCommit");
4611 assert testPoint("finishStartCommit");
4615 * Returns <code>true</code> iff the index in the named directory is
4617 * @param directory the directory to check for a lock
4618 * @throws IOException if there is a low-level IO error
4620 public static boolean isLocked(Directory directory) throws IOException {
4621 return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4625 * Forcibly unlocks the index in the named directory.
4627 * Caution: this should only be used by failure recovery code,
4628 * when it is known that no other process nor thread is in fact
4629 * currently accessing this index.
4631 public static void unlock(Directory directory) throws IOException {
4632 directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4636 * Specifies maximum field length (in number of tokens/terms) in
4637 * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
4638 * the value set by the constructor.
4640 * @deprecated use {@link LimitTokenCountAnalyzer} instead.
4643 public static final class MaxFieldLength {
4646 private String name;
4649 * Private type-safe-enum-pattern constructor.
4651 * @param name instance name
4652 * @param limit maximum field length
4654 private MaxFieldLength(String name, int limit) {
4660 * Public constructor to allow users to specify the maximum field size limit.
4662 * @param limit The maximum field length
4664 public MaxFieldLength(int limit) {
4665 this("User-specified", limit);
4668 public int getLimit() {
4673 public String toString()
4675 return name + ":" + limit;
4678 /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4679 public static final MaxFieldLength UNLIMITED
4680 = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4683 * Sets the maximum field length to
4684 * {@link #DEFAULT_MAX_FIELD_LENGTH}
4686 public static final MaxFieldLength LIMITED
4687 = new MaxFieldLength("LIMITED", 10000);
4690 /** If {@link #getReader} has been called (ie, this writer
4691 * is in near real-time mode), then after a merge
4692 * completes, this class can be invoked to warm the
4693 * reader on the newly merged segment, before the merge
4694 * commits. This is not required for near real-time
4695 * search, but will reduce search latency on opening a
4696 * new near real-time reader after a merge completes.
4698 * @lucene.experimental
4700 * <p><b>NOTE</b>: warm is called before any deletes have
4701 * been carried over to the merged segment. */
4702 public static abstract class IndexReaderWarmer {
4703 public abstract void warm(IndexReader reader) throws IOException;
4707 * Set the merged segment warmer. See {@link IndexReaderWarmer}.
4710 * {@link IndexWriterConfig#setMergedSegmentWarmer}
4714 public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
4715 config.setMergedSegmentWarmer(warmer);
4719 * Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
4721 * @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
4724 public IndexReaderWarmer getMergedSegmentWarmer() {
4725 return config.getMergedSegmentWarmer();
4728 private void handleOOM(OutOfMemoryError oom, String location) {
4729 if (infoStream != null) {
4730 message("hit OutOfMemoryError inside " + location);
4736 // Used only by assert for testing. Current points:
4742 // midStartCommitSuccess
4743 // finishStartCommit
4744 // startCommitMergeDeletes
4746 // DocumentsWriter.ThreadState.init start
4747 boolean testPoint(String name) {
4751 synchronized boolean nrtIsCurrent(SegmentInfos infos) {
4752 //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
4753 return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
4756 synchronized boolean isClosed() {
4760 /** Expert: remove any index files that are no longer
4763 * <p> IndexWriter normally deletes unused files itself,
4764 * during indexing. However, on Windows, which disallows
4765 * deletion of open files, if there is a reader open on
4766 * the index then those files cannot be deleted. This is
4767 * fine, because IndexWriter will periodically retry
4770 * <p> However, IndexWriter doesn't try that often: only
4771 * on open, close, flushing a new segment, and finishing
4772 * a merge. If you don't do any of these actions with your
4773 * IndexWriter, you'll see the unused files linger. If
4774 * that's a problem, call this method to delete them
4775 * (once you've closed the open readers that were
4776 * preventing their deletion).
4778 * <p> In addition, you can call this method to delete
4779 * unreferenced index commits. This might be useful if you
4780 * are using an {@link IndexDeletionPolicy} which holds
4781 * onto index commits until some criteria are met, but those
4782 * commits are no longer needed. Otherwise, those commits will
4783 * be deleted the next time commit() is called.
4785 public synchronized void deleteUnusedFiles() throws IOException {
4786 deleter.deletePendingFiles();
4787 deleter.revisitPolicy();
4791 * Sets the {@link PayloadProcessorProvider} to use when merging payloads.
4792 * Note that the given <code>pcp</code> will be invoked for every segment that
4793 * is merged, not only external ones that are given through
4794 * {@link #addIndexes}. If you want only the payloads of the external segments
4795 * to be processed, you can return <code>null</code> whenever a
4796 * {@link DirPayloadProcessor} is requested for the {@link Directory} of the
4797 * {@link IndexWriter}.
4799 * The default is <code>null</code> which means payloads are processed
4800 * normally (copied) during segment merges. You can also unset it by passing
4801 * <code>null</code>.
4803 * <b>NOTE:</b> the set {@link PayloadProcessorProvider} will be in effect
4804 * immediately, potentially for already running merges too. If you want to be
4805 * sure it is used for further operations only, such as {@link #addIndexes} or
4806 * {@link #optimize}, you can call {@link #waitForMerges()} before.
4808 public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
4809 payloadProcessorProvider = pcp;
4813 * Returns the {@link PayloadProcessorProvider} that is used during segment
4814 * merges to process payloads.
4816 public PayloadProcessorProvider getPayloadProcessorProvider() {
4817 return payloadProcessorProvider;
4820 // decides when flushes happen
4821 final class FlushControl {
4823 private boolean flushPending;
4824 private boolean flushDeletes;
4825 private int delCount;
4826 private int docCount;
4827 private boolean flushing;
4829 private synchronized boolean setFlushPending(String reason, boolean doWait) {
4830 if (flushPending || flushing) {
4832 while(flushPending || flushing) {
4835 } catch (InterruptedException ie) {
4836 throw new ThreadInterruptedException(ie);
4842 if (infoStream != null) {
4843 message("now trigger flush reason=" + reason);
4845 flushPending = true;
4846 return flushPending;
4850 public synchronized void setFlushPendingNoWait(String reason) {
4851 setFlushPending(reason, false);
4854 public synchronized boolean getFlushPending() {
4855 return flushPending;
4858 public synchronized boolean getFlushDeletes() {
4859 return flushDeletes;
4862 public synchronized void clearFlushPending() {
4863 if (infoStream != null) {
4864 message("clearFlushPending");
4866 flushPending = false;
4867 flushDeletes = false;
4872 public synchronized void clearDeletes() {
4876 public synchronized boolean waitUpdate(int docInc, int delInc) {
4877 return waitUpdate(docInc, delInc, false);
4880 public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
4881 while(flushPending) {
4884 } catch (InterruptedException ie) {
4885 throw new ThreadInterruptedException(ie);
4892 // skipWait is only used when a thread is BOTH adding
4893 // a doc and buffering a del term, and, the adding of
4894 // the doc already triggered a flush
4899 final int maxBufferedDocs = config.getMaxBufferedDocs();
4900 if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4901 docCount >= maxBufferedDocs) {
4902 return setFlushPending("maxBufferedDocs", true);
4905 final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
4906 if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4907 delCount >= maxBufferedDeleteTerms) {
4908 flushDeletes = true;
4909 return setFlushPending("maxBufferedDeleteTerms", true);
4912 return flushByRAMUsage("add delete/doc");
4915 public synchronized boolean flushByRAMUsage(String reason) {
4916 final double ramBufferSizeMB = config.getRAMBufferSizeMB();
4917 if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
4918 final long limit = (long) (ramBufferSizeMB*1024*1024);
4919 long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4920 if (used >= limit) {
4922 // DocumentsWriter may be able to free up some
4924 // Lock order: FC -> DW
4925 docWriter.balanceRAM();
4927 used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4928 if (used >= limit) {
4929 return setFlushPending("ram full: " + reason, false);
4937 final FlushControl flushControl = new FlushControl();