1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.Comparator;
27 import java.util.Date;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.LinkedList;
31 import java.util.List;
34 import java.util.concurrent.atomic.AtomicInteger;
35 import java.util.concurrent.ConcurrentHashMap;
37 import org.apache.lucene.analysis.Analyzer;
38 import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
39 import org.apache.lucene.document.Document;
40 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
41 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
42 import org.apache.lucene.search.Similarity;
43 import org.apache.lucene.search.Query;
44 import org.apache.lucene.store.AlreadyClosedException;
45 import org.apache.lucene.store.BufferedIndexInput;
46 import org.apache.lucene.store.Directory;
47 import org.apache.lucene.store.Lock;
48 import org.apache.lucene.store.LockObtainFailedException;
49 import org.apache.lucene.util.Constants;
50 import org.apache.lucene.util.StringHelper;
51 import org.apache.lucene.util.ThreadInterruptedException;
52 import org.apache.lucene.util.Version;
53 import org.apache.lucene.util.MapBackedSet;
54 import org.apache.lucene.util.TwoPhaseCommit;
57 An <code>IndexWriter</code> creates and maintains an index.
59 <p>The <code>create</code> argument to the {@link
60 #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
61 whether a new index is created, or whether an existing index is
62 opened. Note that you can open an index with <code>create=true</code>
63 even while readers are using the index. The old readers will
64 continue to search the "point in time" snapshot they had opened,
65 and won't see the newly created index until they re-open. There are
66 also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
67 with no <code>create</code> argument which will create a new index
68 if there is not already an index at the provided path and otherwise
69 open the existing index.</p>
71 <p>In either case, documents are added with {@link #addDocument(Document)
72 addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
73 #deleteDocuments(Query)}. A document can be updated with {@link
74 #updateDocument(Term, Document) updateDocument} (which just deletes
75 and then adds the entire document). When finished adding, deleting
76 and updating documents, {@link #close() close} should be called.</p>
79 <p>These changes are buffered in memory and periodically
80 flushed to the {@link Directory} (during the above method
81 calls). A flush is triggered when there are enough
82 buffered deletes (see {@link #setMaxBufferedDeleteTerms})
83 or enough added documents since the last flush, whichever
84 is sooner. For the added documents, flushing is triggered
85 either by RAM usage of the documents (see {@link
86 #setRAMBufferSizeMB}) or the number of added documents.
87 The default is to flush when RAM usage hits 16 MB. For
88 best indexing speed you should flush by RAM usage with a
89 large RAM buffer. Note that flushing just moves the
90 internal buffered state in IndexWriter into the index, but
91 these changes are not visible to IndexReader until either
92 {@link #commit()} or {@link #close} is called. A flush may
93 also trigger one or more segment merges which by default
94 run with a background thread so as not to block the
95 addDocument calls (see <a href="#mergePolicy">below</a>
96 for changing the {@link MergeScheduler}).</p>
98 <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
99 another <code>IndexWriter</code> on the same directory will lead to a
100 {@link LockObtainFailedException}. The {@link LockObtainFailedException}
101 is also thrown if an IndexReader on the same directory is used to delete documents
104 <a name="deletionPolicy"></a>
105 <p>Expert: <code>IndexWriter</code> allows an optional
106 {@link IndexDeletionPolicy} implementation to be
107 specified. You can use this to control when prior commits
108 are deleted from the index. The default policy is {@link
109 KeepOnlyLastCommitDeletionPolicy} which removes all prior
110 commits as soon as a new commit is done (this matches
111 behavior before 2.2). Creating your own policy can allow
112 you to explicitly keep previous "point in time" commits
113 alive in the index for some time, to allow readers to
114 refresh to the new commit without having the old commit
115 deleted out from under them. This is necessary on
116 filesystems like NFS that do not support "delete on last
117 close" semantics, which Lucene's "point in time" search
118 normally relies on. </p>
120 <a name="mergePolicy"></a> <p>Expert:
121 <code>IndexWriter</code> allows you to separately change
122 the {@link MergePolicy} and the {@link MergeScheduler}.
123 The {@link MergePolicy} is invoked whenever there are
124 changes to the segments in the index. Its role is to
125 select which merges to do, if any, and return a {@link
126 MergePolicy.MergeSpecification} describing the merges.
127 The default is {@link LogByteSizeMergePolicy}. Then, the {@link
128 MergeScheduler} is invoked with the requested merges and
129 it decides when and how to run the merges. The default is
130 {@link ConcurrentMergeScheduler}. </p>
132 <a name="OOME"></a><p><b>NOTE</b>: if you hit an
133 OutOfMemoryError then IndexWriter will quietly record this
134 fact and block all future segment commits. This is a
135 defensive measure in case any internal state (buffered
136 documents and deletions) were corrupted. Any subsequent
137 calls to {@link #commit()} will throw an
138 IllegalStateException. The only course of action is to
139 call {@link #close()}, which internally will call {@link
140 #rollback()}, to undo any changes to the index since the
141 last commit. You can also just call {@link #rollback()}
144 <a name="thread-safety"></a><p><b>NOTE</b>: {@link
145 IndexWriter} instances are completely thread
146 safe, meaning multiple threads can call any of its
147 methods, concurrently. If your application requires
148 external synchronization, you should <b>not</b>
149 synchronize on the <code>IndexWriter</code> instance as
150 this may cause deadlock; use your own (non-Lucene) objects
153 <p><b>NOTE</b>: If you call
154 <code>Thread.interrupt()</code> on a thread that's within
155 IndexWriter, IndexWriter will try to catch this (eg, if
156 it's in a wait() or Thread.sleep()), and will then throw
157 the unchecked exception {@link ThreadInterruptedException}
158 and <b>clear</b> the interrupt status on the thread.</p>
162 * Clarification: Check Points (and commits)
163 * IndexWriter writes new index files to the directory without writing a new segments_N
164 * file which references these new files. It also means that the state of
165 * the in memory SegmentInfos object is different than the most recent
166 * segments_N file written to the directory.
168 * Each time the SegmentInfos is changed, and matches the (possibly
169 * modified) directory files, we have a new "check point".
170 * If the modified/new SegmentInfos is written to disk - as a new
171 * (generation of) segments_N file - this check point is also an
174 * A new checkpoint always replaces the previous checkpoint and
175 * becomes the new "front" of the index. This allows the IndexFileDeleter
176 * to delete files that are referenced only by stale checkpoints.
177 * (files that were created since the last commit, but are no longer
178 * referenced by the "front" of the index). For this, IndexFileDeleter
179 * keeps track of the last non commit checkpoint.
181 public class IndexWriter implements Closeable, TwoPhaseCommit {
184 * Default value for the write lock timeout (1,000).
185 * @see #setDefaultWriteLockTimeout
186 * @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
189 public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
191 private long writeLockTimeout;
194 * Name of the write lock in the index.
196 public static final String WRITE_LOCK_NAME = "write.lock";
199 * Value to denote a flush trigger is disabled
200 * @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
203 public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
206 * Disabled by default (because IndexWriter flushes by RAM usage
207 * by default). Change using {@link #setMaxBufferedDocs(int)}.
208 * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
211 public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
214 * Default value is 16 MB (which means flush when buffered
215 * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
216 * @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
219 public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
222 * Disabled by default (because IndexWriter flushes by RAM usage
223 * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
224 * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
227 public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
230 * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
232 * @deprecated see {@link IndexWriterConfig}
235 public final static int DEFAULT_MAX_FIELD_LENGTH = MaxFieldLength.UNLIMITED.getLimit();
238 * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
239 * @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
242 public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
245 * Absolute hard maximum length for a term. If a term
246 * arrives from the analyzer longer than this length, it
247 * is skipped and a message is printed to infoStream, if
248 * set (see {@link #setInfoStream}).
250 public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
252 // The normal read buffer size defaults to 1024, but
253 // increasing this during merging seems to yield
254 // performance gains. However we don't want to increase
255 // it too much because there are quite a few
256 // BufferedIndexInputs created during merging. See
257 // LUCENE-888 for details.
258 private final static int MERGE_READ_BUFFER_SIZE = 4096;
260 // Used for printing messages
261 private static final AtomicInteger MESSAGE_ID = new AtomicInteger();
262 private int messageID = MESSAGE_ID.getAndIncrement();
263 volatile private boolean hitOOM;
265 private final Directory directory; // where this index resides
266 private final Analyzer analyzer; // how to analyze text
268 // TODO 4.0: this should be made final once the setter is out
269 private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
271 private volatile long changeCount; // increments every time a change is completed
272 private long lastCommitChangeCount; // last changeCount that was committed
274 private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
276 volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
277 volatile long pendingCommitChangeCount;
279 final SegmentInfos segmentInfos = new SegmentInfos(); // the segments
281 private DocumentsWriter docWriter;
282 private IndexFileDeleter deleter;
284 // used by forceMerge to note those needing merging
285 private Map<SegmentInfo,Boolean> segmentsToMerge = new HashMap<SegmentInfo,Boolean>();
286 private int mergeMaxNumSegments;
288 private Lock writeLock;
290 private volatile boolean closed;
291 private volatile boolean closing;
293 // Holds all SegmentInfo instances currently involved in
295 private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
297 private MergePolicy mergePolicy;
298 // TODO 4.0: this should be made final once the setter is removed
299 private /*final*/MergeScheduler mergeScheduler;
300 private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
301 private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
302 private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
303 private long mergeGen;
304 private boolean stopMerges;
306 private final AtomicInteger flushCount = new AtomicInteger();
307 private final AtomicInteger flushDeletesCount = new AtomicInteger();
309 final ReaderPool readerPool = new ReaderPool();
310 final BufferedDeletesStream bufferedDeletesStream;
312 // This is a "write once" variable (like the organic dye
313 // on a DVD-R that may or may not be heated by a laser and
314 // then cooled to permanently record the event): it's
315 // false, until getReader() is called for the first time,
316 // at which point it's switched to true and never changes
317 // back to false. Once this is true, we hold open and
318 // reuse SegmentReader instances internally for applying
319 // deletes, doing merges, and reopening near real-time
321 private volatile boolean poolReaders;
323 // The instance that was passed to the constructor. It is saved only in order
324 // to allow users to query an IndexWriter settings.
325 private final IndexWriterConfig config;
327 // The PayloadProcessorProvider to use when segments are merged
328 private PayloadProcessorProvider payloadProcessorProvider;
331 boolean anyNonBulkMerges;
334 * Expert: returns a readonly reader, covering all
335 * committed as well as un-committed changes to the index.
336 * This provides "near real-time" searching, in that
337 * changes made during an IndexWriter session can be
338 * quickly made available for searching without closing
339 * the writer nor calling {@link #commit}.
341 * <p>Note that this is functionally equivalent to calling
342 * {#flush} and then using {@link IndexReader#open} to
343 * open a new reader. But the turarnound time of this
344 * method should be faster since it avoids the potentially
345 * costly {@link #commit}.</p>
347 * <p>You must close the {@link IndexReader} returned by
348 * this method once you are done using it.</p>
350 * <p>It's <i>near</i> real-time because there is no hard
351 * guarantee on how quickly you can get a new reader after
352 * making changes with IndexWriter. You'll have to
353 * experiment in your situation to determine if it's
354 * fast enough. As this is a new and experimental
355 * feature, please report back on your findings so we can
356 * learn, improve and iterate.</p>
358 * <p>The resulting reader supports {@link
359 * IndexReader#reopen}, but that call will simply forward
360 * back to this method (though this may change in the
363 * <p>The very first time this method is called, this
364 * writer instance will make every effort to pool the
365 * readers that it opens for doing merges, applying
366 * deletes, etc. This means additional resources (RAM,
367 * file descriptors, CPU time) will be consumed.</p>
369 * <p>For lower latency on reopening a reader, you should
370 * call {@link #setMergedSegmentWarmer} to
371 * pre-warm a newly merged segment before it's committed
372 * to the index. This is important for minimizing
373 * index-to-search delay after a large merge. </p>
375 * <p>If an addIndexes* call is running in another thread,
376 * then this reader will only search those segments from
377 * the foreign index that have been successfully copied
380 * <p><b>NOTE</b>: Once the writer is closed, any
381 * outstanding readers may continue to be used. However,
382 * if you attempt to reopen any of those readers, you'll
383 * hit an {@link AlreadyClosedException}.</p>
385 * @lucene.experimental
387 * @return IndexReader that covers entire index plus all
388 * changes made so far by this IndexWriter instance
390 * @deprecated Please use {@link
391 * IndexReader#open(IndexWriter,boolean)} instead.
393 * @throws IOException
396 public IndexReader getReader() throws IOException {
397 return getReader(config.getReaderTermsIndexDivisor(), true);
400 IndexReader getReader(boolean applyAllDeletes) throws IOException {
401 return getReader(config.getReaderTermsIndexDivisor(), applyAllDeletes);
404 /** Expert: like {@link #getReader}, except you can
405 * specify which termInfosIndexDivisor should be used for
406 * any newly opened readers.
407 * @param termInfosIndexDivisor Subsamples which indexed
408 * terms are loaded into RAM. This has the same effect as {@link
409 * IndexWriter#setTermIndexInterval} except that setting
410 * must be done at indexing time while this setting can be
411 * set per reader. When set to N, then one in every
412 * N*termIndexInterval terms in the index is loaded into
413 * memory. By setting this to a value > 1 you can reduce
414 * memory usage, at the expense of higher latency when
415 * loading a TermInfo. The default value is 1. Set this
416 * to -1 to skip loading the terms index entirely.
418 * @deprecated Please use {@link
419 * IndexReader#open(IndexWriter,boolean)} instead. Furthermore,
420 * this method cannot guarantee the reader (and its
421 * sub-readers) will be opened with the
422 * termInfosIndexDivisor setting because some of them may
423 * have already been opened according to {@link
424 * IndexWriterConfig#setReaderTermsIndexDivisor}. You
425 * should set the requested termInfosIndexDivisor through
426 * {@link IndexWriterConfig#setReaderTermsIndexDivisor} and use
427 * {@link #getReader()}. */
429 public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
430 return getReader(termInfosIndexDivisor, true);
433 IndexReader getReader(int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException {
436 final long tStart = System.currentTimeMillis();
438 if (infoStream != null) {
439 message("flush at getReader");
442 // Do this up front before flushing so that the readers
443 // obtained during this flush are pooled, the first time
444 // this method is called:
447 // Prevent segmentInfos from changing while opening the
448 // reader; in theory we could do similar retry logic,
449 // just like we do when loading segments_N
452 flush(false, applyAllDeletes);
453 r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor, applyAllDeletes);
454 if (infoStream != null) {
455 message("return reader version=" + r.getVersion() + " reader=" + r);
461 if (infoStream != null) {
462 message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
467 // Used for all SegmentReaders we open
468 private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
470 Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
471 return readerFinishedListeners;
474 /** Holds shared SegmentReader instances. IndexWriter uses
475 * SegmentReaders for 1) applying deletes, 2) doing
476 * merges, 3) handing out a real-time reader. This pool
477 * reuses instances of the SegmentReaders in all these
478 * places if it is in "near real-time mode" (getReader()
479 * has been called on this instance). */
483 private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
485 /** Forcefully clear changes for the specified segments. This is called on successful merge. */
486 synchronized void clear(List<SegmentInfo> infos) throws IOException {
488 for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
489 ent.getValue().hasChanges = false;
492 for (final SegmentInfo info: infos) {
493 final SegmentReader r = readerMap.get(info);
495 r.hasChanges = false;
501 // used only by asserts
502 public synchronized boolean infoIsLive(SegmentInfo info) {
503 int idx = segmentInfos.indexOf(info);
504 assert idx != -1: "info=" + info + " isn't in pool";
505 assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
509 public synchronized SegmentInfo mapToLive(SegmentInfo info) {
510 int idx = segmentInfos.indexOf(info);
512 info = segmentInfos.info(idx);
518 * Release the segment reader (i.e. decRef it and close if there
519 * are no more references.
520 * @return true if this release altered the index (eg
521 * the SegmentReader had pending changes to del docs and
522 * was closed). Caller must call checkpoint() if so.
524 * @throws IOException
526 public synchronized boolean release(SegmentReader sr) throws IOException {
527 return release(sr, false);
531 * Release the segment reader (i.e. decRef it and close if there
532 * are no more references.
533 * @return true if this release altered the index (eg
534 * the SegmentReader had pending changes to del docs and
535 * was closed). Caller must call checkpoint() if so.
537 * @throws IOException
539 public synchronized boolean release(SegmentReader sr, boolean drop) throws IOException {
541 final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
543 assert !pooled || readerMap.get(sr.getSegmentInfo()) == sr;
545 // Drop caller's ref; for an external reader (not
546 // pooled), this decRef will close it
549 if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
551 // We invoke deleter.checkpoint below, so we must be
552 // sync'd on IW if there are changes:
553 assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
555 // Discard (don't save) changes when we are dropping
556 // the reader; this is used only on the sub-readers
557 // after a successful merge.
558 sr.hasChanges &= !drop;
560 final boolean hasChanges = sr.hasChanges;
562 // Drop our ref -- this will commit any pending
563 // changes to the dir
566 // We are the last ref to this reader; since we're
567 // not pooling readers, we release it:
568 readerMap.remove(sr.getSegmentInfo());
576 public synchronized void drop(List<SegmentInfo> infos) throws IOException {
577 for(SegmentInfo info : infos) {
582 public synchronized void drop(SegmentInfo info) throws IOException {
583 final SegmentReader sr = readerMap.get(info);
585 sr.hasChanges = false;
586 readerMap.remove(info);
591 public synchronized void dropAll() throws IOException {
592 for(SegmentReader reader : readerMap.values()) {
593 reader.hasChanges = false;
595 // NOTE: it is allowed that this decRef does not
596 // actually close the SR; this can happen when a
597 // near real-time reader using this SR is still open
603 /** Remove all our references to readers, and commits
604 * any pending changes. */
605 synchronized void close() throws IOException {
606 // We invoke deleter.checkpoint below, so we must be
608 assert Thread.holdsLock(IndexWriter.this);
610 for(Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
612 SegmentReader sr = ent.getValue();
614 assert infoIsLive(sr.getSegmentInfo());
617 // Must checkpoint w/ deleter, because this
618 // segment reader will have created new _X_N.del
620 deleter.checkpoint(segmentInfos, false);
623 // NOTE: it is allowed that this decRef does not
624 // actually close the SR; this can happen when a
625 // near real-time reader is kept open after the
626 // IndexWriter instance is closed
634 * Commit all segment reader in the pool.
635 * @throws IOException
637 synchronized void commit(SegmentInfos infos) throws IOException {
639 // We invoke deleter.checkpoint below, so we must be
641 assert Thread.holdsLock(IndexWriter.this);
643 for (SegmentInfo info : infos) {
645 final SegmentReader sr = readerMap.get(info);
646 if (sr != null && sr.hasChanges) {
647 assert infoIsLive(info);
649 // Must checkpoint w/ deleter, because this
650 // segment reader will have created new _X_N.del
652 deleter.checkpoint(segmentInfos, false);
658 * Returns a ref to a clone. NOTE: this clone is not
659 * enrolled in the pool, so you should simply close()
660 * it when you're done (ie, do not call release()).
662 public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
663 SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
665 return (SegmentReader) sr.clone(true);
672 * Obtain a SegmentReader from the readerPool. The reader
673 * must be returned by calling {@link #release(SegmentReader)}
674 * @see #release(SegmentReader)
676 * @param doOpenStores
677 * @throws IOException
679 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
680 return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, config.getReaderTermsIndexDivisor());
684 * Obtain a SegmentReader from the readerPool. The reader
685 * must be returned by calling {@link #release(SegmentReader)}
687 * @see #release(SegmentReader)
689 * @param doOpenStores
690 * @param readBufferSize
691 * @param termsIndexDivisor
692 * @throws IOException
694 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
697 readBufferSize = BufferedIndexInput.BUFFER_SIZE;
700 SegmentReader sr = readerMap.get(info);
702 // TODO: we may want to avoid doing this while
704 // Returns a ref, which we xfer to readerMap:
705 sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
706 sr.readerFinishedListeners = readerFinishedListeners;
708 if (info.dir == directory) {
709 // Only pool if reader is not external
710 readerMap.put(info, sr);
716 if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
717 // If this reader was originally opened because we
718 // needed to merge it, we didn't load the terms
719 // index. But now, if the caller wants the terms
720 // index (eg because it's doing deletes, or an NRT
721 // reader is being opened) we ask the reader to
722 // load its terms index.
723 sr.loadTermsIndex(termsIndexDivisor);
727 // Return a ref to our caller
728 if (info.dir == directory) {
729 // Only incRef if we pooled (reader is not external)
736 public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
737 SegmentReader sr = readerMap.get(info);
748 * Obtain the number of deleted docs for a pooled reader.
749 * If the reader isn't being pooled, the segmentInfo's
750 * delCount is returned.
752 public int numDeletedDocs(SegmentInfo info) throws IOException {
754 SegmentReader reader = readerPool.getIfExists(info);
756 if (reader != null) {
757 return reader.numDeletedDocs();
759 return info.getDelCount();
762 if (reader != null) {
763 readerPool.release(reader);
769 * Used internally to throw an {@link
770 * AlreadyClosedException} if this IndexWriter has been
772 * @throws AlreadyClosedException if this IndexWriter is closed
774 protected final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
775 if (closed || (includePendingClose && closing)) {
776 throw new AlreadyClosedException("this IndexWriter is closed");
780 protected final void ensureOpen() throws AlreadyClosedException {
785 * Prints a message to the infoStream (if non-null),
786 * prefixed with the identifying information for this
787 * writer and the thread that's calling it.
789 public void message(String message) {
790 if (infoStream != null)
791 infoStream.println("IW " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
795 * Casts current mergePolicy to LogMergePolicy, and throws
796 * an exception if the mergePolicy is not a LogMergePolicy.
798 private LogMergePolicy getLogMergePolicy() {
799 if (mergePolicy instanceof LogMergePolicy)
800 return (LogMergePolicy) mergePolicy;
802 throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
805 /** <p>Get the current setting of whether newly flushed
806 * segments will use the compound file format. Note that
807 * this just returns the value previously set with
808 * setUseCompoundFile(boolean), or the default value
809 * (true). You cannot use this to query the status of
810 * previously flushed segments.</p>
812 * <p>Note that this method is a convenience method: it
813 * just calls mergePolicy.getUseCompoundFile as long as
814 * mergePolicy is an instance of {@link LogMergePolicy}.
815 * Otherwise an IllegalArgumentException is thrown.</p>
817 * @see #setUseCompoundFile(boolean)
818 * @deprecated use {@link LogMergePolicy#getUseCompoundFile()}
821 public boolean getUseCompoundFile() {
822 return getLogMergePolicy().getUseCompoundFile();
827 * Setting to turn on usage of a compound file. When on, multiple files for
828 * each segment are merged into a single file when a new segment is flushed.
832 * Note that this method is a convenience method: it just calls
833 * mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
834 * {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
837 * @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}.
840 public void setUseCompoundFile(boolean value) {
841 getLogMergePolicy().setUseCompoundFile(value);
844 /** Expert: Set the Similarity implementation used by this IndexWriter.
846 * @see Similarity#setDefault(Similarity)
847 * @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
850 public void setSimilarity(Similarity similarity) {
852 this.similarity = similarity;
853 docWriter.setSimilarity(similarity);
854 // Required so config.getSimilarity returns the right value. But this will
855 // go away together with the method in 4.0.
856 config.setSimilarity(similarity);
859 /** Expert: Return the Similarity implementation used by this IndexWriter.
861 * <p>This defaults to the current value of {@link Similarity#getDefault()}.
862 * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
865 public Similarity getSimilarity() {
870 /** Expert: Set the interval between indexed terms. Large values cause less
871 * memory to be used by IndexReader, but slow random-access to terms. Small
872 * values cause more memory to be used by an IndexReader, and speed
873 * random-access to terms.
875 * This parameter determines the amount of computation required per query
876 * term, regardless of the number of documents that contain that term. In
877 * particular, it is the maximum number of other terms that must be
878 * scanned before a term is located and its frequency and position information
879 * may be processed. In a large index with user-entered query terms, query
880 * processing time is likely to be dominated not by term lookup but rather
881 * by the processing of frequency and positional data. In a small index
882 * or when many uncommon query terms are generated (e.g., by wildcard
883 * queries) term lookup may become a dominant cost.
885 * In particular, <code>numUniqueTerms/interval</code> terms are read into
886 * memory by an IndexReader, and, on average, <code>interval/2</code> terms
887 * must be scanned for each random term access.
889 * @see #DEFAULT_TERM_INDEX_INTERVAL
890 * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
893 public void setTermIndexInterval(int interval) {
895 config.setTermIndexInterval(interval);
898 /** Expert: Return the interval between indexed terms.
900 * @see #setTermIndexInterval(int)
901 * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
904 public int getTermIndexInterval() {
905 // We pass false because this method is called by SegmentMerger while we are in the process of closing
907 return config.getTermIndexInterval();
911 * Constructs an IndexWriter for the index in <code>d</code>.
912 * Text will be analyzed with <code>a</code>. If <code>create</code>
913 * is true, then a new, empty index will be created in
914 * <code>d</code>, replacing the index already there, if any.
916 * @param d the index directory
917 * @param a the analyzer to use
918 * @param create <code>true</code> to create the index or overwrite
919 * the existing one; <code>false</code> to append to the existing
921 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
922 * via the MaxFieldLength constructor.
923 * @throws CorruptIndexException if the index is corrupt
924 * @throws LockObtainFailedException if another writer
925 * has this index open (<code>write.lock</code> could not
927 * @throws IOException if the directory cannot be read/written to, or
928 * if it does not exist and <code>create</code> is
929 * <code>false</code> or if there is any other low-level
931 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
934 public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
935 throws CorruptIndexException, LockObtainFailedException, IOException {
936 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
937 create ? OpenMode.CREATE : OpenMode.APPEND));
938 setMaxFieldLength(mfl.getLimit());
942 * Constructs an IndexWriter for the index in
943 * <code>d</code>, first creating it if it does not
944 * already exist. Text will be analyzed with
947 * @param d the index directory
948 * @param a the analyzer to use
949 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
950 * via the MaxFieldLength constructor.
951 * @throws CorruptIndexException if the index is corrupt
952 * @throws LockObtainFailedException if another writer
953 * has this index open (<code>write.lock</code> could not
955 * @throws IOException if the directory cannot be
956 * read/written to or if there is any other low-level
958 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
961 public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
962 throws CorruptIndexException, LockObtainFailedException, IOException {
963 this(d, new IndexWriterConfig(Version.LUCENE_31, a));
964 setMaxFieldLength(mfl.getLimit());
968 * Expert: constructs an IndexWriter with a custom {@link
969 * IndexDeletionPolicy}, for the index in <code>d</code>,
970 * first creating it if it does not already exist. Text
971 * will be analyzed with <code>a</code>.
973 * @param d the index directory
974 * @param a the analyzer to use
975 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
976 * @param mfl whether or not to limit field lengths
977 * @throws CorruptIndexException if the index is corrupt
978 * @throws LockObtainFailedException if another writer
979 * has this index open (<code>write.lock</code> could not
981 * @throws IOException if the directory cannot be
982 * read/written to or if there is any other low-level
984 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
987 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
988 throws CorruptIndexException, LockObtainFailedException, IOException {
989 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));
990 setMaxFieldLength(mfl.getLimit());
994 * Expert: constructs an IndexWriter with a custom {@link
995 * IndexDeletionPolicy}, for the index in <code>d</code>.
996 * Text will be analyzed with <code>a</code>. If
997 * <code>create</code> is true, then a new, empty index
998 * will be created in <code>d</code>, replacing the index
999 * already there, if any.
1001 * @param d the index directory
1002 * @param a the analyzer to use
1003 * @param create <code>true</code> to create the index or overwrite
1004 * the existing one; <code>false</code> to append to the existing
1006 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1007 * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
1008 * @throws CorruptIndexException if the index is corrupt
1009 * @throws LockObtainFailedException if another writer
1010 * has this index open (<code>write.lock</code> could not
1012 * @throws IOException if the directory cannot be read/written to, or
1013 * if it does not exist and <code>create</code> is
1014 * <code>false</code> or if there is any other low-level
1016 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1019 public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
1020 throws CorruptIndexException, LockObtainFailedException, IOException {
1021 this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
1022 create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));
1023 setMaxFieldLength(mfl.getLimit());
1027 * Expert: constructs an IndexWriter on specific commit
1028 * point, with a custom {@link IndexDeletionPolicy}, for
1029 * the index in <code>d</code>. Text will be analyzed
1030 * with <code>a</code>.
1032 * <p> This is only meaningful if you've used a {@link
1033 * IndexDeletionPolicy} in that past that keeps more than
1034 * just the last commit.
1036 * <p>This operation is similar to {@link #rollback()},
1037 * except that method can only rollback what's been done
1038 * with the current instance of IndexWriter since its last
1039 * commit, whereas this method can rollback to an
1040 * arbitrary commit point from the past, assuming the
1041 * {@link IndexDeletionPolicy} has preserved past
1044 * @param d the index directory
1045 * @param a the analyzer to use
1046 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1047 * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
1048 * @param commit which commit to open
1049 * @throws CorruptIndexException if the index is corrupt
1050 * @throws LockObtainFailedException if another writer
1051 * has this index open (<code>write.lock</code> could not
1053 * @throws IOException if the directory cannot be read/written to, or
1054 * if it does not exist and <code>create</code> is
1055 * <code>false</code> or if there is any other low-level
1057 * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
1060 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
1061 throws CorruptIndexException, LockObtainFailedException, IOException {
1062 this(d, new IndexWriterConfig(Version.LUCENE_31, a)
1063 .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
1064 setMaxFieldLength(mfl.getLimit());
1068 * Constructs a new IndexWriter per the settings given in <code>conf</code>.
1069 * Note that the passed in {@link IndexWriterConfig} is
1070 * privately cloned; if you need to make subsequent "live"
1071 * changes to the configuration use {@link #getConfig}.
1075 * the index directory. The index is either created or appended
1076 * according <code>conf.getOpenMode()</code>.
1078 * the configuration settings according to which IndexWriter should
1080 * @throws CorruptIndexException
1081 * if the index is corrupt
1082 * @throws LockObtainFailedException
1083 * if another writer has this index open (<code>write.lock</code>
1084 * could not be obtained)
1085 * @throws IOException
1086 * if the directory cannot be read/written to, or if it does not
1087 * exist and <code>conf.getOpenMode()</code> is
1088 * <code>OpenMode.APPEND</code> or if there is any other low-level
1091 public IndexWriter(Directory d, IndexWriterConfig conf)
1092 throws CorruptIndexException, LockObtainFailedException, IOException {
1093 config = (IndexWriterConfig) conf.clone();
1095 analyzer = conf.getAnalyzer();
1096 infoStream = defaultInfoStream;
1097 writeLockTimeout = conf.getWriteLockTimeout();
1098 similarity = conf.getSimilarity();
1099 mergePolicy = conf.getMergePolicy();
1100 mergePolicy.setIndexWriter(this);
1101 mergeScheduler = conf.getMergeScheduler();
1102 bufferedDeletesStream = new BufferedDeletesStream(messageID);
1103 bufferedDeletesStream.setInfoStream(infoStream);
1104 poolReaders = conf.getReaderPooling();
1106 writeLock = directory.makeLock(WRITE_LOCK_NAME);
1108 if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1109 throw new LockObtainFailedException("Index locked for write: " + writeLock);
1111 OpenMode mode = conf.getOpenMode();
1113 if (mode == OpenMode.CREATE) {
1115 } else if (mode == OpenMode.APPEND) {
1118 // CREATE_OR_APPEND - create only if an index does not exist
1119 create = !IndexReader.indexExists(directory);
1122 boolean success = false;
1124 // TODO: we should check whether this index is too old,
1125 // and throw an IndexFormatTooOldExc up front, here,
1126 // instead of later when merge, applyDeletes, getReader
1127 // is attempted. I think to do this we should store the
1128 // oldest segment's version in segments_N.
1132 // Try to read first. This is to allow create
1133 // against an index that's currently open for
1134 // searching. In this case we write the next
1135 // segments_N file with no segments:
1137 segmentInfos.read(directory);
1138 segmentInfos.clear();
1139 } catch (IOException e) {
1140 // Likely this means it's a fresh directory
1143 // Record that we have a change (zero out all
1144 // segments) pending:
1146 segmentInfos.changed();
1148 segmentInfos.read(directory);
1150 IndexCommit commit = conf.getIndexCommit();
1151 if (commit != null) {
1152 // Swap out all segments, but, keep metadata in
1153 // SegmentInfos, like version & generation, to
1154 // preserve write-once. This is important if
1155 // readers are open against the future commit
1157 if (commit.getDirectory() != directory)
1158 throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
1159 SegmentInfos oldInfos = new SegmentInfos();
1160 oldInfos.read(directory, commit.getSegmentsFileName());
1161 segmentInfos.replace(oldInfos);
1163 segmentInfos.changed();
1164 if (infoStream != null)
1165 message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
1169 rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
1171 docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
1172 docWriter.setInfoStream(infoStream);
1173 docWriter.setMaxFieldLength(maxFieldLength);
1175 // Default deleter (for backwards compatibility) is
1176 // KeepOnlyLastCommitDeleter:
1177 synchronized(this) {
1178 deleter = new IndexFileDeleter(directory,
1179 conf.getIndexDeletionPolicy(),
1180 segmentInfos, infoStream,
1184 if (deleter.startingCommitDeleted) {
1185 // Deletion policy deleted the "head" commit point.
1186 // We have to mark ourself as changed so that if we
1187 // are closed w/o any further changes we write a new
1190 segmentInfos.changed();
1193 if (infoStream != null) {
1201 if (infoStream != null) {
1202 message("init: hit exception on init; releasing write lock");
1205 writeLock.release();
1206 } catch (Throwable t) {
1207 // don't mask the original exception
1214 private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
1215 Directory cfsDir = null;
1217 if (info.getUseCompoundFile()) {
1218 cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
1222 return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
1224 if (info.getUseCompoundFile() && cfsDir != null) {
1230 private FieldInfos getCurrentFieldInfos() throws IOException {
1231 final FieldInfos fieldInfos;
1232 if (segmentInfos.size() > 0) {
1233 if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
1234 // Pre-3.1 index. In this case we sweep all
1235 // segments, merging their FieldInfos:
1236 fieldInfos = new FieldInfos();
1237 for(SegmentInfo info : segmentInfos) {
1238 final FieldInfos segFieldInfos = getFieldInfos(info);
1239 final int fieldCount = segFieldInfos.size();
1240 for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
1241 fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
1245 // Already a 3.1 index; just seed the FieldInfos
1246 // from the last segment
1247 fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
1250 fieldInfos = new FieldInfos();
1256 * Returns the private {@link IndexWriterConfig}, cloned
1257 * from the {@link IndexWriterConfig} passed to
1258 * {@link #IndexWriter(Directory, IndexWriterConfig)}.
1260 * <b>NOTE:</b> some settings may be changed on the
1261 * returned {@link IndexWriterConfig}, and will take
1262 * effect in the current IndexWriter instance. See the
1263 * javadocs for the specific setters in {@link
1264 * IndexWriterConfig} for details.
1266 public IndexWriterConfig getConfig() {
1272 * Expert: set the merge policy used by this writer.
1274 * @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
1277 public void setMergePolicy(MergePolicy mp) {
1280 throw new NullPointerException("MergePolicy must be non-null");
1282 if (mergePolicy != mp)
1283 mergePolicy.close();
1285 mergePolicy.setIndexWriter(this);
1286 pushMaxBufferedDocs();
1287 if (infoStream != null)
1288 message("setMergePolicy " + mp);
1289 // Required so config.getMergePolicy returns the right value. But this will
1290 // go away together with the method in 4.0.
1291 config.setMergePolicy(mp);
1295 * Expert: returns the current MergePolicy in use by this writer.
1296 * @see #setMergePolicy
1298 * @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
1301 public MergePolicy getMergePolicy() {
1307 * Expert: set the merge scheduler used by this writer.
1308 * @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
1311 synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1313 if (mergeScheduler == null)
1314 throw new NullPointerException("MergeScheduler must be non-null");
1316 if (this.mergeScheduler != mergeScheduler) {
1318 this.mergeScheduler.close();
1320 this.mergeScheduler = mergeScheduler;
1321 if (infoStream != null)
1322 message("setMergeScheduler " + mergeScheduler);
1323 // Required so config.getMergeScheduler returns the right value. But this will
1324 // go away together with the method in 4.0.
1325 config.setMergeScheduler(mergeScheduler);
1329 * Expert: returns the current MergeScheduler in use by this
1331 * @see #setMergeScheduler(MergeScheduler)
1332 * @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
1335 public MergeScheduler getMergeScheduler() {
1337 return mergeScheduler;
1340 /** <p>Determines the largest segment (measured by
1341 * document count) that may be merged with other segments.
1342 * Small values (e.g., less than 10,000) are best for
1343 * interactive indexing, as this limits the length of
1344 * pauses while indexing to a few seconds. Larger values
1345 * are best for batched indexing and speedier
1348 * <p>The default value is {@link Integer#MAX_VALUE}.</p>
1350 * <p>Note that this method is a convenience method: it
1351 * just calls mergePolicy.setMaxMergeDocs as long as
1352 * mergePolicy is an instance of {@link LogMergePolicy}.
1353 * Otherwise an IllegalArgumentException is thrown.</p>
1355 * <p>The default merge policy ({@link
1356 * LogByteSizeMergePolicy}) also allows you to set this
1357 * limit by net size (in MB) of the segment, using {@link
1358 * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1359 * @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
1362 public void setMaxMergeDocs(int maxMergeDocs) {
1363 getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1367 * <p>Returns the largest segment (measured by document
1368 * count) that may be merged with other segments.</p>
1370 * <p>Note that this method is a convenience method: it
1371 * just calls mergePolicy.getMaxMergeDocs as long as
1372 * mergePolicy is an instance of {@link LogMergePolicy}.
1373 * Otherwise an IllegalArgumentException is thrown.</p>
1375 * @see #setMaxMergeDocs
1376 * @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
1379 public int getMaxMergeDocs() {
1380 return getLogMergePolicy().getMaxMergeDocs();
1384 * The maximum number of terms that will be indexed for a single field in a
1385 * document. This limits the amount of memory required for indexing, so that
1386 * collections with very large files will not crash the indexing process by
1387 * running out of memory. This setting refers to the number of running terms,
1388 * not to the number of different terms.
1390 * <strong>Note:</strong> this silently truncates large documents, excluding
1391 * from the index all terms that occur further in the document. If you know
1392 * your source documents are large, be sure to set this value high enough to
1393 * accomodate the expected size. If you set it to Integer.MAX_VALUE, then the
1394 * only limit is your memory, but you should anticipate an OutOfMemoryError.
1396 * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be
1397 * indexed for a field.
1399 * @deprecated use {@link LimitTokenCountAnalyzer} instead. Note that the
1400 * behvaior slightly changed - the analyzer limits the number of
1401 * tokens per token stream created, while this setting limits the
1402 * total number of tokens to index. This only matters if you index
1403 * many multi-valued fields though.
1406 public void setMaxFieldLength(int maxFieldLength) {
1408 this.maxFieldLength = maxFieldLength;
1409 docWriter.setMaxFieldLength(maxFieldLength);
1410 if (infoStream != null)
1411 message("setMaxFieldLength " + maxFieldLength);
1415 * Returns the maximum number of terms that will be
1416 * indexed for a single field in a document.
1417 * @see #setMaxFieldLength
1418 * @deprecated use {@link LimitTokenCountAnalyzer} to limit number of tokens.
1421 public int getMaxFieldLength() {
1423 return maxFieldLength;
1427 * @deprecated use {@link
1428 * IndexWriterConfig#setReaderTermsIndexDivisor} instead.
1431 public void setReaderTermsIndexDivisor(int divisor) {
1433 config.setReaderTermsIndexDivisor(divisor);
1434 if (infoStream != null) {
1435 message("setReaderTermsIndexDivisor " + divisor);
1440 * @deprecated use {@link
1441 * IndexWriterConfig#getReaderTermsIndexDivisor} instead.
1444 public int getReaderTermsIndexDivisor() {
1446 return config.getReaderTermsIndexDivisor();
1449 /** Determines the minimal number of documents required
1450 * before the buffered in-memory documents are flushed as
1451 * a new Segment. Large values generally gives faster
1454 * <p>When this is set, the writer will flush every
1455 * maxBufferedDocs added documents. Pass in {@link
1456 * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1457 * to number of buffered documents. Note that if flushing
1458 * by RAM usage is also enabled, then the flush will be
1459 * triggered by whichever comes first.</p>
1461 * <p>Disabled by default (writer flushes by RAM usage).</p>
1463 * @throws IllegalArgumentException if maxBufferedDocs is
1464 * enabled but smaller than 2, or it disables maxBufferedDocs
1465 * when ramBufferSize is already disabled
1466 * @see #setRAMBufferSizeMB
1467 * @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.
1470 public void setMaxBufferedDocs(int maxBufferedDocs) {
1472 pushMaxBufferedDocs();
1473 if (infoStream != null) {
1474 message("setMaxBufferedDocs " + maxBufferedDocs);
1476 // Required so config.getMaxBufferedDocs returns the right value. But this
1477 // will go away together with the method in 4.0.
1478 config.setMaxBufferedDocs(maxBufferedDocs);
1482 * If we are flushing by doc count (not by RAM usage), and
1483 * using LogDocMergePolicy then push maxBufferedDocs down
1484 * as its minMergeDocs, to keep backwards compatibility.
1486 private void pushMaxBufferedDocs() {
1487 if (config.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1488 final MergePolicy mp = mergePolicy;
1489 if (mp instanceof LogDocMergePolicy) {
1490 LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1491 final int maxBufferedDocs = config.getMaxBufferedDocs();
1492 if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1493 if (infoStream != null)
1494 message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1495 lmp.setMinMergeDocs(maxBufferedDocs);
1502 * Returns the number of buffered added documents that will
1503 * trigger a flush if enabled.
1504 * @see #setMaxBufferedDocs
1505 * @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.
1508 public int getMaxBufferedDocs() {
1510 return config.getMaxBufferedDocs();
1513 /** Determines the amount of RAM that may be used for
1514 * buffering added documents and deletions before they are
1515 * flushed to the Directory. Generally for faster
1516 * indexing performance it's best to flush by RAM usage
1517 * instead of document count and use as large a RAM buffer
1520 * <p>When this is set, the writer will flush whenever
1521 * buffered documents and deletions use this much RAM.
1522 * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
1523 * triggering a flush due to RAM usage. Note that if
1524 * flushing by document count is also enabled, then the
1525 * flush will be triggered by whichever comes first.</p>
1527 * <p> <b>NOTE</b>: the account of RAM usage for pending
1528 * deletions is only approximate. Specifically, if you
1529 * delete by Query, Lucene currently has no way to measure
1530 * the RAM usage if individual Queries so the accounting
1531 * will under-estimate and you should compensate by either
1532 * calling commit() periodically yourself, or by using
1533 * {@link #setMaxBufferedDeleteTerms} to flush by count
1534 * instead of RAM usage (each buffered delete Query counts
1537 * <p> <b>NOTE</b>: because IndexWriter uses
1538 * <code>int</code>s when managing its internal storage,
1539 * the absolute maximum value for this setting is somewhat
1540 * less than 2048 MB. The precise limit depends on
1541 * various factors, such as how large your documents are,
1542 * how many fields have norms, etc., so it's best to set
1543 * this value comfortably under 2048.</p>
1545 * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1547 * @throws IllegalArgumentException if ramBufferSize is
1548 * enabled but non-positive, or it disables ramBufferSize
1549 * when maxBufferedDocs is already disabled
1550 * @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.
1553 public void setRAMBufferSizeMB(double mb) {
1554 if (infoStream != null) {
1555 message("setRAMBufferSizeMB " + mb);
1557 // Required so config.getRAMBufferSizeMB returns the right value. But this
1558 // will go away together with the method in 4.0.
1559 config.setRAMBufferSizeMB(mb);
1563 * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1564 * @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.
1567 public double getRAMBufferSizeMB() {
1568 return config.getRAMBufferSizeMB();
1572 * <p>Determines the minimal number of delete terms required before the buffered
1573 * in-memory delete terms are applied and flushed. If there are documents
1574 * buffered in memory at the time, they are merged and a new segment is
1577 * <p>Disabled by default (writer flushes by RAM usage).</p>
1579 * @throws IllegalArgumentException if maxBufferedDeleteTerms
1580 * is enabled but smaller than 1
1581 * @see #setRAMBufferSizeMB
1582 * @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.
1585 public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1587 if (infoStream != null)
1588 message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1589 // Required so config.getMaxBufferedDeleteTerms returns the right value. But
1590 // this will go away together with the method in 4.0.
1591 config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1595 * Returns the number of buffered deleted terms that will
1596 * trigger a flush if enabled.
1597 * @see #setMaxBufferedDeleteTerms
1598 * @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead
1601 public int getMaxBufferedDeleteTerms() {
1603 return config.getMaxBufferedDeleteTerms();
1606 /** Determines how often segment indices are merged by addDocument(). With
1607 * smaller values, less RAM is used while indexing, and searches on
1608 * unoptimized indices are faster, but indexing speed is slower. With larger
1609 * values, more RAM is used during indexing, and while searches on unoptimized
1610 * indices are slower, indexing is faster. Thus larger values (> 10) are best
1611 * for batch index creation, and smaller values (< 10) for indices that are
1612 * interactively maintained.
1614 * <p>Note that this method is a convenience method: it
1615 * just calls mergePolicy.setMergeFactor as long as
1616 * mergePolicy is an instance of {@link LogMergePolicy}.
1617 * Otherwise an IllegalArgumentException is thrown.</p>
1619 * <p>This must never be less than 2. The default value is 10.
1620 * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
1623 public void setMergeFactor(int mergeFactor) {
1624 getLogMergePolicy().setMergeFactor(mergeFactor);
1628 * <p>Returns the number of segments that are merged at
1629 * once and also controls the total number of segments
1630 * allowed to accumulate in the index.</p>
1632 * <p>Note that this method is a convenience method: it
1633 * just calls mergePolicy.getMergeFactor as long as
1634 * mergePolicy is an instance of {@link LogMergePolicy}.
1635 * Otherwise an IllegalArgumentException is thrown.</p>
1637 * @see #setMergeFactor
1638 * @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.
1641 public int getMergeFactor() {
1642 return getLogMergePolicy().getMergeFactor();
1645 /** If non-null, this will be the default infoStream used
1646 * by a newly instantiated IndexWriter.
1647 * @see #setInfoStream
1649 public static void setDefaultInfoStream(PrintStream infoStream) {
1650 IndexWriter.defaultInfoStream = infoStream;
1654 * Returns the current default infoStream for newly
1655 * instantiated IndexWriters.
1656 * @see #setDefaultInfoStream
1658 public static PrintStream getDefaultInfoStream() {
1659 return IndexWriter.defaultInfoStream;
1662 /** If non-null, information about merges, deletes and a
1663 * message when maxFieldLength is reached will be printed
1666 public void setInfoStream(PrintStream infoStream) throws IOException {
1668 this.infoStream = infoStream;
1669 docWriter.setInfoStream(infoStream);
1670 deleter.setInfoStream(infoStream);
1671 bufferedDeletesStream.setInfoStream(infoStream);
1672 if (infoStream != null)
1676 private void messageState() throws IOException {
1677 message("\ndir=" + directory + "\n" +
1678 "index=" + segString() + "\n" +
1679 "version=" + Constants.LUCENE_VERSION + "\n" +
1684 * Returns the current infoStream in use by this writer.
1685 * @see #setInfoStream
1687 public PrintStream getInfoStream() {
1692 /** Returns true if verbosing is enabled (i.e., infoStream != null). */
1693 public boolean verbose() {
1694 return infoStream != null;
1698 * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1699 * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1700 * @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
1703 public void setWriteLockTimeout(long writeLockTimeout) {
1705 this.writeLockTimeout = writeLockTimeout;
1706 // Required so config.getWriteLockTimeout returns the right value. But this
1707 // will go away together with the method in 4.0.
1708 config.setWriteLockTimeout(writeLockTimeout);
1712 * Returns allowed timeout when acquiring the write lock.
1713 * @see #setWriteLockTimeout
1714 * @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
1717 public long getWriteLockTimeout() {
1719 return writeLockTimeout;
1723 * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1725 * @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
1728 public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1729 IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
1733 * Returns default write lock timeout for newly
1734 * instantiated IndexWriters.
1735 * @see #setDefaultWriteLockTimeout
1736 * @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
1739 public static long getDefaultWriteLockTimeout() {
1740 return IndexWriterConfig.getDefaultWriteLockTimeout();
1744 * Commits all changes to an index and closes all
1745 * associated files. Note that this may be a costly
1746 * operation, so, try to re-use a single writer instead of
1747 * closing and opening a new one. See {@link #commit()} for
1748 * caveats about write caching done by some IO devices.
1750 * <p> If an Exception is hit during close, eg due to disk
1751 * full or some other reason, then both the on-disk index
1752 * and the internal state of the IndexWriter instance will
1753 * be consistent. However, the close will not be complete
1754 * even though part of it (flushing buffered documents)
1755 * may have succeeded, so the write lock will still be
1758 * <p> If you can correct the underlying cause (eg free up
1759 * some disk space) then you can call close() again.
1760 * Failing that, if you want to force the write lock to be
1761 * released (dangerous, because you may then lose buffered
1762 * docs in the IndexWriter instance) then you can do
1763 * something like this:</p>
1769 * if (IndexWriter.isLocked(directory)) {
1770 * IndexWriter.unlock(directory);
1775 * after which, you must be certain not to use the writer
1776 * instance anymore.</p>
1778 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1779 * you should immediately close the writer, again. See <a
1780 * href="#OOME">above</a> for details.</p>
1782 * @throws CorruptIndexException if the index is corrupt
1783 * @throws IOException if there is a low-level IO error
1785 public void close() throws CorruptIndexException, IOException {
1790 * Closes the index with or without waiting for currently
1791 * running merges to finish. This is only meaningful when
1792 * using a MergeScheduler that runs merges in background
1795 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1796 * you should immediately close the writer, again. See <a
1797 * href="#OOME">above</a> for details.</p>
1799 * <p><b>NOTE</b>: it is dangerous to always call
1800 * close(false), especially when IndexWriter is not open
1801 * for very long, because this can result in "merge
1802 * starvation" whereby long merges will never have a
1803 * chance to finish. This will cause too many segments in
1804 * your index over time.</p>
1806 * @param waitForMerges if true, this call will block
1807 * until all merges complete; else, it will ask all
1808 * running merges to abort, wait until those merges have
1809 * finished (which should be at most a few seconds), and
1812 public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1814 // Ensure that only one thread actually gets to do the closing:
1815 if (shouldClose()) {
1816 // If any methods have hit OutOfMemoryError, then abort
1817 // on close, in case the internal state of IndexWriter
1818 // or DocumentsWriter is corrupt
1822 closeInternal(waitForMerges);
1826 // Returns true if this thread should attempt to close, or
1827 // false if IndexWriter is now closed; else, waits until
1828 // another thread finishes closing
1829 synchronized private boolean shouldClose() {
1836 // Another thread is presently trying to close;
1837 // wait until it finishes one way (closes
1838 // successfully) or another (fails to close)
1846 private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1849 if (infoStream != null) {
1850 message("now flush at close waitForMerges=" + waitForMerges);
1855 // Only allow a new merge to be triggered if we are
1856 // going to wait for merges:
1858 flush(waitForMerges, true);
1862 // Give merge scheduler last chance to run, in case
1863 // any pending merges are waiting:
1864 mergeScheduler.merge(this);
1866 mergePolicy.close();
1868 synchronized(this) {
1869 finishMerges(waitForMerges);
1873 mergeScheduler.close();
1875 if (infoStream != null)
1876 message("now call final commit()");
1879 commitInternal(null);
1882 if (infoStream != null)
1883 message("at close: " + segString());
1885 synchronized(this) {
1891 if (writeLock != null) {
1892 writeLock.release(); // release write lock
1895 synchronized(this) {
1898 } catch (OutOfMemoryError oom) {
1899 handleOOM(oom, "closeInternal");
1901 synchronized(this) {
1905 if (infoStream != null)
1906 message("hit exception while closing");
1912 /** Returns the Directory used by this index. */
1913 public Directory getDirectory() {
1914 // Pass false because the flush during closing calls getDirectory
1919 /** Returns the analyzer used by this index. */
1920 public Analyzer getAnalyzer() {
1925 /** Returns total number of docs in this index, including
1926 * docs not yet flushed (still in the RAM buffer),
1927 * not counting deletions.
1929 public synchronized int maxDoc() {
1932 if (docWriter != null)
1933 count = docWriter.getNumDocs();
1937 count += segmentInfos.totalDocCount();
1941 /** Returns total number of docs in this index, including
1942 * docs not yet flushed (still in the RAM buffer), and
1943 * including deletions. <b>NOTE:</b> buffered deletions
1944 * are not counted. If you really need these to be
1945 * counted you should call {@link #commit()} first.
1947 public synchronized int numDocs() throws IOException {
1950 if (docWriter != null)
1951 count = docWriter.getNumDocs();
1955 for (final SegmentInfo info : segmentInfos) {
1956 count += info.docCount - numDeletedDocs(info);
1961 public synchronized boolean hasDeletions() throws IOException {
1963 if (bufferedDeletesStream.any()) {
1966 if (docWriter.anyDeletions()) {
1969 for (final SegmentInfo info : segmentInfos) {
1970 if (info.hasDeletions()) {
1978 * The maximum number of terms that will be indexed for a single field in a
1979 * document. This limits the amount of memory required for indexing, so that
1980 * collections with very large files will not crash the indexing process by
1981 * running out of memory.<p/>
1982 * Note that this effectively truncates large documents, excluding from the
1983 * index terms that occur further in the document. If you know your source
1984 * documents are large, be sure to set this value high enough to accommodate
1985 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1986 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1987 * By default, no more than 10,000 terms will be indexed for a field.
1989 * @see MaxFieldLength
1990 * @deprecated remove in 4.0
1993 private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1996 * Adds a document to this index. If the document contains more than
1997 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2000 * <p> Note that if an Exception is hit (for example disk full)
2001 * then the index will be consistent, but this document
2002 * may not have been added. Furthermore, it's possible
2003 * the index will have one segment in non-compound format
2004 * even when using compound files (when a merge has
2005 * partially succeeded).</p>
2007 * <p> This method periodically flushes pending documents
2008 * to the Directory (see <a href="#flush">above</a>), and
2009 * also periodically triggers segment merges in the index
2010 * according to the {@link MergePolicy} in use.</p>
2012 * <p>Merges temporarily consume space in the
2013 * directory. The amount of space required is up to 1X the
2014 * size of all segments being merged, when no
2015 * readers/searchers are open against the index, and up to
2016 * 2X the size of all segments being merged when
2017 * readers/searchers are open against the index (see
2018 * {@link #forceMerge(int)} for details). The sequence of
2019 * primitive merge operations performed is governed by the
2022 * <p>Note that each term in the document can be no longer
2023 * than 16383 characters, otherwise an
2024 * IllegalArgumentException will be thrown.</p>
2026 * <p>Note that it's possible to create an invalid Unicode
2027 * string in java if a UTF16 surrogate pair is malformed.
2028 * In this case, the invalid characters are silently
2029 * replaced with the Unicode replacement character
2032 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2033 * you should immediately close the writer. See <a
2034 * href="#OOME">above</a> for details.</p>
2036 * @throws CorruptIndexException if the index is corrupt
2037 * @throws IOException if there is a low-level IO error
2039 public void addDocument(Document doc) throws CorruptIndexException, IOException {
2040 addDocument(doc, analyzer);
2044 * Adds a document to this index, using the provided analyzer instead of the
2045 * value of {@link #getAnalyzer()}. If the document contains more than
2046 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
2049 * <p>See {@link #addDocument(Document)} for details on
2050 * index and IndexWriter state after an Exception, and
2051 * flushing/merging temporary free space requirements.</p>
2053 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2054 * you should immediately close the writer. See <a
2055 * href="#OOME">above</a> for details.</p>
2057 * @throws CorruptIndexException if the index is corrupt
2058 * @throws IOException if there is a low-level IO error
2060 public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
2062 boolean doFlush = false;
2063 boolean success = false;
2066 doFlush = docWriter.updateDocument(doc, analyzer, null);
2069 if (!success && infoStream != null)
2070 message("hit exception adding document");
2074 } catch (OutOfMemoryError oom) {
2075 handleOOM(oom, "addDocument");
2080 * Atomically adds a block of documents with sequentially
2081 * assigned document IDs, such that an external reader
2082 * will see all or none of the documents.
2084 * <p><b>WARNING</b>: the index does not currently record
2085 * which documents were added as a block. Today this is
2086 * fine, because merging will preserve the block (as long
2087 * as none them were deleted). But it's possible in the
2088 * future that Lucene may more aggressively re-order
2089 * documents (for example, perhaps to obtain better index
2090 * compression), in which case you may need to fully
2091 * re-index your documents at that time.
2093 * <p>See {@link #addDocument(Document)} for details on
2094 * index and IndexWriter state after an Exception, and
2095 * flushing/merging temporary free space requirements.</p>
2097 * <p><b>NOTE</b>: tools that do offline splitting of an index
2098 * (for example, IndexSplitter in contrib) or
2099 * re-sorting of documents (for example, IndexSorter in
2100 * contrib) are not aware of these atomically added documents
2101 * and will likely break them up. Use such tools at your
2104 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2105 * you should immediately close the writer. See <a
2106 * href="#OOME">above</a> for details.</p>
2108 * @throws CorruptIndexException if the index is corrupt
2109 * @throws IOException if there is a low-level IO error
2111 * @lucene.experimental
2113 public void addDocuments(Collection<Document> docs) throws CorruptIndexException, IOException {
2114 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2115 addDocuments(docs, analyzer);
2119 * Atomically adds a block of documents, analyzed using the
2120 * provided analyzer, with sequentially assigned document
2121 * IDs, such that an external reader will see all or none
2124 * @throws CorruptIndexException if the index is corrupt
2125 * @throws IOException if there is a low-level IO error
2127 * @lucene.experimental
2129 public void addDocuments(Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2130 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2131 updateDocuments(null, docs, analyzer);
2135 * Atomically deletes documents matching the provided
2136 * delTerm and adds a block of documents with sequentially
2137 * assigned document IDs, such that an external reader
2138 * will see all or none of the documents.
2140 * See {@link #addDocuments(Collection)}.
2142 * @throws CorruptIndexException if the index is corrupt
2143 * @throws IOException if there is a low-level IO error
2145 * @lucene.experimental
2147 public void updateDocuments(Term delTerm, Collection<Document> docs) throws CorruptIndexException, IOException {
2148 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2149 updateDocuments(delTerm, docs, analyzer);
2153 * Atomically deletes documents matching the provided
2154 * delTerm and adds a block of documents, analyzed using
2155 * the provided analyzer, with sequentially
2156 * assigned document IDs, such that an external reader
2157 * will see all or none of the documents.
2159 * See {@link #addDocuments(Collection)}.
2161 * @throws CorruptIndexException if the index is corrupt
2162 * @throws IOException if there is a low-level IO error
2164 * @lucene.experimental
2166 public void updateDocuments(Term delTerm, Collection<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
2167 // TODO: if we backport DWPT we should change arg to Iterable<Document>
2170 boolean success = false;
2171 boolean doFlush = false;
2173 doFlush = docWriter.updateDocuments(docs, analyzer, delTerm);
2176 if (!success && infoStream != null) {
2177 message("hit exception updating document");
2183 } catch (OutOfMemoryError oom) {
2184 handleOOM(oom, "updateDocuments");
2189 * Deletes the document(s) containing <code>term</code>.
2191 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2192 * you should immediately close the writer. See <a
2193 * href="#OOME">above</a> for details.</p>
2195 * @param term the term to identify the documents to be deleted
2196 * @throws CorruptIndexException if the index is corrupt
2197 * @throws IOException if there is a low-level IO error
2199 public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
2202 if (docWriter.deleteTerm(term, false)) {
2205 } catch (OutOfMemoryError oom) {
2206 handleOOM(oom, "deleteDocuments(Term)");
2211 * Deletes the document(s) containing any of the
2212 * terms. All deletes are flushed at the same time.
2214 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2215 * you should immediately close the writer. See <a
2216 * href="#OOME">above</a> for details.</p>
2218 * @param terms array of terms to identify the documents
2220 * @throws CorruptIndexException if the index is corrupt
2221 * @throws IOException if there is a low-level IO error
2223 public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
2226 if (docWriter.deleteTerms(terms)) {
2229 } catch (OutOfMemoryError oom) {
2230 handleOOM(oom, "deleteDocuments(Term..)");
2235 * Deletes the document(s) matching the provided query.
2237 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2238 * you should immediately close the writer. See <a
2239 * href="#OOME">above</a> for details.</p>
2241 * @param query the query to identify the documents to be deleted
2242 * @throws CorruptIndexException if the index is corrupt
2243 * @throws IOException if there is a low-level IO error
2245 public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
2248 if (docWriter.deleteQuery(query)) {
2251 } catch (OutOfMemoryError oom) {
2252 handleOOM(oom, "deleteDocuments(Query)");
2257 * Deletes the document(s) matching any of the provided queries.
2258 * All deletes are flushed at the same time.
2260 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2261 * you should immediately close the writer. See <a
2262 * href="#OOME">above</a> for details.</p>
2264 * @param queries array of queries to identify the documents
2266 * @throws CorruptIndexException if the index is corrupt
2267 * @throws IOException if there is a low-level IO error
2269 public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
2272 if (docWriter.deleteQueries(queries)) {
2275 } catch (OutOfMemoryError oom) {
2276 handleOOM(oom, "deleteDocuments(Query..)");
2281 * Updates a document by first deleting the document(s)
2282 * containing <code>term</code> and then adding the new
2283 * document. The delete and then add are atomic as seen
2284 * by a reader on the same index (flush may happen only after
2287 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2288 * you should immediately close the writer. See <a
2289 * href="#OOME">above</a> for details.</p>
2291 * @param term the term to identify the document(s) to be
2293 * @param doc the document to be added
2294 * @throws CorruptIndexException if the index is corrupt
2295 * @throws IOException if there is a low-level IO error
2297 public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2299 updateDocument(term, doc, getAnalyzer());
2303 * Updates a document by first deleting the document(s)
2304 * containing <code>term</code> and then adding the new
2305 * document. The delete and then add are atomic as seen
2306 * by a reader on the same index (flush may happen only after
2309 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2310 * you should immediately close the writer. See <a
2311 * href="#OOME">above</a> for details.</p>
2313 * @param term the term to identify the document(s) to be
2315 * @param doc the document to be added
2316 * @param analyzer the analyzer to use when analyzing the document
2317 * @throws CorruptIndexException if the index is corrupt
2318 * @throws IOException if there is a low-level IO error
2320 public void updateDocument(Term term, Document doc, Analyzer analyzer)
2321 throws CorruptIndexException, IOException {
2324 boolean doFlush = false;
2325 boolean success = false;
2327 doFlush = docWriter.updateDocument(doc, analyzer, term);
2330 if (!success && infoStream != null)
2331 message("hit exception updating document");
2336 } catch (OutOfMemoryError oom) {
2337 handleOOM(oom, "updateDocument");
2342 final synchronized int getSegmentCount(){
2343 return segmentInfos.size();
2347 final synchronized int getNumBufferedDocuments(){
2348 return docWriter.getNumDocs();
2352 final synchronized int getDocCount(int i) {
2353 if (i >= 0 && i < segmentInfos.size()) {
2354 return segmentInfos.info(i).docCount;
2361 final int getFlushCount() {
2362 return flushCount.get();
2366 final int getFlushDeletesCount() {
2367 return flushDeletesCount.get();
2370 final String newSegmentName() {
2371 // Cannot synchronize on IndexWriter because that causes
2373 synchronized(segmentInfos) {
2374 // Important to increment changeCount so that the
2375 // segmentInfos is written on close. Otherwise we
2376 // could close, re-open and re-return the same segment
2377 // name that was previously returned which can cause
2378 // problems at least with ConcurrentMergeScheduler.
2380 segmentInfos.changed();
2381 return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2385 /** If non-null, information about merges will be printed to this.
2387 private PrintStream infoStream;
2388 private static PrintStream defaultInfoStream;
2390 /** This method has been deprecated, as it is horribly
2391 * inefficient and very rarely justified. Lucene's
2392 * multi-segment search performance has improved over
2393 * time, and the default TieredMergePolicy now targets
2394 * segments with deletions.
2398 public void optimize() throws CorruptIndexException, IOException {
2399 forceMerge(1, true);
2402 /** This method has been deprecated, as it is horribly
2403 * inefficient and very rarely justified. Lucene's
2404 * multi-segment search performance has improved over
2405 * time, and the default TieredMergePolicy now targets
2406 * segments with deletions.
2410 public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2411 forceMerge(maxNumSegments, true);
2414 /** This method has been deprecated, as it is horribly
2415 * inefficient and very rarely justified. Lucene's
2416 * multi-segment search performance has improved over
2417 * time, and the default TieredMergePolicy now targets
2418 * segments with deletions.
2422 public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2423 forceMerge(1, doWait);
2427 * Forces merge policy to merge segments until there's <=
2428 * maxNumSegments. The actual merges to be
2429 * executed are determined by the {@link MergePolicy}.
2431 * <p>This is a horribly costly operation, especially when
2432 * you pass a small {@code maxNumSegments}; usually you
2433 * should only call this if the index is static (will no
2434 * longer be changed).</p>
2436 * <p>Note that this requires up to 2X the index size free
2437 * space in your Directory (3X if you're using compound
2438 * file format). For example, if your index size is 10 MB
2439 * then you need up to 20 MB free for this to complete (30
2440 * MB if you're using compound file format). Also,
2441 * it's best to call {@link #commit()} afterwards,
2442 * to allow IndexWriter to free up disk space.</p>
2444 * <p>If some but not all readers re-open while merging
2445 * is underway, this will cause > 2X temporary
2446 * space to be consumed as those new readers will then
2447 * hold open the temporary segments at that time. It is
2448 * best not to re-open readers while merging is running.</p>
2450 * <p>The actual temporary usage could be much less than
2451 * these figures (it depends on many factors).</p>
2453 * <p>In general, once the this completes, the total size of the
2454 * index will be less than the size of the starting index.
2455 * It could be quite a bit smaller (if there were many
2456 * pending deletes) or just slightly smaller.</p>
2458 * <p>If an Exception is hit, for example
2459 * due to disk full, the index will not be corrupt and no
2460 * documents will have been lost. However, it may have
2461 * been partially merged (some segments were merged but
2462 * not all), and it's possible that one of the segments in
2463 * the index will be in non-compound format even when
2464 * using compound file format. This will occur when the
2465 * Exception is hit during conversion of the segment into
2466 * compound format.</p>
2468 * <p>This call will merge those segments present in
2469 * the index when the call started. If other threads are
2470 * still adding documents and flushing segments, those
2471 * newly created segments will not be merged unless you
2472 * call forceMerge again.</p>
2474 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2475 * you should immediately close the writer. See <a
2476 * href="#OOME">above</a> for details.</p>
2478 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2479 * with <tt>false</tt>, which aborts all running merges,
2480 * then any thread still running this method might hit a
2481 * {@link MergePolicy.MergeAbortedException}.
2483 * @throws CorruptIndexException if the index is corrupt
2484 * @throws IOException if there is a low-level IO error
2485 * @see MergePolicy#findMerges
2487 * @param maxNumSegments maximum number of segments left
2488 * in the index after merging finishes
2490 public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2491 forceMerge(maxNumSegments, true);
2494 /** Just like {@link #forceMerge(int)}, except you can
2495 * specify whether the call should block until
2496 * all merging completes. This is only meaningful with a
2497 * {@link MergeScheduler} that is able to run merges in
2498 * background threads.
2500 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2501 * you should immediately close the writer. See <a
2502 * href="#OOME">above</a> for details.</p>
2504 public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2507 if (maxNumSegments < 1)
2508 throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2510 if (infoStream != null) {
2511 message("forceMerge: index now " + segString());
2512 message("now flush at forceMerge");
2517 synchronized(this) {
2518 resetMergeExceptions();
2519 segmentsToMerge.clear();
2520 for(SegmentInfo info : segmentInfos) {
2521 segmentsToMerge.put(info, Boolean.TRUE);
2523 mergeMaxNumSegments = maxNumSegments;
2525 // Now mark all pending & running merges as isMaxNumSegments:
2526 for(final MergePolicy.OneMerge merge : pendingMerges) {
2527 merge.maxNumSegments = maxNumSegments;
2528 segmentsToMerge.put(merge.info, Boolean.TRUE);
2531 for ( final MergePolicy.OneMerge merge: runningMerges ) {
2532 merge.maxNumSegments = maxNumSegments;
2533 segmentsToMerge.put(merge.info, Boolean.TRUE);
2537 maybeMerge(maxNumSegments);
2540 synchronized(this) {
2544 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
2547 if (mergeExceptions.size() > 0) {
2548 // Forward any exceptions in background merge
2549 // threads to the current thread:
2550 final int size = mergeExceptions.size();
2551 for(int i=0;i<size;i++) {
2552 final MergePolicy.OneMerge merge = mergeExceptions.get(i);
2553 if (merge.maxNumSegments != -1) {
2554 IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2555 final Throwable t = merge.getException();
2563 if (maxNumSegmentsMergesPending())
2570 // If close is called while we are still
2571 // running, throw an exception so the calling
2572 // thread will know merging did not
2577 // NOTE: in the ConcurrentMergeScheduler case, when
2578 // doWait is false, we can return immediately while
2579 // background threads accomplish the merging
2582 /** Returns true if any merges in pendingMerges or
2583 * runningMerges are maxNumSegments merges. */
2584 private synchronized boolean maxNumSegmentsMergesPending() {
2585 for (final MergePolicy.OneMerge merge : pendingMerges) {
2586 if (merge.maxNumSegments != -1)
2590 for (final MergePolicy.OneMerge merge : runningMerges) {
2591 if (merge.maxNumSegments != -1)
2598 /** This method has been deprecated, as it is horribly
2599 * inefficient and very rarely justified. Lucene's
2600 * multi-segment search performance has improved over
2601 * time, and the default TieredMergePolicy now targets
2602 * segments with deletions.
2606 public void expungeDeletes(boolean doWait) throws CorruptIndexException, IOException {
2607 forceMergeDeletes(doWait);
2610 /** Just like {@link #forceMergeDeletes()}, except you can
2611 * specify whether the call should block until the
2612 * operation completes. This is only meaningful with a
2613 * {@link MergeScheduler} that is able to run merges in
2614 * background threads.
2616 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2617 * you should immediately close the writer. See <a
2618 * href="#OOME">above</a> for details.</p>
2620 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
2621 * with <tt>false</tt>, which aborts all running merges,
2622 * then any thread still running this method might hit a
2623 * {@link MergePolicy.MergeAbortedException}.
2625 public void forceMergeDeletes(boolean doWait)
2626 throws CorruptIndexException, IOException {
2631 if (infoStream != null)
2632 message("forceMergeDeletes: index now " + segString());
2634 MergePolicy.MergeSpecification spec;
2636 synchronized(this) {
2637 spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
2639 final int numMerges = spec.merges.size();
2640 for(int i=0;i<numMerges;i++)
2641 registerMerge(spec.merges.get(i));
2645 mergeScheduler.merge(this);
2647 if (spec != null && doWait) {
2648 final int numMerges = spec.merges.size();
2649 synchronized(this) {
2650 boolean running = true;
2654 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMergeDeletes");
2657 // Check each merge that MergePolicy asked us to
2658 // do, to see if any of them are still running and
2659 // if any of them have hit an exception.
2661 for(int i=0;i<numMerges;i++) {
2662 final MergePolicy.OneMerge merge = spec.merges.get(i);
2663 if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2665 Throwable t = merge.getException();
2667 IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2673 // If any of our merges are still running, wait:
2680 // NOTE: in the ConcurrentMergeScheduler case, when
2681 // doWait is false, we can return immediately while
2682 // background threads accomplish the merging
2686 /** This method has been deprecated, as it is horribly
2687 * inefficient and very rarely justified. Lucene's
2688 * multi-segment search performance has improved over
2689 * time, and the default TieredMergePolicy now targets
2690 * segments with deletions.
2694 public void expungeDeletes() throws CorruptIndexException, IOException {
2695 forceMergeDeletes();
2699 * Forces merging of all segments that have deleted
2700 * documents. The actual merges to be executed are
2701 * determined by the {@link MergePolicy}. For example,
2702 * the default {@link TieredMergePolicy} will only
2703 * pick a segment if the percentage of
2704 * deleted docs is over 10%.
2706 * <p>This is often a horribly costly operation; rarely
2707 * is it warranted.</p>
2710 * many deletions you have pending in your index, call
2711 * {@link IndexReader#numDeletedDocs}.</p>
2713 * <p><b>NOTE</b>: this method first flushes a new
2714 * segment (if there are indexed documents), and applies
2715 * all buffered deletes.
2717 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2718 * you should immediately close the writer. See <a
2719 * href="#OOME">above</a> for details.</p>
2721 public void forceMergeDeletes() throws CorruptIndexException, IOException {
2722 forceMergeDeletes(true);
2726 * Expert: asks the mergePolicy whether any merges are
2727 * necessary now and if so, runs the requested merges and
2728 * then iterate (test again if merges are needed) until no
2729 * more merges are returned by the mergePolicy.
2731 * Explicit calls to maybeMerge() are usually not
2732 * necessary. The most common case is when merge policy
2733 * parameters have changed.
2735 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2736 * you should immediately close the writer. See <a
2737 * href="#OOME">above</a> for details.</p>
2739 public final void maybeMerge() throws CorruptIndexException, IOException {
2743 private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException {
2745 updatePendingMerges(maxNumSegments);
2746 mergeScheduler.merge(this);
2749 private synchronized void updatePendingMerges(int maxNumSegments)
2750 throws CorruptIndexException, IOException {
2751 assert maxNumSegments == -1 || maxNumSegments > 0;
2757 // Do not start new merges if we've hit OOME
2762 final MergePolicy.MergeSpecification spec;
2763 if (maxNumSegments != -1) {
2764 spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));
2766 final int numMerges = spec.merges.size();
2767 for(int i=0;i<numMerges;i++) {
2768 final MergePolicy.OneMerge merge = spec.merges.get(i);
2769 merge.maxNumSegments = maxNumSegments;
2774 spec = mergePolicy.findMerges(segmentInfos);
2778 final int numMerges = spec.merges.size();
2779 for(int i=0;i<numMerges;i++) {
2780 registerMerge(spec.merges.get(i));
2785 /** Expert: to be used by a {@link MergePolicy} to avoid
2786 * selecting merges for segments already being merged.
2787 * The returned collection is not cloned, and thus is
2788 * only safe to access if you hold IndexWriter's lock
2789 * (which you do when IndexWriter invokes the
2792 * <p>Do not alter the returned collection! */
2793 public synchronized Collection<SegmentInfo> getMergingSegments() {
2794 return mergingSegments;
2797 /** Expert: the {@link MergeScheduler} calls this method
2798 * to retrieve the next merge requested by the
2801 * @lucene.experimental
2803 public synchronized MergePolicy.OneMerge getNextMerge() {
2804 if (pendingMerges.size() == 0)
2807 // Advance the merge from pending to running
2808 MergePolicy.OneMerge merge = pendingMerges.removeFirst();
2809 runningMerges.add(merge);
2815 * Close the <code>IndexWriter</code> without committing
2816 * any changes that have occurred since the last commit
2817 * (or since it was opened, if commit hasn't been called).
2818 * This removes any temporary files that had been created,
2819 * after which the state of the index will be the same as
2820 * it was when commit() was last called or when this
2821 * writer was first opened. This also clears a previous
2822 * call to {@link #prepareCommit}.
2823 * @throws IOException if there is a low-level IO error
2825 public void rollback() throws IOException {
2828 // Ensure that only one thread actually gets to do the closing:
2833 private void rollbackInternal() throws IOException {
2835 boolean success = false;
2837 if (infoStream != null ) {
2838 message("rollback");
2842 synchronized(this) {
2843 finishMerges(false);
2847 if (infoStream != null ) {
2848 message("rollback: done finish merges");
2851 // Must pre-close these two, in case they increment
2852 // changeCount so that we can then set it to false
2853 // before calling closeInternal
2854 mergePolicy.close();
2855 mergeScheduler.close();
2857 bufferedDeletesStream.clear();
2859 synchronized(this) {
2861 if (pendingCommit != null) {
2862 pendingCommit.rollbackCommit(directory);
2863 deleter.decRef(pendingCommit);
2864 pendingCommit = null;
2868 // Keep the same segmentInfos instance but replace all
2869 // of its SegmentInfo instances. This is so the next
2870 // attempt to commit using this instance of IndexWriter
2871 // will always write to a new generation ("write
2873 segmentInfos.rollbackSegmentInfos(rollbackSegments);
2874 if (infoStream != null ) {
2875 message("rollback: infos=" + segString(segmentInfos));
2880 assert testPoint("rollback before checkpoint");
2882 // Ask deleter to locate unreferenced files & remove
2884 deleter.checkpoint(segmentInfos, false);
2888 // Don't bother saving any changes in our segmentInfos
2889 readerPool.clear(null);
2891 lastCommitChangeCount = changeCount;
2894 } catch (OutOfMemoryError oom) {
2895 handleOOM(oom, "rollbackInternal");
2897 synchronized(this) {
2901 if (infoStream != null)
2902 message("hit exception during rollback");
2907 closeInternal(false);
2911 * Delete all documents in the index.
2913 * <p>This method will drop all buffered documents and will
2914 * remove all segments from the index. This change will not be
2915 * visible until a {@link #commit()} has been called. This method
2916 * can be rolled back using {@link #rollback()}.</p>
2918 * <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
2920 * <p>NOTE: this method will forcefully abort all merges
2921 * in progress. If other threads are running {@link
2922 * #forceMerge}, {@link #addIndexes(IndexReader[])} or
2923 * {@link #forceMergeDeletes} methods, they may receive
2924 * {@link MergePolicy.MergeAbortedException}s.
2926 public synchronized void deleteAll() throws IOException {
2930 // Abort any running merges
2931 finishMerges(false);
2933 // Remove any buffered docs
2936 // Remove all segments
2937 segmentInfos.clear();
2939 // Ask deleter to locate unreferenced files & remove them:
2940 deleter.checkpoint(segmentInfos, false);
2943 // Don't bother saving any changes in our segmentInfos
2944 readerPool.dropAll();
2946 // Mark that the index has changed
2948 segmentInfos.changed();
2949 } catch (OutOfMemoryError oom) {
2950 handleOOM(oom, "deleteAll");
2952 if (infoStream != null) {
2953 message("hit exception during deleteAll");
2958 private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2959 if (!waitForMerges) {
2963 // Abort all pending & running merges:
2964 for (final MergePolicy.OneMerge merge : pendingMerges) {
2965 if (infoStream != null)
2966 message("now abort pending merge " + merge.segString(directory));
2970 pendingMerges.clear();
2972 for (final MergePolicy.OneMerge merge : runningMerges) {
2973 if (infoStream != null)
2974 message("now abort running merge " + merge.segString(directory));
2978 // These merges periodically check whether they have
2979 // been aborted, and stop if so. We wait here to make
2980 // sure they all stop. It should not take very long
2981 // because the merge threads periodically check if
2982 // they are aborted.
2983 while(runningMerges.size() > 0) {
2984 if (infoStream != null)
2985 message("now wait for " + runningMerges.size() + " running merge to abort");
2992 assert 0 == mergingSegments.size();
2994 if (infoStream != null)
2995 message("all running merges have aborted");
2998 // waitForMerges() will ensure any running addIndexes finishes.
2999 // It's fine if a new one attempts to start because from our
3000 // caller above the call will see that we are in the
3001 // process of closing, and will throw an
3002 // AlreadyClosedException.
3008 * Wait for any currently outstanding merges to finish.
3010 * <p>It is guaranteed that any merges started prior to calling this method
3011 * will have completed once this method completes.</p>
3013 public synchronized void waitForMerges() {
3015 if (infoStream != null) {
3016 message("waitForMerges");
3018 while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
3023 assert 0 == mergingSegments.size();
3025 if (infoStream != null) {
3026 message("waitForMerges done");
3031 * Called whenever the SegmentInfos has been updated and
3032 * the index files referenced exist (correctly) in the
3035 synchronized void checkpoint() throws IOException {
3037 segmentInfos.changed();
3038 deleter.checkpoint(segmentInfos, false);
3041 private synchronized void resetMergeExceptions() {
3042 mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
3046 private void noDupDirs(Directory... dirs) {
3047 HashSet<Directory> dups = new HashSet<Directory>();
3048 for (Directory dir : dirs) {
3049 if (dups.contains(dir))
3050 throw new IllegalArgumentException("Directory " + dir + " appears more than once");
3051 if (dir == directory)
3052 throw new IllegalArgumentException("Cannot add directory to itself");
3058 * @deprecated use {@link #addIndexes(Directory...)} instead
3061 public void addIndexesNoOptimize(Directory... dirs)
3062 throws CorruptIndexException, IOException {
3067 * Adds all segments from an array of indexes into this index.
3069 * <p>This may be used to parallelize batch indexing. A large document
3070 * collection can be broken into sub-collections. Each sub-collection can be
3071 * indexed in parallel, on a different thread, process or machine. The
3072 * complete index can then be created by merging sub-collection indexes
3076 * <b>NOTE:</b> the index in each {@link Directory} must not be
3077 * changed (opened by a writer) while this method is
3078 * running. This method does not acquire a write lock in
3079 * each input Directory, so it is up to the caller to
3082 * <p>This method is transactional in how Exceptions are
3083 * handled: it does not commit a new segments_N file until
3084 * all indexes are added. This means if an Exception
3085 * occurs (for example disk full), then either no indexes
3086 * will have been added or they all will have been.
3088 * <p>Note that this requires temporary free space in the
3089 * {@link Directory} up to 2X the sum of all input indexes
3090 * (including the starting index). If readers/searchers
3091 * are open against the starting index, then temporary
3092 * free space required will be higher by the size of the
3093 * starting index (see {@link #forceMerge(int)} for details).
3096 * <b>NOTE:</b> this method only copies the segments of the incomning indexes
3097 * and does not merge them. Therefore deleted documents are not removed and
3098 * the new segments are not merged with the existing ones. Also, if the merge
3099 * policy allows compound files, then any segment that is not compound is
3100 * converted to such. However, if the segment is compound, it is copied as-is
3101 * even if the merge policy does not allow compound files.
3104 * <p>This requires this index not be among those to be added.
3107 * <b>NOTE</b>: if this method hits an OutOfMemoryError
3108 * you should immediately close the writer. See <a
3109 * href="#OOME">above</a> for details.
3111 * @throws CorruptIndexException if the index is corrupt
3112 * @throws IOException if there is a low-level IO error
3114 public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
3120 if (infoStream != null)
3121 message("flush at addIndexes(Directory...)");
3125 List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
3126 Comparator<String> versionComparator = StringHelper.getVersionComparator();
3127 for (Directory dir : dirs) {
3128 if (infoStream != null) {
3129 message("addIndexes: process directory " + dir);
3131 SegmentInfos sis = new SegmentInfos(); // read infos from dir
3133 final Set<String> dsFilesCopied = new HashSet<String>();
3134 final Map<String, String> dsNames = new HashMap<String, String>();
3135 for (SegmentInfo info : sis) {
3136 assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3138 docCount += info.docCount;
3139 String newSegName = newSegmentName();
3140 String dsName = info.getDocStoreSegment();
3142 if (infoStream != null) {
3143 message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
3146 // create CFS only if the source segment is not CFS, and MP agrees it
3149 synchronized (this) { // Guard segmentInfos
3150 createCFS = !info.getUseCompoundFile()
3151 && mergePolicy.useCompoundFile(segmentInfos, info)
3152 // optimize case only for segments that don't share doc stores
3153 && versionComparator.compare(info.getVersion(), "3.1") >= 0;
3157 copySegmentIntoCFS(info, newSegName);
3159 copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
3165 synchronized (this) {
3167 segmentInfos.addAll(infos);
3171 } catch (OutOfMemoryError oom) {
3172 handleOOM(oom, "addIndexes(Directory...)");
3177 * Merges the provided indexes into this index. This method is useful
3178 * if you use extensions of {@link IndexReader}. Otherwise, using
3179 * {@link #addIndexes(Directory...)} is highly recommended for performance
3180 * reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set
3181 * on this writer, which may perform merges in parallel.
3183 * <p>The provided IndexReaders are not closed.
3185 * <p><b>NOTE:</b> this method does not merge the current segments,
3186 * only the incoming ones.
3188 * <p>See {@link #addIndexes(Directory...)} for details on transactional
3189 * semantics, temporary free space required in the Directory,
3190 * and non-CFS segments on an Exception.
3192 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3193 * you should immediately close the writer. See <a
3194 * href="#OOME">above</a> for details.
3196 * <p><b>NOTE</b>: if you call {@link #close(boolean)}
3197 * with <tt>false</tt>, which aborts all running merges,
3198 * then any thread still running this method might hit a
3199 * {@link MergePolicy.MergeAbortedException}.
3201 * @throws CorruptIndexException if the index is corrupt
3202 * @throws IOException if there is a low-level IO error
3204 public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
3209 if (infoStream != null)
3210 message("flush at addIndexes(IndexReader...)");
3213 String mergedName = newSegmentName();
3214 // TODO: somehow we should fix this merge so it's
3215 // abortable so that IW.close(false) is able to stop it
3216 SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
3217 mergedName, null, payloadProcessorProvider,
3218 ((FieldInfos) docWriter.getFieldInfos().clone()));
3220 for (IndexReader reader : readers) // add new indexes
3223 int docCount = merger.merge(); // merge 'em
3225 SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
3227 merger.fieldInfos().hasProx(),
3228 merger.fieldInfos().hasVectors());
3229 setDiagnostics(info, "addIndexes(IndexReader...)");
3231 boolean useCompoundFile;
3232 synchronized(this) { // Guard segmentInfos
3234 deleter.deleteNewFiles(info.files());
3238 useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
3241 // Now create the compound file if needed
3242 if (useCompoundFile) {
3243 merger.createCompoundFile(mergedName + ".cfs", info);
3245 // delete new non cfs files directly: they were never
3246 // registered with IFD
3247 synchronized(this) {
3248 deleter.deleteNewFiles(info.files());
3250 info.setUseCompoundFile(true);
3253 // Register the new segment
3254 synchronized(this) {
3256 deleter.deleteNewFiles(info.files());
3260 segmentInfos.add(info);
3264 } catch (OutOfMemoryError oom) {
3265 handleOOM(oom, "addIndexes(IndexReader...)");
3269 /** Copies the segment into the IndexWriter's directory, as a compound segment. */
3270 private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
3271 String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);
3272 Collection<String> files = info.files();
3273 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
3274 for (String file : files) {
3275 String newFileName = segName + IndexFileNames.stripSegmentName(file);
3276 if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
3277 && !IndexFileNames.isSeparateNormsFile(file)) {
3278 cfsWriter.addFile(file, info.dir);
3280 assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3281 info.dir.copy(directory, file, newFileName);
3288 info.dir = directory;
3289 info.name = segName;
3290 info.setUseCompoundFile(true);
3293 /** Copies the segment files as-is into the IndexWriter's directory. */
3294 private void copySegmentAsIs(SegmentInfo info, String segName,
3295 Map<String, String> dsNames, Set<String> dsFilesCopied)
3296 throws IOException {
3297 // Determine if the doc store of this segment needs to be copied. It's
3298 // only relevant for segments that share doc store with others,
3299 // because the DS might have been copied already, in which case we
3300 // just want to update the DS name of this SegmentInfo.
3301 // NOTE: pre-3x segments include a null DSName if they don't share doc
3302 // store. The following code ensures we don't accidentally insert
3303 // 'null' to the map.
3304 String dsName = info.getDocStoreSegment();
3305 final String newDsName;
3306 if (dsName != null) {
3307 if (dsNames.containsKey(dsName)) {
3308 newDsName = dsNames.get(dsName);
3310 dsNames.put(dsName, segName);
3311 newDsName = segName;
3314 newDsName = segName;
3317 // Copy the segment files
3318 for (String file: info.files()) {
3319 final String newFileName;
3320 if (IndexFileNames.isDocStoreFile(file)) {
3321 newFileName = newDsName + IndexFileNames.stripSegmentName(file);
3322 if (dsFilesCopied.contains(newFileName)) {
3325 dsFilesCopied.add(newFileName);
3327 newFileName = segName + IndexFileNames.stripSegmentName(file);
3330 assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
3331 info.dir.copy(directory, file, newFileName);
3334 info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
3335 info.dir = directory;
3336 info.name = segName;
3340 * A hook for extending classes to execute operations after pending added and
3341 * deleted documents have been flushed to the Directory but before the change
3342 * is committed (new segments_N file written).
3344 protected void doAfterFlush() throws IOException {}
3347 * A hook for extending classes to execute operations before pending added and
3348 * deleted documents are flushed to the Directory.
3350 protected void doBeforeFlush() throws IOException {}
3352 /** Expert: prepare for commit.
3354 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3355 * you should immediately close the writer. See <a
3356 * href="#OOME">above</a> for details.</p>
3358 * @see #prepareCommit(Map) */
3359 public final void prepareCommit() throws CorruptIndexException, IOException {
3361 prepareCommit(null);
3364 /** <p>Expert: prepare for commit, specifying
3365 * commitUserData Map (String -> String). This does the
3366 * first phase of 2-phase commit. This method does all
3367 * steps necessary to commit changes since this writer
3368 * was opened: flushes pending added and deleted docs,
3369 * syncs the index files, writes most of next segments_N
3370 * file. After calling this you must call either {@link
3371 * #commit()} to finish the commit, or {@link
3372 * #rollback()} to revert the commit and undo all changes
3373 * done since the writer was opened.</p>
3375 * You can also just call {@link #commit(Map)} directly
3376 * without prepareCommit first in which case that method
3377 * will internally call prepareCommit.
3379 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3380 * you should immediately close the writer. See <a
3381 * href="#OOME">above</a> for details.</p>
3383 * @param commitUserData Opaque Map (String->String)
3384 * that's recorded into the segments file in the index,
3385 * and retrievable by {@link
3386 * IndexReader#getCommitUserData}. Note that when
3387 * IndexWriter commits itself during {@link #close}, the
3388 * commitUserData is unchanged (just carried over from
3389 * the prior commit). If this is null then the previous
3390 * commitUserData is kept. Also, the commitUserData will
3391 * only "stick" if there are actually changes in the
3394 public final void prepareCommit(Map<String, String> commitUserData)
3395 throws CorruptIndexException, IOException {
3399 throw new IllegalStateException(
3400 "this writer hit an OutOfMemoryError; cannot commit");
3403 if (pendingCommit != null)
3404 throw new IllegalStateException(
3405 "prepareCommit was already called with no corresponding call to commit");
3407 if (infoStream != null)
3408 message("prepareCommit: flush");
3411 boolean anySegmentsFlushed = false;
3412 SegmentInfos toCommit = null;
3413 boolean success = false;
3416 synchronized (this) {
3417 anySegmentsFlushed = doFlush(true);
3418 readerPool.commit(segmentInfos);
3419 toCommit = (SegmentInfos) segmentInfos.clone();
3420 pendingCommitChangeCount = changeCount;
3421 // This protects the segmentInfos we are now going
3422 // to commit. This is important in case, eg, while
3423 // we are trying to sync all referenced files, a
3424 // merge completes which would otherwise have
3425 // removed the files we are now syncing.
3426 deleter.incRef(toCommit, false);
3430 if (!success && infoStream != null) {
3431 message("hit exception during prepareCommit");
3435 } catch (OutOfMemoryError oom) {
3436 handleOOM(oom, "prepareCommit");
3441 if (anySegmentsFlushed) {
3447 synchronized (this) {
3448 deleter.decRef(toCommit);
3453 startCommit(toCommit, commitUserData);
3456 // Used only by commit, below; lock order is commitLock -> IW
3457 private final Object commitLock = new Object();
3460 * <p>Commits all pending changes (added & deleted
3461 * documents, segment merges, added
3462 * indexes, etc.) to the index, and syncs all referenced
3463 * index files, such that a reader will see the changes
3464 * and the index updates will survive an OS or machine
3465 * crash or power loss. Note that this does not wait for
3466 * any running background merges to finish. This may be a
3467 * costly operation, so you should test the cost in your
3468 * application and do it only when really necessary.</p>
3470 * <p> Note that this operation calls Directory.sync on
3471 * the index files. That call should not return until the
3472 * file contents & metadata are on stable storage. For
3473 * FSDirectory, this calls the OS's fsync. But, beware:
3474 * some hardware devices may in fact cache writes even
3475 * during fsync, and return before the bits are actually
3476 * on stable storage, to give the appearance of faster
3477 * performance. If you have such a device, and it does
3478 * not have a battery backup (for example) then on power
3479 * loss it may still lose data. Lucene cannot guarantee
3480 * consistency on such devices. </p>
3482 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3483 * you should immediately close the writer. See <a
3484 * href="#OOME">above</a> for details.</p>
3486 * @see #prepareCommit
3489 public final void commit() throws CorruptIndexException, IOException {
3493 /** Commits all changes to the index, specifying a
3494 * commitUserData Map (String -> String). This just
3495 * calls {@link #prepareCommit(Map)} (if you didn't
3496 * already call it) and then {@link #finishCommit}.
3498 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3499 * you should immediately close the writer. See <a
3500 * href="#OOME">above</a> for details.</p>
3502 public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3506 commitInternal(commitUserData);
3509 private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3511 if (infoStream != null) {
3512 message("commit: start");
3515 synchronized(commitLock) {
3516 if (infoStream != null) {
3517 message("commit: enter lock");
3520 if (pendingCommit == null) {
3521 if (infoStream != null) {
3522 message("commit: now prepare");
3524 prepareCommit(commitUserData);
3525 } else if (infoStream != null) {
3526 message("commit: already prepared");
3533 private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3535 if (pendingCommit != null) {
3537 if (infoStream != null)
3538 message("commit: pendingCommit != null");
3539 pendingCommit.finishCommit(directory);
3540 if (infoStream != null)
3541 message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
3542 lastCommitChangeCount = pendingCommitChangeCount;
3543 segmentInfos.updateGeneration(pendingCommit);
3544 segmentInfos.setUserData(pendingCommit.getUserData());
3545 rollbackSegments = pendingCommit.createBackupSegmentInfos(true);
3546 deleter.checkpoint(pendingCommit, true);
3548 // Matches the incRef done in startCommit:
3549 deleter.decRef(pendingCommit);
3550 pendingCommit = null;
3554 } else if (infoStream != null) {
3555 message("commit: pendingCommit == null; skip");
3558 if (infoStream != null) {
3559 message("commit: done");
3563 /** NOTE: flushDocStores is ignored now (hardwired to
3564 * true); this method is only here for backwards
3566 protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3567 flush(triggerMerge, flushDeletes);
3571 * Flush all in-memory buffered updates (adds and deletes)
3573 * @param triggerMerge if true, we may merge segments (if
3574 * deletes or docs were flushed) if necessary
3575 * @param applyAllDeletes whether pending deletes should also
3577 protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
3579 // NOTE: this method cannot be sync'd because
3580 // maybeMerge() in turn calls mergeScheduler.merge which
3581 // in turn can take a long time to run and we don't want
3582 // to hold the lock for that. In the case of
3583 // ConcurrentMergeScheduler this can lead to deadlock
3584 // when it stalls due to too many running merges.
3586 // We can be called during close, when closing==true, so we must pass false to ensureOpen:
3588 if (doFlush(applyAllDeletes) && triggerMerge) {
3593 // TODO: this method should not have to be entirely
3594 // synchronized, ie, merges should be allowed to commit
3595 // even while a flush is happening
3596 private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
3599 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3604 assert testPoint("startDoFlush");
3606 // We may be flushing because it was triggered by doc
3607 // count, del count, ram usage (in which case flush
3608 // pending is already set), or we may be flushing
3609 // due to external event eg getReader or commit is
3610 // called (in which case we now set it, and this will
3611 // pause all threads):
3612 flushControl.setFlushPendingNoWait("explicit flush");
3614 boolean success = false;
3618 if (infoStream != null) {
3619 message(" start flush: applyAllDeletes=" + applyAllDeletes);
3620 message(" index before flush " + segString());
3623 final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);
3624 if (newSegment != null) {
3625 setDiagnostics(newSegment, "flush");
3626 segmentInfos.add(newSegment);
3630 if (!applyAllDeletes) {
3631 // If deletes alone are consuming > 1/2 our RAM
3632 // buffer, force them all to apply now. This is to
3633 // prevent too-frequent flushing of a long tail of
3635 if (flushControl.getFlushDeletes() ||
3636 (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
3637 bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
3638 applyAllDeletes = true;
3639 if (infoStream != null) {
3640 message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
3645 if (applyAllDeletes) {
3646 if (infoStream != null) {
3647 message("apply all deletes during flush");
3650 flushDeletesCount.incrementAndGet();
3651 final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
3652 .applyDeletes(readerPool, segmentInfos.asList());
3653 if (result.anyDeletes) {
3656 if (!keepFullyDeletedSegments && result.allDeleted != null) {
3657 if (infoStream != null) {
3658 message("drop 100% deleted segments: " + result.allDeleted);
3660 for (SegmentInfo info : result.allDeleted) {
3661 // If a merge has already registered for this
3662 // segment, we leave it in the readerPool; the
3663 // merge will skip merging it and will then drop
3664 // it once it's done:
3665 if (!mergingSegments.contains(info)) {
3666 segmentInfos.remove(info);
3667 if (readerPool != null) {
3668 readerPool.drop(info);
3674 bufferedDeletesStream.prune(segmentInfos);
3676 assert !bufferedDeletesStream.any();
3677 flushControl.clearDeletes();
3678 } else if (infoStream != null) {
3679 message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
3684 flushCount.incrementAndGet();
3688 return newSegment != null;
3690 } catch (OutOfMemoryError oom) {
3691 handleOOM(oom, "doFlush");
3695 flushControl.clearFlushPending();
3696 if (!success && infoStream != null)
3697 message("hit exception during flush");
3701 /** Expert: Return the total size of all index files currently cached in memory.
3702 * Useful for size management with flushRamDocs()
3704 public final long ramSizeInBytes() {
3706 return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
3709 /** Expert: Return the number of documents currently
3710 * buffered in RAM. */
3711 public final synchronized int numRamDocs() {
3713 return docWriter.getNumDocs();
3716 private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
3717 for(SegmentInfo info : merge.segments) {
3718 if (!segmentInfos.contains(info)) {
3719 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
3724 /** Carefully merges deletes for the segments we just
3725 * merged. This is tricky because, although merging will
3726 * clear all deletes (compacts the documents), new
3727 * deletes may have been flushed to the segments since
3728 * the merge was started. This method "carries over"
3729 * such new deletes onto the newly merged segment, and
3730 * saves the resulting deletes file (incrementing the
3731 * delete generation for merge.info). If no deletes were
3732 * flushed, no new deletes file is saved. */
3733 synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3735 assert testPoint("startCommitMergeDeletes");
3737 final List<SegmentInfo> sourceSegments = merge.segments;
3739 if (infoStream != null)
3740 message("commitMergeDeletes " + merge.segString(directory));
3742 // Carefully merge deletes that occurred after we
3746 long minGen = Long.MAX_VALUE;
3748 for(int i=0; i < sourceSegments.size(); i++) {
3749 SegmentInfo info = sourceSegments.get(i);
3750 minGen = Math.min(info.getBufferedDeletesGen(), minGen);
3751 int docCount = info.docCount;
3752 final SegmentReader previousReader = merge.readerClones.get(i);
3753 if (previousReader == null) {
3754 // Reader was skipped because it was 100% deletions
3757 final SegmentReader currentReader = merge.readers.get(i);
3758 if (previousReader.hasDeletions()) {
3760 // There were deletes on this segment when the merge
3761 // started. The merge has collapsed away those
3762 // deletes, but, if new deletes were flushed since
3763 // the merge started, we must now carefully keep any
3764 // newly flushed deletes but mapping them to the new
3767 if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
3768 // This means this segment has had new deletes
3769 // committed since we started the merge, so we
3771 for(int j=0;j<docCount;j++) {
3772 if (previousReader.isDeleted(j))
3773 assert currentReader.isDeleted(j);
3775 if (currentReader.isDeleted(j)) {
3776 mergedReader.doDelete(docUpto);
3783 docUpto += docCount - previousReader.numDeletedDocs();
3785 } else if (currentReader.hasDeletions()) {
3786 // This segment had no deletes before but now it
3788 for(int j=0; j<docCount; j++) {
3789 if (currentReader.isDeleted(j)) {
3790 mergedReader.doDelete(docUpto);
3796 // No deletes before or after
3797 docUpto += info.docCount;
3800 assert mergedReader.numDeletedDocs() == delCount;
3802 mergedReader.hasChanges = delCount > 0;
3804 // If new deletes were applied while we were merging
3805 // (which happens if eg commit() or getReader() is
3806 // called during our merge), then it better be the case
3807 // that the delGen has increased for all our merged
3809 assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
3811 mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
3814 synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
3816 assert testPoint("startCommitMerge");
3819 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
3822 if (infoStream != null)
3823 message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3825 assert merge.registerDone;
3827 // If merge was explicitly aborted, or, if rollback() or
3828 // rollbackTransaction() had been called since our merge
3829 // started (which results in an unqualified
3830 // deleter.refresh() call that will remove any index
3831 // file that current segments does not reference), we
3833 if (merge.isAborted()) {
3834 if (infoStream != null)
3835 message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3839 commitMergedDeletes(merge, mergedReader);
3841 // If the doc store we are using has been closed and
3842 // is in now compound format (but wasn't when we
3843 // started), then we will switch to the compound
3846 assert !segmentInfos.contains(merge.info);
3848 final boolean allDeleted = mergedReader.numDocs() == 0;
3850 if (infoStream != null && allDeleted) {
3851 message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
3854 final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
3855 segmentInfos.applyMergeChanges(merge, dropSegment);
3858 readerPool.drop(merge.info);
3861 if (infoStream != null) {
3862 message("after commit: " + segString());
3865 closeMergeReaders(merge, false);
3867 // Must note the change to segmentInfos so any commits
3868 // in-flight don't lose it:
3871 // If the merged segments had pending changes, clear
3872 // them so that they don't bother writing them to
3873 // disk, updating SegmentInfo, etc.:
3874 readerPool.clear(merge.segments);
3876 if (merge.maxNumSegments != -1) {
3877 // cascade the forceMerge:
3878 if (!segmentsToMerge.containsKey(merge.info)) {
3879 segmentsToMerge.put(merge.info, Boolean.FALSE);
3886 final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3888 if (infoStream != null) {
3889 message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
3892 // Set the exception on the merge, so if
3893 // forceMerge is waiting on us it sees the root
3895 merge.setException(t);
3896 addMergeException(merge);
3898 if (t instanceof MergePolicy.MergeAbortedException) {
3899 // We can ignore this exception (it happens when
3900 // close(false) or rollback is called), unless the
3901 // merge involves segments from external directories,
3902 // in which case we must throw it so, for example, the
3903 // rollbackTransaction code in addIndexes* is
3905 if (merge.isExternal)
3906 throw (MergePolicy.MergeAbortedException) t;
3907 } else if (t instanceof IOException)
3908 throw (IOException) t;
3909 else if (t instanceof RuntimeException)
3910 throw (RuntimeException) t;
3911 else if (t instanceof Error)
3914 // Should not get here
3915 throw new RuntimeException(t);
3919 * Merges the indicated segments, replacing them in the stack with a
3922 * @lucene.experimental
3924 public void merge(MergePolicy.OneMerge merge)
3925 throws CorruptIndexException, IOException {
3927 boolean success = false;
3929 final long t0 = System.currentTimeMillis();
3930 //System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n merge=" + merge.segString(directory) + "\n idx=" + segString());
3937 if (infoStream != null)
3938 message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());
3941 mergeSuccess(merge);
3943 } catch (Throwable t) {
3944 handleMergeException(t, merge);
3947 synchronized(this) {
3951 if (infoStream != null)
3952 message("hit exception during merge");
3953 if (merge.info != null && !segmentInfos.contains(merge.info))
3954 deleter.refresh(merge.info.name);
3957 // This merge (and, generally, any change to the
3958 // segments) may now enable new merges, so we call
3959 // merge policy & update pending merges.
3960 if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {
3961 updatePendingMerges(merge.maxNumSegments);
3965 } catch (OutOfMemoryError oom) {
3966 handleOOM(oom, "merge");
3968 if (infoStream != null && merge.info != null) {
3969 message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
3971 //System.out.println(Thread.currentThread().getName() + ": merge end");
3974 /** Hook that's called when the specified merge is complete. */
3975 void mergeSuccess(MergePolicy.OneMerge merge) {
3978 /** Checks whether this merge involves any segments
3979 * already participating in a merge. If not, this merge
3980 * is "registered", meaning we record that its segments
3981 * are now participating in a merge, and true is
3982 * returned. Else (the merge conflicts) false is
3984 final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
3986 if (merge.registerDone)
3991 throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3994 boolean isExternal = false;
3995 for(SegmentInfo info : merge.segments) {
3996 if (mergingSegments.contains(info)) {
3999 if (!segmentInfos.contains(info)) {
4002 if (info.dir != directory) {
4005 if (segmentsToMerge.containsKey(info)) {
4006 merge.maxNumSegments = mergeMaxNumSegments;
4010 ensureValidMerge(merge);
4012 pendingMerges.add(merge);
4014 if (infoStream != null)
4015 message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
4017 merge.mergeGen = mergeGen;
4018 merge.isExternal = isExternal;
4020 // OK it does not conflict; now record that this merge
4021 // is running (while synchronized) to avoid race
4022 // condition where two conflicting merges from different
4024 message("registerMerge merging=" + mergingSegments);
4025 for(SegmentInfo info : merge.segments) {
4026 message("registerMerge info=" + info);
4027 mergingSegments.add(info);
4030 // Merge is now registered
4031 merge.registerDone = true;
4035 /** Does initial setup for a merge, which is fast but holds
4036 * the synchronized lock on IndexWriter instance. */
4037 final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
4038 boolean success = false;
4044 if (infoStream != null) {
4045 message("hit exception in mergeInit");
4052 synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
4054 assert testPoint("startMergeInit");
4056 assert merge.registerDone;
4057 assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;
4060 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
4063 // TODO: is there any perf benefit to sorting
4064 // merged segments? eg biggest to smallest?
4066 if (merge.info != null)
4067 // mergeInit already done
4070 if (merge.isAborted())
4073 boolean hasVectors = false;
4074 for (SegmentInfo sourceSegment : merge.segments) {
4075 if (sourceSegment.getHasVectors()) {
4080 // Bind a new segment name here so even with
4081 // ConcurrentMergePolicy we keep deterministic segment
4083 merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, true, false, hasVectors);
4085 // Lock order: IW -> BD
4086 final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
4088 if (result.anyDeletes) {
4092 if (!keepFullyDeletedSegments && result.allDeleted != null) {
4093 if (infoStream != null) {
4094 message("drop 100% deleted segments: " + result.allDeleted);
4096 for(SegmentInfo info : result.allDeleted) {
4097 segmentInfos.remove(info);
4098 if (merge.segments.contains(info)) {
4099 mergingSegments.remove(info);
4100 merge.segments.remove(info);
4103 if (readerPool != null) {
4104 readerPool.drop(result.allDeleted);
4109 merge.info.setBufferedDeletesGen(result.gen);
4111 // Lock order: IW -> BD
4112 bufferedDeletesStream.prune(segmentInfos);
4114 Map<String,String> details = new HashMap<String,String>();
4115 details.put("mergeMaxNumSegments", ""+merge.maxNumSegments);
4116 details.put("mergeFactor", Integer.toString(merge.segments.size()));
4117 setDiagnostics(merge.info, "merge", details);
4119 if (infoStream != null) {
4120 message("merge seg=" + merge.info.name);
4123 assert merge.estimatedMergeBytes == 0;
4124 for(SegmentInfo info : merge.segments) {
4125 if (info.docCount > 0) {
4126 final int delCount = numDeletedDocs(info);
4127 assert delCount <= info.docCount;
4128 final double delRatio = ((double) delCount)/info.docCount;
4129 merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
4133 // TODO: I think this should no longer be needed (we
4134 // now build CFS before adding segment to the infos);
4135 // however, on removing it, tests fail for some reason!
4137 // Also enroll the merged segment into mergingSegments;
4138 // this prevents it from getting selected for a merge
4139 // after our merge is done but while we are building the
4141 mergingSegments.add(merge.info);
4144 private void setDiagnostics(SegmentInfo info, String source) {
4145 setDiagnostics(info, source, null);
4148 private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
4149 Map<String,String> diagnostics = new HashMap<String,String>();
4150 diagnostics.put("source", source);
4151 diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
4152 diagnostics.put("os", Constants.OS_NAME);
4153 diagnostics.put("os.arch", Constants.OS_ARCH);
4154 diagnostics.put("os.version", Constants.OS_VERSION);
4155 diagnostics.put("java.version", Constants.JAVA_VERSION);
4156 diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
4157 if (details != null) {
4158 diagnostics.putAll(details);
4160 info.setDiagnostics(diagnostics);
4163 /** Does fininishing for a merge, which is fast but holds
4164 * the synchronized lock on IndexWriter instance. */
4165 final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4167 // forceMerge, addIndexes or finishMerges may be waiting
4168 // on merges to finish.
4171 // It's possible we are called twice, eg if there was an
4172 // exception inside mergeInit
4173 if (merge.registerDone) {
4174 final List<SegmentInfo> sourceSegments = merge.segments;
4175 for(SegmentInfo info : sourceSegments) {
4176 mergingSegments.remove(info);
4178 // TODO: if we remove the add in _mergeInit, we should
4179 // also remove this:
4180 mergingSegments.remove(merge.info);
4181 merge.registerDone = false;
4184 runningMerges.remove(merge);
4187 private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
4188 final int numSegments = merge.readers.size();
4189 Throwable th = null;
4191 boolean anyChanges = false;
4192 boolean drop = !suppressExceptions;
4193 for (int i = 0; i < numSegments; i++) {
4194 if (merge.readers.get(i) != null) {
4196 anyChanges |= readerPool.release(merge.readers.get(i), drop);
4197 } catch (Throwable t) {
4202 merge.readers.set(i, null);
4205 if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
4207 merge.readerClones.get(i).close();
4208 } catch (Throwable t) {
4213 // This was a private clone and we had the
4215 assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
4216 merge.readerClones.set(i, null);
4220 if (suppressExceptions && anyChanges) {
4224 // If any error occured, throw it.
4225 if (!suppressExceptions && th != null) {
4226 if (th instanceof IOException) throw (IOException) th;
4227 if (th instanceof RuntimeException) throw (RuntimeException) th;
4228 if (th instanceof Error) throw (Error) th;
4229 throw new RuntimeException(th);
4233 /** Does the actual (time-consuming) work of the merge,
4234 * but without holding synchronized lock on IndexWriter
4236 final private int mergeMiddle(MergePolicy.OneMerge merge)
4237 throws CorruptIndexException, IOException {
4239 merge.checkAborted(directory);
4241 final String mergedName = merge.info.name;
4243 int mergedDocCount = 0;
4245 List<SegmentInfo> sourceSegments = merge.segments;
4247 SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
4248 payloadProcessorProvider,
4249 ((FieldInfos) docWriter.getFieldInfos().clone()));
4251 if (infoStream != null) {
4252 message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getHasVectors());
4255 merge.readers = new ArrayList<SegmentReader>();
4256 merge.readerClones = new ArrayList<SegmentReader>();
4258 // This is try/finally to make sure merger's readers are
4260 boolean success = false;
4262 int totDocCount = 0;
4264 while(segUpto < sourceSegments.size()) {
4266 final SegmentInfo info = sourceSegments.get(segUpto);
4268 // Hold onto the "live" reader; we will use this to
4269 // commit merged deletes
4270 final SegmentReader reader = readerPool.get(info, true,
4271 MERGE_READ_BUFFER_SIZE,
4273 merge.readers.add(reader);
4275 // We clone the segment readers because other
4276 // deletes may come in while we're merging so we
4277 // need readers that will not change
4278 final SegmentReader clone = (SegmentReader) reader.clone(true);
4279 merge.readerClones.add(clone);
4281 if (clone.numDocs() > 0) {
4283 totDocCount += clone.numDocs();
4288 if (infoStream != null) {
4289 message("merge: total " + totDocCount + " docs");
4292 merge.checkAborted(directory);
4294 // This is where all the work happens:
4295 mergedDocCount = merge.info.docCount = merger.merge();
4297 // LUCENE-3403: set hasVectors after merge(), so that it is properly set.
4298 merge.info.setHasVectors(merger.fieldInfos().hasVectors());
4300 assert mergedDocCount == totDocCount;
4302 if (infoStream != null) {
4303 message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
4306 anyNonBulkMerges |= merger.getAnyNonBulkMerges();
4308 assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
4310 // Very important to do this before opening the reader
4311 // because SegmentReader must know if prox was written for
4313 merge.info.setHasProx(merger.fieldInfos().hasProx());
4315 boolean useCompoundFile;
4316 synchronized (this) { // Guard segmentInfos
4317 useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
4320 if (useCompoundFile) {
4323 final String compoundFileName = IndexFileNames.segmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION);
4326 if (infoStream != null) {
4327 message("create compound file " + compoundFileName);
4329 merger.createCompoundFile(compoundFileName, merge.info);
4331 } catch (IOException ioe) {
4332 synchronized(this) {
4333 if (merge.isAborted()) {
4334 // This can happen if rollback or close(false)
4335 // is called -- fall through to logic below to
4336 // remove the partially created CFS:
4338 handleMergeException(ioe, merge);
4341 } catch (Throwable t) {
4342 handleMergeException(t, merge);
4345 if (infoStream != null) {
4346 message("hit exception creating compound file during merge");
4349 synchronized(this) {
4350 deleter.deleteFile(compoundFileName);
4351 deleter.deleteNewFiles(merge.info.files());
4358 synchronized(this) {
4360 // delete new non cfs files directly: they were never
4361 // registered with IFD
4362 deleter.deleteNewFiles(merge.info.files());
4364 if (merge.isAborted()) {
4365 if (infoStream != null) {
4366 message("abort merge after building CFS");
4368 deleter.deleteFile(compoundFileName);
4373 merge.info.setUseCompoundFile(true);
4376 if (infoStream != null) {
4377 message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));
4380 final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
4382 final int termsIndexDivisor;
4383 final boolean loadDocStores;
4385 if (mergedSegmentWarmer != null) {
4386 // Load terms index & doc stores so the segment
4387 // warmer can run searches, load documents/term
4389 termsIndexDivisor = config.getReaderTermsIndexDivisor();
4390 loadDocStores = true;
4392 termsIndexDivisor = -1;
4393 loadDocStores = false;
4396 // TODO: in the non-realtime case, we may want to only
4397 // keep deletes (it's costly to open entire reader
4398 // when we just need deletes)
4400 final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
4402 if (poolReaders && mergedSegmentWarmer != null) {
4403 mergedSegmentWarmer.warm(mergedReader);
4406 if (!commitMerge(merge, mergedReader)) {
4407 // commitMerge will return false if this merge was aborted
4411 synchronized(this) {
4412 if (readerPool.release(mergedReader)) {
4413 // Must checkpoint after releasing the
4414 // mergedReader since it may have written a new
4424 // Readers are already closed in commitMerge if we didn't hit
4427 closeMergeReaders(merge, true);
4431 return mergedDocCount;
4434 synchronized void addMergeException(MergePolicy.OneMerge merge) {
4435 assert merge.getException() != null;
4436 if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4437 mergeExceptions.add(merge);
4440 // For test purposes.
4441 final int getBufferedDeleteTermsSize() {
4442 return docWriter.getPendingDeletes().terms.size();
4445 // For test purposes.
4446 final int getNumBufferedDeleteTerms() {
4447 return docWriter.getPendingDeletes().numTermDeletes.get();
4450 // utility routines for tests
4451 synchronized SegmentInfo newestSegment() {
4452 return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
4455 /** @lucene.internal */
4456 public synchronized String segString() throws IOException {
4457 return segString(segmentInfos);
4460 /** @lucene.internal */
4461 public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
4462 final StringBuilder buffer = new StringBuilder();
4463 for(final SegmentInfo s : infos) {
4464 if (buffer.length() > 0) {
4467 buffer.append(segString(s));
4469 return buffer.toString();
4472 /** @lucene.internal */
4473 public synchronized String segString(SegmentInfo info) throws IOException {
4474 StringBuilder buffer = new StringBuilder();
4475 SegmentReader reader = readerPool.getIfExists(info);
4477 if (reader != null) {
4478 buffer.append(reader.toString());
4480 buffer.append(info.toString(directory, 0));
4481 if (info.dir != directory) {
4482 buffer.append("**");
4486 if (reader != null) {
4487 readerPool.release(reader);
4490 return buffer.toString();
4493 private synchronized void doWait() {
4494 // NOTE: the callers of this method should in theory
4495 // be able to do simply wait(), but, as a defense
4496 // against thread timing hazards where notifyAll()
4497 // fails to be called, we wait for at most 1 second
4498 // and then return so caller can check if wait
4499 // conditions are satisfied:
4502 } catch (InterruptedException ie) {
4503 throw new ThreadInterruptedException(ie);
4507 private boolean keepFullyDeletedSegments;
4509 /** Only for testing.
4511 * @lucene.internal */
4512 void keepFullyDeletedSegments() {
4513 keepFullyDeletedSegments = true;
4516 boolean getKeepFullyDeletedSegments() {
4517 return keepFullyDeletedSegments;
4520 // called only from assert
4521 private boolean filesExist(SegmentInfos toSync) throws IOException {
4522 Collection<String> files = toSync.files(directory, false);
4523 for(final String fileName: files) {
4524 assert directory.fileExists(fileName): "file " + fileName + " does not exist";
4525 // If this trips it means we are missing a call to
4526 // .checkpoint somewhere, because by the time we
4527 // are called, deleter should know about every
4528 // file referenced by the current head
4530 assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
4535 /** Walk through all files referenced by the current
4536 * segmentInfos and ask the Directory to sync each file,
4537 * if it wasn't already. If that succeeds, then we
4538 * prepare a new segments_N file but do not fully commit
4540 private void startCommit(SegmentInfos toSync, Map<String,String> commitUserData) throws IOException {
4542 assert testPoint("startStartCommit");
4543 assert pendingCommit == null;
4546 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
4551 if (infoStream != null)
4552 message("startCommit(): start");
4555 synchronized(this) {
4557 assert lastCommitChangeCount <= changeCount;
4559 if (pendingCommitChangeCount == lastCommitChangeCount) {
4560 if (infoStream != null) {
4561 message(" skip startCommit(): no changes pending");
4563 deleter.decRef(toSync);
4567 // First, we clone & incref the segmentInfos we intend
4568 // to sync, then, without locking, we sync() all files
4569 // referenced by toSync, in the background.
4571 if (infoStream != null)
4572 message("startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
4574 assert filesExist(toSync);
4576 if (commitUserData != null) {
4577 toSync.setUserData(commitUserData);
4581 assert testPoint("midStartCommit");
4583 boolean pendingCommitSet = false;
4586 // This call can take a long time -- 10s of seconds
4587 // or more. We do it without sync:
4588 directory.sync(toSync.files(directory, false));
4590 assert testPoint("midStartCommit2");
4592 synchronized(this) {
4594 assert pendingCommit == null;
4596 assert segmentInfos.getGeneration() == toSync.getGeneration();
4598 // Exception here means nothing is prepared
4599 // (this method unwinds everything it did on
4601 toSync.prepareCommit(directory);
4602 pendingCommitSet = true;
4603 pendingCommit = toSync;
4606 if (infoStream != null) {
4607 message("done all syncs");
4610 assert testPoint("midStartCommitSuccess");
4613 synchronized(this) {
4615 // Have our master segmentInfos record the
4616 // generations we just prepared. We do this
4617 // on error or success so we don't
4618 // double-write a segments_N file.
4619 segmentInfos.updateGeneration(toSync);
4621 if (!pendingCommitSet) {
4622 if (infoStream != null) {
4623 message("hit exception committing segments file");
4626 deleter.decRef(toSync);
4630 } catch (OutOfMemoryError oom) {
4631 handleOOM(oom, "startCommit");
4633 assert testPoint("finishStartCommit");
4637 * Returns <code>true</code> iff the index in the named directory is
4639 * @param directory the directory to check for a lock
4640 * @throws IOException if there is a low-level IO error
4642 public static boolean isLocked(Directory directory) throws IOException {
4643 return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4647 * Forcibly unlocks the index in the named directory.
4649 * Caution: this should only be used by failure recovery code,
4650 * when it is known that no other process nor thread is in fact
4651 * currently accessing this index.
4653 public static void unlock(Directory directory) throws IOException {
4654 directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4658 * Specifies maximum field length (in number of tokens/terms) in
4659 * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
4660 * the value set by the constructor.
4662 * @deprecated use {@link LimitTokenCountAnalyzer} instead.
4665 public static final class MaxFieldLength {
4668 private String name;
4671 * Private type-safe-enum-pattern constructor.
4673 * @param name instance name
4674 * @param limit maximum field length
4676 private MaxFieldLength(String name, int limit) {
4682 * Public constructor to allow users to specify the maximum field size limit.
4684 * @param limit The maximum field length
4686 public MaxFieldLength(int limit) {
4687 this("User-specified", limit);
4690 public int getLimit() {
4695 public String toString()
4697 return name + ":" + limit;
4700 /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4701 public static final MaxFieldLength UNLIMITED
4702 = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4705 * Sets the maximum field length to
4706 * {@link #DEFAULT_MAX_FIELD_LENGTH}
4708 public static final MaxFieldLength LIMITED
4709 = new MaxFieldLength("LIMITED", 10000);
4712 /** If {@link #getReader} has been called (ie, this writer
4713 * is in near real-time mode), then after a merge
4714 * completes, this class can be invoked to warm the
4715 * reader on the newly merged segment, before the merge
4716 * commits. This is not required for near real-time
4717 * search, but will reduce search latency on opening a
4718 * new near real-time reader after a merge completes.
4720 * @lucene.experimental
4722 * <p><b>NOTE</b>: warm is called before any deletes have
4723 * been carried over to the merged segment. */
4724 public static abstract class IndexReaderWarmer {
4725 public abstract void warm(IndexReader reader) throws IOException;
4729 * Set the merged segment warmer. See {@link IndexReaderWarmer}.
4732 * {@link IndexWriterConfig#setMergedSegmentWarmer}
4736 public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
4737 config.setMergedSegmentWarmer(warmer);
4741 * Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
4743 * @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
4746 public IndexReaderWarmer getMergedSegmentWarmer() {
4747 return config.getMergedSegmentWarmer();
4750 private void handleOOM(OutOfMemoryError oom, String location) {
4751 if (infoStream != null) {
4752 message("hit OutOfMemoryError inside " + location);
4758 // Used only by assert for testing. Current points:
4764 // midStartCommitSuccess
4765 // finishStartCommit
4766 // startCommitMergeDeletes
4768 // DocumentsWriter.ThreadState.init start
4769 boolean testPoint(String name) {
4773 synchronized boolean nrtIsCurrent(SegmentInfos infos) {
4774 //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
4776 return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
4779 synchronized boolean isClosed() {
4783 /** Expert: remove any index files that are no longer
4786 * <p> IndexWriter normally deletes unused files itself,
4787 * during indexing. However, on Windows, which disallows
4788 * deletion of open files, if there is a reader open on
4789 * the index then those files cannot be deleted. This is
4790 * fine, because IndexWriter will periodically retry
4793 * <p> However, IndexWriter doesn't try that often: only
4794 * on open, close, flushing a new segment, and finishing
4795 * a merge. If you don't do any of these actions with your
4796 * IndexWriter, you'll see the unused files linger. If
4797 * that's a problem, call this method to delete them
4798 * (once you've closed the open readers that were
4799 * preventing their deletion).
4801 * <p> In addition, you can call this method to delete
4802 * unreferenced index commits. This might be useful if you
4803 * are using an {@link IndexDeletionPolicy} which holds
4804 * onto index commits until some criteria are met, but those
4805 * commits are no longer needed. Otherwise, those commits will
4806 * be deleted the next time commit() is called.
4808 public synchronized void deleteUnusedFiles() throws IOException {
4810 deleter.deletePendingFiles();
4811 deleter.revisitPolicy();
4814 // Called by DirectoryReader.doClose
4815 synchronized void deletePendingFiles() throws IOException {
4816 deleter.deletePendingFiles();
4820 * Sets the {@link PayloadProcessorProvider} to use when merging payloads.
4821 * Note that the given <code>pcp</code> will be invoked for every segment that
4822 * is merged, not only external ones that are given through
4823 * {@link #addIndexes}. If you want only the payloads of the external segments
4824 * to be processed, you can return <code>null</code> whenever a
4825 * {@link DirPayloadProcessor} is requested for the {@link Directory} of the
4826 * {@link IndexWriter}.
4828 * The default is <code>null</code> which means payloads are processed
4829 * normally (copied) during segment merges. You can also unset it by passing
4830 * <code>null</code>.
4832 * <b>NOTE:</b> the set {@link PayloadProcessorProvider} will be in effect
4833 * immediately, potentially for already running merges too. If you want to be
4834 * sure it is used for further operations only, such as {@link #addIndexes} or
4835 * {@link #forceMerge}, you can call {@link #waitForMerges()} before.
4837 public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
4839 payloadProcessorProvider = pcp;
4843 * Returns the {@link PayloadProcessorProvider} that is used during segment
4844 * merges to process payloads.
4846 public PayloadProcessorProvider getPayloadProcessorProvider() {
4848 return payloadProcessorProvider;
4851 // decides when flushes happen
4852 final class FlushControl {
4854 private boolean flushPending;
4855 private boolean flushDeletes;
4856 private int delCount;
4857 private int docCount;
4858 private boolean flushing;
4860 private synchronized boolean setFlushPending(String reason, boolean doWait) {
4861 if (flushPending || flushing) {
4863 while(flushPending || flushing) {
4866 } catch (InterruptedException ie) {
4867 throw new ThreadInterruptedException(ie);
4873 if (infoStream != null) {
4874 message("now trigger flush reason=" + reason);
4876 flushPending = true;
4877 return flushPending;
4881 public synchronized void setFlushPendingNoWait(String reason) {
4882 setFlushPending(reason, false);
4885 public synchronized boolean getFlushPending() {
4886 return flushPending;
4889 public synchronized boolean getFlushDeletes() {
4890 return flushDeletes;
4893 public synchronized void clearFlushPending() {
4894 if (infoStream != null) {
4895 message("clearFlushPending");
4897 flushPending = false;
4898 flushDeletes = false;
4903 public synchronized void clearDeletes() {
4907 public synchronized boolean waitUpdate(int docInc, int delInc) {
4908 return waitUpdate(docInc, delInc, false);
4911 public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
4912 while(flushPending) {
4915 } catch (InterruptedException ie) {
4916 throw new ThreadInterruptedException(ie);
4923 // skipWait is only used when a thread is BOTH adding
4924 // a doc and buffering a del term, and, the adding of
4925 // the doc already triggered a flush
4930 final int maxBufferedDocs = config.getMaxBufferedDocs();
4931 if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4932 docCount >= maxBufferedDocs) {
4933 return setFlushPending("maxBufferedDocs", true);
4936 final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
4937 if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
4938 delCount >= maxBufferedDeleteTerms) {
4939 flushDeletes = true;
4940 return setFlushPending("maxBufferedDeleteTerms", true);
4943 return flushByRAMUsage("add delete/doc");
4946 public synchronized boolean flushByRAMUsage(String reason) {
4947 final double ramBufferSizeMB = config.getRAMBufferSizeMB();
4948 if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
4949 final long limit = (long) (ramBufferSizeMB*1024*1024);
4950 long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4951 if (used >= limit) {
4953 // DocumentsWriter may be able to free up some
4955 // Lock order: FC -> DW
4956 docWriter.balanceRAM();
4958 used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
4959 if (used >= limit) {
4960 return setFlushPending("ram full: " + reason, false);
4968 final FlushControl flushControl = new FlushControl();