X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexWriter.java?ds=inline
diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexWriter.java
new file mode 100644
index 0000000..e847277
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@@ -0,0 +1,4969 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.Lock;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.MapBackedSet;
+import org.apache.lucene.util.TwoPhaseCommit;
+
+/**
+ An IndexWriter
creates and maintains an index.
+
+
The create
argument to the {@link
+ #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
+ whether a new index is created, or whether an existing index is
+ opened. Note that you can open an index with create=true
+ even while readers are using the index. The old readers will
+ continue to search the "point in time" snapshot they had opened,
+ and won't see the newly created index until they re-open. There are
+ also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
+ with no create
argument which will create a new index
+ if there is not already an index at the provided path and otherwise
+ open the existing index.
In either case, documents are added with {@link #addDocument(Document) + addDocument} and removed with {@link #deleteDocuments(Term)} or {@link + #deleteDocuments(Query)}. A document can be updated with {@link + #updateDocument(Term, Document) updateDocument} (which just deletes + and then adds the entire document). When finished adding, deleting + and updating documents, {@link #close() close} should be called.
+ + +These changes are buffered in memory and periodically + flushed to the {@link Directory} (during the above method + calls). A flush is triggered when there are enough + buffered deletes (see {@link #setMaxBufferedDeleteTerms}) + or enough added documents since the last flush, whichever + is sooner. For the added documents, flushing is triggered + either by RAM usage of the documents (see {@link + #setRAMBufferSizeMB}) or the number of added documents. + The default is to flush when RAM usage hits 16 MB. For + best indexing speed you should flush by RAM usage with a + large RAM buffer. Note that flushing just moves the + internal buffered state in IndexWriter into the index, but + these changes are not visible to IndexReader until either + {@link #commit()} or {@link #close} is called. A flush may + also trigger one or more segment merges which by default + run with a background thread so as not to block the + addDocument calls (see below + for changing the {@link MergeScheduler}).
+ +Opening an IndexWriter
creates a lock file for the directory in use. Trying to open
+ another IndexWriter
on the same directory will lead to a
+ {@link LockObtainFailedException}. The {@link LockObtainFailedException}
+ is also thrown if an IndexReader on the same directory is used to delete documents
+ from the index.
Expert: IndexWriter
allows an optional
+ {@link IndexDeletionPolicy} implementation to be
+ specified. You can use this to control when prior commits
+ are deleted from the index. The default policy is {@link
+ KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ commits as soon as a new commit is done (this matches
+ behavior before 2.2). Creating your own policy can allow
+ you to explicitly keep previous "point in time" commits
+ alive in the index for some time, to allow readers to
+ refresh to the new commit without having the old commit
+ deleted out from under them. This is necessary on
+ filesystems like NFS that do not support "delete on last
+ close" semantics, which Lucene's "point in time" search
+ normally relies on.
Expert:
+ IndexWriter
allows you to separately change
+ the {@link MergePolicy} and the {@link MergeScheduler}.
+ The {@link MergePolicy} is invoked whenever there are
+ changes to the segments in the index. Its role is to
+ select which merges to do, if any, and return a {@link
+ MergePolicy.MergeSpecification} describing the merges.
+ The default is {@link LogByteSizeMergePolicy}. Then, the {@link
+ MergeScheduler} is invoked with the requested merges and
+ it decides when and how to run the merges. The default is
+ {@link ConcurrentMergeScheduler}.
NOTE: if you hit an + OutOfMemoryError then IndexWriter will quietly record this + fact and block all future segment commits. This is a + defensive measure in case any internal state (buffered + documents and deletions) were corrupted. Any subsequent + calls to {@link #commit()} will throw an + IllegalStateException. The only course of action is to + call {@link #close()}, which internally will call {@link + #rollback()}, to undo any changes to the index since the + last commit. You can also just call {@link #rollback()} + directly.
+ +NOTE: {@link
+ IndexWriter} instances are completely thread
+ safe, meaning multiple threads can call any of its
+ methods, concurrently. If your application requires
+ external synchronization, you should not
+ synchronize on the IndexWriter
instance as
+ this may cause deadlock; use your own (non-Lucene) objects
+ instead.
NOTE: If you call
+ Thread.interrupt()
on a thread that's within
+ IndexWriter, IndexWriter will try to catch this (eg, if
+ it's in a wait() or Thread.sleep()), and will then throw
+ the unchecked exception {@link ThreadInterruptedException}
+ and clear the interrupt status on the thread.
Note that this is functionally equivalent to calling + * {#flush} and then using {@link IndexReader#open} to + * open a new reader. But the turarnound time of this + * method should be faster since it avoids the potentially + * costly {@link #commit}.
+ * + *You must close the {@link IndexReader} returned by + * this method once you are done using it.
+ * + *It's near real-time because there is no hard + * guarantee on how quickly you can get a new reader after + * making changes with IndexWriter. You'll have to + * experiment in your situation to determine if it's + * fast enough. As this is a new and experimental + * feature, please report back on your findings so we can + * learn, improve and iterate.
+ * + *The resulting reader supports {@link + * IndexReader#reopen}, but that call will simply forward + * back to this method (though this may change in the + * future).
+ * + *The very first time this method is called, this + * writer instance will make every effort to pool the + * readers that it opens for doing merges, applying + * deletes, etc. This means additional resources (RAM, + * file descriptors, CPU time) will be consumed.
+ * + *For lower latency on reopening a reader, you should + * call {@link #setMergedSegmentWarmer} to + * pre-warm a newly merged segment before it's committed + * to the index. This is important for minimizing + * index-to-search delay after a large merge.
+ * + *If an addIndexes* call is running in another thread, + * then this reader will only search those segments from + * the foreign index that have been successfully copied + * over, so far
. + * + *NOTE: Once the writer is closed, any + * outstanding readers may continue to be used. However, + * if you attempt to reopen any of those readers, you'll + * hit an {@link AlreadyClosedException}.
+ * + * @lucene.experimental + * + * @return IndexReader that covers entire index plus all + * changes made so far by this IndexWriter instance + * + * @deprecated Please use {@link + * IndexReader#open(IndexWriter,boolean)} instead. + * + * @throws IOException + */ + @Deprecated + public IndexReader getReader() throws IOException { + return getReader(config.getReaderTermsIndexDivisor(), true); + } + + IndexReader getReader(boolean applyAllDeletes) throws IOException { + return getReader(config.getReaderTermsIndexDivisor(), applyAllDeletes); + } + + /** Expert: like {@link #getReader}, except you can + * specify which termInfosIndexDivisor should be used for + * any newly opened readers. + * @param termInfosIndexDivisor Subsamples which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * + * @deprecated Please use {@link + * IndexReader#open(IndexWriter,boolean)} instead. Furthermore, + * this method cannot guarantee the reader (and its + * sub-readers) will be opened with the + * termInfosIndexDivisor setting because some of them may + * have already been opened according to {@link + * IndexWriterConfig#setReaderTermsIndexDivisor}. You + * should set the requested termInfosIndexDivisor through + * {@link IndexWriterConfig#setReaderTermsIndexDivisor} and use + * {@link #getReader()}. */ + @Deprecated + public IndexReader getReader(int termInfosIndexDivisor) throws IOException { + return getReader(termInfosIndexDivisor, true); + } + + IndexReader getReader(int termInfosIndexDivisor, boolean applyAllDeletes) throws IOException { + ensureOpen(); + + final long tStart = System.currentTimeMillis(); + + if (infoStream != null) { + message("flush at getReader"); + } + + // Do this up front before flushing so that the readers + // obtained during this flush are pooled, the first time + // this method is called: + poolReaders = true; + + // Prevent segmentInfos from changing while opening the + // reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + IndexReader r; + synchronized(this) { + flush(false, applyAllDeletes); + r = new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor, applyAllDeletes); + if (infoStream != null) { + message("return reader version=" + r.getVersion() + " reader=" + r); + } + } + + maybeMerge(); + + if (infoStream != null) { + message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); + } + return r; + } + + // Used for all SegmentReaders we open + private final CollectionGet the current setting of whether newly flushed + * segments will use the compound file format. Note that + * this just returns the value previously set with + * setUseCompoundFile(boolean), or the default value + * (true). You cannot use this to query the status of + * previously flushed segments.
+ * + *Note that this method is a convenience method: it + * just calls mergePolicy.getUseCompoundFile as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
+ * + * @see #setUseCompoundFile(boolean) + * @deprecated use {@link LogMergePolicy#getUseCompoundFile()} + */ + @Deprecated + public boolean getUseCompoundFile() { + return getLogMergePolicy().getUseCompoundFile(); + } + + /** + *+ * Setting to turn on usage of a compound file. When on, multiple files for + * each segment are merged into a single file when a new segment is flushed. + *
+ * + *+ * Note that this method is a convenience method: it just calls + * mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of + * {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown. + *
+ * + * @deprecated use {@link LogMergePolicy#setUseCompoundFile(boolean)}. + */ + @Deprecated + public void setUseCompoundFile(boolean value) { + getLogMergePolicy().setUseCompoundFile(value); + } + + /** Expert: Set the Similarity implementation used by this IndexWriter. + * + * @see Similarity#setDefault(Similarity) + * @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead + */ + @Deprecated + public void setSimilarity(Similarity similarity) { + ensureOpen(); + this.similarity = similarity; + docWriter.setSimilarity(similarity); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setSimilarity(similarity); + } + + /** Expert: Return the Similarity implementation used by this IndexWriter. + * + *This defaults to the current value of {@link Similarity#getDefault()}.
+ * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
+ */
+ @Deprecated
+ public Similarity getSimilarity() {
+ ensureOpen();
+ return similarity;
+ }
+
+ /** Expert: Set the interval between indexed terms. Large values cause less
+ * memory to be used by IndexReader, but slow random-access to terms. Small
+ * values cause more memory to be used by an IndexReader, and speed
+ * random-access to terms.
+ *
+ * This parameter determines the amount of computation required per query
+ * term, regardless of the number of documents that contain that term. In
+ * particular, it is the maximum number of other terms that must be
+ * scanned before a term is located and its frequency and position information
+ * may be processed. In a large index with user-entered query terms, query
+ * processing time is likely to be dominated not by term lookup but rather
+ * by the processing of frequency and positional data. In a small index
+ * or when many uncommon query terms are generated (e.g., by wildcard
+ * queries) term lookup may become a dominant cost.
+ *
+ * In particular, numUniqueTerms/interval
terms are read into
+ * memory by an IndexReader, and, on average, interval/2
terms
+ * must be scanned for each random term access.
+ *
+ * @see #DEFAULT_TERM_INDEX_INTERVAL
+ * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
+ */
+ @Deprecated
+ public void setTermIndexInterval(int interval) {
+ ensureOpen();
+ config.setTermIndexInterval(interval);
+ }
+
+ /** Expert: Return the interval between indexed terms.
+ *
+ * @see #setTermIndexInterval(int)
+ * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
+ */
+ @Deprecated
+ public int getTermIndexInterval() {
+ // We pass false because this method is called by SegmentMerger while we are in the process of closing
+ ensureOpen(false);
+ return config.getTermIndexInterval();
+ }
+
+ /**
+ * Constructs an IndexWriter for the index in d
.
+ * Text will be analyzed with a
. If create
+ * is true, then a new, empty index will be created in
+ * d
, replacing the index already there, if any.
+ *
+ * @param d the index directory
+ * @param a the analyzer to use
+ * @param create true
to create the index or overwrite
+ * the existing one; false
to append to the existing
+ * index
+ * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ * via the MaxFieldLength constructor.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock
could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create
is
+ * false
or if there is any other low-level
+ * IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
+ */
+ @Deprecated
+ public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
+ create ? OpenMode.CREATE : OpenMode.APPEND));
+ setMaxFieldLength(mfl.getLimit());
+ }
+
+ /**
+ * Constructs an IndexWriter for the index in
+ * d
, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * a
.
+ *
+ * @param d the index directory
+ * @param a the analyzer to use
+ * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ * via the MaxFieldLength constructor.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock
could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
+ */
+ @Deprecated
+ public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ this(d, new IndexWriterConfig(Version.LUCENE_31, a));
+ setMaxFieldLength(mfl.getLimit());
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d
,
+ * first creating it if it does not already exist. Text
+ * will be analyzed with a
.
+ *
+ * @param d the index directory
+ * @param a the analyzer to use
+ * @param deletionPolicy see above
+ * @param mfl whether or not to limit field lengths
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock
could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
+ */
+ @Deprecated
+ public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ this(d, new IndexWriterConfig(Version.LUCENE_31, a).setIndexDeletionPolicy(deletionPolicy));
+ setMaxFieldLength(mfl.getLimit());
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d
.
+ * Text will be analyzed with a
. If
+ * create
is true, then a new, empty index
+ * will be created in d
, replacing the index
+ * already there, if any.
+ *
+ * @param d the index directory
+ * @param a the analyzer to use
+ * @param create true
to create the index or overwrite
+ * the existing one; false
to append to the existing
+ * index
+ * @param deletionPolicy see above
+ * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock
could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create
is
+ * false
or if there is any other low-level
+ * IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
+ */
+ @Deprecated
+ public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
+ create ? OpenMode.CREATE : OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy));
+ setMaxFieldLength(mfl.getLimit());
+ }
+
+ /**
+ * Expert: constructs an IndexWriter on specific commit
+ * point, with a custom {@link IndexDeletionPolicy}, for
+ * the index in d
. Text will be analyzed
+ * with a
.
+ *
+ *
This is only meaningful if you've used a {@link + * IndexDeletionPolicy} in that past that keeps more than + * just the last commit. + * + *
This operation is similar to {@link #rollback()},
+ * except that method can only rollback what's been done
+ * with the current instance of IndexWriter since its last
+ * commit, whereas this method can rollback to an
+ * arbitrary commit point from the past, assuming the
+ * {@link IndexDeletionPolicy} has preserved past
+ * commits.
+ *
+ * @param d the index directory
+ * @param a the analyzer to use
+ * @param deletionPolicy see above
+ * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
+ * @param commit which commit to open
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock
could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create
is
+ * false
or if there is any other low-level
+ * IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
+ */
+ @Deprecated
+ public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ this(d, new IndexWriterConfig(Version.LUCENE_31, a)
+ .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
+ setMaxFieldLength(mfl.getLimit());
+ }
+
+ /**
+ * Constructs a new IndexWriter per the settings given in conf
.
+ * Note that the passed in {@link IndexWriterConfig} is
+ * privately cloned; if you need to make subsequent "live"
+ * changes to the configuration use {@link #getConfig}.
+ *
+ *
+ * @param d
+ * the index directory. The index is either created or appended
+ * according Determines the largest segment (measured by
+ * document count) that may be merged with other segments.
+ * Small values (e.g., less than 10,000) are best for
+ * interactive indexing, as this limits the length of
+ * pauses while indexing to a few seconds. Larger values
+ * are best for batched indexing and speedier
+ * searches. The default value is {@link Integer#MAX_VALUE}. Note that this method is a convenience method: it
+ * just calls mergePolicy.setMaxMergeDocs as long as
+ * mergePolicy is an instance of {@link LogMergePolicy}.
+ * Otherwise an IllegalArgumentException is thrown. The default merge policy ({@link
+ * LogByteSizeMergePolicy}) also allows you to set this
+ * limit by net size (in MB) of the segment, using {@link
+ * LogByteSizeMergePolicy#setMaxMergeMB}. Returns the largest segment (measured by document
+ * count) that may be merged with other segments. Note that this method is a convenience method: it
+ * just calls mergePolicy.getMaxMergeDocs as long as
+ * mergePolicy is an instance of {@link LogMergePolicy}.
+ * Otherwise an IllegalArgumentException is thrown. When this is set, the writer will flush every
+ * maxBufferedDocs added documents. Pass in {@link
+ * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
+ * to number of buffered documents. Note that if flushing
+ * by RAM usage is also enabled, then the flush will be
+ * triggered by whichever comes first. Disabled by default (writer flushes by RAM usage). When this is set, the writer will flush whenever
+ * buffered documents and deletions use this much RAM.
+ * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
+ * triggering a flush due to RAM usage. Note that if
+ * flushing by document count is also enabled, then the
+ * flush will be triggered by whichever comes first. NOTE: the account of RAM usage for pending
+ * deletions is only approximate. Specifically, if you
+ * delete by Query, Lucene currently has no way to measure
+ * the RAM usage if individual Queries so the accounting
+ * will under-estimate and you should compensate by either
+ * calling commit() periodically yourself, or by using
+ * {@link #setMaxBufferedDeleteTerms} to flush by count
+ * instead of RAM usage (each buffered delete Query counts
+ * as one).
+ *
+ * NOTE: because IndexWriter uses
+ * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. Determines the minimal number of delete terms required before the buffered
+ * in-memory delete terms are applied and flushed. If there are documents
+ * buffered in memory at the time, they are merged and a new segment is
+ * created. Disabled by default (writer flushes by RAM usage). Note that this method is a convenience method: it
+ * just calls mergePolicy.setMergeFactor as long as
+ * mergePolicy is an instance of {@link LogMergePolicy}.
+ * Otherwise an IllegalArgumentException is thrown. This must never be less than 2. The default value is 10.
+ * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
+ */
+ @Deprecated
+ public void setMergeFactor(int mergeFactor) {
+ getLogMergePolicy().setMergeFactor(mergeFactor);
+ }
+
+ /**
+ * Returns the number of segments that are merged at
+ * once and also controls the total number of segments
+ * allowed to accumulate in the index. Note that this method is a convenience method: it
+ * just calls mergePolicy.getMergeFactor as long as
+ * mergePolicy is an instance of {@link LogMergePolicy}.
+ * Otherwise an IllegalArgumentException is thrown. If an Exception is hit during close, eg due to disk
+ * full or some other reason, then both the on-disk index
+ * and the internal state of the IndexWriter instance will
+ * be consistent. However, the close will not be complete
+ * even though part of it (flushing buffered documents)
+ * may have succeeded, so the write lock will still be
+ * held. If you can correct the underlying cause (eg free up
+ * some disk space) then you can call close() again.
+ * Failing that, if you want to force the write lock to be
+ * released (dangerous, because you may then lose buffered
+ * docs in the IndexWriter instance) then you can do
+ * something like this:conf.getOpenMode()
.
+ * @param conf
+ * the configuration settings according to which IndexWriter should
+ * be initialized.
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws LockObtainFailedException
+ * if another writer has this index open (write.lock
+ * could not be obtained)
+ * @throws IOException
+ * if the directory cannot be read/written to, or if it does not
+ * exist and conf.getOpenMode()
is
+ * OpenMode.APPEND
or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, IndexWriterConfig conf)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ config = (IndexWriterConfig) conf.clone();
+ directory = d;
+ analyzer = conf.getAnalyzer();
+ infoStream = defaultInfoStream;
+ writeLockTimeout = conf.getWriteLockTimeout();
+ similarity = conf.getSimilarity();
+ mergePolicy = conf.getMergePolicy();
+ mergePolicy.setIndexWriter(this);
+ mergeScheduler = conf.getMergeScheduler();
+ bufferedDeletesStream = new BufferedDeletesStream(messageID);
+ bufferedDeletesStream.setInfoStream(infoStream);
+ poolReaders = conf.getReaderPooling();
+
+ writeLock = directory.makeLock(WRITE_LOCK_NAME);
+
+ if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+
+ OpenMode mode = conf.getOpenMode();
+ boolean create;
+ if (mode == OpenMode.CREATE) {
+ create = true;
+ } else if (mode == OpenMode.APPEND) {
+ create = false;
+ } else {
+ // CREATE_OR_APPEND - create only if an index does not exist
+ create = !IndexReader.indexExists(directory);
+ }
+
+ boolean success = false;
+
+ // TODO: we should check whether this index is too old,
+ // and throw an IndexFormatTooOldExc up front, here,
+ // instead of later when merge, applyDeletes, getReader
+ // is attempted. I think to do this we should store the
+ // oldest segment's version in segments_N.
+
+ try {
+ if (create) {
+ // Try to read first. This is to allow create
+ // against an index that's currently open for
+ // searching. In this case we write the next
+ // segments_N file with no segments:
+ try {
+ segmentInfos.read(directory);
+ segmentInfos.clear();
+ } catch (IOException e) {
+ // Likely this means it's a fresh directory
+ }
+
+ // Record that we have a change (zero out all
+ // segments) pending:
+ changeCount++;
+ segmentInfos.changed();
+ } else {
+ segmentInfos.read(directory);
+
+ IndexCommit commit = conf.getIndexCommit();
+ if (commit != null) {
+ // Swap out all segments, but, keep metadata in
+ // SegmentInfos, like version & generation, to
+ // preserve write-once. This is important if
+ // readers are open against the future commit
+ // points.
+ if (commit.getDirectory() != directory)
+ throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
+ SegmentInfos oldInfos = new SegmentInfos();
+ oldInfos.read(directory, commit.getSegmentsFileName());
+ segmentInfos.replace(oldInfos);
+ changeCount++;
+ segmentInfos.changed();
+ if (infoStream != null)
+ message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
+ }
+ }
+
+ rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
+
+ docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
+ docWriter.setInfoStream(infoStream);
+ docWriter.setMaxFieldLength(maxFieldLength);
+
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ synchronized(this) {
+ deleter = new IndexFileDeleter(directory,
+ conf.getIndexDeletionPolicy(),
+ segmentInfos, infoStream,
+ this);
+ }
+
+ if (deleter.startingCommitDeleted) {
+ // Deletion policy deleted the "head" commit point.
+ // We have to mark ourself as changed so that if we
+ // are closed w/o any further changes we write a new
+ // segments_N file.
+ changeCount++;
+ segmentInfos.changed();
+ }
+
+ if (infoStream != null) {
+ messageState();
+ }
+
+ success = true;
+
+ } finally {
+ if (!success) {
+ if (infoStream != null) {
+ message("init: hit exception on init; releasing write lock");
+ }
+ try {
+ writeLock.release();
+ } catch (Throwable t) {
+ // don't mask the original exception
+ }
+ writeLock = null;
+ }
+ }
+ }
+
+ private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
+ Directory cfsDir = null;
+ try {
+ if (info.getUseCompoundFile()) {
+ cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
+ } else {
+ cfsDir = directory;
+ }
+ return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
+ } finally {
+ if (info.getUseCompoundFile() && cfsDir != null) {
+ cfsDir.close();
+ }
+ }
+ }
+
+ private FieldInfos getCurrentFieldInfos() throws IOException {
+ final FieldInfos fieldInfos;
+ if (segmentInfos.size() > 0) {
+ if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
+ // Pre-3.1 index. In this case we sweep all
+ // segments, merging their FieldInfos:
+ fieldInfos = new FieldInfos();
+ for(SegmentInfo info : segmentInfos) {
+ final FieldInfos segFieldInfos = getFieldInfos(info);
+ final int fieldCount = segFieldInfos.size();
+ for(int fieldNumber=0;fieldNumberint
s when managing its internal storage,
+ * the absolute maximum value for this setting is somewhat
+ * less than 2048 MB. The precise limit depends on
+ * various factors, such as how large your documents are,
+ * how many fields have norms, etc., so it's best to set
+ * this value comfortably under 2048.
+ * try {
+ * writer.close();
+ * } finally {
+ * if (IndexWriter.isLocked(directory)) {
+ * IndexWriter.unlock(directory);
+ * }
+ * }
+ *
+ *
+ * after which, you must be certain not to use the writer
+ * instance anymore.
NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer, again. See above for details.
+ * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void close() throws CorruptIndexException, IOException { + close(true); + } + + /** + * Closes the index with or without waiting for currently + * running merges to finish. This is only meaningful when + * using a MergeScheduler that runs merges in background + * threads. + * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer, again. See above for details.
+ * + *NOTE: it is dangerous to always call + * close(false), especially when IndexWriter is not open + * for very long, because this can result in "merge + * starvation" whereby long merges will never have a + * chance to finish. This will cause too many segments in + * your index over time.
+ * + * @param waitForMerges if true, this call will block + * until all merges complete; else, it will ask all + * running merges to abort, wait until those merges have + * finished (which should be at most a few seconds), and + * then return. + */ + public void close(boolean waitForMerges) throws CorruptIndexException, IOException { + + // Ensure that only one thread actually gets to do the closing: + if (shouldClose()) { + // If any methods have hit OutOfMemoryError, then abort + // on close, in case the internal state of IndexWriter + // or DocumentsWriter is corrupt + if (hitOOM) + rollbackInternal(); + else + closeInternal(waitForMerges); + } + } + + // Returns true if this thread should attempt to close, or + // false if IndexWriter is now closed; else, waits until + // another thread finishes closing + synchronized private boolean shouldClose() { + while(true) { + if (!closed) { + if (!closing) { + closing = true; + return true; + } else { + // Another thread is presently trying to close; + // wait until it finishes one way (closes + // successfully) or another (fails to close) + doWait(); + } + } else + return false; + } + } + + private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException { + + try { + if (infoStream != null) { + message("now flush at close waitForMerges=" + waitForMerges); + } + + docWriter.close(); + + // Only allow a new merge to be triggered if we are + // going to wait for merges: + if (!hitOOM) { + flush(waitForMerges, true); + } + + if (waitForMerges) + // Give merge scheduler last chance to run, in case + // any pending merges are waiting: + mergeScheduler.merge(this); + + mergePolicy.close(); + + synchronized(this) { + finishMerges(waitForMerges); + stopMerges = true; + } + + mergeScheduler.close(); + + if (infoStream != null) + message("now call final commit()"); + + if (!hitOOM) { + commitInternal(null); + } + + if (infoStream != null) + message("at close: " + segString()); + + synchronized(this) { + readerPool.close(); + docWriter = null; + deleter.close(); + } + + if (writeLock != null) { + writeLock.release(); // release write lock + writeLock = null; + } + synchronized(this) { + closed = true; + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "closeInternal"); + } finally { + synchronized(this) { + closing = false; + notifyAll(); + if (!closed) { + if (infoStream != null) + message("hit exception while closing"); + } + } + } + } + + /** Returns the Directory used by this index. */ + public Directory getDirectory() { + // Pass false because the flush during closing calls getDirectory + ensureOpen(false); + return directory; + } + + /** Returns the analyzer used by this index. */ + public Analyzer getAnalyzer() { + ensureOpen(); + return analyzer; + } + + /** Returns total number of docs in this index, including + * docs not yet flushed (still in the RAM buffer), + * not counting deletions. + * @see #numDocs */ + public synchronized int maxDoc() { + ensureOpen(); + int count; + if (docWriter != null) + count = docWriter.getNumDocs(); + else + count = 0; + + count += segmentInfos.totalDocCount(); + return count; + } + + /** Returns total number of docs in this index, including + * docs not yet flushed (still in the RAM buffer), and + * including deletions. NOTE: buffered deletions + * are not counted. If you really need these to be + * counted you should call {@link #commit()} first. + * @see #numDocs */ + public synchronized int numDocs() throws IOException { + ensureOpen(); + int count; + if (docWriter != null) + count = docWriter.getNumDocs(); + else + count = 0; + + for (final SegmentInfo info : segmentInfos) { + count += info.docCount - numDeletedDocs(info); + } + return count; + } + + public synchronized boolean hasDeletions() throws IOException { + ensureOpen(); + if (bufferedDeletesStream.any()) { + return true; + } + if (docWriter.anyDeletions()) { + return true; + } + for (final SegmentInfo info : segmentInfos) { + if (info.hasDeletions()) { + return true; + } + } + return false; + } + + /** + * The maximum number of terms that will be indexed for a single field in a + * document. This limits the amount of memory required for indexing, so that + * collections with very large files will not crash the indexing process by + * running out of memory. + * Note that this effectively truncates large documents, excluding from the + * index terms that occur further in the document. If you know your source + * documents are large, be sure to set this value high enough to accommodate + * the expected size. If you set it to Integer.MAX_VALUE, then the only limit + * is your memory, but you should anticipate an OutOfMemoryError. + * By default, no more than 10,000 terms will be indexed for a field. + * + * @see MaxFieldLength + * @deprecated remove in 4.0 + */ + @Deprecated + private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; + + /** + * Adds a document to this index. If the document contains more than + * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are + * discarded. + * + *Note that if an Exception is hit (for example disk full) + * then the index will be consistent, but this document + * may not have been added. Furthermore, it's possible + * the index will have one segment in non-compound format + * even when using compound files (when a merge has + * partially succeeded).
+ * + *This method periodically flushes pending documents + * to the Directory (see above), and + * also periodically triggers segment merges in the index + * according to the {@link MergePolicy} in use.
+ * + *Merges temporarily consume space in the + * directory. The amount of space required is up to 1X the + * size of all segments being merged, when no + * readers/searchers are open against the index, and up to + * 2X the size of all segments being merged when + * readers/searchers are open against the index (see + * {@link #forceMerge(int)} for details). The sequence of + * primitive merge operations performed is governed by the + * merge policy. + * + *
Note that each term in the document can be no longer + * than 16383 characters, otherwise an + * IllegalArgumentException will be thrown.
+ * + *Note that it's possible to create an invalid Unicode + * string in java if a UTF16 surrogate pair is malformed. + * In this case, the invalid characters are silently + * replaced with the Unicode replacement character + * U+FFFD.
+ * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void addDocument(Document doc) throws CorruptIndexException, IOException { + addDocument(doc, analyzer); + } + + /** + * Adds a document to this index, using the provided analyzer instead of the + * value of {@link #getAnalyzer()}. If the document contains more than + * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are + * discarded. + * + *See {@link #addDocument(Document)} for details on + * index and IndexWriter state after an Exception, and + * flushing/merging temporary free space requirements.
+ * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { + ensureOpen(); + boolean doFlush = false; + boolean success = false; + try { + try { + doFlush = docWriter.updateDocument(doc, analyzer, null); + success = true; + } finally { + if (!success && infoStream != null) + message("hit exception adding document"); + } + if (doFlush) + flush(true, false); + } catch (OutOfMemoryError oom) { + handleOOM(oom, "addDocument"); + } + } + + /** + * Atomically adds a block of documents with sequentially + * assigned document IDs, such that an external reader + * will see all or none of the documents. + * + *WARNING: the index does not currently record + * which documents were added as a block. Today this is + * fine, because merging will preserve the block (as long + * as none them were deleted). But it's possible in the + * future that Lucene may more aggressively re-order + * documents (for example, perhaps to obtain better index + * compression), in which case you may need to fully + * re-index your documents at that time. + * + *
See {@link #addDocument(Document)} for details on + * index and IndexWriter state after an Exception, and + * flushing/merging temporary free space requirements.
+ * + *NOTE: tools that do offline splitting of an index + * (for example, IndexSplitter in contrib) or + * re-sorting of documents (for example, IndexSorter in + * contrib) are not aware of these atomically added documents + * and will likely break them up. Use such tools at your + * own risk! + * + *
NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @lucene.experimental + */ + public void addDocuments(Collectionterm
.
+ *
+ * NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param term the term to identify the documents to be deleted + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void deleteDocuments(Term term) throws CorruptIndexException, IOException { + ensureOpen(); + try { + if (docWriter.deleteTerm(term, false)) { + flush(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "deleteDocuments(Term)"); + } + } + + /** + * Deletes the document(s) containing any of the + * terms. All deletes are flushed at the same time. + * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param terms array of terms to identify the documents + * to be deleted + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException { + ensureOpen(); + try { + if (docWriter.deleteTerms(terms)) { + flush(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "deleteDocuments(Term..)"); + } + } + + /** + * Deletes the document(s) matching the provided query. + * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param query the query to identify the documents to be deleted + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void deleteDocuments(Query query) throws CorruptIndexException, IOException { + ensureOpen(); + try { + if (docWriter.deleteQuery(query)) { + flush(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "deleteDocuments(Query)"); + } + } + + /** + * Deletes the document(s) matching any of the provided queries. + * All deletes are flushed at the same time. + * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param queries array of queries to identify the documents + * to be deleted + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException { + ensureOpen(); + try { + if (docWriter.deleteQueries(queries)) { + flush(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "deleteDocuments(Query..)"); + } + } + + /** + * Updates a document by first deleting the document(s) + * containingterm
and then adding the new
+ * document. The delete and then add are atomic as seen
+ * by a reader on the same index (flush may happen only after
+ * the add).
+ *
+ * NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param term the term to identify the document(s) to be + * deleted + * @param doc the document to be added + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { + ensureOpen(); + updateDocument(term, doc, getAnalyzer()); + } + + /** + * Updates a document by first deleting the document(s) + * containingterm
and then adding the new
+ * document. The delete and then add are atomic as seen
+ * by a reader on the same index (flush may happen only after
+ * the add).
+ *
+ * NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + * @param term the term to identify the document(s) to be + * deleted + * @param doc the document to be added + * @param analyzer the analyzer to use when analyzing the document + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public void updateDocument(Term term, Document doc, Analyzer analyzer) + throws CorruptIndexException, IOException { + ensureOpen(); + try { + boolean doFlush = false; + boolean success = false; + try { + doFlush = docWriter.updateDocument(doc, analyzer, term); + success = true; + } finally { + if (!success && infoStream != null) + message("hit exception updating document"); + } + if (doFlush) { + flush(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "updateDocument"); + } + } + + // for test purpose + final synchronized int getSegmentCount(){ + return segmentInfos.size(); + } + + // for test purpose + final synchronized int getNumBufferedDocuments(){ + return docWriter.getNumDocs(); + } + + // for test purpose + final synchronized int getDocCount(int i) { + if (i >= 0 && i < segmentInfos.size()) { + return segmentInfos.info(i).docCount; + } else { + return -1; + } + } + + // for test purpose + final int getFlushCount() { + return flushCount.get(); + } + + // for test purpose + final int getFlushDeletesCount() { + return flushDeletesCount.get(); + } + + final String newSegmentName() { + // Cannot synchronize on IndexWriter because that causes + // deadlock + synchronized(segmentInfos) { + // Important to increment changeCount so that the + // segmentInfos is written on close. Otherwise we + // could close, re-open and re-return the same segment + // name that was previously returned which can cause + // problems at least with ConcurrentMergeScheduler. + changeCount++; + segmentInfos.changed(); + return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); + } + } + + /** If non-null, information about merges will be printed to this. + */ + private PrintStream infoStream; + private static PrintStream defaultInfoStream; + + /** This method has been deprecated, as it is horribly + * inefficient and very rarely justified. Lucene's + * multi-segment search performance has improved over + * time, and the default TieredMergePolicy now targets + * segments with deletions. + * + * @deprecated */ + @Deprecated + public void optimize() throws CorruptIndexException, IOException { + forceMerge(1, true); + } + + /** This method has been deprecated, as it is horribly + * inefficient and very rarely justified. Lucene's + * multi-segment search performance has improved over + * time, and the default TieredMergePolicy now targets + * segments with deletions. + * + * @deprecated */ + @Deprecated + public void optimize(int maxNumSegments) throws CorruptIndexException, IOException { + forceMerge(maxNumSegments, true); + } + + /** This method has been deprecated, as it is horribly + * inefficient and very rarely justified. Lucene's + * multi-segment search performance has improved over + * time, and the default TieredMergePolicy now targets + * segments with deletions. + * + * @deprecated */ + @Deprecated + public void optimize(boolean doWait) throws CorruptIndexException, IOException { + forceMerge(1, doWait); + } + + /** + * Forces merge policy to merge segments until there's <= + * maxNumSegments. The actual merges to be + * executed are determined by the {@link MergePolicy}. + * + *This is a horribly costly operation, especially when + * you pass a small {@code maxNumSegments}; usually you + * should only call this if the index is static (will no + * longer be changed).
+ * + *Note that this requires up to 2X the index size free + * space in your Directory (3X if you're using compound + * file format). For example, if your index size is 10 MB + * then you need up to 20 MB free for this to complete (30 + * MB if you're using compound file format). Also, + * it's best to call {@link #commit()} afterwards, + * to allow IndexWriter to free up disk space.
+ * + *If some but not all readers re-open while merging + * is underway, this will cause > 2X temporary + * space to be consumed as those new readers will then + * hold open the temporary segments at that time. It is + * best not to re-open readers while merging is running.
+ * + *The actual temporary usage could be much less than + * these figures (it depends on many factors).
+ * + *In general, once the this completes, the total size of the + * index will be less than the size of the starting index. + * It could be quite a bit smaller (if there were many + * pending deletes) or just slightly smaller.
+ * + *If an Exception is hit, for example + * due to disk full, the index will not be corrupt and no + * documents will have been lost. However, it may have + * been partially merged (some segments were merged but + * not all), and it's possible that one of the segments in + * the index will be in non-compound format even when + * using compound file format. This will occur when the + * Exception is hit during conversion of the segment into + * compound format.
+ * + *This call will merge those segments present in + * the index when the call started. If other threads are + * still adding documents and flushing segments, those + * newly created segments will not be merged unless you + * call forceMerge again.
+ * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ * + *NOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * @see MergePolicy#findMerges + * + * @param maxNumSegments maximum number of segments left + * in the index after merging finishes + */ + public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException { + forceMerge(maxNumSegments, true); + } + + /** Just like {@link #forceMerge(int)}, except you can + * specify whether the call should block until + * all merging completes. This is only meaningful with a + * {@link MergeScheduler} that is able to run merges in + * background threads. + * + *
NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ */ + public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException { + ensureOpen(); + + if (maxNumSegments < 1) + throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); + + if (infoStream != null) { + message("forceMerge: index now " + segString()); + message("now flush at forceMerge"); + } + + flush(true, true); + + synchronized(this) { + resetMergeExceptions(); + segmentsToMerge.clear(); + for(SegmentInfo info : segmentInfos) { + segmentsToMerge.put(info, Boolean.TRUE); + } + mergeMaxNumSegments = maxNumSegments; + + // Now mark all pending & running merges as isMaxNumSegments: + for(final MergePolicy.OneMerge merge : pendingMerges) { + merge.maxNumSegments = maxNumSegments; + segmentsToMerge.put(merge.info, Boolean.TRUE); + } + + for ( final MergePolicy.OneMerge merge: runningMerges ) { + merge.maxNumSegments = maxNumSegments; + segmentsToMerge.put(merge.info, Boolean.TRUE); + } + } + + maybeMerge(maxNumSegments); + + if (doWait) { + synchronized(this) { + while(true) { + + if (hitOOM) { + throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge"); + } + + if (mergeExceptions.size() > 0) { + // Forward any exceptions in background merge + // threads to the current thread: + final int size = mergeExceptions.size(); + for(int i=0;iNOTE: if you call {@link #close(boolean)}
+ * with false, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ */
+ public void forceMergeDeletes(boolean doWait)
+ throws CorruptIndexException, IOException {
+ ensureOpen();
+
+ flush(true, true);
+
+ if (infoStream != null)
+ message("forceMergeDeletes: index now " + segString());
+
+ MergePolicy.MergeSpecification spec;
+
+ synchronized(this) {
+ spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
+ if (spec != null) {
+ final int numMerges = spec.merges.size();
+ for(int i=0;i
To see how + * many deletions you have pending in your index, call + * {@link IndexReader#numDeletedDocs}.
+ * + *NOTE: this method first flushes a new + * segment (if there are indexed documents), and applies + * all buffered deletes. + * + *
NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ */ + public void forceMergeDeletes() throws CorruptIndexException, IOException { + forceMergeDeletes(true); + } + + /** + * Expert: asks the mergePolicy whether any merges are + * necessary now and if so, runs the requested merges and + * then iterate (test again if merges are needed) until no + * more merges are returned by the mergePolicy. + * + * Explicit calls to maybeMerge() are usually not + * necessary. The most common case is when merge policy + * parameters have changed. + * + *NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details.
+ */ + public final void maybeMerge() throws CorruptIndexException, IOException { + maybeMerge(-1); + } + + private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException { + ensureOpen(false); + updatePendingMerges(maxNumSegments); + mergeScheduler.merge(this); + } + + private synchronized void updatePendingMerges(int maxNumSegments) + throws CorruptIndexException, IOException { + assert maxNumSegments == -1 || maxNumSegments > 0; + + if (stopMerges) { + return; + } + + // Do not start new merges if we've hit OOME + if (hitOOM) { + return; + } + + final MergePolicy.MergeSpecification spec; + if (maxNumSegments != -1) { + spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge)); + if (spec != null) { + final int numMerges = spec.merges.size(); + for(int i=0;iIndexWriter
without committing
+ * any changes that have occurred since the last commit
+ * (or since it was opened, if commit hasn't been called).
+ * This removes any temporary files that had been created,
+ * after which the state of the index will be the same as
+ * it was when commit() was last called or when this
+ * writer was first opened. This also clears a previous
+ * call to {@link #prepareCommit}.
+ * @throws IOException if there is a low-level IO error
+ */
+ public void rollback() throws IOException {
+ ensureOpen();
+
+ // Ensure that only one thread actually gets to do the closing:
+ if (shouldClose())
+ rollbackInternal();
+ }
+
+ private void rollbackInternal() throws IOException {
+
+ boolean success = false;
+
+ if (infoStream != null ) {
+ message("rollback");
+ }
+
+ try {
+ synchronized(this) {
+ finishMerges(false);
+ stopMerges = true;
+ }
+
+ if (infoStream != null ) {
+ message("rollback: done finish merges");
+ }
+
+ // Must pre-close these two, in case they increment
+ // changeCount so that we can then set it to false
+ // before calling closeInternal
+ mergePolicy.close();
+ mergeScheduler.close();
+
+ bufferedDeletesStream.clear();
+
+ synchronized(this) {
+
+ if (pendingCommit != null) {
+ pendingCommit.rollbackCommit(directory);
+ deleter.decRef(pendingCommit);
+ pendingCommit = null;
+ notifyAll();
+ }
+
+ // Keep the same segmentInfos instance but replace all
+ // of its SegmentInfo instances. This is so the next
+ // attempt to commit using this instance of IndexWriter
+ // will always write to a new generation ("write
+ // once").
+ segmentInfos.rollbackSegmentInfos(rollbackSegments);
+ if (infoStream != null ) {
+ message("rollback: infos=" + segString(segmentInfos));
+ }
+
+ docWriter.abort();
+
+ assert testPoint("rollback before checkpoint");
+
+ // Ask deleter to locate unreferenced files & remove
+ // them:
+ deleter.checkpoint(segmentInfos, false);
+ deleter.refresh();
+ }
+
+ // Don't bother saving any changes in our segmentInfos
+ readerPool.clear(null);
+
+ lastCommitChangeCount = changeCount;
+
+ success = true;
+ } catch (OutOfMemoryError oom) {
+ handleOOM(oom, "rollbackInternal");
+ } finally {
+ synchronized(this) {
+ if (!success) {
+ closing = false;
+ notifyAll();
+ if (infoStream != null)
+ message("hit exception during rollback");
+ }
+ }
+ }
+
+ closeInternal(false);
+ }
+
+ /**
+ * Delete all documents in the index.
+ *
+ * This method will drop all buffered documents and will + * remove all segments from the index. This change will not be + * visible until a {@link #commit()} has been called. This method + * can be rolled back using {@link #rollback()}.
+ * + *NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).
+ * + *NOTE: this method will forcefully abort all merges + * in progress. If other threads are running {@link + * #forceMerge}, {@link #addIndexes(IndexReader[])} or + * {@link #forceMergeDeletes} methods, they may receive + * {@link MergePolicy.MergeAbortedException}s. + */ + public synchronized void deleteAll() throws IOException { + ensureOpen(); + try { + + // Abort any running merges + finishMerges(false); + + // Remove any buffered docs + docWriter.abort(); + + // Remove all segments + segmentInfos.clear(); + + // Ask deleter to locate unreferenced files & remove them: + deleter.checkpoint(segmentInfos, false); + deleter.refresh(); + + // Don't bother saving any changes in our segmentInfos + readerPool.dropAll(); + + // Mark that the index has changed + ++changeCount; + segmentInfos.changed(); + } catch (OutOfMemoryError oom) { + handleOOM(oom, "deleteAll"); + } finally { + if (infoStream != null) { + message("hit exception during deleteAll"); + } + } + } + + private synchronized void finishMerges(boolean waitForMerges) throws IOException { + if (!waitForMerges) { + + stopMerges = true; + + // Abort all pending & running merges: + for (final MergePolicy.OneMerge merge : pendingMerges) { + if (infoStream != null) + message("now abort pending merge " + merge.segString(directory)); + merge.abort(); + mergeFinish(merge); + } + pendingMerges.clear(); + + for (final MergePolicy.OneMerge merge : runningMerges) { + if (infoStream != null) + message("now abort running merge " + merge.segString(directory)); + merge.abort(); + } + + // These merges periodically check whether they have + // been aborted, and stop if so. We wait here to make + // sure they all stop. It should not take very long + // because the merge threads periodically check if + // they are aborted. + while(runningMerges.size() > 0) { + if (infoStream != null) + message("now wait for " + runningMerges.size() + " running merge to abort"); + doWait(); + } + + stopMerges = false; + notifyAll(); + + assert 0 == mergingSegments.size(); + + if (infoStream != null) + message("all running merges have aborted"); + + } else { + // waitForMerges() will ensure any running addIndexes finishes. + // It's fine if a new one attempts to start because from our + // caller above the call will see that we are in the + // process of closing, and will throw an + // AlreadyClosedException. + waitForMerges(); + } + } + + /** + * Wait for any currently outstanding merges to finish. + * + *
It is guaranteed that any merges started prior to calling this method + * will have completed once this method completes.
+ */ + public synchronized void waitForMerges() { + ensureOpen(false); + if (infoStream != null) { + message("waitForMerges"); + } + while(pendingMerges.size() > 0 || runningMerges.size() > 0) { + doWait(); + } + + // sanity check + assert 0 == mergingSegments.size(); + + if (infoStream != null) { + message("waitForMerges done"); + } + } + + /** + * Called whenever the SegmentInfos has been updated and + * the index files referenced exist (correctly) in the + * index directory. + */ + synchronized void checkpoint() throws IOException { + changeCount++; + segmentInfos.changed(); + deleter.checkpoint(segmentInfos, false); + } + + private synchronized void resetMergeExceptions() { + mergeExceptions = new ArrayListThis may be used to parallelize batch indexing. A large document + * collection can be broken into sub-collections. Each sub-collection can be + * indexed in parallel, on a different thread, process or machine. The + * complete index can then be created by merging sub-collection indexes + * with this method. + * + *
+ * NOTE: the index in each {@link Directory} must not be + * changed (opened by a writer) while this method is + * running. This method does not acquire a write lock in + * each input Directory, so it is up to the caller to + * enforce this. + * + *
This method is transactional in how Exceptions are + * handled: it does not commit a new segments_N file until + * all indexes are added. This means if an Exception + * occurs (for example disk full), then either no indexes + * will have been added or they all will have been. + * + *
Note that this requires temporary free space in the + * {@link Directory} up to 2X the sum of all input indexes + * (including the starting index). If readers/searchers + * are open against the starting index, then temporary + * free space required will be higher by the size of the + * starting index (see {@link #forceMerge(int)} for details). + * + *
+ * NOTE: this method only copies the segments of the incomning indexes + * and does not merge them. Therefore deleted documents are not removed and + * the new segments are not merged with the existing ones. Also, if the merge + * policy allows compound files, then any segment that is not compound is + * converted to such. However, if the segment is compound, it is copied as-is + * even if the merge policy does not allow compound files. + * + *
+ *
This requires this index not be among those to be added. + * + *
+ * NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ */
+ public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
+ ensureOpen();
+
+ noDupDirs(dirs);
+
+ try {
+ if (infoStream != null)
+ message("flush at addIndexes(Directory...)");
+ flush(false, true);
+
+ int docCount = 0;
+ List The provided IndexReaders are not closed.
+ *
+ * NOTE: this method does not merge the current segments,
+ * only the incoming ones.
+ *
+ * See {@link #addIndexes(Directory...)} for details on transactional
+ * semantics, temporary free space required in the Directory,
+ * and non-CFS segments on an Exception.
+ *
+ * NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * NOTE: if you call {@link #close(boolean)}
+ * with false, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ */
+ public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
+
+ ensureOpen();
+
+ try {
+ if (infoStream != null)
+ message("flush at addIndexes(IndexReader...)");
+ flush(false, true);
+
+ String mergedName = newSegmentName();
+ // TODO: somehow we should fix this merge so it's
+ // abortable so that IW.close(false) is able to stop it
+ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
+ mergedName, null, payloadProcessorProvider,
+ ((FieldInfos) docWriter.getFieldInfos().clone()));
+
+ for (IndexReader reader : readers) // add new indexes
+ merger.add(reader);
+
+ int docCount = merger.merge(); // merge 'em
+
+ SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
+ false, true,
+ merger.fieldInfos().hasProx(),
+ merger.fieldInfos().hasVectors());
+ setDiagnostics(info, "addIndexes(IndexReader...)");
+
+ boolean useCompoundFile;
+ synchronized(this) { // Guard segmentInfos
+ if (stopMerges) {
+ deleter.deleteNewFiles(info.files());
+ return;
+ }
+ ensureOpen();
+ useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
+ }
+
+ // Now create the compound file if needed
+ if (useCompoundFile) {
+ merger.createCompoundFile(mergedName + ".cfs", info);
+
+ // delete new non cfs files directly: they were never
+ // registered with IFD
+ synchronized(this) {
+ deleter.deleteNewFiles(info.files());
+ }
+ info.setUseCompoundFile(true);
+ }
+
+ // Register the new segment
+ synchronized(this) {
+ if (stopMerges) {
+ deleter.deleteNewFiles(info.files());
+ return;
+ }
+ ensureOpen();
+ segmentInfos.add(info);
+ checkpoint();
+ }
+
+ } catch (OutOfMemoryError oom) {
+ handleOOM(oom, "addIndexes(IndexReader...)");
+ }
+ }
+
+ /** Copies the segment into the IndexWriter's directory, as a compound segment. */
+ private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
+ String segFileName = IndexFileNames.segmentFileName(segName, IndexFileNames.COMPOUND_FILE_EXTENSION);
+ Collection NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details. Expert: prepare for commit, specifying
+ * commitUserData Map (String -> String). This does the
+ * first phase of 2-phase commit. This method does all
+ * steps necessary to commit changes since this writer
+ * was opened: flushes pending added and deleted docs,
+ * syncs the index files, writes most of next segments_N
+ * file. After calling this you must call either {@link
+ * #commit()} to finish the commit, or {@link
+ * #rollback()} to revert the commit and undo all changes
+ * done since the writer was opened. NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details. Commits all pending changes (added & deleted
+ * documents, segment merges, added
+ * indexes, etc.) to the index, and syncs all referenced
+ * index files, such that a reader will see the changes
+ * and the index updates will survive an OS or machine
+ * crash or power loss. Note that this does not wait for
+ * any running background merges to finish. This may be a
+ * costly operation, so you should test the cost in your
+ * application and do it only when really necessary. Note that this operation calls Directory.sync on
+ * the index files. That call should not return until the
+ * file contents & metadata are on stable storage. For
+ * FSDirectory, this calls the OS's fsync. But, beware:
+ * some hardware devices may in fact cache writes even
+ * during fsync, and return before the bits are actually
+ * on stable storage, to give the appearance of faster
+ * performance. If you have such a device, and it does
+ * not have a battery backup (for example) then on power
+ * loss it may still lose data. Lucene cannot guarantee
+ * consistency on such devices. NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details. NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ * Caution: this should only be used by failure recovery code,
+ * when it is known that no other process nor thread is in fact
+ * currently accessing this index.
+ */
+ public static void unlock(Directory directory) throws IOException {
+ directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
+ }
+
+ /**
+ * Specifies maximum field length (in number of tokens/terms) in
+ * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
+ * the value set by the constructor.
+ *
+ * @deprecated use {@link LimitTokenCountAnalyzer} instead.
+ */
+ @Deprecated
+ public static final class MaxFieldLength {
+
+ private int limit;
+ private String name;
+
+ /**
+ * Private type-safe-enum-pattern constructor.
+ *
+ * @param name instance name
+ * @param limit maximum field length
+ */
+ private MaxFieldLength(String name, int limit) {
+ this.name = name;
+ this.limit = limit;
+ }
+
+ /**
+ * Public constructor to allow users to specify the maximum field size limit.
+ *
+ * @param limit The maximum field length
+ */
+ public MaxFieldLength(int limit) {
+ this("User-specified", limit);
+ }
+
+ public int getLimit() {
+ return limit;
+ }
+
+ @Override
+ public String toString()
+ {
+ return name + ":" + limit;
+ }
+
+ /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
+ public static final MaxFieldLength UNLIMITED
+ = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
+
+ /**
+ * Sets the maximum field length to
+ * {@link #DEFAULT_MAX_FIELD_LENGTH}
+ * */
+ public static final MaxFieldLength LIMITED
+ = new MaxFieldLength("LIMITED", 10000);
+ }
+
+ /** If {@link #getReader} has been called (ie, this writer
+ * is in near real-time mode), then after a merge
+ * completes, this class can be invoked to warm the
+ * reader on the newly merged segment, before the merge
+ * commits. This is not required for near real-time
+ * search, but will reduce search latency on opening a
+ * new near real-time reader after a merge completes.
+ *
+ * @lucene.experimental
+ *
+ * NOTE: warm is called before any deletes have
+ * been carried over to the merged segment. */
+ public static abstract class IndexReaderWarmer {
+ public abstract void warm(IndexReader reader) throws IOException;
+ }
+
+ /**
+ * Set the merged segment warmer. See {@link IndexReaderWarmer}.
+ *
+ * @deprecated use
+ * {@link IndexWriterConfig#setMergedSegmentWarmer}
+ * instead.
+ */
+ @Deprecated
+ public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
+ config.setMergedSegmentWarmer(warmer);
+ }
+
+ /**
+ * Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
+ *
+ * @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
+ */
+ @Deprecated
+ public IndexReaderWarmer getMergedSegmentWarmer() {
+ return config.getMergedSegmentWarmer();
+ }
+
+ private void handleOOM(OutOfMemoryError oom, String location) {
+ if (infoStream != null) {
+ message("hit OutOfMemoryError inside " + location);
+ }
+ hitOOM = true;
+ throw oom;
+ }
+
+ // Used only by assert for testing. Current points:
+ // startDoFlush
+ // startCommitMerge
+ // startStartCommit
+ // midStartCommit
+ // midStartCommit2
+ // midStartCommitSuccess
+ // finishStartCommit
+ // startCommitMergeDeletes
+ // startMergeInit
+ // DocumentsWriter.ThreadState.init start
+ boolean testPoint(String name) {
+ return true;
+ }
+
+ synchronized boolean nrtIsCurrent(SegmentInfos infos) {
+ //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
+ ensureOpen();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
+ }
+
+ synchronized boolean isClosed() {
+ return closed;
+ }
+
+ /** Expert: remove any index files that are no longer
+ * used.
+ *
+ * IndexWriter normally deletes unused files itself,
+ * during indexing. However, on Windows, which disallows
+ * deletion of open files, if there is a reader open on
+ * the index then those files cannot be deleted. This is
+ * fine, because IndexWriter will periodically retry
+ * the deletion. However, IndexWriter doesn't try that often: only
+ * on open, close, flushing a new segment, and finishing
+ * a merge. If you don't do any of these actions with your
+ * IndexWriter, you'll see the unused files linger. If
+ * that's a problem, call this method to delete them
+ * (once you've closed the open readers that were
+ * preventing their deletion).
+ *
+ * In addition, you can call this method to delete
+ * unreferenced index commits. This might be useful if you
+ * are using an {@link IndexDeletionPolicy} which holds
+ * onto index commits until some criteria are met, but those
+ * commits are no longer needed. Otherwise, those commits will
+ * be deleted the next time commit() is called.
+ */
+ public synchronized void deleteUnusedFiles() throws IOException {
+ ensureOpen(false);
+ deleter.deletePendingFiles();
+ deleter.revisitPolicy();
+ }
+
+ // Called by DirectoryReader.doClose
+ synchronized void deletePendingFiles() throws IOException {
+ deleter.deletePendingFiles();
+ }
+
+ /**
+ * Sets the {@link PayloadProcessorProvider} to use when merging payloads.
+ * Note that the given
+ * The default is
+ * NOTE: the set {@link PayloadProcessorProvider} will be in effect
+ * immediately, potentially for already running merges too. If you want to be
+ * sure it is used for further operations only, such as {@link #addIndexes} or
+ * {@link #forceMerge}, you can call {@link #waitForMerges()} before.
+ */
+ public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
+ ensureOpen();
+ payloadProcessorProvider = pcp;
+ }
+
+ /**
+ * Returns the {@link PayloadProcessorProvider} that is used during segment
+ * merges to process payloads.
+ */
+ public PayloadProcessorProvider getPayloadProcessorProvider() {
+ ensureOpen();
+ return payloadProcessorProvider;
+ }
+
+ // decides when flushes happen
+ final class FlushControl {
+
+ private boolean flushPending;
+ private boolean flushDeletes;
+ private int delCount;
+ private int docCount;
+ private boolean flushing;
+
+ private synchronized boolean setFlushPending(String reason, boolean doWait) {
+ if (flushPending || flushing) {
+ if (doWait) {
+ while(flushPending || flushing) {
+ try {
+ wait();
+ } catch (InterruptedException ie) {
+ throw new ThreadInterruptedException(ie);
+ }
+ }
+ }
+ return false;
+ } else {
+ if (infoStream != null) {
+ message("now trigger flush reason=" + reason);
+ }
+ flushPending = true;
+ return flushPending;
+ }
+ }
+
+ public synchronized void setFlushPendingNoWait(String reason) {
+ setFlushPending(reason, false);
+ }
+
+ public synchronized boolean getFlushPending() {
+ return flushPending;
+ }
+
+ public synchronized boolean getFlushDeletes() {
+ return flushDeletes;
+ }
+
+ public synchronized void clearFlushPending() {
+ if (infoStream != null) {
+ message("clearFlushPending");
+ }
+ flushPending = false;
+ flushDeletes = false;
+ docCount = 0;
+ notifyAll();
+ }
+
+ public synchronized void clearDeletes() {
+ delCount = 0;
+ }
+
+ public synchronized boolean waitUpdate(int docInc, int delInc) {
+ return waitUpdate(docInc, delInc, false);
+ }
+
+ public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
+ while(flushPending) {
+ try {
+ wait();
+ } catch (InterruptedException ie) {
+ throw new ThreadInterruptedException(ie);
+ }
+ }
+
+ docCount += docInc;
+ delCount += delInc;
+
+ // skipWait is only used when a thread is BOTH adding
+ // a doc and buffering a del term, and, the adding of
+ // the doc already triggered a flush
+ if (skipWait) {
+ return false;
+ }
+
+ final int maxBufferedDocs = config.getMaxBufferedDocs();
+ if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
+ docCount >= maxBufferedDocs) {
+ return setFlushPending("maxBufferedDocs", true);
+ }
+
+ final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
+ if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
+ delCount >= maxBufferedDeleteTerms) {
+ flushDeletes = true;
+ return setFlushPending("maxBufferedDeleteTerms", true);
+ }
+
+ return flushByRAMUsage("add delete/doc");
+ }
+
+ public synchronized boolean flushByRAMUsage(String reason) {
+ final double ramBufferSizeMB = config.getRAMBufferSizeMB();
+ if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+ final long limit = (long) (ramBufferSizeMB*1024*1024);
+ long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
+ if (used >= limit) {
+
+ // DocumentsWriter may be able to free up some
+ // RAM:
+ // Lock order: FC -> DW
+ docWriter.balanceRAM();
+
+ used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
+ if (used >= limit) {
+ return setFlushPending("ram full: " + reason, false);
+ }
+ }
+ }
+ return false;
+ }
+ }
+
+ final FlushControl flushControl = new FlushControl();
+}
true
iff the index in the named directory is
+ * currently locked.
+ * @param directory the directory to check for a lock
+ * @throws IOException if there is a low-level IO error
+ */
+ public static boolean isLocked(Directory directory) throws IOException {
+ return directory.makeLock(WRITE_LOCK_NAME).isLocked();
+ }
+
+ /**
+ * Forcibly unlocks the index in the named directory.
+ * pcp
will be invoked for every segment that
+ * is merged, not only external ones that are given through
+ * {@link #addIndexes}. If you want only the payloads of the external segments
+ * to be processed, you can return null
whenever a
+ * {@link DirPayloadProcessor} is requested for the {@link Directory} of the
+ * {@link IndexWriter}.
+ * null
which means payloads are processed
+ * normally (copied) during segment merges. You can also unset it by passing
+ * null
.
+ *