1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.index.DocumentsWriter.IndexingChain;
22 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
23 import org.apache.lucene.search.Similarity;
24 import org.apache.lucene.util.Version;
27 * Holds all the configuration of {@link IndexWriter}. You
28 * should instantiate this class, call the setters to set
29 * your configuration, then pass it to {@link IndexWriter}.
30 * Note that {@link IndexWriter} makes a private clone; if
31 * you need to subsequently change settings use {@link
32 * IndexWriter#getConfig}.
35 * All setter methods return {@link IndexWriterConfig} to allow chaining
36 * settings conveniently, for example:
39 * IndexWriterConfig conf = new IndexWriterConfig(analyzer);
40 * conf.setter1().setter2();
45 public final class IndexWriterConfig implements Cloneable {
48 * Specifies the open mode for {@link IndexWriter}:
50 * {@link #CREATE} - creates a new index or overwrites an existing one.
51 * {@link #CREATE_OR_APPEND} - creates a new index if one does not exist,
52 * otherwise it opens the index and documents will be appended.
53 * {@link #APPEND} - opens an existing index.
56 public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
58 /** Default value is 128. Change using {@link #setTermIndexInterval(int)}. */
59 public static final int DEFAULT_TERM_INDEX_INTERVAL = 128;
61 /** Denotes a flush trigger is disabled. */
62 public final static int DISABLE_AUTO_FLUSH = -1;
64 /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
65 public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
67 /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
68 public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
71 * Default value is 16 MB (which means flush when buffered docs consume
72 * approximately 16 MB RAM).
74 public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
77 * Default value for the write lock timeout (1,000 ms).
79 * @see #setDefaultWriteLockTimeout(long)
81 public static long WRITE_LOCK_TIMEOUT = 1000;
83 /** The maximum number of simultaneous threads that may be
84 * indexing documents at once in IndexWriter; if more
85 * than this many threads arrive they will wait for
86 * others to finish. */
87 public final static int DEFAULT_MAX_THREAD_STATES = 8;
89 /** Default setting for {@link #setReaderPooling}. */
90 public final static boolean DEFAULT_READER_POOLING = false;
92 /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
93 public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
96 * Sets the default (for any instance) maximum time to wait for a write lock
99 public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
100 WRITE_LOCK_TIMEOUT = writeLockTimeout;
104 * Returns the default write lock timeout for newly instantiated
105 * IndexWriterConfigs.
107 * @see #setDefaultWriteLockTimeout(long)
109 public static long getDefaultWriteLockTimeout() {
110 return WRITE_LOCK_TIMEOUT;
113 private final Analyzer analyzer;
114 private volatile IndexDeletionPolicy delPolicy;
115 private volatile IndexCommit commit;
116 private volatile OpenMode openMode;
117 private volatile Similarity similarity;
118 private volatile int termIndexInterval;
119 private volatile MergeScheduler mergeScheduler;
120 private volatile long writeLockTimeout;
121 private volatile int maxBufferedDeleteTerms;
122 private volatile double ramBufferSizeMB;
123 private volatile int maxBufferedDocs;
124 private volatile IndexingChain indexingChain;
125 private volatile IndexReaderWarmer mergedSegmentWarmer;
126 private volatile MergePolicy mergePolicy;
127 private volatile int maxThreadStates;
128 private volatile boolean readerPooling;
129 private volatile int readerTermsIndexDivisor;
131 private Version matchVersion;
134 * Creates a new config that with defaults that match the specified
135 * {@link Version} as well as the default {@link
136 * Analyzer}. If matchVersion is >= {@link
137 * Version#LUCENE_32}, {@link TieredMergePolicy} is used
138 * for merging; else {@link LogByteSizeMergePolicy}.
139 * Note that {@link TieredMergePolicy} is free to select
140 * non-contiguous merges, which means docIDs may not
141 * remain montonic over time. If this is a problem you
142 * should switch to {@link LogByteSizeMergePolicy} or
143 * {@link LogDocMergePolicy}.
145 public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
146 this.matchVersion = matchVersion;
147 this.analyzer = analyzer;
148 delPolicy = new KeepOnlyLastCommitDeletionPolicy();
150 openMode = OpenMode.CREATE_OR_APPEND;
151 similarity = Similarity.getDefault();
152 termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
153 mergeScheduler = new ConcurrentMergeScheduler();
154 writeLockTimeout = WRITE_LOCK_TIMEOUT;
155 maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
156 ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
157 maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
158 indexingChain = DocumentsWriter.defaultIndexingChain;
159 mergedSegmentWarmer = null;
160 if (matchVersion.onOrAfter(Version.LUCENE_32)) {
161 mergePolicy = new TieredMergePolicy();
163 mergePolicy = new LogByteSizeMergePolicy();
165 maxThreadStates = DEFAULT_MAX_THREAD_STATES;
166 readerPooling = DEFAULT_READER_POOLING;
167 readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
171 public Object clone() {
172 // Shallow clone is the only thing that's possible, since parameters like
173 // analyzer, index commit etc. do not implement Cloneable.
175 return super.clone();
176 } catch (CloneNotSupportedException e) {
178 throw new RuntimeException(e);
182 /** Returns the default analyzer to use for indexing documents. */
183 public Analyzer getAnalyzer() {
187 /** Specifies {@link OpenMode} of the index.
189 * <p>Only takes effect when IndexWriter is first created. */
190 public IndexWriterConfig setOpenMode(OpenMode openMode) {
191 this.openMode = openMode;
195 /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
196 public OpenMode getOpenMode() {
201 * Expert: allows an optional {@link IndexDeletionPolicy} implementation to be
202 * specified. You can use this to control when prior commits are deleted from
203 * the index. The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
204 * which removes all prior commits as soon as a new commit is done (this
205 * matches behavior before 2.2). Creating your own policy can allow you to
206 * explicitly keep previous "point in time" commits alive in the index for
207 * some time, to allow readers to refresh to the new commit without having the
208 * old commit deleted out from under them. This is necessary on filesystems
209 * like NFS that do not support "delete on last close" semantics, which
210 * Lucene's "point in time" search normally relies on.
212 * <b>NOTE:</b> the deletion policy cannot be null. If <code>null</code> is
213 * passed, the deletion policy will be set to the default.
215 * <p>Only takes effect when IndexWriter is first created.
217 public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
218 this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
223 * Returns the {@link IndexDeletionPolicy} specified in
224 * {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
225 * {@link KeepOnlyLastCommitDeletionPolicy}/
227 public IndexDeletionPolicy getIndexDeletionPolicy() {
232 * Expert: allows to open a certain commit point. The default is null which
233 * opens the latest commit point.
235 * <p>Only takes effect when IndexWriter is first created. */
236 public IndexWriterConfig setIndexCommit(IndexCommit commit) {
237 this.commit = commit;
242 * Returns the {@link IndexCommit} as specified in
243 * {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
244 * which specifies to open the latest index commit point.
246 public IndexCommit getIndexCommit() {
251 * Expert: set the {@link Similarity} implementation used by this IndexWriter.
253 * <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
254 * the similarity will be set to the default.
256 * @see Similarity#setDefault(Similarity)
258 * <p>Only takes effect when IndexWriter is first created. */
259 public IndexWriterConfig setSimilarity(Similarity similarity) {
260 this.similarity = similarity == null ? Similarity.getDefault() : similarity;
265 * Expert: returns the {@link Similarity} implementation used by this
266 * IndexWriter. This defaults to the current value of
267 * {@link Similarity#getDefault()}.
269 public Similarity getSimilarity() {
274 * Expert: set the interval between indexed terms. Large values cause less
275 * memory to be used by IndexReader, but slow random-access to terms. Small
276 * values cause more memory to be used by an IndexReader, and speed
277 * random-access to terms.
279 * This parameter determines the amount of computation required per query
280 * term, regardless of the number of documents that contain that term. In
281 * particular, it is the maximum number of other terms that must be scanned
282 * before a term is located and its frequency and position information may be
283 * processed. In a large index with user-entered query terms, query processing
284 * time is likely to be dominated not by term lookup but rather by the
285 * processing of frequency and positional data. In a small index or when many
286 * uncommon query terms are generated (e.g., by wildcard queries) term lookup
287 * may become a dominant cost.
289 * In particular, <code>numUniqueTerms/interval</code> terms are read into
290 * memory by an IndexReader, and, on average, <code>interval/2</code> terms
291 * must be scanned for each random term access.
293 * @see #DEFAULT_TERM_INDEX_INTERVAL
295 * <p>Takes effect immediately, but only applies to newly
296 * flushed/merged segments. */
297 public IndexWriterConfig setTermIndexInterval(int interval) {
298 this.termIndexInterval = interval;
303 * Returns the interval between indexed terms.
305 * @see #setTermIndexInterval(int)
307 public int getTermIndexInterval() {
308 return termIndexInterval;
312 * Expert: sets the merge scheduler used by this writer. The default is
313 * {@link ConcurrentMergeScheduler}.
315 * <b>NOTE:</b> the merge scheduler cannot be null. If <code>null</code> is
316 * passed, the merge scheduler will be set to the default.
318 * <p>Only takes effect when IndexWriter is first created. */
319 public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
320 this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
325 * Returns the {@link MergeScheduler} that was set by
326 * {@link #setMergeScheduler(MergeScheduler)}
328 public MergeScheduler getMergeScheduler() {
329 return mergeScheduler;
333 * Sets the maximum time to wait for a write lock (in milliseconds) for this
334 * instance. You can change the default value for all instances by calling
335 * {@link #setDefaultWriteLockTimeout(long)}.
337 * <p>Only takes effect when IndexWriter is first created. */
338 public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
339 this.writeLockTimeout = writeLockTimeout;
344 * Returns allowed timeout when acquiring the write lock.
346 * @see #setWriteLockTimeout(long)
348 public long getWriteLockTimeout() {
349 return writeLockTimeout;
353 * Determines the minimal number of delete terms required before the buffered
354 * in-memory delete terms are applied and flushed. If there are documents
355 * buffered in memory at the time, they are merged and a new segment is
358 * <p>Disabled by default (writer flushes by RAM usage).
360 * @throws IllegalArgumentException if maxBufferedDeleteTerms
361 * is enabled but smaller than 1
362 * @see #setRAMBufferSizeMB
364 * <p>Takes effect immediately, but only the next time a
365 * document is added, updated or deleted.
367 public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
368 if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
369 && maxBufferedDeleteTerms < 1)
370 throw new IllegalArgumentException(
371 "maxBufferedDeleteTerms must at least be 1 when enabled");
372 this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
377 * Returns the number of buffered deleted terms that will trigger a flush if
380 * @see #setMaxBufferedDeleteTerms(int)
382 public int getMaxBufferedDeleteTerms() {
383 return maxBufferedDeleteTerms;
387 * Determines the amount of RAM that may be used for buffering added documents
388 * and deletions before they are flushed to the Directory. Generally for
389 * faster indexing performance it's best to flush by RAM usage instead of
390 * document count and use as large a RAM buffer as you can.
393 * When this is set, the writer will flush whenever buffered documents and
394 * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
395 * triggering a flush due to RAM usage. Note that if flushing by document
396 * count is also enabled, then the flush will be triggered by whichever comes
400 * <b>NOTE</b>: the account of RAM usage for pending deletions is only
401 * approximate. Specifically, if you delete by Query, Lucene currently has no
402 * way to measure the RAM usage of individual Queries so the accounting will
403 * under-estimate and you should compensate by either calling commit()
404 * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
405 * to flush by count instead of RAM usage (each buffered delete Query counts
409 * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
410 * internal storage, the absolute maximum value for this setting is somewhat
411 * less than 2048 MB. The precise limit depends on various factors, such as
412 * how large your documents are, how many fields have norms, etc., so it's
413 * best to set this value comfortably under 2048.
416 * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
418 * <p>Takes effect immediately, but only the next time a
419 * document is added, updated or deleted.
421 * @throws IllegalArgumentException
422 * if ramBufferSize is enabled but non-positive, or it disables
423 * ramBufferSize when maxBufferedDocs is already disabled
425 public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
426 if (ramBufferSizeMB > 2048.0) {
427 throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
428 + " is too large; should be comfortably less than 2048");
430 if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
431 throw new IllegalArgumentException(
432 "ramBufferSize should be > 0.0 MB when enabled");
433 if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
434 throw new IllegalArgumentException(
435 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
436 this.ramBufferSizeMB = ramBufferSizeMB;
440 /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
441 public double getRAMBufferSizeMB() {
442 return ramBufferSizeMB;
446 * Determines the minimal number of documents required before the buffered
447 * in-memory documents are flushed as a new Segment. Large values generally
448 * give faster indexing.
451 * When this is set, the writer will flush every maxBufferedDocs added
452 * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
453 * flush due to number of buffered documents. Note that if flushing by RAM
454 * usage is also enabled, then the flush will be triggered by whichever comes
458 * Disabled by default (writer flushes by RAM usage).
460 * <p>Takes effect immediately, but only the next time a
461 * document is added, updated or deleted.
463 * @see #setRAMBufferSizeMB(double)
465 * @throws IllegalArgumentException
466 * if maxBufferedDocs is enabled but smaller than 2, or it disables
467 * maxBufferedDocs when ramBufferSize is already disabled
469 public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
470 if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
471 throw new IllegalArgumentException(
472 "maxBufferedDocs must at least be 2 when enabled");
473 if (maxBufferedDocs == DISABLE_AUTO_FLUSH
474 && ramBufferSizeMB == DISABLE_AUTO_FLUSH)
475 throw new IllegalArgumentException(
476 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
477 this.maxBufferedDocs = maxBufferedDocs;
482 * Returns the number of buffered added documents that will trigger a flush if
485 * @see #setMaxBufferedDocs(int)
487 public int getMaxBufferedDocs() {
488 return maxBufferedDocs;
491 /** Set the merged segment warmer. See {@link IndexReaderWarmer}.
493 * <p>Takes effect on the next merge. */
494 public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
495 this.mergedSegmentWarmer = mergeSegmentWarmer;
499 /** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
500 public IndexReaderWarmer getMergedSegmentWarmer() {
501 return mergedSegmentWarmer;
505 * Expert: {@link MergePolicy} is invoked whenever there are changes to the
506 * segments in the index. Its role is to select which merges to do, if any,
507 * and return a {@link MergePolicy.MergeSpecification} describing the merges.
508 * It also selects merges to do for optimize(). (The default is
509 * {@link LogByteSizeMergePolicy}.
511 * <p>Only takes effect when IndexWriter is first created. */
512 public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) {
513 this.mergePolicy = mergePolicy == null ? new LogByteSizeMergePolicy() : mergePolicy;
518 * Returns the current MergePolicy in use by this writer.
520 * @see #setMergePolicy(MergePolicy)
522 public MergePolicy getMergePolicy() {
527 * Sets the max number of simultaneous threads that may be indexing documents
528 * at once in IndexWriter. Values < 1 are invalid and if passed
529 * <code>maxThreadStates</code> will be set to
530 * {@link #DEFAULT_MAX_THREAD_STATES}.
532 * <p>Only takes effect when IndexWriter is first created. */
533 public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
534 this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
538 /** Returns the max number of simultaneous threads that
539 * may be indexing documents at once in IndexWriter. */
540 public int getMaxThreadStates() {
541 return maxThreadStates;
544 /** By default, IndexWriter does not pool the
545 * SegmentReaders it must open for deletions and
546 * merging, unless a near-real-time reader has been
547 * obtained by calling {@link IndexWriter#getReader}.
548 * This method lets you enable pooling without getting a
549 * near-real-time reader. NOTE: if you set this to
550 * false, IndexWriter will still pool readers once
551 * {@link IndexWriter#getReader} is called.
553 * <p>Only takes effect when IndexWriter is first created. */
554 public IndexWriterConfig setReaderPooling(boolean readerPooling) {
555 this.readerPooling = readerPooling;
559 /** Returns true if IndexWriter should pool readers even
560 * if {@link IndexWriter#getReader} has not been called. */
561 public boolean getReaderPooling() {
562 return readerPooling;
565 /** Expert: sets the {@link DocConsumer} chain to be used to process documents.
567 * <p>Only takes effect when IndexWriter is first created. */
568 IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
569 this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
573 /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
574 IndexingChain getIndexingChain() {
575 return indexingChain;
578 /** Sets the termsIndexDivisor passed to any readers that
579 * IndexWriter opens, for example when applying deletes
580 * or creating a near-real-time reader in {@link
581 * IndexWriter#getReader}. If you pass -1, the terms index
582 * won't be loaded by the readers. This is only useful in
583 * advanced situations when you will only .next() through
584 * all terms; attempts to seek will hit an exception.
586 * <p>Takes effect immediately, but only applies to
587 * readers opened after this call */
588 public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
589 if (divisor <= 0 && divisor != -1) {
590 throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
592 readerTermsIndexDivisor = divisor;
596 /** @see #setReaderTermsIndexDivisor(int) */
597 public int getReaderTermsIndexDivisor() {
598 return readerTermsIndexDivisor;
602 public String toString() {
603 StringBuilder sb = new StringBuilder();
604 sb.append("matchVersion=").append(matchVersion).append("\n");
605 sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n");
606 sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
607 sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
608 sb.append("openMode=").append(openMode).append("\n");
609 sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
610 sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
611 sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
612 sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
613 sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
614 sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
615 sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
616 sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
617 sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
618 sb.append("mergePolicy=").append(mergePolicy).append("\n");
619 sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
620 sb.append("readerPooling=").append(readerPooling).append("\n");
621 sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n");
622 return sb.toString();