1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.io.Reader;
24 import java.io.StringReader;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.HashMap;
28 import java.util.Iterator;
29 import java.util.List;
31 import java.util.Random;
32 import java.util.Collections;
33 import java.util.concurrent.atomic.AtomicBoolean;
35 import org.apache.lucene.util.LuceneTestCase;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.analysis.MockAnalyzer;
38 import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
39 import org.apache.lucene.analysis.TokenStream;
40 import org.apache.lucene.analysis.Tokenizer;
41 import org.apache.lucene.analysis.WhitespaceAnalyzer;
42 import org.apache.lucene.analysis.WhitespaceTokenizer;
43 import org.apache.lucene.analysis.standard.StandardAnalyzer;
44 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
45 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
46 import org.apache.lucene.document.Document;
47 import org.apache.lucene.document.Field;
48 import org.apache.lucene.document.Fieldable;
49 import org.apache.lucene.document.Field.Index;
50 import org.apache.lucene.document.Field.Store;
51 import org.apache.lucene.document.Field.TermVector;
52 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
53 import org.apache.lucene.search.TopDocs;
54 import org.apache.lucene.search.IndexSearcher;
55 import org.apache.lucene.search.PhraseQuery;
56 import org.apache.lucene.search.Query;
57 import org.apache.lucene.search.ScoreDoc;
58 import org.apache.lucene.search.TermQuery;
59 import org.apache.lucene.search.spans.SpanTermQuery;
60 import org.apache.lucene.store.AlreadyClosedException;
61 import org.apache.lucene.store.Directory;
62 import org.apache.lucene.store.IndexOutput;
63 import org.apache.lucene.store.Lock;
64 import org.apache.lucene.store.LockFactory;
65 import org.apache.lucene.store.NoLockFactory;
66 import org.apache.lucene.store.MockDirectoryWrapper;
67 import org.apache.lucene.store.RAMDirectory;
68 import org.apache.lucene.store.SingleInstanceLockFactory;
69 import org.apache.lucene.util.UnicodeUtil;
70 import org.apache.lucene.util._TestUtil;
71 import org.apache.lucene.util.ThreadInterruptedException;
73 public class TestIndexWriter extends LuceneTestCase {
75 public void testDocCount() throws IOException {
76 Directory dir = newDirectory();
78 IndexWriter writer = null;
79 IndexReader reader = null;
82 long savedWriteLockTimeout = IndexWriterConfig.getDefaultWriteLockTimeout();
84 IndexWriterConfig.setDefaultWriteLockTimeout(2000);
85 assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout());
86 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
88 IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout);
92 for (i = 0; i < 100; i++) {
95 assertEquals(100, writer.maxDoc());
98 // delete 40 documents
99 reader = IndexReader.open(dir, false);
100 for (i = 0; i < 40; i++) {
101 reader.deleteDocument(i);
105 reader = IndexReader.open(dir, true);
106 assertEquals(60, reader.numDocs());
109 // optimize the index and check that the new doc count is correct
110 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
111 assertEquals(60, writer.numDocs());
113 assertEquals(60, writer.maxDoc());
114 assertEquals(60, writer.numDocs());
117 // check that the index reader gives the same numbers.
118 reader = IndexReader.open(dir, true);
119 assertEquals(60, reader.maxDoc());
120 assertEquals(60, reader.numDocs());
123 // make sure opening a new index for create over
124 // this existing one works correctly:
125 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
126 assertEquals(0, writer.maxDoc());
127 assertEquals(0, writer.numDocs());
132 static void addDoc(IndexWriter writer) throws IOException
134 Document doc = new Document();
135 doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
136 writer.addDocument(doc);
139 static void addDocWithIndex(IndexWriter writer, int index) throws IOException
141 Document doc = new Document();
142 doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED));
143 doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED));
144 writer.addDocument(doc);
147 public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException {
148 String[] startFiles = dir.listAll();
149 SegmentInfos infos = new SegmentInfos();
151 new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback();
152 String[] endFiles = dir.listAll();
154 Arrays.sort(startFiles);
155 Arrays.sort(endFiles);
157 if (!Arrays.equals(startFiles, endFiles)) {
158 fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
162 static final class StringSplitAnalyzer extends Analyzer {
164 public TokenStream tokenStream(String fieldName, Reader reader) {
165 return new StringSplitTokenizer(reader);
169 private static class StringSplitTokenizer extends Tokenizer {
170 private final String[] tokens;
171 private int upto = 0;
172 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
174 public StringSplitTokenizer(Reader r) {
176 final StringBuilder b = new StringBuilder();
177 final char[] buffer = new char[1024];
179 while((n = r.read(buffer)) != -1) {
180 b.append(buffer, 0, n);
182 tokens = b.toString().split(" ");
183 } catch (IOException ioe) {
184 throw new RuntimeException(ioe);
189 public final boolean incrementToken() throws IOException {
191 if (upto < tokens.length) {
193 termAtt.append(tokens[upto]);
203 * Make sure we skip wicked long terms.
205 public void testWickedLongTerm() throws IOException {
206 MockDirectoryWrapper dir = newDirectory();
207 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
208 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
210 char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE-1];
211 Arrays.fill(chars, 'x');
212 Document doc = new Document();
213 final String bigTerm = new String(chars);
215 // Max length term is 16383, so this contents produces
217 String contents = "abc xyz x" + bigTerm + " another term";
218 doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
219 writer.addDocument(doc);
221 // Make sure we can add another normal document
222 doc = new Document();
223 doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
224 writer.addDocument(doc);
227 IndexReader reader = IndexReader.open(dir, true);
229 // Make sure all terms < max size were indexed
230 assertEquals(2, reader.docFreq(new Term("content", "abc")));
231 assertEquals(1, reader.docFreq(new Term("content", "bbb")));
232 assertEquals(1, reader.docFreq(new Term("content", "term")));
233 assertEquals(1, reader.docFreq(new Term("content", "another")));
235 // Make sure position is still incremented when
236 // massive term is skipped:
237 TermPositions tps = reader.termPositions(new Term("content", "another"));
238 assertTrue(tps.next());
239 assertEquals(1, tps.freq());
240 assertEquals(3, tps.nextPosition());
242 // Make sure the doc that has the massive term is in
244 assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
248 // Make sure we can add a document with exactly the
249 // maximum length term, and search on that term:
250 doc = new Document();
251 doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
252 StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
253 sa.setMaxTokenLength(100000);
254 writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
255 writer.addDocument(doc);
257 reader = IndexReader.open(dir, true);
258 assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
264 static String arrayToString(String[] l) {
266 for(int i=0;i<l.length;i++) {
275 // Make sure we can open an index for create even when a
276 // reader holds it open (this fails pre lock-less
277 // commits on windows):
278 public void testCreateWithReader() throws IOException {
279 Directory dir = newDirectory();
281 // add one document & close writer
282 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
287 IndexReader reader = IndexReader.open(dir, true);
288 assertEquals("should be one document", reader.numDocs(), 1);
290 // now open index for create:
291 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
292 assertEquals("should be zero documents", writer.maxDoc(), 0);
296 assertEquals("should be one document", reader.numDocs(), 1);
297 IndexReader reader2 = IndexReader.open(dir, true);
298 assertEquals("should be one document", reader2.numDocs(), 1);
305 public void testChangesAfterClose() throws IOException {
306 Directory dir = newDirectory();
308 IndexWriter writer = null;
310 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
317 fail("did not hit AlreadyClosedException");
318 } catch (AlreadyClosedException e) {
324 public void testIndexNoDocuments() throws IOException {
325 MockDirectoryWrapper dir = newDirectory();
326 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
330 IndexReader reader = IndexReader.open(dir, true);
331 assertEquals(0, reader.maxDoc());
332 assertEquals(0, reader.numDocs());
335 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
339 reader = IndexReader.open(dir, true);
340 assertEquals(0, reader.maxDoc());
341 assertEquals(0, reader.numDocs());
346 public void testManyFields() throws IOException {
347 MockDirectoryWrapper dir = newDirectory();
348 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
349 for(int j=0;j<100;j++) {
350 Document doc = new Document();
351 doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
352 doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
353 doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
354 doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
355 doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
356 doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
357 writer.addDocument(doc);
361 IndexReader reader = IndexReader.open(dir, true);
362 assertEquals(100, reader.maxDoc());
363 assertEquals(100, reader.numDocs());
364 for(int j=0;j<100;j++) {
365 assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
366 assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
367 assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
368 assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
369 assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
370 assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
376 public void testSmallRAMBuffer() throws IOException {
377 MockDirectoryWrapper dir = newDirectory();
378 IndexWriter writer = new IndexWriter(
380 newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).
381 setRAMBufferSizeMB(0.000001).
382 setMergePolicy(newLogMergePolicy(10))
384 int lastNumFile = dir.listAll().length;
385 for(int j=0;j<9;j++) {
386 Document doc = new Document();
387 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
388 writer.addDocument(doc);
389 int numFile = dir.listAll().length;
390 // Verify that with a tiny RAM buffer we see new
391 // segment after every doc
392 assertTrue(numFile > lastNumFile);
393 lastNumFile = numFile;
400 * Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a
403 * @deprecated after all the setters on IW go away (4.0), this test can be
404 * removed because changing ram buffer settings during a write
405 * session won't be possible.
408 public void testChangingRAMBuffer() throws IOException {
409 MockDirectoryWrapper dir = newDirectory();
410 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
411 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setRAMBufferSizeMB(
412 IndexWriterConfig.DISABLE_AUTO_FLUSH));
414 int lastFlushCount = -1;
415 for(int j=1;j<52;j++) {
416 Document doc = new Document();
417 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
418 writer.addDocument(doc);
419 _TestUtil.syncConcurrentMerges(writer);
420 int flushCount = writer.getFlushCount();
422 lastFlushCount = flushCount;
424 // No new files should be created
425 assertEquals(flushCount, lastFlushCount);
427 assertTrue(flushCount > lastFlushCount);
428 lastFlushCount = flushCount;
429 writer.setRAMBufferSizeMB(0.000001);
430 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
432 assertTrue(flushCount > lastFlushCount);
433 lastFlushCount = flushCount;
434 } else if (20 == j) {
435 writer.setRAMBufferSizeMB(16);
436 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
437 lastFlushCount = flushCount;
439 assertEquals(flushCount, lastFlushCount);
440 } else if (30 == j) {
441 writer.setRAMBufferSizeMB(0.000001);
442 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
444 assertTrue(flushCount> lastFlushCount);
445 lastFlushCount = flushCount;
446 } else if (40 == j) {
447 writer.setMaxBufferedDocs(10);
448 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
449 lastFlushCount = flushCount;
451 assertEquals(flushCount, lastFlushCount);
452 writer.setMaxBufferedDocs(10);
453 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
454 } else if (50 == j) {
455 assertTrue(flushCount > lastFlushCount);
463 * @deprecated after setters on IW go away, this test can be deleted because
464 * changing those settings on IW won't be possible.
467 public void testChangingRAMBuffer2() throws IOException {
468 MockDirectoryWrapper dir = newDirectory();
469 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
470 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10).setMaxBufferedDeleteTerms(
471 10).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
473 for(int j=1;j<52;j++) {
474 Document doc = new Document();
475 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
476 writer.addDocument(doc);
479 int lastFlushCount = -1;
480 for(int j=1;j<52;j++) {
481 writer.deleteDocuments(new Term("field", "aaa" + j));
482 _TestUtil.syncConcurrentMerges(writer);
483 int flushCount = writer.getFlushCount();
485 lastFlushCount = flushCount;
487 // No new files should be created
488 assertEquals(flushCount, lastFlushCount);
489 } else if (10 == j) {
490 assertTrue(flushCount > lastFlushCount);
491 lastFlushCount = flushCount;
492 writer.setRAMBufferSizeMB(0.000001);
493 writer.setMaxBufferedDeleteTerms(1);
495 assertTrue(flushCount > lastFlushCount);
496 lastFlushCount = flushCount;
497 } else if (20 == j) {
498 writer.setRAMBufferSizeMB(16);
499 writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
500 lastFlushCount = flushCount;
502 assertEquals(flushCount, lastFlushCount);
503 } else if (30 == j) {
504 writer.setRAMBufferSizeMB(0.000001);
505 writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
506 writer.setMaxBufferedDeleteTerms(1);
508 assertTrue(flushCount> lastFlushCount);
509 lastFlushCount = flushCount;
510 } else if (40 == j) {
511 writer.setMaxBufferedDeleteTerms(10);
512 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
513 lastFlushCount = flushCount;
515 assertEquals(flushCount, lastFlushCount);
516 writer.setMaxBufferedDeleteTerms(10);
517 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
518 } else if (50 == j) {
519 assertTrue(flushCount > lastFlushCount);
526 // Make sure it's OK to change RAM buffer size and
527 // maxBufferedDocs in a write session, using IW.getConfig()
528 public void testChangingRAMBufferWithIWC() throws IOException {
529 Directory dir = newDirectory();
530 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
531 writer.getConfig().setMaxBufferedDocs(10);
532 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
534 int lastFlushCount = -1;
535 for(int j=1;j<52;j++) {
536 Document doc = new Document();
537 doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
538 writer.addDocument(doc);
539 _TestUtil.syncConcurrentMerges(writer);
540 int flushCount = writer.getFlushCount();
542 lastFlushCount = flushCount;
544 // No new files should be created
545 assertEquals(flushCount, lastFlushCount);
547 assertTrue(flushCount > lastFlushCount);
548 lastFlushCount = flushCount;
549 writer.getConfig().setRAMBufferSizeMB(0.000001);
550 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
552 assertTrue(flushCount > lastFlushCount);
553 lastFlushCount = flushCount;
554 } else if (20 == j) {
555 writer.getConfig().setRAMBufferSizeMB(16);
556 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
557 lastFlushCount = flushCount;
559 assertEquals(flushCount, lastFlushCount);
560 } else if (30 == j) {
561 writer.getConfig().setRAMBufferSizeMB(0.000001);
562 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
564 assertTrue(flushCount> lastFlushCount);
565 lastFlushCount = flushCount;
566 } else if (40 == j) {
567 writer.getConfig().setMaxBufferedDocs(10);
568 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
569 lastFlushCount = flushCount;
571 assertEquals(flushCount, lastFlushCount);
572 writer.getConfig().setMaxBufferedDocs(10);
573 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
574 } else if (50 == j) {
575 assertTrue(flushCount > lastFlushCount);
582 public void testChangingRAMBuffer2WithIWC() throws IOException {
583 Directory dir = newDirectory();
584 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
585 writer.getConfig().setMaxBufferedDocs(10);
586 writer.getConfig().setMaxBufferedDeleteTerms(10);
587 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
589 for(int j=1;j<52;j++) {
590 Document doc = new Document();
591 doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
592 writer.addDocument(doc);
595 int lastFlushCount = -1;
596 for(int j=1;j<52;j++) {
597 writer.deleteDocuments(new Term("field", "aaa" + j));
598 _TestUtil.syncConcurrentMerges(writer);
599 int flushCount = writer.getFlushCount();
601 lastFlushCount = flushCount;
603 // No new files should be created
604 assertEquals(flushCount, lastFlushCount);
605 } else if (10 == j) {
606 assertTrue(flushCount > lastFlushCount);
607 lastFlushCount = flushCount;
608 writer.getConfig().setRAMBufferSizeMB(0.000001);
609 writer.getConfig().setMaxBufferedDeleteTerms(1);
611 assertTrue(flushCount > lastFlushCount);
612 lastFlushCount = flushCount;
613 } else if (20 == j) {
614 writer.getConfig().setRAMBufferSizeMB(16);
615 writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
616 lastFlushCount = flushCount;
618 assertEquals(flushCount, lastFlushCount);
619 } else if (30 == j) {
620 writer.getConfig().setRAMBufferSizeMB(0.000001);
621 writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
622 writer.getConfig().setMaxBufferedDeleteTerms(1);
624 assertTrue(flushCount> lastFlushCount);
625 lastFlushCount = flushCount;
626 } else if (40 == j) {
627 writer.getConfig().setMaxBufferedDeleteTerms(10);
628 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
629 lastFlushCount = flushCount;
631 assertEquals(flushCount, lastFlushCount);
632 writer.getConfig().setMaxBufferedDeleteTerms(10);
633 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
634 } else if (50 == j) {
635 assertTrue(flushCount > lastFlushCount);
642 public void testDiverseDocs() throws IOException {
643 MockDirectoryWrapper dir = newDirectory();
644 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5));
645 for(int i=0;i<3;i++) {
646 // First, docs where every term is unique (heavy on
647 // Posting instances)
648 for(int j=0;j<100;j++) {
649 Document doc = new Document();
650 for(int k=0;k<100;k++) {
651 doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED));
653 writer.addDocument(doc);
656 // Next, many single term docs where only one term
657 // occurs (heavy on byte blocks)
658 for(int j=0;j<100;j++) {
659 Document doc = new Document();
660 doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED));
661 writer.addDocument(doc);
664 // Next, many single term docs where only one term
665 // occurs but the terms are very long (heavy on
667 for(int j=0;j<100;j++) {
668 StringBuilder b = new StringBuilder();
669 String x = Integer.toString(j) + ".";
670 for(int k=0;k<1000;k++)
672 String longTerm = b.toString();
674 Document doc = new Document();
675 doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED));
676 writer.addDocument(doc);
681 IndexSearcher searcher = new IndexSearcher(dir, false);
682 ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs;
683 assertEquals(300, hits.length);
689 public void testEnablingNorms() throws IOException {
690 MockDirectoryWrapper dir = newDirectory();
691 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
692 // Enable norms for only 1 doc, pre flush
693 for(int j=0;j<10;j++) {
694 Document doc = new Document();
695 Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
697 f.setOmitNorms(true);
700 writer.addDocument(doc);
704 Term searchTerm = new Term("field", "aaa");
706 IndexSearcher searcher = new IndexSearcher(dir, false);
707 ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
708 assertEquals(10, hits.length);
711 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
712 .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10));
713 // Enable norms for only 1 doc, post flush
714 for(int j=0;j<27;j++) {
715 Document doc = new Document();
716 Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
718 f.setOmitNorms(true);
721 writer.addDocument(doc);
724 searcher = new IndexSearcher(dir, false);
725 hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
726 assertEquals(27, hits.length);
729 IndexReader reader = IndexReader.open(dir, true);
735 public void testHighFreqTerm() throws IOException {
736 MockDirectoryWrapper dir = newDirectory();
737 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
738 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01));
739 // Massive doc that has 128 K a's
740 StringBuilder b = new StringBuilder(1024*1024);
741 for(int i=0;i<4096;i++) {
742 b.append(" a a a a a a a a");
743 b.append(" a a a a a a a a");
744 b.append(" a a a a a a a a");
745 b.append(" a a a a a a a a");
747 Document doc = new Document();
748 doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
749 writer.addDocument(doc);
752 IndexReader reader = IndexReader.open(dir, true);
753 assertEquals(1, reader.maxDoc());
754 assertEquals(1, reader.numDocs());
755 Term t = new Term("field", "a");
756 assertEquals(1, reader.docFreq(t));
757 TermDocs td = reader.termDocs(t);
759 assertEquals(128*1024, td.freq());
764 // Make sure that a Directory implementation that does
765 // not use LockFactory at all (ie overrides makeLock and
766 // implements its own private locking) works OK. This
767 // was raised on java-dev as loss of backwards
769 public void testNullLockFactory() throws IOException {
771 final class MyRAMDirectory extends MockDirectoryWrapper {
772 private LockFactory myLockFactory;
773 MyRAMDirectory(Directory delegate) {
774 super(random, delegate);
776 myLockFactory = new SingleInstanceLockFactory();
779 public Lock makeLock(String name) {
780 return myLockFactory.makeLock(name);
784 Directory dir = new MyRAMDirectory(new RAMDirectory());
785 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
786 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
787 for (int i = 0; i < 100; i++) {
791 Term searchTerm = new Term("content", "aaa");
792 IndexSearcher searcher = new IndexSearcher(dir, false);
793 ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
794 assertEquals("did not get right number of hits", 100, hits.length);
797 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
798 .setOpenMode(OpenMode.CREATE));
804 public void testFlushWithNoMerging() throws IOException {
805 Directory dir = newDirectory();
806 IndexWriter writer = new IndexWriter(
808 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
809 setMaxBufferedDocs(2).
810 setMergePolicy(newLogMergePolicy(10))
812 Document doc = new Document();
813 doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
814 for(int i=0;i<19;i++)
815 writer.addDocument(doc);
816 writer.flush(false, true);
818 SegmentInfos sis = new SegmentInfos();
820 // Since we flushed w/o allowing merging we should now
822 assertEquals(10, sis.size());
826 // Make sure we can flush segment w/ norms, then add
827 // empty doc (no norms) and flush
828 public void testEmptyDocAfterFlushingRealDoc() throws IOException {
829 Directory dir = newDirectory();
830 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
831 writer.setInfoStream(VERBOSE ? System.out : null);
832 Document doc = new Document();
833 doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
834 writer.addDocument(doc);
837 System.out.println("\nTEST: now add empty doc");
839 writer.addDocument(new Document());
841 IndexReader reader = IndexReader.open(dir, true);
842 assertEquals(2, reader.numDocs());
848 * Test that no NullPointerException will be raised,
849 * when adding one document with a single, empty field
850 * and term vectors enabled.
851 * @throws IOException
854 public void testBadSegment() throws IOException {
855 Directory dir = newDirectory();
856 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
857 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
859 Document document = new Document();
860 document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES));
861 iw.addDocument(document);
867 public void testMaxThreadPriority() throws IOException {
868 int pri = Thread.currentThread().getPriority();
870 Directory dir = newDirectory();
871 IndexWriterConfig conf = newIndexWriterConfig(
872 TEST_VERSION_CURRENT, new MockAnalyzer(random))
873 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy());
874 ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2);
875 IndexWriter iw = new IndexWriter(dir, conf);
876 Document document = new Document();
877 document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
878 Field.TermVector.YES));
879 Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
881 iw.addDocument(document);
885 Thread.currentThread().setPriority(pri);
889 // Just intercepts all merges & verifies that we are never
890 // merging a segment with >= 20 (maxMergeDocs) docs
891 private class MyMergeScheduler extends MergeScheduler {
893 synchronized public void merge(IndexWriter writer)
894 throws CorruptIndexException, IOException {
897 MergePolicy.OneMerge merge = writer.getNextMerge();
901 for(int i=0;i<merge.segments.size();i++) {
902 assert merge.segments.get(i).docCount < 20;
909 public void close() {}
912 public void testVariableSchema() throws Exception {
913 Directory dir = newDirectory();
915 for(int i=0;i<20;i++) {
917 System.out.println("TEST: iter=" + i);
919 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
920 writer.setInfoStream(VERBOSE ? System.out : null);
921 //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
922 //lmp.setMergeFactor(2);
923 //lmp.setUseCompoundFile(false);
924 Document doc = new Document();
925 String contents = "aa bb cc dd ee ff gg hh ii jj kk";
928 // Add empty docs here
929 doc.add(newField("content3", "", Field.Store.NO,
930 Field.Index.ANALYZED));
932 Field.Store storeVal;
934 doc.add(newField("content4", contents, Field.Store.YES,
935 Field.Index.ANALYZED));
936 storeVal = Field.Store.YES;
938 storeVal = Field.Store.NO;
939 doc.add(newField("content1", contents, storeVal,
940 Field.Index.ANALYZED));
941 doc.add(newField("content3", "", Field.Store.YES,
942 Field.Index.ANALYZED));
943 doc.add(newField("content5", "", storeVal,
944 Field.Index.ANALYZED));
948 writer.addDocument(doc);
951 IndexReader reader = IndexReader.open(dir, false);
952 reader.deleteDocument(delID++);
956 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
957 //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy();
958 //lmp2.setUseCompoundFile(false);
966 public void testNoWaitClose() throws Throwable {
967 Directory directory = newDirectory();
969 final Document doc = new Document();
970 Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
973 for(int pass=0;pass<2;pass++) {
975 System.out.println("TEST: pass=" + pass);
978 IndexWriterConfig conf = newIndexWriterConfig(
979 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)
980 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy());
982 conf.setMergeScheduler(new SerialMergeScheduler());
984 IndexWriter writer = new IndexWriter(directory, conf);
985 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
986 writer.setInfoStream(VERBOSE ? System.out : null);
988 // have to use compound file to prevent running out of
989 // descripters when newDirectory returns a file-system
991 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true);
993 for(int iter=0;iter<10;iter++) {
995 System.out.println("TEST: iter=" + iter);
997 for(int j=0;j<199;j++) {
998 idField.setValue(Integer.toString(iter*201+j));
999 writer.addDocument(doc);
1002 int delID = iter*199;
1003 for(int j=0;j<20;j++) {
1004 writer.deleteDocuments(new Term("id", Integer.toString(delID)));
1008 // Force a bunch of merge threads to kick off so we
1009 // stress out aborting them on close:
1010 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2);
1012 final IndexWriter finalWriter = writer;
1013 final ArrayList<Throwable> failure = new ArrayList<Throwable>();
1014 Thread t1 = new Thread() {
1017 boolean done = false;
1019 for(int i=0;i<100;i++) {
1021 finalWriter.addDocument(doc);
1022 } catch (AlreadyClosedException e) {
1025 } catch (NullPointerException e) {
1028 } catch (Throwable e) {
1029 e.printStackTrace(System.out);
1041 if (failure.size() > 0) {
1042 throw failure.get(0);
1047 writer.close(false);
1050 // Make sure reader can read
1051 IndexReader reader = IndexReader.open(directory, true);
1055 writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
1056 writer.setInfoStream(VERBOSE ? System.out : null);
1064 // LUCENE-1084: test unlimited field length
1065 public void testUnlimitedMaxFieldLength() throws IOException {
1066 Directory dir = newDirectory();
1068 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1070 Document doc = new Document();
1071 StringBuilder b = new StringBuilder();
1072 for(int i=0;i<10000;i++)
1075 doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
1076 writer.addDocument(doc);
1079 IndexReader reader = IndexReader.open(dir, true);
1080 Term t = new Term("field", "x");
1081 assertEquals(1, reader.docFreq(t));
1086 // LUCENE-1084: test user-specified field length
1087 public void testUserSpecifiedMaxFieldLength() throws IOException {
1088 Directory dir = newDirectory();
1090 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
1091 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
1092 writer.setMaxFieldLength(100000);
1094 Document doc = new Document();
1095 StringBuilder b = new StringBuilder();
1096 for(int i=0;i<10000;i++)
1099 doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
1100 writer.addDocument(doc);
1103 IndexReader reader = IndexReader.open(dir, true);
1104 Term t = new Term("field", "x");
1105 assertEquals(1, reader.docFreq(t));
1111 public void testEmptyFieldName() throws IOException {
1112 Directory dir = newDirectory();
1113 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1114 Document doc = new Document();
1115 doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED));
1116 writer.addDocument(doc);
1123 private static final class MockIndexWriter extends IndexWriter {
1125 public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
1129 boolean afterWasCalled;
1130 boolean beforeWasCalled;
1133 public void doAfterFlush() {
1134 afterWasCalled = true;
1138 protected void doBeforeFlush() throws IOException {
1139 beforeWasCalled = true;
1145 public void testDoBeforeAfterFlush() throws IOException {
1146 Directory dir = newDirectory();
1147 MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1148 Document doc = new Document();
1149 doc.add(newField("field", "a field", Field.Store.YES,
1150 Field.Index.ANALYZED));
1153 assertTrue(w.beforeWasCalled);
1154 assertTrue(w.afterWasCalled);
1155 w.beforeWasCalled = false;
1156 w.afterWasCalled = false;
1157 w.deleteDocuments(new Term("field", "field"));
1159 assertTrue(w.beforeWasCalled);
1160 assertTrue(w.afterWasCalled);
1163 IndexReader ir = IndexReader.open(dir, true);
1164 assertEquals(0, ir.numDocs());
1171 public void testNegativePositions() throws Throwable {
1172 final TokenStream tokens = new TokenStream() {
1173 final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1174 final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1176 final Iterator<String> terms = Arrays.asList("a","b","c").iterator();
1177 boolean first = true;
1180 public boolean incrementToken() {
1181 if (!terms.hasNext()) return false;
1183 termAtt.append(terms.next());
1184 posIncrAtt.setPositionIncrement(first ? 0 : 1);
1190 Directory dir = newDirectory();
1191 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1192 Document doc = new Document();
1193 doc.add(new Field("field", tokens));
1197 IndexSearcher s = new IndexSearcher(dir, false);
1198 PhraseQuery pq = new PhraseQuery();
1199 pq.add(new Term("field", "a"));
1200 pq.add(new Term("field", "b"));
1201 pq.add(new Term("field", "c"));
1202 ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
1203 assertEquals(1, hits.length);
1205 Query q = new SpanTermQuery(new Term("field", "a"));
1206 hits = s.search(q, null, 1000).scoreDocs;
1207 assertEquals(1, hits.length);
1208 TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a"));
1209 assertTrue(tps.next());
1210 assertEquals(1, tps.freq());
1211 assertEquals(0, tps.nextPosition());
1219 public void testBinaryFieldOffsetLength() throws IOException {
1220 Directory dir = newDirectory();
1221 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1222 byte[] b = new byte[50];
1223 for(int i=0;i<50;i++)
1224 b[i] = (byte) (i+77);
1226 Document doc = new Document();
1227 Field f = new Field("binary", b, 10, 17);
1228 byte[] bx = f.getBinaryValue();
1229 assertTrue(bx != null);
1230 assertEquals(50, bx.length);
1231 assertEquals(10, f.getBinaryOffset());
1232 assertEquals(17, f.getBinaryLength());
1237 IndexReader ir = IndexReader.open(dir, true);
1238 doc = ir.document(0);
1239 f = doc.getField("binary");
1240 b = f.getBinaryValue();
1241 assertTrue(b != null);
1242 assertEquals(17, b.length, 17);
1243 assertEquals(87, b[0]);
1249 public void testPositionIncrementGapEmptyField() throws Exception {
1250 Directory dir = newDirectory();
1251 Analyzer analyzer = new Analyzer(){
1252 Analyzer a = new WhitespaceAnalyzer( TEST_VERSION_CURRENT );
1254 public TokenStream tokenStream(String fieldName, Reader reader){
1255 return a.tokenStream(fieldName, reader);
1258 public int getPositionIncrementGap(String fieldName) {
1262 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
1263 TEST_VERSION_CURRENT, analyzer));
1264 Document doc = new Document();
1265 Field f = newField("field", "", Field.Store.NO,
1266 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
1267 Field f2 = newField("field", "crunch man", Field.Store.NO,
1268 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
1274 IndexReader r = IndexReader.open(dir, true);
1275 TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
1276 int[] poss = tpv.getTermPositions(0);
1277 assertEquals(1, poss.length);
1278 assertEquals(100, poss[0]);
1279 poss = tpv.getTermPositions(1);
1280 assertEquals(1, poss.length);
1281 assertEquals(101, poss[0]);
1287 // LUCENE-1468 -- make sure opening an IndexWriter with
1288 // create=true does not remove non-index files
1290 public void testOtherFiles() throws Throwable {
1291 Directory dir = newDirectory();
1293 // Create my own random file:
1294 IndexOutput out = dir.createOutput("myrandomfile");
1295 out.writeByte((byte) 42);
1298 new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close();
1300 assertTrue(dir.fileExists("myrandomfile"));
1302 // Make sure this does not copy myrandomfile:
1303 Directory dir2 = new MockDirectoryWrapper(random, new RAMDirectory(dir));
1304 assertTrue(!dir2.fileExists("myrandomfile"));
1311 public void testDeadlock() throws Exception {
1312 Directory dir = newDirectory();
1313 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
1314 Document doc = new Document();
1315 doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
1316 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
1317 writer.addDocument(doc);
1318 writer.addDocument(doc);
1319 writer.addDocument(doc);
1321 // index has 2 segments
1323 Directory dir2 = newDirectory();
1324 IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1325 writer2.addDocument(doc);
1328 IndexReader r1 = IndexReader.open(dir2, true);
1329 IndexReader r2 = (IndexReader) r1.clone();
1330 writer.addIndexes(new IndexReader[] {r1, r2});
1333 IndexReader r3 = IndexReader.open(dir, true);
1334 assertEquals(5, r3.numDocs());
1344 private class IndexerThreadInterrupt extends Thread {
1345 volatile boolean failed;
1346 volatile boolean finish;
1348 volatile boolean allowInterrupt = false;
1352 // LUCENE-2239: won't work with NIOFS/MMAP
1353 Directory dir = new MockDirectoryWrapper(random, new RAMDirectory());
1354 IndexWriter w = null;
1363 IndexWriterConfig conf = newIndexWriterConfig(
1364 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2);
1365 w = new IndexWriter(dir, conf);
1367 Document doc = new Document();
1368 doc.add(newField("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED));
1369 for(int i=0;i<100;i++) {
1377 _TestUtil.checkIndex(dir);
1378 IndexReader.open(dir, true).close();
1380 // Strangely, if we interrupt a thread before
1381 // all classes are loaded, the class loader
1382 // seems to do scary things with the interrupt
1383 // status. In java 1.5, it'll throw an
1384 // incorrect ClassNotFoundException. In java
1385 // 1.6, it'll silently clear the interrupt.
1386 // So, on first iteration through here we
1387 // don't open ourselves up for interrupts
1388 // until we've done the above loop.
1389 allowInterrupt = true;
1391 } catch (ThreadInterruptedException re) {
1393 System.out.println("TEST: got interrupt");
1394 re.printStackTrace(System.out);
1396 Throwable e = re.getCause();
1397 assertTrue(e instanceof InterruptedException);
1401 } catch (Throwable t) {
1402 System.out.println("FAILED; unexpected exception");
1403 t.printStackTrace(System.out);
1410 // clear interrupt state:
1411 Thread.interrupted();
1415 } catch (IOException ioe) {
1416 throw new RuntimeException(ioe);
1421 _TestUtil.checkIndex(dir);
1422 } catch (Exception e) {
1424 System.out.println("CheckIndex FAILED: unexpected exception");
1425 e.printStackTrace(System.out);
1428 IndexReader r = IndexReader.open(dir, true);
1429 //System.out.println("doc count=" + r.numDocs());
1431 } catch (Exception e) {
1433 System.out.println("IndexReader.open FAILED: unexpected exception");
1434 e.printStackTrace(System.out);
1439 } catch (IOException e) {
1440 throw new RuntimeException(e);
1445 public void testThreadInterruptDeadlock() throws Exception {
1446 IndexerThreadInterrupt t = new IndexerThreadInterrupt();
1450 // Force class loader to load ThreadInterruptedException
1451 // up front... else we can see a false failure if 2nd
1452 // interrupt arrives while class loader is trying to
1453 // init this class (in servicing a first interrupt):
1454 assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException);
1456 // issue 100 interrupts to child thread
1460 if (t.allowInterrupt) {
1470 assertFalse(t.failed);
1474 public void testIndexStoreCombos() throws Exception {
1475 Directory dir = newDirectory();
1476 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1477 byte[] b = new byte[50];
1478 for(int i=0;i<50;i++)
1479 b[i] = (byte) (i+77);
1481 Document doc = new Document();
1482 Field f = new Field("binary", b, 10, 17);
1483 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field1")));
1484 Field f2 = newField("string", "value", Field.Store.YES,Field.Index.ANALYZED);
1485 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field2")));
1490 // add 2 docs to test in-memory merging
1491 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field1")));
1492 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field2")));
1495 // force segment flush so we can force a segment merge with doc3 later.
1498 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field1")));
1499 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field2")));
1503 w.optimize(); // force segment merge.
1506 IndexReader ir = IndexReader.open(dir, true);
1507 doc = ir.document(0);
1508 f = doc.getField("binary");
1509 b = f.getBinaryValue();
1510 assertTrue(b != null);
1511 assertEquals(17, b.length, 17);
1512 assertEquals(87, b[0]);
1514 assertTrue(ir.document(0).getFieldable("binary").isBinary());
1515 assertTrue(ir.document(1).getFieldable("binary").isBinary());
1516 assertTrue(ir.document(2).getFieldable("binary").isBinary());
1518 assertEquals("value", ir.document(0).get("string"));
1519 assertEquals("value", ir.document(1).get("string"));
1520 assertEquals("value", ir.document(2).get("string"));
1523 // test that the terms were indexed.
1524 assertTrue(ir.termDocs(new Term("binary","doc1field1")).next());
1525 assertTrue(ir.termDocs(new Term("binary","doc2field1")).next());
1526 assertTrue(ir.termDocs(new Term("binary","doc3field1")).next());
1527 assertTrue(ir.termDocs(new Term("string","doc1field2")).next());
1528 assertTrue(ir.termDocs(new Term("string","doc2field2")).next());
1529 assertTrue(ir.termDocs(new Term("string","doc3field2")).next());
1536 // LUCENE-1727: make sure doc fields are stored in order
1537 public void testStoredFieldsOrder() throws Throwable {
1538 Directory d = newDirectory();
1539 IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1540 Document doc = new Document();
1541 doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO));
1542 doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO));
1543 doc.add(newField("zzz", "1 2 3", Field.Store.YES, Field.Index.NO));
1545 IndexReader r = w.getReader();
1546 doc = r.document(0);
1547 Iterator<Fieldable> it = doc.getFields().iterator();
1548 assertTrue(it.hasNext());
1549 Field f = (Field) it.next();
1550 assertEquals(f.name(), "zzz");
1551 assertEquals(f.stringValue(), "a b c");
1553 assertTrue(it.hasNext());
1554 f = (Field) it.next();
1555 assertEquals(f.name(), "aaa");
1556 assertEquals(f.stringValue(), "a b c");
1558 assertTrue(it.hasNext());
1559 f = (Field) it.next();
1560 assertEquals(f.name(), "zzz");
1561 assertEquals(f.stringValue(), "1 2 3");
1562 assertFalse(it.hasNext());
1568 public void testNoDocsIndex() throws Throwable {
1569 Directory dir = newDirectory();
1570 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1571 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1572 ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
1573 writer.setInfoStream(new PrintStream(bos));
1574 writer.addDocument(new Document());
1580 public void testDeleteUnusedFiles() throws Exception {
1582 for(int iter=0;iter<2;iter++) {
1583 Directory dir = newDirectory();
1585 LogMergePolicy mergePolicy = newLogMergePolicy(true);
1586 mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
1588 IndexWriter w = new IndexWriter(
1590 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
1591 setMergePolicy(mergePolicy)
1593 Document doc = new Document();
1594 doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED));
1603 r = IndexReader.open(dir);
1606 List<String> files = Arrays.asList(dir.listAll());
1607 assertTrue(files.contains("_0.cfs"));
1613 IndexReader r2 = r.reopen();
1614 assertTrue(r != r2);
1615 files = Arrays.asList(dir.listAll());
1617 // NOTE: here we rely on "Windows" behavior, ie, even
1618 // though IW wanted to delete _0.cfs since it was
1619 // optimized away, because we have a reader open
1620 // against this file, it should still be here:
1621 assertTrue(files.contains("_0.cfs"));
1622 // optimize created this
1623 //assertTrue(files.contains("_2.cfs"));
1624 w.deleteUnusedFiles();
1626 files = Arrays.asList(dir.listAll());
1627 // r still holds this file open
1628 assertTrue(files.contains("_0.cfs"));
1629 //assertTrue(files.contains("_2.cfs"));
1633 // on closing NRT reader, it calls writer.deleteUnusedFiles
1634 files = Arrays.asList(dir.listAll());
1635 assertFalse(files.contains("_0.cfs"));
1637 // now writer can remove it
1638 w.deleteUnusedFiles();
1639 files = Arrays.asList(dir.listAll());
1640 assertFalse(files.contains("_0.cfs"));
1642 //assertTrue(files.contains("_2.cfs"));
1651 public void testDeleteUnsedFiles2() throws Exception {
1652 // Validates that iw.deleteUnusedFiles() also deletes unused index commits
1653 // in case a deletion policy which holds onto commits is used.
1654 Directory dir = newDirectory();
1655 SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
1656 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1657 TEST_VERSION_CURRENT, new MockAnalyzer(random))
1658 .setIndexDeletionPolicy(sdp));
1661 Document doc = new Document();
1662 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1663 writer.addDocument(doc);
1665 assertEquals(1, IndexReader.listCommits(dir).size());
1670 // Second commit - now KeepOnlyLastCommit cannot delete the prev commit.
1671 doc = new Document();
1672 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1673 writer.addDocument(doc);
1675 assertEquals(2, IndexReader.listCommits(dir).size());
1677 // Should delete the unreferenced commit
1679 writer.deleteUnusedFiles();
1680 assertEquals(1, IndexReader.listCommits(dir).size());
1686 private static class FlushCountingIndexWriter extends IndexWriter {
1688 public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException {
1692 public void doAfterFlush() {
1697 public void testEmptyFSDirWithNoLock() throws Exception {
1698 // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF),
1699 // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed
1700 // when listAll() was called in IndexFileDeleter.
1701 Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory());
1702 new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close();
1706 public void testEmptyDirRollback() throws Exception {
1707 // Tests that if IW is created over an empty Directory, some documents are
1708 // indexed, flushed (but not committed) and then IW rolls back, then no
1709 // files are left in the Directory.
1710 Directory dir = newDirectory();
1711 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1712 TEST_VERSION_CURRENT, new MockAnalyzer(random))
1713 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
1714 String[] files = dir.listAll();
1716 writer.setInfoStream(VERBOSE ? System.out : null);
1718 // Creating over empty dir should not create any files,
1719 // or, at most the write.lock file
1720 final int extraFileCount;
1721 if (files.length == 1) {
1722 assertTrue(files[0].endsWith("write.lock"));
1725 assertEquals(0, files.length);
1729 Document doc = new Document();
1730 // create as many files as possible
1731 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1732 writer.addDocument(doc);
1733 // Adding just one document does not call flush yet.
1734 assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length);
1736 doc = new Document();
1737 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1738 writer.addDocument(doc);
1740 // The second document should cause a flush.
1741 assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount);
1743 // After rollback, IW should remove all files
1745 assertEquals("no files should exist in the directory after rollback", 0, dir.listAll().length);
1747 // Since we rolled-back above, that close should be a no-op
1749 assertEquals("expected a no-op close after IW.rollback()", 0, dir.listAll().length);
1753 public void testNoSegmentFile() throws IOException {
1754 Directory dir = newDirectory();
1755 dir.setLockFactory(NoLockFactory.getNoLockFactory());
1756 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
1757 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
1759 Document doc = new Document();
1760 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1763 IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(
1764 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)
1765 .setOpenMode(OpenMode.CREATE));
1772 public void testRandomStoredFields() throws IOException {
1773 Directory dir = newDirectory();
1774 Random rand = random;
1775 RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20)));
1776 //w.w.setInfoStream(System.out);
1777 //w.w.setUseCompoundFile(false);
1779 w.w.setInfoStream(System.out);
1781 final int docCount = atLeast(200);
1782 final int fieldCount = _TestUtil.nextInt(rand, 1, 5);
1784 final List<Integer> fieldIDs = new ArrayList<Integer>();
1786 Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
1788 for(int i=0;i<fieldCount;i++) {
1792 final Map<String,Document> docs = new HashMap<String,Document>();
1795 System.out.println("TEST: build index docCount=" + docCount);
1798 for(int i=0;i<docCount;i++) {
1799 Document doc = new Document();
1801 final String id = ""+i;
1802 idField.setValue(id);
1805 for(int field: fieldIDs) {
1807 if (rand.nextInt(4) != 3) {
1808 s = _TestUtil.randomUnicodeString(rand, 1000);
1809 doc.add(newField("f"+field, s, Field.Store.YES, Field.Index.NO));
1815 if (rand.nextInt(50) == 17) {
1816 // mixup binding of field name -> Number every so often
1817 Collections.shuffle(fieldIDs);
1819 if (rand.nextInt(5) == 3 && i > 0) {
1820 final String delID = ""+rand.nextInt(i);
1822 System.out.println("TEST: delete doc " + delID);
1824 w.deleteDocuments(new Term("id", delID));
1830 System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
1832 if (docs.size() > 0) {
1833 String[] idsList = docs.keySet().toArray(new String[docs.size()]);
1835 for(int x=0;x<2;x++) {
1836 IndexReader r = w.getReader();
1837 IndexSearcher s = newSearcher(r);
1840 System.out.println("TEST: cycle x=" + x + " r=" + r);
1843 int num = atLeast(1000);
1844 for(int iter=0;iter<num;iter++) {
1845 String testID = idsList[rand.nextInt(idsList.length)];
1846 TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
1847 assertEquals(1, hits.totalHits);
1848 Document doc = r.document(hits.scoreDocs[0].doc);
1849 Document docExp = docs.get(testID);
1850 for(int i=0;i<fieldCount;i++) {
1851 assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i));
1863 public void testNoUnwantedTVFiles() throws Exception {
1865 Directory dir = newDirectory();
1866 IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy()));
1867 ((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false);
1869 String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
1870 BIG=BIG+BIG+BIG+BIG;
1872 for (int i=0; i<2; i++) {
1873 Document doc = new Document();
1874 doc.add(new Field("id", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
1875 doc.add(new Field("str", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED));
1876 doc.add(new Field("str2", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED));
1877 doc.add(new Field("str3", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
1878 indexWriter.addDocument(doc);
1881 indexWriter.close();
1883 assertNoUnreferencedFiles(dir, "no tv files");
1884 String[] files = dir.listAll();
1885 for(String file : files) {
1886 assertTrue(!file.endsWith(IndexFileNames.VECTORS_FIELDS_EXTENSION));
1887 assertTrue(!file.endsWith(IndexFileNames.VECTORS_INDEX_EXTENSION));
1888 assertTrue(!file.endsWith(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
1895 public void testEmptyFieldNameTIIOne() throws IOException {
1896 Directory dir = newDirectory();
1897 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
1898 iwc.setTermIndexInterval(1);
1899 iwc.setReaderTermsIndexDivisor(1);
1900 IndexWriter writer = new IndexWriter(dir, iwc);
1901 Document doc = new Document();
1902 doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED));
1903 writer.addDocument(doc);
1904 final IndexReader r = IndexReader.open(writer, true);
1906 r.terms(new Term("", ""));
1907 r.terms(new Term("", ""));
1908 r.terms(new Term("", "a"));
1909 r.terms(new Term("", ""));