1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.io.Reader;
24 import java.io.StringReader;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.Iterator;
30 import java.util.List;
32 import java.util.Random;
34 import org.apache.lucene.analysis.Analyzer;
35 import org.apache.lucene.analysis.MockAnalyzer;
36 import org.apache.lucene.analysis.TokenStream;
37 import org.apache.lucene.analysis.Tokenizer;
38 import org.apache.lucene.analysis.WhitespaceAnalyzer;
39 import org.apache.lucene.analysis.WhitespaceTokenizer;
40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
41 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
42 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field.Index;
45 import org.apache.lucene.document.Field.Store;
46 import org.apache.lucene.document.Field.TermVector;
47 import org.apache.lucene.document.Field;
48 import org.apache.lucene.document.Fieldable;
49 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
50 import org.apache.lucene.search.IndexSearcher;
51 import org.apache.lucene.search.PhraseQuery;
52 import org.apache.lucene.search.Query;
53 import org.apache.lucene.search.ScoreDoc;
54 import org.apache.lucene.search.TermQuery;
55 import org.apache.lucene.search.TopDocs;
56 import org.apache.lucene.search.spans.SpanTermQuery;
57 import org.apache.lucene.store.AlreadyClosedException;
58 import org.apache.lucene.store.Directory;
59 import org.apache.lucene.store.IndexOutput;
60 import org.apache.lucene.store.Lock;
61 import org.apache.lucene.store.LockFactory;
62 import org.apache.lucene.store.MockDirectoryWrapper;
63 import org.apache.lucene.store.NoLockFactory;
64 import org.apache.lucene.store.RAMDirectory;
65 import org.apache.lucene.store.SingleInstanceLockFactory;
66 import org.apache.lucene.util.LuceneTestCase;
67 import org.apache.lucene.util.ThreadInterruptedException;
68 import org.apache.lucene.util._TestUtil;
70 public class TestIndexWriter extends LuceneTestCase {
72 public void testDocCount() throws IOException {
73 Directory dir = newDirectory();
75 IndexWriter writer = null;
76 IndexReader reader = null;
79 long savedWriteLockTimeout = IndexWriterConfig.getDefaultWriteLockTimeout();
81 IndexWriterConfig.setDefaultWriteLockTimeout(2000);
82 assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout());
83 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
85 IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout);
89 for (i = 0; i < 100; i++) {
92 assertEquals(100, writer.maxDoc());
95 // delete 40 documents
96 reader = IndexReader.open(dir, false);
97 for (i = 0; i < 40; i++) {
98 reader.deleteDocument(i);
102 reader = IndexReader.open(dir, true);
103 assertEquals(60, reader.numDocs());
106 // merge the index down and check that the new doc count is correct
107 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
108 assertEquals(60, writer.numDocs());
109 writer.forceMerge(1);
110 assertEquals(60, writer.maxDoc());
111 assertEquals(60, writer.numDocs());
114 // check that the index reader gives the same numbers.
115 reader = IndexReader.open(dir, true);
116 assertEquals(60, reader.maxDoc());
117 assertEquals(60, reader.numDocs());
120 // make sure opening a new index for create over
121 // this existing one works correctly:
122 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
123 assertEquals(0, writer.maxDoc());
124 assertEquals(0, writer.numDocs());
129 static void addDoc(IndexWriter writer) throws IOException
131 Document doc = new Document();
132 doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
133 writer.addDocument(doc);
136 static void addDocWithIndex(IndexWriter writer, int index) throws IOException
138 Document doc = new Document();
139 doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED));
140 doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED));
141 writer.addDocument(doc);
144 public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException {
145 String[] startFiles = dir.listAll();
146 SegmentInfos infos = new SegmentInfos();
148 new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback();
149 String[] endFiles = dir.listAll();
151 Arrays.sort(startFiles);
152 Arrays.sort(endFiles);
154 if (!Arrays.equals(startFiles, endFiles)) {
155 fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
160 * Make sure we skip wicked long terms.
162 public void testWickedLongTerm() throws IOException {
163 MockDirectoryWrapper dir = newDirectory();
164 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
165 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
167 char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE-1];
168 Arrays.fill(chars, 'x');
169 Document doc = new Document();
170 final String bigTerm = new String(chars);
172 // Max length term is 16383, so this contents produces
174 String contents = "abc xyz x" + bigTerm + " another term";
175 doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
176 writer.addDocument(doc);
178 // Make sure we can add another normal document
179 doc = new Document();
180 doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
181 writer.addDocument(doc);
184 IndexReader reader = IndexReader.open(dir, true);
186 // Make sure all terms < max size were indexed
187 assertEquals(2, reader.docFreq(new Term("content", "abc")));
188 assertEquals(1, reader.docFreq(new Term("content", "bbb")));
189 assertEquals(1, reader.docFreq(new Term("content", "term")));
190 assertEquals(1, reader.docFreq(new Term("content", "another")));
192 // Make sure position is still incremented when
193 // massive term is skipped:
194 TermPositions tps = reader.termPositions(new Term("content", "another"));
195 assertTrue(tps.next());
196 assertEquals(1, tps.freq());
197 assertEquals(3, tps.nextPosition());
199 // Make sure the doc that has the massive term is in
201 assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
205 // Make sure we can add a document with exactly the
206 // maximum length term, and search on that term:
207 doc = new Document();
208 doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
209 StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
210 sa.setMaxTokenLength(100000);
211 writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
212 writer.addDocument(doc);
214 reader = IndexReader.open(dir, true);
215 assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
221 static String arrayToString(String[] l) {
223 for(int i=0;i<l.length;i++) {
232 // Make sure we can open an index for create even when a
233 // reader holds it open (this fails pre lock-less
234 // commits on windows):
235 public void testCreateWithReader() throws IOException {
236 Directory dir = newDirectory();
238 // add one document & close writer
239 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
244 IndexReader reader = IndexReader.open(dir, true);
245 assertEquals("should be one document", reader.numDocs(), 1);
247 // now open index for create:
248 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
249 assertEquals("should be zero documents", writer.maxDoc(), 0);
253 assertEquals("should be one document", reader.numDocs(), 1);
254 IndexReader reader2 = IndexReader.open(dir, true);
255 assertEquals("should be one document", reader2.numDocs(), 1);
262 public void testChangesAfterClose() throws IOException {
263 Directory dir = newDirectory();
265 IndexWriter writer = null;
267 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
274 fail("did not hit AlreadyClosedException");
275 } catch (AlreadyClosedException e) {
281 public void testIndexNoDocuments() throws IOException {
282 MockDirectoryWrapper dir = newDirectory();
283 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
287 IndexReader reader = IndexReader.open(dir, true);
288 assertEquals(0, reader.maxDoc());
289 assertEquals(0, reader.numDocs());
292 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
296 reader = IndexReader.open(dir, true);
297 assertEquals(0, reader.maxDoc());
298 assertEquals(0, reader.numDocs());
303 public void testManyFields() throws IOException {
304 MockDirectoryWrapper dir = newDirectory();
305 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
306 for(int j=0;j<100;j++) {
307 Document doc = new Document();
308 doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
309 doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
310 doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
311 doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
312 doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
313 doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED));
314 writer.addDocument(doc);
318 IndexReader reader = IndexReader.open(dir, true);
319 assertEquals(100, reader.maxDoc());
320 assertEquals(100, reader.numDocs());
321 for(int j=0;j<100;j++) {
322 assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
323 assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
324 assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
325 assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
326 assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
327 assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
333 public void testSmallRAMBuffer() throws IOException {
334 MockDirectoryWrapper dir = newDirectory();
335 IndexWriter writer = new IndexWriter(
337 newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).
338 setRAMBufferSizeMB(0.000001).
339 setMergePolicy(newLogMergePolicy(10))
341 int lastNumFile = dir.listAll().length;
342 for(int j=0;j<9;j++) {
343 Document doc = new Document();
344 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
345 writer.addDocument(doc);
346 int numFile = dir.listAll().length;
347 // Verify that with a tiny RAM buffer we see new
348 // segment after every doc
349 assertTrue(numFile > lastNumFile);
350 lastNumFile = numFile;
357 * Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a
360 * @deprecated after all the setters on IW go away (4.0), this test can be
361 * removed because changing ram buffer settings during a write
362 * session won't be possible.
365 public void testChangingRAMBuffer() throws IOException {
366 MockDirectoryWrapper dir = newDirectory();
367 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
368 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setRAMBufferSizeMB(
369 IndexWriterConfig.DISABLE_AUTO_FLUSH));
371 int lastFlushCount = -1;
372 for(int j=1;j<52;j++) {
373 Document doc = new Document();
374 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
375 writer.addDocument(doc);
376 _TestUtil.syncConcurrentMerges(writer);
377 int flushCount = writer.getFlushCount();
379 lastFlushCount = flushCount;
381 // No new files should be created
382 assertEquals(flushCount, lastFlushCount);
384 assertTrue(flushCount > lastFlushCount);
385 lastFlushCount = flushCount;
386 writer.setRAMBufferSizeMB(0.000001);
387 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
389 assertTrue(flushCount > lastFlushCount);
390 lastFlushCount = flushCount;
391 } else if (20 == j) {
392 writer.setRAMBufferSizeMB(16);
393 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
394 lastFlushCount = flushCount;
396 assertEquals(flushCount, lastFlushCount);
397 } else if (30 == j) {
398 writer.setRAMBufferSizeMB(0.000001);
399 writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
401 assertTrue(flushCount> lastFlushCount);
402 lastFlushCount = flushCount;
403 } else if (40 == j) {
404 writer.setMaxBufferedDocs(10);
405 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
406 lastFlushCount = flushCount;
408 assertEquals(flushCount, lastFlushCount);
409 writer.setMaxBufferedDocs(10);
410 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
411 } else if (50 == j) {
412 assertTrue(flushCount > lastFlushCount);
420 * @deprecated after setters on IW go away, this test can be deleted because
421 * changing those settings on IW won't be possible.
424 public void testChangingRAMBuffer2() throws IOException {
425 MockDirectoryWrapper dir = newDirectory();
426 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
427 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10).setMaxBufferedDeleteTerms(
428 10).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
430 for(int j=1;j<52;j++) {
431 Document doc = new Document();
432 doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
433 writer.addDocument(doc);
436 int lastFlushCount = -1;
437 for(int j=1;j<52;j++) {
438 writer.deleteDocuments(new Term("field", "aaa" + j));
439 _TestUtil.syncConcurrentMerges(writer);
440 int flushCount = writer.getFlushCount();
442 lastFlushCount = flushCount;
444 // No new files should be created
445 assertEquals(flushCount, lastFlushCount);
446 } else if (10 == j) {
447 assertTrue(flushCount > lastFlushCount);
448 lastFlushCount = flushCount;
449 writer.setRAMBufferSizeMB(0.000001);
450 writer.setMaxBufferedDeleteTerms(1);
452 assertTrue(flushCount > lastFlushCount);
453 lastFlushCount = flushCount;
454 } else if (20 == j) {
455 writer.setRAMBufferSizeMB(16);
456 writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
457 lastFlushCount = flushCount;
459 assertEquals(flushCount, lastFlushCount);
460 } else if (30 == j) {
461 writer.setRAMBufferSizeMB(0.000001);
462 writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
463 writer.setMaxBufferedDeleteTerms(1);
465 assertTrue(flushCount> lastFlushCount);
466 lastFlushCount = flushCount;
467 } else if (40 == j) {
468 writer.setMaxBufferedDeleteTerms(10);
469 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
470 lastFlushCount = flushCount;
472 assertEquals(flushCount, lastFlushCount);
473 writer.setMaxBufferedDeleteTerms(10);
474 writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
475 } else if (50 == j) {
476 assertTrue(flushCount > lastFlushCount);
483 // Make sure it's OK to change RAM buffer size and
484 // maxBufferedDocs in a write session, using IW.getConfig()
485 public void testChangingRAMBufferWithIWC() throws IOException {
486 Directory dir = newDirectory();
487 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
488 writer.getConfig().setMaxBufferedDocs(10);
489 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
491 int lastFlushCount = -1;
492 for(int j=1;j<52;j++) {
493 Document doc = new Document();
494 doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
495 writer.addDocument(doc);
496 _TestUtil.syncConcurrentMerges(writer);
497 int flushCount = writer.getFlushCount();
499 lastFlushCount = flushCount;
501 // No new files should be created
502 assertEquals(flushCount, lastFlushCount);
504 assertTrue(flushCount > lastFlushCount);
505 lastFlushCount = flushCount;
506 writer.getConfig().setRAMBufferSizeMB(0.000001);
507 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
509 assertTrue(flushCount > lastFlushCount);
510 lastFlushCount = flushCount;
511 } else if (20 == j) {
512 writer.getConfig().setRAMBufferSizeMB(16);
513 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
514 lastFlushCount = flushCount;
516 assertEquals(flushCount, lastFlushCount);
517 } else if (30 == j) {
518 writer.getConfig().setRAMBufferSizeMB(0.000001);
519 writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
521 assertTrue(flushCount> lastFlushCount);
522 lastFlushCount = flushCount;
523 } else if (40 == j) {
524 writer.getConfig().setMaxBufferedDocs(10);
525 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
526 lastFlushCount = flushCount;
528 assertEquals(flushCount, lastFlushCount);
529 writer.getConfig().setMaxBufferedDocs(10);
530 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
531 } else if (50 == j) {
532 assertTrue(flushCount > lastFlushCount);
539 public void testChangingRAMBuffer2WithIWC() throws IOException {
540 Directory dir = newDirectory();
541 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
542 writer.getConfig().setMaxBufferedDocs(10);
543 writer.getConfig().setMaxBufferedDeleteTerms(10);
544 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
546 for(int j=1;j<52;j++) {
547 Document doc = new Document();
548 doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
549 writer.addDocument(doc);
552 int lastFlushCount = -1;
553 for(int j=1;j<52;j++) {
554 writer.deleteDocuments(new Term("field", "aaa" + j));
555 _TestUtil.syncConcurrentMerges(writer);
556 int flushCount = writer.getFlushCount();
558 lastFlushCount = flushCount;
560 // No new files should be created
561 assertEquals(flushCount, lastFlushCount);
562 } else if (10 == j) {
563 assertTrue(flushCount > lastFlushCount);
564 lastFlushCount = flushCount;
565 writer.getConfig().setRAMBufferSizeMB(0.000001);
566 writer.getConfig().setMaxBufferedDeleteTerms(1);
568 assertTrue(flushCount > lastFlushCount);
569 lastFlushCount = flushCount;
570 } else if (20 == j) {
571 writer.getConfig().setRAMBufferSizeMB(16);
572 writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
573 lastFlushCount = flushCount;
575 assertEquals(flushCount, lastFlushCount);
576 } else if (30 == j) {
577 writer.getConfig().setRAMBufferSizeMB(0.000001);
578 writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
579 writer.getConfig().setMaxBufferedDeleteTerms(1);
581 assertTrue(flushCount> lastFlushCount);
582 lastFlushCount = flushCount;
583 } else if (40 == j) {
584 writer.getConfig().setMaxBufferedDeleteTerms(10);
585 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
586 lastFlushCount = flushCount;
588 assertEquals(flushCount, lastFlushCount);
589 writer.getConfig().setMaxBufferedDeleteTerms(10);
590 writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
591 } else if (50 == j) {
592 assertTrue(flushCount > lastFlushCount);
599 public void testDiverseDocs() throws IOException {
600 MockDirectoryWrapper dir = newDirectory();
601 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5));
602 for(int i=0;i<3;i++) {
603 // First, docs where every term is unique (heavy on
604 // Posting instances)
605 for(int j=0;j<100;j++) {
606 Document doc = new Document();
607 for(int k=0;k<100;k++) {
608 doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED));
610 writer.addDocument(doc);
613 // Next, many single term docs where only one term
614 // occurs (heavy on byte blocks)
615 for(int j=0;j<100;j++) {
616 Document doc = new Document();
617 doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED));
618 writer.addDocument(doc);
621 // Next, many single term docs where only one term
622 // occurs but the terms are very long (heavy on
624 for(int j=0;j<100;j++) {
625 StringBuilder b = new StringBuilder();
626 String x = Integer.toString(j) + ".";
627 for(int k=0;k<1000;k++)
629 String longTerm = b.toString();
631 Document doc = new Document();
632 doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED));
633 writer.addDocument(doc);
638 IndexReader reader = IndexReader.open(dir, false);
639 IndexSearcher searcher = new IndexSearcher(reader);
640 ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs;
641 assertEquals(300, hits.length);
648 public void testEnablingNorms() throws IOException {
649 MockDirectoryWrapper dir = newDirectory();
650 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
651 // Enable norms for only 1 doc, pre flush
652 for(int j=0;j<10;j++) {
653 Document doc = new Document();
654 Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
656 f.setOmitNorms(true);
659 writer.addDocument(doc);
663 Term searchTerm = new Term("field", "aaa");
665 IndexReader reader = IndexReader.open(dir, false);
666 IndexSearcher searcher = new IndexSearcher(reader);
667 ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
668 assertEquals(10, hits.length);
672 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
673 .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10));
674 // Enable norms for only 1 doc, post flush
675 for(int j=0;j<27;j++) {
676 Document doc = new Document();
677 Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
679 f.setOmitNorms(true);
682 writer.addDocument(doc);
685 reader = IndexReader.open(dir, false);
686 searcher = new IndexSearcher(reader);
687 hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
688 assertEquals(27, hits.length);
692 reader = IndexReader.open(dir, true);
698 public void testHighFreqTerm() throws IOException {
699 MockDirectoryWrapper dir = newDirectory();
700 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
701 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01));
702 // Massive doc that has 128 K a's
703 StringBuilder b = new StringBuilder(1024*1024);
704 for(int i=0;i<4096;i++) {
705 b.append(" a a a a a a a a");
706 b.append(" a a a a a a a a");
707 b.append(" a a a a a a a a");
708 b.append(" a a a a a a a a");
710 Document doc = new Document();
711 doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
712 writer.addDocument(doc);
715 IndexReader reader = IndexReader.open(dir, true);
716 assertEquals(1, reader.maxDoc());
717 assertEquals(1, reader.numDocs());
718 Term t = new Term("field", "a");
719 assertEquals(1, reader.docFreq(t));
720 TermDocs td = reader.termDocs(t);
722 assertEquals(128*1024, td.freq());
727 // Make sure that a Directory implementation that does
728 // not use LockFactory at all (ie overrides makeLock and
729 // implements its own private locking) works OK. This
730 // was raised on java-dev as loss of backwards
732 public void testNullLockFactory() throws IOException {
734 final class MyRAMDirectory extends MockDirectoryWrapper {
735 private LockFactory myLockFactory;
736 MyRAMDirectory(Directory delegate) {
737 super(random, delegate);
739 myLockFactory = new SingleInstanceLockFactory();
742 public Lock makeLock(String name) {
743 return myLockFactory.makeLock(name);
747 Directory dir = new MyRAMDirectory(new RAMDirectory());
748 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
749 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
750 for (int i = 0; i < 100; i++) {
754 Term searchTerm = new Term("content", "aaa");
755 IndexReader reader = IndexReader.open(dir, false);
756 IndexSearcher searcher = new IndexSearcher(reader);
757 ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
758 assertEquals("did not get right number of hits", 100, hits.length);
762 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
763 .setOpenMode(OpenMode.CREATE));
768 public void testFlushWithNoMerging() throws IOException {
769 Directory dir = newDirectory();
770 IndexWriter writer = new IndexWriter(
772 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
773 setMaxBufferedDocs(2).
774 setMergePolicy(newLogMergePolicy(10))
776 Document doc = new Document();
777 doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
778 for(int i=0;i<19;i++)
779 writer.addDocument(doc);
780 writer.flush(false, true);
782 SegmentInfos sis = new SegmentInfos();
784 // Since we flushed w/o allowing merging we should now
786 assertEquals(10, sis.size());
790 // Make sure we can flush segment w/ norms, then add
791 // empty doc (no norms) and flush
792 public void testEmptyDocAfterFlushingRealDoc() throws IOException {
793 Directory dir = newDirectory();
794 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
795 writer.setInfoStream(VERBOSE ? System.out : null);
796 Document doc = new Document();
797 doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
798 writer.addDocument(doc);
801 System.out.println("\nTEST: now add empty doc");
803 writer.addDocument(new Document());
805 IndexReader reader = IndexReader.open(dir, true);
806 assertEquals(2, reader.numDocs());
812 * Test that no NullPointerException will be raised,
813 * when adding one document with a single, empty field
814 * and term vectors enabled.
815 * @throws IOException
818 public void testBadSegment() throws IOException {
819 Directory dir = newDirectory();
820 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
821 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
823 Document document = new Document();
824 document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES));
825 iw.addDocument(document);
831 public void testMaxThreadPriority() throws IOException {
832 int pri = Thread.currentThread().getPriority();
834 Directory dir = newDirectory();
835 IndexWriterConfig conf = newIndexWriterConfig(
836 TEST_VERSION_CURRENT, new MockAnalyzer(random))
837 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy());
838 ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2);
839 IndexWriter iw = new IndexWriter(dir, conf);
840 Document document = new Document();
841 document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
842 Field.TermVector.YES));
843 Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
845 iw.addDocument(document);
849 Thread.currentThread().setPriority(pri);
853 // Just intercepts all merges & verifies that we are never
854 // merging a segment with >= 20 (maxMergeDocs) docs
855 private class MyMergeScheduler extends MergeScheduler {
857 synchronized public void merge(IndexWriter writer)
858 throws CorruptIndexException, IOException {
861 MergePolicy.OneMerge merge = writer.getNextMerge();
865 for(int i=0;i<merge.segments.size();i++) {
866 assert merge.segments.get(i).docCount < 20;
873 public void close() {}
876 public void testVariableSchema() throws Exception {
877 Directory dir = newDirectory();
879 for(int i=0;i<20;i++) {
881 System.out.println("TEST: iter=" + i);
883 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
884 writer.setInfoStream(VERBOSE ? System.out : null);
885 //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
886 //lmp.setMergeFactor(2);
887 //lmp.setUseCompoundFile(false);
888 Document doc = new Document();
889 String contents = "aa bb cc dd ee ff gg hh ii jj kk";
892 // Add empty docs here
893 doc.add(newField("content3", "", Field.Store.NO,
894 Field.Index.ANALYZED));
896 Field.Store storeVal;
898 doc.add(newField("content4", contents, Field.Store.YES,
899 Field.Index.ANALYZED));
900 storeVal = Field.Store.YES;
902 storeVal = Field.Store.NO;
903 doc.add(newField("content1", contents, storeVal,
904 Field.Index.ANALYZED));
905 doc.add(newField("content3", "", Field.Store.YES,
906 Field.Index.ANALYZED));
907 doc.add(newField("content5", "", storeVal,
908 Field.Index.ANALYZED));
912 writer.addDocument(doc);
915 IndexReader reader = IndexReader.open(dir, false);
916 reader.deleteDocument(delID++);
920 writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
921 //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy();
922 //lmp2.setUseCompoundFile(false);
923 writer.forceMerge(1);
930 public void testNoWaitClose() throws Throwable {
931 Directory directory = newDirectory();
933 final Document doc = new Document();
934 Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
937 for(int pass=0;pass<2;pass++) {
939 System.out.println("TEST: pass=" + pass);
942 IndexWriterConfig conf = newIndexWriterConfig(
943 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)
944 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy());
946 conf.setMergeScheduler(new SerialMergeScheduler());
948 IndexWriter writer = new IndexWriter(directory, conf);
949 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
950 writer.setInfoStream(VERBOSE ? System.out : null);
952 // have to use compound file to prevent running out of
953 // descripters when newDirectory returns a file-system
955 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true);
957 for(int iter=0;iter<10;iter++) {
959 System.out.println("TEST: iter=" + iter);
961 for(int j=0;j<199;j++) {
962 idField.setValue(Integer.toString(iter*201+j));
963 writer.addDocument(doc);
966 int delID = iter*199;
967 for(int j=0;j<20;j++) {
968 writer.deleteDocuments(new Term("id", Integer.toString(delID)));
972 // Force a bunch of merge threads to kick off so we
973 // stress out aborting them on close:
974 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2);
976 final IndexWriter finalWriter = writer;
977 final ArrayList<Throwable> failure = new ArrayList<Throwable>();
978 Thread t1 = new Thread() {
981 boolean done = false;
983 for(int i=0;i<100;i++) {
985 finalWriter.addDocument(doc);
986 } catch (AlreadyClosedException e) {
989 } catch (NullPointerException e) {
992 } catch (Throwable e) {
993 e.printStackTrace(System.out);
1005 if (failure.size() > 0) {
1006 throw failure.get(0);
1011 writer.close(false);
1014 // Make sure reader can read
1015 IndexReader reader = IndexReader.open(directory, true);
1019 writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
1020 writer.setInfoStream(VERBOSE ? System.out : null);
1028 // LUCENE-1084: test unlimited field length
1029 public void testUnlimitedMaxFieldLength() throws IOException {
1030 Directory dir = newDirectory();
1032 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1034 Document doc = new Document();
1035 StringBuilder b = new StringBuilder();
1036 for(int i=0;i<10000;i++)
1039 doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
1040 writer.addDocument(doc);
1043 IndexReader reader = IndexReader.open(dir, true);
1044 Term t = new Term("field", "x");
1045 assertEquals(1, reader.docFreq(t));
1050 // LUCENE-1084: test user-specified field length
1051 public void testUserSpecifiedMaxFieldLength() throws IOException {
1052 Directory dir = newDirectory();
1054 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
1055 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
1056 writer.setMaxFieldLength(100000);
1058 Document doc = new Document();
1059 StringBuilder b = new StringBuilder();
1060 for(int i=0;i<10000;i++)
1063 doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
1064 writer.addDocument(doc);
1067 IndexReader reader = IndexReader.open(dir, true);
1068 Term t = new Term("field", "x");
1069 assertEquals(1, reader.docFreq(t));
1075 public void testEmptyFieldName() throws IOException {
1076 Directory dir = newDirectory();
1077 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1078 Document doc = new Document();
1079 doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED));
1080 writer.addDocument(doc);
1086 public void testEmptyFieldNameTerms() throws IOException {
1087 Directory dir = newDirectory();
1088 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1089 Document doc = new Document();
1090 doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED));
1091 writer.addDocument(doc);
1093 IndexReader reader = IndexReader.open(dir, true);
1094 IndexReader subreader = SegmentReader.getOnlySegmentReader(reader);
1095 TermEnum te = subreader.terms();
1096 assertTrue(te.next());
1097 assertEquals(new Term("", "a"), te.term());
1098 assertTrue(te.next());
1099 assertEquals(new Term("", "b"), te.term());
1100 assertTrue(te.next());
1101 assertEquals(new Term("", "c"), te.term());
1102 assertFalse(te.next());
1107 public void testEmptyFieldNameEmptyTerm() throws IOException {
1108 Directory dir = newDirectory();
1109 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1110 Document doc = new Document();
1111 // TODO: why do we throw IAE: name and value cannot both be empty in Field ctor?!
1112 doc.add(newField("", "", Field.Store.NO, Field.Index.NOT_ANALYZED));
1113 doc.add(newField("", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
1114 doc.add(newField("", "b", Field.Store.NO, Field.Index.NOT_ANALYZED));
1115 doc.add(newField("", "c", Field.Store.NO, Field.Index.NOT_ANALYZED));
1116 writer.addDocument(doc);
1118 IndexReader reader = IndexReader.open(dir, true);
1119 IndexReader subreader = SegmentReader.getOnlySegmentReader(reader);
1120 TermEnum te = subreader.terms();
1121 assertTrue(te.next());
1122 assertEquals(new Term("", ""), te.term());
1123 assertTrue(te.next());
1124 assertEquals(new Term("", "a"), te.term());
1125 assertTrue(te.next());
1126 assertEquals(new Term("", "b"), te.term());
1127 assertTrue(te.next());
1128 assertEquals(new Term("", "c"), te.term());
1129 assertFalse(te.next());
1134 private static final class MockIndexWriter extends IndexWriter {
1136 public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
1140 boolean afterWasCalled;
1141 boolean beforeWasCalled;
1144 public void doAfterFlush() {
1145 afterWasCalled = true;
1149 protected void doBeforeFlush() throws IOException {
1150 beforeWasCalled = true;
1156 public void testDoBeforeAfterFlush() throws IOException {
1157 Directory dir = newDirectory();
1158 MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1159 Document doc = new Document();
1160 doc.add(newField("field", "a field", Field.Store.YES,
1161 Field.Index.ANALYZED));
1164 assertTrue(w.beforeWasCalled);
1165 assertTrue(w.afterWasCalled);
1166 w.beforeWasCalled = false;
1167 w.afterWasCalled = false;
1168 w.deleteDocuments(new Term("field", "field"));
1170 assertTrue(w.beforeWasCalled);
1171 assertTrue(w.afterWasCalled);
1174 IndexReader ir = IndexReader.open(dir, true);
1175 assertEquals(0, ir.numDocs());
1182 public void testNegativePositions() throws Throwable {
1183 final TokenStream tokens = new TokenStream() {
1184 final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1185 final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1187 final Iterator<String> terms = Arrays.asList("a","b","c").iterator();
1188 boolean first = true;
1191 public boolean incrementToken() {
1192 if (!terms.hasNext()) return false;
1194 termAtt.append(terms.next());
1195 posIncrAtt.setPositionIncrement(first ? 0 : 1);
1201 Directory dir = newDirectory();
1202 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1203 Document doc = new Document();
1204 doc.add(new Field("field", tokens));
1208 IndexReader r = IndexReader.open(dir, false);
1209 IndexSearcher s = new IndexSearcher(r);
1210 PhraseQuery pq = new PhraseQuery();
1211 pq.add(new Term("field", "a"));
1212 pq.add(new Term("field", "b"));
1213 pq.add(new Term("field", "c"));
1214 ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
1215 assertEquals(1, hits.length);
1217 Query q = new SpanTermQuery(new Term("field", "a"));
1218 hits = s.search(q, null, 1000).scoreDocs;
1219 assertEquals(1, hits.length);
1220 TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a"));
1221 assertTrue(tps.next());
1222 assertEquals(1, tps.freq());
1223 assertEquals(0, tps.nextPosition());
1232 public void testBinaryFieldOffsetLength() throws IOException {
1233 Directory dir = newDirectory();
1234 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1235 byte[] b = new byte[50];
1236 for(int i=0;i<50;i++)
1237 b[i] = (byte) (i+77);
1239 Document doc = new Document();
1240 Field f = new Field("binary", b, 10, 17);
1241 byte[] bx = f.getBinaryValue();
1242 assertTrue(bx != null);
1243 assertEquals(50, bx.length);
1244 assertEquals(10, f.getBinaryOffset());
1245 assertEquals(17, f.getBinaryLength());
1250 IndexReader ir = IndexReader.open(dir, true);
1251 doc = ir.document(0);
1252 f = doc.getField("binary");
1253 b = f.getBinaryValue();
1254 assertTrue(b != null);
1255 assertEquals(17, b.length, 17);
1256 assertEquals(87, b[0]);
1262 public void testPositionIncrementGapEmptyField() throws Exception {
1263 Directory dir = newDirectory();
1264 Analyzer analyzer = new Analyzer(){
1265 Analyzer a = new WhitespaceAnalyzer( TEST_VERSION_CURRENT );
1267 public TokenStream tokenStream(String fieldName, Reader reader){
1268 return a.tokenStream(fieldName, reader);
1271 public int getPositionIncrementGap(String fieldName) {
1275 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
1276 TEST_VERSION_CURRENT, analyzer));
1277 Document doc = new Document();
1278 Field f = newField("field", "", Field.Store.NO,
1279 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
1280 Field f2 = newField("field", "crunch man", Field.Store.NO,
1281 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
1287 IndexReader r = IndexReader.open(dir, true);
1288 TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
1289 int[] poss = tpv.getTermPositions(0);
1290 assertEquals(1, poss.length);
1291 assertEquals(100, poss[0]);
1292 poss = tpv.getTermPositions(1);
1293 assertEquals(1, poss.length);
1294 assertEquals(101, poss[0]);
1300 // LUCENE-1468 -- make sure opening an IndexWriter with
1301 // create=true does not remove non-index files
1303 public void testOtherFiles() throws Throwable {
1304 Directory dir = newDirectory();
1306 // Create my own random file:
1307 IndexOutput out = dir.createOutput("myrandomfile");
1308 out.writeByte((byte) 42);
1311 new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close();
1313 assertTrue(dir.fileExists("myrandomfile"));
1315 // Make sure this does not copy myrandomfile:
1316 Directory dir2 = new MockDirectoryWrapper(random, new RAMDirectory(dir));
1317 assertTrue(!dir2.fileExists("myrandomfile"));
1324 public void testDeadlock() throws Exception {
1325 Directory dir = newDirectory();
1326 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
1327 Document doc = new Document();
1328 doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
1329 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
1330 writer.addDocument(doc);
1331 writer.addDocument(doc);
1332 writer.addDocument(doc);
1334 // index has 2 segments
1336 Directory dir2 = newDirectory();
1337 IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1338 writer2.addDocument(doc);
1341 IndexReader r1 = IndexReader.open(dir2, true);
1342 IndexReader r2 = (IndexReader) r1.clone();
1343 writer.addIndexes(new IndexReader[] {r1, r2});
1346 IndexReader r3 = IndexReader.open(dir, true);
1347 assertEquals(5, r3.numDocs());
1357 private class IndexerThreadInterrupt extends Thread {
1358 volatile boolean failed;
1359 volatile boolean finish;
1361 volatile boolean allowInterrupt = false;
1365 // LUCENE-2239: won't work with NIOFS/MMAP
1366 Directory dir = new MockDirectoryWrapper(random, new RAMDirectory());
1367 IndexWriter w = null;
1376 IndexWriterConfig conf = newIndexWriterConfig(
1377 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2);
1378 w = new IndexWriter(dir, conf);
1379 w.setInfoStream(VERBOSE ? System.out : null);
1381 Document doc = new Document();
1382 doc.add(newField("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED));
1383 for(int i=0;i<100;i++) {
1391 _TestUtil.checkIndex(dir);
1392 IndexReader.open(dir, true).close();
1394 // Strangely, if we interrupt a thread before
1395 // all classes are loaded, the class loader
1396 // seems to do scary things with the interrupt
1397 // status. In java 1.5, it'll throw an
1398 // incorrect ClassNotFoundException. In java
1399 // 1.6, it'll silently clear the interrupt.
1400 // So, on first iteration through here we
1401 // don't open ourselves up for interrupts
1402 // until we've done the above loop.
1403 allowInterrupt = true;
1405 } catch (ThreadInterruptedException re) {
1407 System.out.println("TEST: got interrupt");
1408 re.printStackTrace(System.out);
1410 Throwable e = re.getCause();
1411 assertTrue(e instanceof InterruptedException);
1415 } catch (Throwable t) {
1416 System.out.println("FAILED; unexpected exception");
1417 t.printStackTrace(System.out);
1424 // clear interrupt state:
1425 Thread.interrupted();
1429 } catch (IOException ioe) {
1430 throw new RuntimeException(ioe);
1435 _TestUtil.checkIndex(dir);
1436 } catch (Exception e) {
1438 System.out.println("CheckIndex FAILED: unexpected exception");
1439 e.printStackTrace(System.out);
1442 IndexReader r = IndexReader.open(dir, true);
1443 //System.out.println("doc count=" + r.numDocs());
1445 } catch (Exception e) {
1447 System.out.println("IndexReader.open FAILED: unexpected exception");
1448 e.printStackTrace(System.out);
1453 } catch (IOException e) {
1454 throw new RuntimeException(e);
1459 public void testThreadInterruptDeadlock() throws Exception {
1460 IndexerThreadInterrupt t = new IndexerThreadInterrupt();
1464 // Force class loader to load ThreadInterruptedException
1465 // up front... else we can see a false failure if 2nd
1466 // interrupt arrives while class loader is trying to
1467 // init this class (in servicing a first interrupt):
1468 assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException);
1470 // issue 100 interrupts to child thread
1474 if (t.allowInterrupt) {
1484 assertFalse(t.failed);
1488 public void testIndexStoreCombos() throws Exception {
1489 Directory dir = newDirectory();
1490 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1491 byte[] b = new byte[50];
1492 for(int i=0;i<50;i++)
1493 b[i] = (byte) (i+77);
1495 Document doc = new Document();
1496 Field f = new Field("binary", b, 10, 17);
1497 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field1")));
1498 Field f2 = newField("string", "value", Field.Store.YES,Field.Index.ANALYZED);
1499 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field2")));
1504 // add 2 docs to test in-memory merging
1505 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field1")));
1506 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field2")));
1509 // force segment flush so we can force a segment merge with doc3 later.
1512 f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field1")));
1513 f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field2")));
1517 w.forceMerge(1); // force segment merge.
1520 IndexReader ir = IndexReader.open(dir, true);
1521 doc = ir.document(0);
1522 f = doc.getField("binary");
1523 b = f.getBinaryValue();
1524 assertTrue(b != null);
1525 assertEquals(17, b.length, 17);
1526 assertEquals(87, b[0]);
1528 assertTrue(ir.document(0).getFieldable("binary").isBinary());
1529 assertTrue(ir.document(1).getFieldable("binary").isBinary());
1530 assertTrue(ir.document(2).getFieldable("binary").isBinary());
1532 assertEquals("value", ir.document(0).get("string"));
1533 assertEquals("value", ir.document(1).get("string"));
1534 assertEquals("value", ir.document(2).get("string"));
1537 // test that the terms were indexed.
1538 assertTrue(ir.termDocs(new Term("binary","doc1field1")).next());
1539 assertTrue(ir.termDocs(new Term("binary","doc2field1")).next());
1540 assertTrue(ir.termDocs(new Term("binary","doc3field1")).next());
1541 assertTrue(ir.termDocs(new Term("string","doc1field2")).next());
1542 assertTrue(ir.termDocs(new Term("string","doc2field2")).next());
1543 assertTrue(ir.termDocs(new Term("string","doc3field2")).next());
1550 // LUCENE-1727: make sure doc fields are stored in order
1551 public void testStoredFieldsOrder() throws Throwable {
1552 Directory d = newDirectory();
1553 IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1554 Document doc = new Document();
1555 doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO));
1556 doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO));
1557 doc.add(newField("zzz", "1 2 3", Field.Store.YES, Field.Index.NO));
1559 IndexReader r = w.getReader();
1560 doc = r.document(0);
1561 Iterator<Fieldable> it = doc.getFields().iterator();
1562 assertTrue(it.hasNext());
1563 Field f = (Field) it.next();
1564 assertEquals(f.name(), "zzz");
1565 assertEquals(f.stringValue(), "a b c");
1567 assertTrue(it.hasNext());
1568 f = (Field) it.next();
1569 assertEquals(f.name(), "aaa");
1570 assertEquals(f.stringValue(), "a b c");
1572 assertTrue(it.hasNext());
1573 f = (Field) it.next();
1574 assertEquals(f.name(), "zzz");
1575 assertEquals(f.stringValue(), "1 2 3");
1576 assertFalse(it.hasNext());
1582 public void testNoDocsIndex() throws Throwable {
1583 Directory dir = newDirectory();
1584 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1585 TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1586 ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
1587 writer.setInfoStream(new PrintStream(bos));
1588 writer.addDocument(new Document());
1594 public void testDeleteUnusedFiles() throws Exception {
1596 for(int iter=0;iter<2;iter++) {
1597 Directory dir = newDirectory();
1599 LogMergePolicy mergePolicy = newLogMergePolicy(true);
1600 mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
1602 IndexWriter w = new IndexWriter(
1604 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
1605 setMergePolicy(mergePolicy)
1607 Document doc = new Document();
1608 doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED));
1617 r = IndexReader.open(dir);
1620 List<String> files = Arrays.asList(dir.listAll());
1621 assertTrue(files.contains("_0.cfs"));
1627 IndexReader r2 = IndexReader.openIfChanged(r);
1629 assertTrue(r != r2);
1630 files = Arrays.asList(dir.listAll());
1632 // NOTE: here we rely on "Windows" behavior, ie, even
1633 // though IW wanted to delete _0.cfs since it was
1634 // merged away, because we have a reader open
1635 // against this file, it should still be here:
1636 assertTrue(files.contains("_0.cfs"));
1637 // forceMerge created this
1638 //assertTrue(files.contains("_2.cfs"));
1639 w.deleteUnusedFiles();
1641 files = Arrays.asList(dir.listAll());
1642 // r still holds this file open
1643 assertTrue(files.contains("_0.cfs"));
1644 //assertTrue(files.contains("_2.cfs"));
1648 // on closing NRT reader, it calls writer.deleteUnusedFiles
1649 files = Arrays.asList(dir.listAll());
1650 assertFalse(files.contains("_0.cfs"));
1652 // now writer can remove it
1653 w.deleteUnusedFiles();
1654 files = Arrays.asList(dir.listAll());
1655 assertFalse(files.contains("_0.cfs"));
1657 //assertTrue(files.contains("_2.cfs"));
1666 public void testDeleteUnsedFiles2() throws Exception {
1667 // Validates that iw.deleteUnusedFiles() also deletes unused index commits
1668 // in case a deletion policy which holds onto commits is used.
1669 Directory dir = newDirectory();
1670 SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
1671 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1672 TEST_VERSION_CURRENT, new MockAnalyzer(random))
1673 .setIndexDeletionPolicy(sdp));
1676 Document doc = new Document();
1677 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1678 writer.addDocument(doc);
1680 assertEquals(1, IndexReader.listCommits(dir).size());
1685 // Second commit - now KeepOnlyLastCommit cannot delete the prev commit.
1686 doc = new Document();
1687 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1688 writer.addDocument(doc);
1690 assertEquals(2, IndexReader.listCommits(dir).size());
1692 // Should delete the unreferenced commit
1694 writer.deleteUnusedFiles();
1695 assertEquals(1, IndexReader.listCommits(dir).size());
1701 private static class FlushCountingIndexWriter extends IndexWriter {
1703 public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException {
1707 public void doAfterFlush() {
1712 public void testEmptyFSDirWithNoLock() throws Exception {
1713 // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF),
1714 // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed
1715 // when listAll() was called in IndexFileDeleter.
1716 Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory());
1717 new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close();
1721 public void testEmptyDirRollback() throws Exception {
1722 // Tests that if IW is created over an empty Directory, some documents are
1723 // indexed, flushed (but not committed) and then IW rolls back, then no
1724 // files are left in the Directory.
1725 Directory dir = newDirectory();
1726 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
1727 TEST_VERSION_CURRENT, new MockAnalyzer(random))
1728 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
1729 String[] files = dir.listAll();
1731 writer.setInfoStream(VERBOSE ? System.out : null);
1733 // Creating over empty dir should not create any files,
1734 // or, at most the write.lock file
1735 final int extraFileCount;
1736 if (files.length == 1) {
1737 assertTrue(files[0].endsWith("write.lock"));
1740 assertEquals(0, files.length);
1744 Document doc = new Document();
1745 // create as many files as possible
1746 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1747 writer.addDocument(doc);
1748 // Adding just one document does not call flush yet.
1749 assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length);
1751 doc = new Document();
1752 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1753 writer.addDocument(doc);
1755 // The second document should cause a flush.
1756 assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount);
1758 // After rollback, IW should remove all files
1760 assertEquals("no files should exist in the directory after rollback", 0, dir.listAll().length);
1762 // Since we rolled-back above, that close should be a no-op
1764 assertEquals("expected a no-op close after IW.rollback()", 0, dir.listAll().length);
1768 public void testNoSegmentFile() throws IOException {
1769 Directory dir = newDirectory();
1770 dir.setLockFactory(NoLockFactory.getNoLockFactory());
1771 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
1772 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
1774 Document doc = new Document();
1775 doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
1778 IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(
1779 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)
1780 .setOpenMode(OpenMode.CREATE));
1787 public void testRandomStoredFields() throws IOException {
1788 Directory dir = newDirectory();
1789 Random rand = random;
1790 RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20)));
1791 //w.w.setInfoStream(System.out);
1792 //w.w.setUseCompoundFile(false);
1794 w.w.setInfoStream(System.out);
1796 final int docCount = atLeast(200);
1797 final int fieldCount = _TestUtil.nextInt(rand, 1, 5);
1799 final List<Integer> fieldIDs = new ArrayList<Integer>();
1801 Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
1803 for(int i=0;i<fieldCount;i++) {
1807 final Map<String,Document> docs = new HashMap<String,Document>();
1810 System.out.println("TEST: build index docCount=" + docCount);
1813 for(int i=0;i<docCount;i++) {
1814 Document doc = new Document();
1816 final String id = ""+i;
1817 idField.setValue(id);
1820 for(int field: fieldIDs) {
1822 if (rand.nextInt(4) != 3) {
1823 s = _TestUtil.randomUnicodeString(rand, 1000);
1824 doc.add(newField("f"+field, s, Field.Store.YES, Field.Index.NO));
1830 if (rand.nextInt(50) == 17) {
1831 // mixup binding of field name -> Number every so often
1832 Collections.shuffle(fieldIDs);
1834 if (rand.nextInt(5) == 3 && i > 0) {
1835 final String delID = ""+rand.nextInt(i);
1837 System.out.println("TEST: delete doc " + delID);
1839 w.deleteDocuments(new Term("id", delID));
1845 System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
1847 if (docs.size() > 0) {
1848 String[] idsList = docs.keySet().toArray(new String[docs.size()]);
1850 for(int x=0;x<2;x++) {
1851 IndexReader r = w.getReader();
1852 IndexSearcher s = newSearcher(r);
1855 System.out.println("TEST: cycle x=" + x + " r=" + r);
1858 int num = atLeast(1000);
1859 for(int iter=0;iter<num;iter++) {
1860 String testID = idsList[rand.nextInt(idsList.length)];
1861 TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
1862 assertEquals(1, hits.totalHits);
1863 Document doc = r.document(hits.scoreDocs[0].doc);
1864 Document docExp = docs.get(testID);
1865 for(int i=0;i<fieldCount;i++) {
1866 assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i));
1878 public void testNoUnwantedTVFiles() throws Exception {
1880 Directory dir = newDirectory();
1881 IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy()));
1882 ((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false);
1884 String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
1885 BIG=BIG+BIG+BIG+BIG;
1887 for (int i=0; i<2; i++) {
1888 Document doc = new Document();
1889 doc.add(new Field("id", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
1890 doc.add(new Field("str", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED));
1891 doc.add(new Field("str2", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED));
1892 doc.add(new Field("str3", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
1893 indexWriter.addDocument(doc);
1896 indexWriter.close();
1898 assertNoUnreferencedFiles(dir, "no tv files");
1899 String[] files = dir.listAll();
1900 for(String file : files) {
1901 assertTrue(!file.endsWith(IndexFileNames.VECTORS_FIELDS_EXTENSION));
1902 assertTrue(!file.endsWith(IndexFileNames.VECTORS_INDEX_EXTENSION));
1903 assertTrue(!file.endsWith(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
1909 static final class StringSplitAnalyzer extends Analyzer {
1911 public TokenStream tokenStream(String fieldName, Reader reader) {
1912 return new StringSplitTokenizer(reader);
1916 private static class StringSplitTokenizer extends Tokenizer {
1917 private String[] tokens;
1919 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1921 public StringSplitTokenizer(Reader r) {
1924 } catch (IOException e) {
1925 throw new RuntimeException(e);
1930 public final boolean incrementToken() throws IOException {
1932 if (upto < tokens.length) {
1934 termAtt.append(tokens[upto]);
1943 public void reset(Reader input) throws IOException {
1945 final StringBuilder b = new StringBuilder();
1946 final char[] buffer = new char[1024];
1948 while ((n = input.read(buffer)) != -1) {
1949 b.append(buffer, 0, n);
1951 this.tokens = b.toString().split(" ");
1956 public void testEmptyFieldNameTIIOne() throws IOException {
1957 Directory dir = newDirectory();
1958 IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
1959 iwc.setTermIndexInterval(1);
1960 iwc.setReaderTermsIndexDivisor(1);
1961 IndexWriter writer = new IndexWriter(dir, iwc);
1962 Document doc = new Document();
1963 doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED));
1964 writer.addDocument(doc);
1965 final IndexReader r = IndexReader.open(writer, true);
1967 r.terms(new Term("", ""));
1968 r.terms(new Term("", ""));
1969 r.terms(new Term("", "a"));
1970 r.terms(new Term("", ""));
1975 public void testDeleteAllNRTLeftoverFiles() throws Exception {
1977 Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
1978 IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
1979 Document doc = new Document();
1980 for(int i = 0; i < 20; i++) {
1981 for(int j = 0; j < 100; ++j) {
1985 IndexReader.open(w, true).close();
1990 // Make sure we accumulate no files except for empty
1991 // segments_N and segments.gen:
1992 assertTrue(d.listAll().length <= 2);
1999 public void testNRTReaderVersion() throws Exception {
2000 Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
2001 IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
2002 Document doc = new Document();
2003 doc.add(newField("id", "0", Field.Store.YES, Field.Index.ANALYZED));
2005 IndexReader r = w.getReader();
2006 long version = r.getVersion();
2011 long version2 = r.getVersion();
2013 assert(version2 > version);
2015 w.deleteDocuments(new Term("id", "0"));
2018 long version3 = r.getVersion();
2020 assert(version3 > version2);