1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.Arrays;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.List;
26 import java.util.Random;
27 import java.util.concurrent.atomic.AtomicInteger;
28 import java.util.concurrent.atomic.AtomicBoolean;
30 import org.apache.lucene.analysis.Analyzer;
31 import org.apache.lucene.analysis.MockAnalyzer;
32 import org.apache.lucene.analysis.MockTokenizer;
33 import org.apache.lucene.analysis.TokenStream;
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.document.Field.Index;
37 import org.apache.lucene.document.Field.Store;
38 import org.apache.lucene.document.Field.TermVector;
39 import org.apache.lucene.search.IndexSearcher;
40 import org.apache.lucene.search.ScoreDoc;
41 import org.apache.lucene.search.TermQuery;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.store.MockDirectoryWrapper;
44 import org.apache.lucene.store.RAMDirectory;
45 import org.apache.lucene.util.LuceneTestCase;
46 import org.apache.lucene.util._TestUtil;
48 public class TestIndexWriterDelete extends LuceneTestCase {
50 // test the simple case
51 public void testSimpleCase() throws IOException {
52 String[] keywords = { "1", "2" };
53 String[] unindexed = { "Netherlands", "Italy" };
54 String[] unstored = { "Amsterdam has lots of bridges",
55 "Venice has lots of canals" };
56 String[] text = { "Amsterdam", "Venice" };
58 Directory dir = newDirectory();
59 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
60 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1));
62 for (int i = 0; i < keywords.length; i++) {
63 Document doc = new Document();
64 doc.add(newField("id", keywords[i], Field.Store.YES,
65 Field.Index.NOT_ANALYZED));
66 doc.add(newField("country", unindexed[i], Field.Store.YES,
68 doc.add(newField("contents", unstored[i], Field.Store.NO,
69 Field.Index.ANALYZED));
71 .add(newField("city", text[i], Field.Store.YES,
72 Field.Index.ANALYZED));
73 modifier.addDocument(doc);
78 Term term = new Term("city", "Amsterdam");
79 int hitCount = getHitCount(dir, term);
80 assertEquals(1, hitCount);
81 modifier.deleteDocuments(term);
83 hitCount = getHitCount(dir, term);
84 assertEquals(0, hitCount);
90 // test when delete terms only apply to disk segments
91 public void testNonRAMDelete() throws IOException {
93 Directory dir = newDirectory();
94 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
95 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
96 .setMaxBufferedDeleteTerms(2));
97 modifier.setInfoStream(VERBOSE ? System.out : null);
101 for (int i = 0; i < 7; i++) {
102 addDoc(modifier, ++id, value);
106 assertEquals(0, modifier.getNumBufferedDocuments());
107 assertTrue(0 < modifier.getSegmentCount());
111 IndexReader reader = IndexReader.open(dir, true);
112 assertEquals(7, reader.numDocs());
115 modifier.deleteDocuments(new Term("value", String.valueOf(value)));
119 reader = IndexReader.open(dir, true);
120 assertEquals(0, reader.numDocs());
126 public void testMaxBufferedDeletes() throws IOException {
127 Directory dir = newDirectory();
128 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
129 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1));
131 writer.setInfoStream(VERBOSE ? System.out : null);
132 writer.addDocument(new Document());
133 writer.deleteDocuments(new Term("foobar", "1"));
134 writer.deleteDocuments(new Term("foobar", "1"));
135 writer.deleteDocuments(new Term("foobar", "1"));
136 assertEquals(3, writer.getFlushDeletesCount());
141 // test when delete terms only apply to ram segments
142 public void testRAMDeletes() throws IOException {
143 for(int t=0;t<2;t++) {
145 System.out.println("TEST: t=" + t);
147 Directory dir = newDirectory();
148 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
149 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(4)
150 .setMaxBufferedDeleteTerms(4));
151 modifier.setInfoStream(VERBOSE ? System.out : null);
155 addDoc(modifier, ++id, value);
157 modifier.deleteDocuments(new Term("value", String.valueOf(value)));
159 modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value))));
160 addDoc(modifier, ++id, value);
162 modifier.deleteDocuments(new Term("value", String.valueOf(value)));
163 assertEquals(2, modifier.getNumBufferedDeleteTerms());
164 assertEquals(1, modifier.getBufferedDeleteTermsSize());
167 modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value))));
169 addDoc(modifier, ++id, value);
170 assertEquals(0, modifier.getSegmentCount());
173 IndexReader reader = IndexReader.open(dir, true);
174 assertEquals(1, reader.numDocs());
176 int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
177 assertEquals(1, hitCount);
184 // test when delete terms apply to both disk and ram segments
185 public void testBothDeletes() throws IOException {
186 Directory dir = newDirectory();
187 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
188 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(100)
189 .setMaxBufferedDeleteTerms(100));
194 for (int i = 0; i < 5; i++) {
195 addDoc(modifier, ++id, value);
199 for (int i = 0; i < 5; i++) {
200 addDoc(modifier, ++id, value);
204 for (int i = 0; i < 5; i++) {
205 addDoc(modifier, ++id, value);
207 modifier.deleteDocuments(new Term("value", String.valueOf(value)));
211 IndexReader reader = IndexReader.open(dir, true);
212 assertEquals(5, reader.numDocs());
218 // test that batched delete terms are flushed together
219 public void testBatchDeletes() throws IOException {
220 Directory dir = newDirectory();
221 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
222 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
223 .setMaxBufferedDeleteTerms(2));
228 for (int i = 0; i < 7; i++) {
229 addDoc(modifier, ++id, value);
233 IndexReader reader = IndexReader.open(dir, true);
234 assertEquals(7, reader.numDocs());
238 modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
239 modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
243 reader = IndexReader.open(dir, true);
244 assertEquals(5, reader.numDocs());
247 Term[] terms = new Term[3];
248 for (int i = 0; i < terms.length; i++) {
249 terms[i] = new Term("id", String.valueOf(++id));
251 modifier.deleteDocuments(terms);
253 reader = IndexReader.open(dir, true);
254 assertEquals(2, reader.numDocs());
262 public void testDeleteAll() throws IOException {
263 Directory dir = newDirectory();
264 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
265 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
266 .setMaxBufferedDeleteTerms(2));
271 for (int i = 0; i < 7; i++) {
272 addDoc(modifier, ++id, value);
276 IndexReader reader = IndexReader.open(dir, true);
277 assertEquals(7, reader.numDocs());
280 // Add 1 doc (so we will have something buffered)
281 addDoc(modifier, 99, value);
284 modifier.deleteAll();
286 // Delete all shouldn't be on disk yet
287 reader = IndexReader.open(dir, true);
288 assertEquals(7, reader.numDocs());
291 // Add a doc and update a doc (after the deleteAll, before the commit)
292 addDoc(modifier, 101, value);
293 updateDoc(modifier, 102, value);
295 // commit the delete all
298 // Validate there are no docs left
299 reader = IndexReader.open(dir, true);
300 assertEquals(2, reader.numDocs());
307 // test rollback of deleteAll()
308 public void testDeleteAllRollback() throws IOException {
309 Directory dir = newDirectory();
310 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
311 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
312 .setMaxBufferedDeleteTerms(2));
317 for (int i = 0; i < 7; i++) {
318 addDoc(modifier, ++id, value);
322 addDoc(modifier, ++id, value);
324 IndexReader reader = IndexReader.open(dir, true);
325 assertEquals(7, reader.numDocs());
329 modifier.deleteAll();
335 // Validate that the docs are still there
336 reader = IndexReader.open(dir, true);
337 assertEquals(7, reader.numDocs());
344 // test deleteAll() w/ near real-time reader
345 public void testDeleteAllNRT() throws IOException {
346 Directory dir = newDirectory();
347 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
348 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
349 .setMaxBufferedDeleteTerms(2));
354 for (int i = 0; i < 7; i++) {
355 addDoc(modifier, ++id, value);
359 IndexReader reader = modifier.getReader();
360 assertEquals(7, reader.numDocs());
363 addDoc(modifier, ++id, value);
364 addDoc(modifier, ++id, value);
367 modifier.deleteAll();
369 reader = modifier.getReader();
370 assertEquals(0, reader.numDocs());
378 // Validate that the docs are still there
379 reader = IndexReader.open(dir, true);
380 assertEquals(7, reader.numDocs());
387 private void updateDoc(IndexWriter modifier, int id, int value)
389 Document doc = new Document();
390 doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
391 doc.add(newField("id", String.valueOf(id), Field.Store.YES,
392 Field.Index.NOT_ANALYZED));
393 doc.add(newField("value", String.valueOf(value), Field.Store.NO,
394 Field.Index.NOT_ANALYZED));
395 modifier.updateDocument(new Term("id", String.valueOf(id)), doc);
399 private void addDoc(IndexWriter modifier, int id, int value)
401 Document doc = new Document();
402 doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
403 doc.add(newField("id", String.valueOf(id), Field.Store.YES,
404 Field.Index.NOT_ANALYZED));
405 doc.add(newField("value", String.valueOf(value), Field.Store.NO,
406 Field.Index.NOT_ANALYZED));
407 modifier.addDocument(doc);
410 private int getHitCount(Directory dir, Term term) throws IOException {
411 IndexSearcher searcher = new IndexSearcher(dir, true);
412 int hitCount = searcher.search(new TermQuery(term), null, 1000).totalHits;
417 public void testDeletesOnDiskFull() throws IOException {
418 doTestOperationsOnDiskFull(false);
421 public void testUpdatesOnDiskFull() throws IOException {
422 doTestOperationsOnDiskFull(true);
426 * Make sure if modifier tries to commit but hits disk full that modifier
427 * remains consistent and usable. Similar to TestIndexReader.testDiskFull().
429 private void doTestOperationsOnDiskFull(boolean updates) throws IOException {
431 Term searchTerm = new Term("content", "aaa");
432 int START_COUNT = 157;
435 // First build up a starting index:
436 MockDirectoryWrapper startDir = newDirectory();
437 // TODO: find the resource leak that only occurs sometimes here.
438 startDir.setNoDeleteOpenFile(false);
439 IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
440 for (int i = 0; i < 157; i++) {
441 Document d = new Document();
442 d.add(newField("id", Integer.toString(i), Field.Store.YES,
443 Field.Index.NOT_ANALYZED));
444 d.add(newField("content", "aaa " + i, Field.Store.NO,
445 Field.Index.ANALYZED));
446 writer.addDocument(d);
450 long diskUsage = startDir.sizeInBytes();
451 long diskFree = diskUsage + 10;
453 IOException err = null;
455 boolean done = false;
457 // Iterate w/ ever increasing free disk space:
460 System.out.println("TEST: cycle");
462 MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir));
463 dir.setPreventDoubleWrite(false);
464 IndexWriter modifier = new IndexWriter(dir,
465 newIndexWriterConfig(
466 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
467 .setMaxBufferedDocs(1000)
468 .setMaxBufferedDeleteTerms(1000)
469 .setMergeScheduler(new ConcurrentMergeScheduler()));
470 ((ConcurrentMergeScheduler) modifier.getConfig().getMergeScheduler()).setSuppressExceptions();
471 modifier.setInfoStream(VERBOSE ? System.out : null);
473 // For each disk size, first try to commit against
474 // dir that will hit random IOExceptions & disk
475 // full; after, give it infinite disk space & turn
476 // off random IOExceptions & retry w/ same reader:
477 boolean success = false;
479 for (int x = 0; x < 2; x++) {
481 System.out.println("TEST: x=" + x);
485 double diskRatio = ((double)diskFree) / diskUsage;
490 thisDiskFree = diskFree;
491 if (diskRatio >= 2.0) {
494 if (diskRatio >= 4.0) {
497 if (diskRatio >= 6.0) {
501 System.out.println("\ncycle: " + diskFree + " bytes");
503 testName = "disk full during reader.close() @ " + thisDiskFree
509 System.out.println("\ncycle: same writer: unlimited disk space");
511 testName = "reader re-use after disk full";
514 dir.setMaxSizeInBytes(thisDiskFree);
515 dir.setRandomIOExceptionRate(rate);
520 for (int i = 0; i < 13; i++) {
522 Document d = new Document();
523 d.add(newField("id", Integer.toString(i), Field.Store.YES,
524 Field.Index.NOT_ANALYZED));
525 d.add(newField("content", "bbb " + i, Field.Store.NO,
526 Field.Index.ANALYZED));
527 modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
529 modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
530 // modifier.setNorm(docId, "contents", (float)2.0);
541 catch (IOException e) {
543 System.out.println(" hit IOException: " + e);
544 e.printStackTrace(System.out);
549 fail(testName + " hit IOException after disk space was freed up");
554 // Must force the close else the writer can have
555 // open files which cause exc in MockRAMDir.close
559 // If the close() succeeded, make sure there are
560 // no unreferenced files.
562 _TestUtil.checkIndex(dir);
563 TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close");
566 // Finally, verify index is not corrupt, and, if
567 // we succeeded, we see all docs changed, and if
568 // we failed, we see either all docs or no docs
569 // changed (transactional semantics):
570 IndexReader newReader = null;
572 newReader = IndexReader.open(dir, true);
574 catch (IOException e) {
577 + ":exception when creating IndexReader after disk full during close: "
581 IndexSearcher searcher = newSearcher(newReader);
582 ScoreDoc[] hits = null;
584 hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
586 catch (IOException e) {
588 fail(testName + ": exception when searching: " + e);
590 int result2 = hits.length;
592 if (x == 0 && result2 != END_COUNT) {
594 + ": method did not throw exception but hits.length for search on term 'aaa' is "
595 + result2 + " instead of expected " + END_COUNT);
596 } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) {
597 // It's possible that the first exception was
598 // "recoverable" wrt pending deletes, in which
599 // case the pending deletes are retained and
600 // then re-flushing (with plenty of disk
601 // space) will succeed in flushing the
604 + ": method did not throw exception but hits.length for search on term 'aaa' is "
605 + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
608 // On hitting exception we still may have added
610 if (result2 != START_COUNT && result2 != END_COUNT) {
611 err.printStackTrace();
613 + ": method did throw exception but hits.length for search on term 'aaa' is "
614 + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
625 // Try again with 10 more bytes of free space:
631 // This test tests that buffered deletes are cleared when
632 // an Exception is hit during flush.
633 public void testErrorAfterApplyDeletes() throws IOException {
635 MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() {
636 boolean sawMaybe = false;
637 boolean failed = false;
640 public MockDirectoryWrapper.Failure reset() {
641 thread = Thread.currentThread();
647 public void eval(MockDirectoryWrapper dir) throws IOException {
648 if (Thread.currentThread() != thread) {
649 // don't fail during merging
652 if (sawMaybe && !failed) {
653 boolean seen = false;
654 StackTraceElement[] trace = new Exception().getStackTrace();
655 for (int i = 0; i < trace.length; i++) {
656 if ("applyDeletes".equals(trace[i].getMethodName())) {
662 // Only fail once we are no longer in applyDeletes
665 System.out.println("TEST: mock failure: now fail");
666 new Throwable().printStackTrace(System.out);
668 throw new IOException("fail after applyDeletes");
672 StackTraceElement[] trace = new Exception().getStackTrace();
673 for (int i = 0; i < trace.length; i++) {
674 if ("applyDeletes".equals(trace[i].getMethodName())) {
676 System.out.println("TEST: mock failure: saw applyDeletes");
677 new Throwable().printStackTrace(System.out);
687 // create a couple of files
689 String[] keywords = { "1", "2" };
690 String[] unindexed = { "Netherlands", "Italy" };
691 String[] unstored = { "Amsterdam has lots of bridges",
692 "Venice has lots of canals" };
693 String[] text = { "Amsterdam", "Venice" };
695 MockDirectoryWrapper dir = newDirectory();
696 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
697 TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2).setReaderPooling(false).setMergePolicy(newLogMergePolicy()));
698 modifier.setInfoStream(VERBOSE ? System.out : null);
700 LogMergePolicy lmp = (LogMergePolicy) modifier.getConfig().getMergePolicy();
701 lmp.setUseCompoundFile(true);
703 dir.failOn(failure.reset());
705 for (int i = 0; i < keywords.length; i++) {
706 Document doc = new Document();
707 doc.add(newField("id", keywords[i], Field.Store.YES,
708 Field.Index.NOT_ANALYZED));
709 doc.add(newField("country", unindexed[i], Field.Store.YES,
711 doc.add(newField("contents", unstored[i], Field.Store.NO,
712 Field.Index.ANALYZED));
713 doc.add(newField("city", text[i], Field.Store.YES,
714 Field.Index.ANALYZED));
715 modifier.addDocument(doc);
717 // flush (and commit if ac)
720 System.out.println("TEST: now optimize");
725 System.out.println("TEST: now commit");
729 // one of the two files hits
731 Term term = new Term("city", "Amsterdam");
732 int hitCount = getHitCount(dir, term);
733 assertEquals(1, hitCount);
735 // open the writer again (closed above)
738 // max buf del terms is two, so this is buffered
741 System.out.println("TEST: delete term=" + term);
744 modifier.deleteDocuments(term);
746 // add a doc (needed for the !ac case; see below)
747 // doc remains buffered
750 System.out.println("TEST: add empty doc");
752 Document doc = new Document();
753 modifier.addDocument(doc);
755 // commit the changes, the buffered deletes, and the new doc
757 // The failure object will fail on the first write after the del
758 // file gets created when processing the buffered delete
760 // in the ac case, this will be when writing the new segments
761 // files so we really don't need the new doc, but it's harmless
763 // a new segments file won't be created but in this
764 // case, creation of the cfs file happens next so we
765 // need the doc (to test that it's okay that we don't
766 // lose deletes if failing while creating the cfs file)
767 boolean failed = false;
770 System.out.println("TEST: now commit for failure");
773 } catch (IOException ioe) {
780 // The commit above failed, so we need to retry it (which will
781 // succeed, because the failure is a one-shot)
785 hitCount = getHitCount(dir, term);
787 // Make sure the delete was successfully flushed:
788 assertEquals(0, hitCount);
794 // This test tests that the files created by the docs writer before
795 // a segment is written are cleaned up if there's an i/o error
797 public void testErrorInDocsWriterAdd() throws IOException {
799 MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() {
800 boolean failed = false;
802 public MockDirectoryWrapper.Failure reset() {
807 public void eval(MockDirectoryWrapper dir) throws IOException {
810 throw new IOException("fail in add doc");
815 // create a couple of files
817 String[] keywords = { "1", "2" };
818 String[] unindexed = { "Netherlands", "Italy" };
819 String[] unstored = { "Amsterdam has lots of bridges",
820 "Venice has lots of canals" };
821 String[] text = { "Amsterdam", "Venice" };
823 MockDirectoryWrapper dir = newDirectory();
824 IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
826 dir.failOn(failure.reset());
828 for (int i = 0; i < keywords.length; i++) {
829 Document doc = new Document();
830 doc.add(newField("id", keywords[i], Field.Store.YES,
831 Field.Index.NOT_ANALYZED));
832 doc.add(newField("country", unindexed[i], Field.Store.YES,
834 doc.add(newField("contents", unstored[i], Field.Store.NO,
835 Field.Index.ANALYZED));
836 doc.add(newField("city", text[i], Field.Store.YES,
837 Field.Index.ANALYZED));
839 modifier.addDocument(doc);
840 } catch (IOException io) {
842 System.out.println("TEST: got expected exc:");
843 io.printStackTrace(System.out);
850 TestIndexWriter.assertNoUnreferencedFiles(dir, "docswriter abort() failed to delete unreferenced files");
854 private String arrayToString(String[] l) {
856 for (int i = 0; i < l.length; i++) {
865 public void testDeleteAllSlowly() throws Exception {
866 final Directory dir = newDirectory();
867 RandomIndexWriter w = new RandomIndexWriter(random, dir);
868 final int NUM_DOCS = atLeast(1000);
869 final List<Integer> ids = new ArrayList<Integer>(NUM_DOCS);
870 for(int id=0;id<NUM_DOCS;id++) {
873 Collections.shuffle(ids, random);
875 Document doc = new Document();
876 doc.add(newField("id", ""+id, Field.Index.NOT_ANALYZED));
879 Collections.shuffle(ids, random);
881 while(upto < ids.size()) {
882 final int left = ids.size() - upto;
883 final int inc = Math.min(left, _TestUtil.nextInt(random, 1, 20));
884 final int limit = upto + inc;
885 while(upto < limit) {
886 w.deleteDocuments(new Term("id", ""+ids.get(upto++)));
888 final IndexReader r = w.getReader();
889 assertEquals(NUM_DOCS - upto, r.numDocs());
897 public void testIndexingThenDeleting() throws Exception {
898 final Random r = random;
899 Directory dir = newDirectory();
900 // note this test explicitly disables payloads
901 final Analyzer analyzer = new Analyzer() {
903 public TokenStream tokenStream(String fieldName, Reader reader) {
904 return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
907 IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
908 w.setInfoStream(VERBOSE ? System.out : null);
909 Document doc = new Document();
910 doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED));
911 int num = atLeast(3);
912 for (int iter = 0; iter < num; iter++) {
915 final boolean doIndexing = r.nextBoolean();
917 System.out.println("TEST: iter doIndexing=" + doIndexing);
920 // Add docs until a flush is triggered
921 final int startFlushCount = w.getFlushCount();
922 while(w.getFlushCount() == startFlushCount) {
927 // Delete docs until a flush is triggered
928 final int startFlushCount = w.getFlushCount();
929 while(w.getFlushCount() == startFlushCount) {
930 w.deleteDocuments(new Term("foo", ""+count));
934 assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 3000);
940 // LUCENE-3340: make sure deletes that we don't apply
941 // during flush (ie are just pushed into the stream) are
942 // in fact later flushed due to their RAM usage:
943 public void testFlushPushedDeletesByRAM() throws Exception {
944 Directory dir = newDirectory();
945 // Cannot use RandomIndexWriter because we don't want to
946 // ever call commit() for this test:
947 // note: tiny rambuffer used, as with a 1MB buffer the test is too slow (flush @ 128,999)
948 IndexWriter w = new IndexWriter(dir,
949 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
950 .setRAMBufferSizeMB(0.2f).setMaxBufferedDocs(1000).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
951 w.setInfoStream(VERBOSE ? System.out : null);
954 Document doc = new Document();
955 doc.add(new Field("id", count+"", Field.Store.NO, Field.Index.NOT_ANALYZED));
958 // This is the only delete that applies
959 delTerm = new Term("id", ""+0);
961 // These get buffered, taking up RAM, but delete
962 // nothing when applied:
963 delTerm = new Term("id", "x" + count);
965 w.updateDocument(delTerm, doc);
966 // Eventually segment 0 should get a del docs:
967 if (dir.fileExists("_0_1.del")) {
969 System.out.println("TEST: deletes created @ count=" + count);
975 // Today we applyDeletes @ count=21553; even if we make
976 // sizable improvements to RAM efficiency of buffered
977 // del term we're unlikely to go over 100K:
978 if (count > 100000) {
979 fail("delete's were not applied");
986 // LUCENE-3340: make sure deletes that we don't apply
987 // during flush (ie are just pushed into the stream) are
988 // in fact later flushed due to their RAM usage:
989 public void testFlushPushedDeletesByCount() throws Exception {
990 Directory dir = newDirectory();
991 // Cannot use RandomIndexWriter because we don't want to
992 // ever call commit() for this test:
993 final int flushAtDelCount = atLeast(1020);
994 IndexWriter w = new IndexWriter(dir,
995 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
996 setMaxBufferedDeleteTerms(flushAtDelCount).setMaxBufferedDocs(1000).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
997 w.setInfoStream(VERBOSE ? System.out : null);
999 System.out.println("TEST: flush @ " + flushAtDelCount + " buffered delete terms");
1003 Document doc = new Document();
1004 doc.add(new Field("id", count+"", Field.Store.NO, Field.Index.NOT_ANALYZED));
1006 if (count == 1010) {
1007 // This is the only delete that applies
1008 delTerm = new Term("id", ""+0);
1010 // These get buffered, taking up RAM, but delete
1011 // nothing when applied:
1012 delTerm = new Term("id", "x" + count);
1014 w.updateDocument(delTerm, doc);
1015 // Eventually segment 0 should get a del docs:
1016 if (dir.fileExists("_0_1.del")) {
1020 if (count > flushAtDelCount) {
1021 fail("delete's were not applied at count=" + flushAtDelCount);
1028 // Make sure buffered (pushed) deletes don't use up so
1029 // much RAM that it forces long tail of tiny segments:
1030 public void testApplyDeletesOnFlush() throws Exception {
1031 Directory dir = newDirectory();
1032 // Cannot use RandomIndexWriter because we don't want to
1033 // ever call commit() for this test:
1034 final AtomicInteger docsInSegment = new AtomicInteger();
1035 final AtomicBoolean closing = new AtomicBoolean();
1036 final AtomicBoolean sawAfterFlush = new AtomicBoolean();
1037 IndexWriter w = new IndexWriter(dir,
1038 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
1039 setRAMBufferSizeMB(0.5).setMaxBufferedDocs(-1).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false)) {
1041 public void doAfterFlush() {
1042 assertTrue("only " + docsInSegment.get() + " in segment", closing.get() || docsInSegment.get() >= 7);
1043 docsInSegment.set(0);
1044 sawAfterFlush.set(true);
1047 w.setInfoStream(VERBOSE ? System.out : null);
1050 StringBuilder sb = new StringBuilder();
1051 for(int termIDX=0;termIDX<100;termIDX++) {
1052 sb.append(' ').append(_TestUtil.randomRealisticUnicodeString(random));
1055 w.deleteDocuments(new Term("id", "0"));
1057 Document doc = new Document();
1058 doc.add(newField("id", ""+id, Field.Index.NOT_ANALYZED));
1059 doc.add(newField("body", sb.toString(), Field.Index.ANALYZED));
1060 w.updateDocument(new Term("id", ""+id), doc);
1061 docsInSegment.incrementAndGet();
1062 if (dir.fileExists("_0_1.del")) {
1064 System.out.println("TEST: deletes created @ id=" + id);
1071 assertTrue(sawAfterFlush.get());