1 package org.apache.lucene.index;
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 import java.io.IOException;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collections;
21 import java.util.Comparator;
22 import java.util.HashMap;
23 import java.util.Iterator;
24 import java.util.List;
26 import java.util.Random;
28 import junit.framework.Assert;
30 import org.apache.lucene.analysis.WhitespaceAnalyzer;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.Fieldable;
34 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.store.Directory;
37 import org.apache.lucene.util.LuceneTestCase;
38 import org.apache.lucene.util.StringHelper;
39 import org.apache.lucene.util._TestUtil;
41 public class TestStressIndexing2 extends LuceneTestCase {
42 static int maxFields=4;
43 static int bigFieldSize=10;
44 static boolean sameFieldOrder=false;
45 static int mergeFactor=3;
46 static int maxBufferedDocs=3;
49 public class MockIndexWriter extends IndexWriter {
51 public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
56 boolean testPoint(String name) {
57 // if (name.equals("startCommit")) {
58 if (random.nextInt(4) == 2)
64 public void testRandomIWReader() throws Throwable {
65 Directory dir = newDirectory();
67 // TODO: verify equals using IW.getReader
68 DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
69 IndexReader reader = dw.writer.getReader();
71 verifyEquals(random, reader, dir, "id");
77 public void testRandom() throws Throwable {
78 Directory dir1 = newDirectory();
79 Directory dir2 = newDirectory();
80 // mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
81 int maxThreadStates = 1+random.nextInt(10);
82 boolean doReaderPooling = random.nextBoolean();
83 Map<String,Document> docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling);
84 indexSerial(random, docs, dir2);
87 // verifyEquals(dir1, dir1, "id");
88 // verifyEquals(dir2, dir2, "id");
90 verifyEquals(dir1, dir2, "id");
95 public void testMultiConfig() throws Throwable {
96 // test lots of smaller different params together
98 for (int i = 0; i < num; i++) { // increase iterations for better testing
100 System.out.println("\n\nTEST: top iter=" + i);
102 sameFieldOrder=random.nextBoolean();
103 mergeFactor=random.nextInt(3)+2;
104 maxBufferedDocs=random.nextInt(3)+2;
105 int maxThreadStates = 1+random.nextInt(10);
106 boolean doReaderPooling = random.nextBoolean();
109 int nThreads=random.nextInt(5)+1;
110 int iter=random.nextInt(5)+1;
111 int range=random.nextInt(20)+1;
112 Directory dir1 = newDirectory();
113 Directory dir2 = newDirectory();
115 System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor);
117 Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling);
119 System.out.println("TEST: index serial");
121 indexSerial(random, docs, dir2);
123 System.out.println("TEST: verify");
125 verifyEquals(dir1, dir2, "id");
132 static Term idTerm = new Term("id","");
133 IndexingThread[] threads;
134 static Comparator<Fieldable> fieldNameComparator = new Comparator<Fieldable>() {
135 public int compare(Fieldable o1, Fieldable o2) {
136 return o1.name().compareTo(o2.name());
140 // This test avoids using any extra synchronization in the multiple
141 // indexing threads to test that IndexWriter does correctly synchronize
144 public static class DocsAndWriter {
145 Map<String,Document> docs;
149 public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
150 Map<String,Document> docs = new HashMap<String,Document>();
151 IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
152 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB(
153 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()));
154 w.setInfoStream(VERBOSE ? System.out : null);
156 setUseCompoundFile(w.getConfig().getMergePolicy(), false);
157 setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
159 w.setMaxMergeDocs(Integer.MAX_VALUE);
160 w.setMaxFieldLength(10000);
161 w.setRAMBufferSizeMB(1);
162 w.setMergeFactor(10);
165 threads = new IndexingThread[nThreads];
166 for (int i=0; i<threads.length; i++) {
167 IndexingThread th = new IndexingThread();
171 th.iterations = iterations;
175 for (int i=0; i<threads.length; i++) {
178 for (int i=0; i<threads.length; i++) {
185 for (int i=0; i<threads.length; i++) {
186 IndexingThread th = threads[i];
188 docs.putAll(th.docs);
192 _TestUtil.checkIndex(dir);
193 DocsAndWriter dw = new DocsAndWriter();
199 public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates,
200 boolean doReaderPooling) throws IOException, InterruptedException {
201 Map<String,Document> docs = new HashMap<String,Document>();
202 for(int iter=0;iter<3;iter++) {
204 System.out.println("TEST: iter=" + iter);
206 IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
207 TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)
208 .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates)
209 .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
210 w.setInfoStream(VERBOSE ? System.out : null);
211 setUseCompoundFile(w.getConfig().getMergePolicy(), false);
212 setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
214 threads = new IndexingThread[nThreads];
215 for (int i=0; i<threads.length; i++) {
216 IndexingThread th = new IndexingThread();
220 th.iterations = iterations;
224 for (int i=0; i<threads.length; i++) {
227 for (int i=0; i<threads.length; i++) {
234 for (int i=0; i<threads.length; i++) {
235 IndexingThread th = threads[i];
237 docs.putAll(th.docs);
242 _TestUtil.checkIndex(dir);
248 public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
249 IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy()));
251 // index all docs in a single thread
252 Iterator<Document> iter = docs.values().iterator();
253 while (iter.hasNext()) {
254 Document d = iter.next();
255 ArrayList<Fieldable> fields = new ArrayList<Fieldable>();
256 fields.addAll(d.getFields());
257 // put fields in same order each time
258 Collections.sort(fields, fieldNameComparator);
260 Document d1 = new Document();
261 d1.setBoost(d.getBoost());
262 for (int i=0; i<fields.size(); i++) {
263 d1.add(fields.get(i));
266 // System.out.println("indexing "+d1);
272 public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
273 IndexReader r2 = IndexReader.open(dir2);
274 verifyEquals(r1, r2, idField);
278 public static void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable {
279 IndexReader r1 = IndexReader.open(dir1, true);
280 IndexReader r2 = IndexReader.open(dir2, true);
281 verifyEquals(r1, r2, idField);
286 private static void printDocs(IndexReader r) throws Throwable {
287 IndexReader[] subs = r.getSequentialSubReaders();
288 for(IndexReader sub : subs) {
289 System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
290 for(int docID=0;docID<sub.maxDoc();docID++) {
291 Document doc = sub.document(docID);
292 if (!sub.isDeleted(docID)) {
293 System.out.println(" docID=" + docID + " id:" + doc.get("id"));
295 System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
302 public static void verifyEquals(IndexReader r1, IndexReader r2, String idField) throws Throwable {
304 System.out.println("\nr1 docs:");
306 System.out.println("\nr2 docs:");
309 if (r1.numDocs() != r2.numDocs()) {
310 assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
312 boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc());
314 int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping
316 TermDocs termDocs1 = r1.termDocs();
317 TermDocs termDocs2 = r2.termDocs();
319 // create mapping from id2 space to id2 based on idField
320 idField = StringHelper.intern(idField);
321 TermEnum termEnum = r1.terms (new Term (idField, ""));
323 Term term = termEnum.term();
324 if (term==null || term.field() != idField) break;
326 termDocs1.seek (termEnum);
327 if (!termDocs1.next()) {
328 // This doc is deleted and wasn't replaced
329 termDocs2.seek(termEnum);
330 assertFalse(termDocs2.next());
334 int id1 = termDocs1.doc();
335 assertFalse(termDocs1.next());
337 termDocs2.seek(termEnum);
338 assertTrue(termDocs2.next());
339 int id2 = termDocs2.doc();
340 assertFalse(termDocs2.next());
344 // verify stored fields are equivalent
346 verifyEquals(r1.document(id1), r2.document(id2));
347 } catch (Throwable t) {
348 System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term);
349 System.out.println(" d1=" + r1.document(id1));
350 System.out.println(" d2=" + r2.document(id2));
355 // verify term vectors are equivalent
356 verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
357 } catch (Throwable e) {
358 System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
359 TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
360 System.out.println(" d1=" + tv1);
362 for(int i=0;i<tv1.length;i++)
363 System.out.println(" " + i + ": " + tv1[i]);
365 TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
366 System.out.println(" d2=" + tv2);
368 for(int i=0;i<tv2.length;i++)
369 System.out.println(" " + i + ": " + tv2[i]);
374 } while (termEnum.next());
379 TermEnum termEnum1 = r1.terms (new Term ("", ""));
380 TermEnum termEnum2 = r2.terms (new Term ("", ""));
382 // pack both doc and freq into single element for easy sorting
383 long[] info1 = new long[r1.numDocs()];
384 long[] info2 = new long[r2.numDocs()];
389 // iterate until we get some docs
393 term1 = termEnum1.term();
394 if (term1==null) break;
395 termDocs1.seek(termEnum1);
396 while (termDocs1.next()) {
397 int d1 = termDocs1.doc();
398 int f1 = termDocs1.freq();
399 info1[len1] = (((long)d1)<<32) | f1;
403 if (!termEnum1.next()) break;
406 // iterate until we get some docs
410 term2 = termEnum2.term();
411 if (term2==null) break;
412 termDocs2.seek(termEnum2);
413 while (termDocs2.next()) {
414 int d2 = termDocs2.doc();
415 int f2 = termDocs2.freq();
416 info2[len2] = (((long)r2r1[d2])<<32) | f2;
420 if (!termEnum2.next()) break;
424 assertEquals(termEnum1.docFreq(), termEnum2.docFreq());
426 assertEquals(len1, len2);
427 if (len1==0) break; // no more terms
429 assertEquals(term1, term2);
431 // sort info2 to get it into ascending docid
432 Arrays.sort(info2, 0, len2);
435 for (int i=0; i<len1; i++) {
436 assertEquals(info1[i], info2[i]);
444 public static void verifyEquals(Document d1, Document d2) {
445 List<Fieldable> ff1 = d1.getFields();
446 List<Fieldable> ff2 = d2.getFields();
448 Collections.sort(ff1, fieldNameComparator);
449 Collections.sort(ff2, fieldNameComparator);
451 assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());
453 for (int i=0; i<ff1.size(); i++) {
454 Fieldable f1 = ff1.get(i);
455 Fieldable f2 = ff2.get(i);
457 assert(f2.isBinary());
459 String s1 = f1.stringValue();
460 String s2 = f2.stringValue();
461 assertEquals(ff1 + " : " + ff2, s1,s2);
466 public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
468 assertTrue(d2 == null);
471 assertTrue(d2 != null);
473 assertEquals(d1.length, d2.length);
474 for(int i=0;i<d1.length;i++) {
475 TermFreqVector v1 = d1[i];
476 TermFreqVector v2 = d2[i];
477 if (v1 == null || v2 == null)
478 System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
479 assertEquals(v1.size(), v2.size());
480 int numTerms = v1.size();
481 String[] terms1 = v1.getTerms();
482 String[] terms2 = v2.getTerms();
483 int[] freq1 = v1.getTermFrequencies();
484 int[] freq2 = v2.getTermFrequencies();
485 for(int j=0;j<numTerms;j++) {
486 if (!terms1[j].equals(terms2[j]))
487 assertEquals(terms1[j], terms2[j]);
488 assertEquals(freq1[j], freq2[j]);
490 if (v1 instanceof TermPositionVector) {
491 assertTrue(v2 instanceof TermPositionVector);
492 TermPositionVector tpv1 = (TermPositionVector) v1;
493 TermPositionVector tpv2 = (TermPositionVector) v2;
494 for(int j=0;j<numTerms;j++) {
495 int[] pos1 = tpv1.getTermPositions(j);
496 int[] pos2 = tpv2.getTermPositions(j);
502 assertEquals(pos1.length, pos2.length);
503 TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
504 TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
505 if (offsets1 == null)
506 assertTrue(offsets2 == null);
508 assertTrue(offsets2 != null);
509 for(int k=0;k<pos1.length;k++) {
510 assertEquals(pos1[k], pos2[k]);
511 if (offsets1 != null) {
512 assertEquals(offsets1[k].getStartOffset(),
513 offsets2[k].getStartOffset());
514 assertEquals(offsets1[k].getEndOffset(),
515 offsets2[k].getEndOffset());
524 private class IndexingThread extends Thread {
529 Map<String,Document> docs = new HashMap<String,Document>();
532 public int nextInt(int lim) {
533 return r.nextInt(lim);
536 // start is inclusive and end is exclusive
537 public int nextInt(int start, int end) {
538 return start + r.nextInt(end-start);
541 char[] buffer = new char[100];
543 private int addUTF8Token(int start) {
544 final int end = start + nextInt(20);
545 if (buffer.length < 1+end) {
546 char[] newBuffer = new char[(int) ((1+end)*1.25)];
547 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
551 for(int i=start;i<end;i++) {
553 if (0 == t && i < end-1) {
554 // Make a surrogate pair
556 buffer[i++] = (char) nextInt(0xd800, 0xdc00);
558 buffer[i] = (char) nextInt(0xdc00, 0xe000);
560 buffer[i] = (char) nextInt(0x80);
562 buffer[i] = (char) nextInt(0x80, 0x800);
564 buffer[i] = (char) nextInt(0x800, 0xd800);
566 buffer[i] = (char) nextInt(0xe000, 0xffff);
568 // Illegal unpaired surrogate
570 buffer[i] = (char) nextInt(0xd800, 0xdc00);
572 buffer[i] = (char) nextInt(0xdc00, 0xe000);
579 public String getString(int nTokens) {
580 nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1;
582 // Half the time make a random UTF8 string
584 return getUTF8String(nTokens);
586 // avoid StringBuffer because it adds extra synchronization.
587 char[] arr = new char[nTokens*2];
588 for (int i=0; i<nTokens; i++) {
589 arr[i*2] = (char)('A' + r.nextInt(10));
592 return new String(arr);
595 public String getUTF8String(int nTokens) {
597 Arrays.fill(buffer, (char) 0);
598 for(int i=0;i<nTokens;i++)
599 upto = addUTF8Token(upto);
600 return new String(buffer, 0, upto);
603 public String getIdString() {
604 return Integer.toString(base + nextInt(range));
607 public void indexDoc() throws IOException {
608 Document d = new Document();
610 ArrayList<Field> fields = new ArrayList<Field>();
611 String idString = getIdString();
612 Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
615 int nFields = nextInt(maxFields);
616 for (int i=0; i<nFields; i++) {
618 Field.TermVector tvVal = Field.TermVector.NO;
619 switch (nextInt(4)) {
621 tvVal = Field.TermVector.NO;
624 tvVal = Field.TermVector.YES;
627 tvVal = Field.TermVector.WITH_POSITIONS;
630 tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
634 switch (nextInt(4)) {
636 fields.add(newField("f" + nextInt(100), getString(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, tvVal));
639 fields.add(newField("f" + nextInt(100), getString(0), Field.Store.NO, Field.Index.ANALYZED, tvVal));
642 fields.add(newField("f" + nextInt(100), getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
645 fields.add(newField("f" + nextInt(100), getString(bigFieldSize), Field.Store.YES, Field.Index.ANALYZED, tvVal));
650 if (sameFieldOrder) {
651 Collections.sort(fields, fieldNameComparator);
653 // random placement of id field also
654 Collections.swap(fields,nextInt(fields.size()), 0);
657 for (int i=0; i<fields.size(); i++) {
658 d.add(fields.get(i));
661 System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
663 w.updateDocument(idTerm.createTerm(idString), d);
664 //System.out.println(Thread.currentThread().getName() + ": indexing "+d);
665 docs.put(idString, d);
668 public void deleteDoc() throws IOException {
669 String idString = getIdString();
671 System.out.println(Thread.currentThread().getName() + ": del id:" + idString);
673 w.deleteDocuments(idTerm.createTerm(idString));
674 docs.remove(idString);
677 public void deleteByQuery() throws IOException {
678 String idString = getIdString();
680 System.out.println(Thread.currentThread().getName() + ": del query id:" + idString);
682 w.deleteDocuments(new TermQuery(idTerm.createTerm(idString)));
683 docs.remove(idString);
689 r = new Random(base+range+seed);
690 for (int i=0; i<iterations; i++) {
691 int what = nextInt(100);
694 } else if (what < 10) {
700 } catch (Throwable e) {
702 Assert.fail(e.toString());
705 synchronized (this) {