+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import junit.framework.Assert;
-
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util._TestUtil;
-
-public class TestStressIndexing2 extends LuceneTestCase {
- static int maxFields=4;
- static int bigFieldSize=10;
- static boolean sameFieldOrder=false;
- static int mergeFactor=3;
- static int maxBufferedDocs=3;
- static int seed=0;
-
- public class MockIndexWriter extends IndexWriter {
-
- public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
- @Override
- boolean testPoint(String name) {
- // if (name.equals("startCommit")) {
- if (random.nextInt(4) == 2)
- Thread.yield();
- return true;
- }
- }
-
- public void testRandomIWReader() throws Throwable {
- Directory dir = newDirectory();
-
- // TODO: verify equals using IW.getReader
- DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
- IndexReader reader = dw.writer.getReader();
- dw.writer.commit();
- verifyEquals(random, reader, dir, "id");
- reader.close();
- dw.writer.close();
- dir.close();
- }
-
- public void testRandom() throws Throwable {
- Directory dir1 = newDirectory();
- Directory dir2 = newDirectory();
- // mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
- int maxThreadStates = 1+random.nextInt(10);
- boolean doReaderPooling = random.nextBoolean();
- Map<String,Document> docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling);
- indexSerial(random, docs, dir2);
-
- // verifying verify
- // verifyEquals(dir1, dir1, "id");
- // verifyEquals(dir2, dir2, "id");
-
- verifyEquals(dir1, dir2, "id");
- dir1.close();
- dir2.close();
- }
-
- public void testMultiConfig() throws Throwable {
- // test lots of smaller different params together
- int num = atLeast(3);
- for (int i = 0; i < num; i++) { // increase iterations for better testing
- if (VERBOSE) {
- System.out.println("\n\nTEST: top iter=" + i);
- }
- sameFieldOrder=random.nextBoolean();
- mergeFactor=random.nextInt(3)+2;
- maxBufferedDocs=random.nextInt(3)+2;
- int maxThreadStates = 1+random.nextInt(10);
- boolean doReaderPooling = random.nextBoolean();
- seed++;
-
- int nThreads=random.nextInt(5)+1;
- int iter=random.nextInt(5)+1;
- int range=random.nextInt(20)+1;
- Directory dir1 = newDirectory();
- Directory dir2 = newDirectory();
- if (VERBOSE) {
- System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor);
- }
- Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling);
- if (VERBOSE) {
- System.out.println("TEST: index serial");
- }
- indexSerial(random, docs, dir2);
- if (VERBOSE) {
- System.out.println("TEST: verify");
- }
- verifyEquals(dir1, dir2, "id");
- dir1.close();
- dir2.close();
- }
- }
-
-
- static Term idTerm = new Term("id","");
- IndexingThread[] threads;
- static Comparator<Fieldable> fieldNameComparator = new Comparator<Fieldable>() {
- public int compare(Fieldable o1, Fieldable o2) {
- return o1.name().compareTo(o2.name());
- }
- };
-
- // This test avoids using any extra synchronization in the multiple
- // indexing threads to test that IndexWriter does correctly synchronize
- // everything.
-
- public static class DocsAndWriter {
- Map<String,Document> docs;
- IndexWriter writer;
- }
-
- public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
- Map<String,Document> docs = new HashMap<String,Document>();
- IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB(
- 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()));
- w.setInfoStream(VERBOSE ? System.out : null);
- w.commit();
- setUseCompoundFile(w.getConfig().getMergePolicy(), false);
- setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
- /***
- w.setMaxMergeDocs(Integer.MAX_VALUE);
- w.setMaxFieldLength(10000);
- w.setRAMBufferSizeMB(1);
- w.setMergeFactor(10);
- ***/
-
- threads = new IndexingThread[nThreads];
- for (int i=0; i<threads.length; i++) {
- IndexingThread th = new IndexingThread();
- th.w = w;
- th.base = 1000000*i;
- th.range = range;
- th.iterations = iterations;
- threads[i] = th;
- }
-
- for (int i=0; i<threads.length; i++) {
- threads[i].start();
- }
- for (int i=0; i<threads.length; i++) {
- threads[i].join();
- }
-
- // w.optimize();
- //w.close();
-
- for (int i=0; i<threads.length; i++) {
- IndexingThread th = threads[i];
- synchronized(th) {
- docs.putAll(th.docs);
- }
- }
-
- _TestUtil.checkIndex(dir);
- DocsAndWriter dw = new DocsAndWriter();
- dw.docs = docs;
- dw.writer = w;
- return dw;
- }
-
- public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates,
- boolean doReaderPooling) throws IOException, InterruptedException {
- Map<String,Document> docs = new HashMap<String,Document>();
- for(int iter=0;iter<3;iter++) {
- if (VERBOSE) {
- System.out.println("TEST: iter=" + iter);
- }
- IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)
- .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates)
- .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
- w.setInfoStream(VERBOSE ? System.out : null);
- setUseCompoundFile(w.getConfig().getMergePolicy(), false);
- setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
-
- threads = new IndexingThread[nThreads];
- for (int i=0; i<threads.length; i++) {
- IndexingThread th = new IndexingThread();
- th.w = w;
- th.base = 1000000*i;
- th.range = range;
- th.iterations = iterations;
- threads[i] = th;
- }
-
- for (int i=0; i<threads.length; i++) {
- threads[i].start();
- }
- for (int i=0; i<threads.length; i++) {
- threads[i].join();
- }
-
- // w.optimize();
- w.close();
-
- for (int i=0; i<threads.length; i++) {
- IndexingThread th = threads[i];
- synchronized(th) {
- docs.putAll(th.docs);
- }
- }
- }
-
- _TestUtil.checkIndex(dir);
-
- return docs;
- }
-
-
- public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
- IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy()));
-
- // index all docs in a single thread
- Iterator<Document> iter = docs.values().iterator();
- while (iter.hasNext()) {
- Document d = iter.next();
- ArrayList<Fieldable> fields = new ArrayList<Fieldable>();
- fields.addAll(d.getFields());
- // put fields in same order each time
- Collections.sort(fields, fieldNameComparator);
-
- Document d1 = new Document();
- d1.setBoost(d.getBoost());
- for (int i=0; i<fields.size(); i++) {
- d1.add(fields.get(i));
- }
- w.addDocument(d1);
- // System.out.println("indexing "+d1);
- }
-
- w.close();
- }
-
- public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
- IndexReader r2 = IndexReader.open(dir2);
- verifyEquals(r1, r2, idField);
- r2.close();
- }
-
- public static void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable {
- IndexReader r1 = IndexReader.open(dir1, true);
- IndexReader r2 = IndexReader.open(dir2, true);
- verifyEquals(r1, r2, idField);
- r1.close();
- r2.close();
- }
-
- private static void printDocs(IndexReader r) throws Throwable {
- IndexReader[] subs = r.getSequentialSubReaders();
- for(IndexReader sub : subs) {
- System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
- for(int docID=0;docID<sub.maxDoc();docID++) {
- Document doc = sub.document(docID);
- if (!sub.isDeleted(docID)) {
- System.out.println(" docID=" + docID + " id:" + doc.get("id"));
- } else {
- System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
- }
- }
- }
- }
-
-
- public static void verifyEquals(IndexReader r1, IndexReader r2, String idField) throws Throwable {
- if (VERBOSE) {
- System.out.println("\nr1 docs:");
- printDocs(r1);
- System.out.println("\nr2 docs:");
- printDocs(r2);
- }
- if (r1.numDocs() != r2.numDocs()) {
- assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
- }
- boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc());
-
- int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping
-
- TermDocs termDocs1 = r1.termDocs();
- TermDocs termDocs2 = r2.termDocs();
-
- // create mapping from id2 space to id2 based on idField
- idField = StringHelper.intern(idField);
- TermEnum termEnum = r1.terms (new Term (idField, ""));
- do {
- Term term = termEnum.term();
- if (term==null || term.field() != idField) break;
-
- termDocs1.seek (termEnum);
- if (!termDocs1.next()) {
- // This doc is deleted and wasn't replaced
- termDocs2.seek(termEnum);
- assertFalse(termDocs2.next());
- continue;
- }
-
- int id1 = termDocs1.doc();
- assertFalse(termDocs1.next());
-
- termDocs2.seek(termEnum);
- assertTrue(termDocs2.next());
- int id2 = termDocs2.doc();
- assertFalse(termDocs2.next());
-
- r2r1[id2] = id1;
-
- // verify stored fields are equivalent
- try {
- verifyEquals(r1.document(id1), r2.document(id2));
- } catch (Throwable t) {
- System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term);
- System.out.println(" d1=" + r1.document(id1));
- System.out.println(" d2=" + r2.document(id2));
- throw t;
- }
-
- try {
- // verify term vectors are equivalent
- verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
- } catch (Throwable e) {
- System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
- TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
- System.out.println(" d1=" + tv1);
- if (tv1 != null)
- for(int i=0;i<tv1.length;i++)
- System.out.println(" " + i + ": " + tv1[i]);
-
- TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
- System.out.println(" d2=" + tv2);
- if (tv2 != null)
- for(int i=0;i<tv2.length;i++)
- System.out.println(" " + i + ": " + tv2[i]);
-
- throw e;
- }
-
- } while (termEnum.next());
-
- termEnum.close();
-
- // Verify postings
- TermEnum termEnum1 = r1.terms (new Term ("", ""));
- TermEnum termEnum2 = r2.terms (new Term ("", ""));
-
- // pack both doc and freq into single element for easy sorting
- long[] info1 = new long[r1.numDocs()];
- long[] info2 = new long[r2.numDocs()];
-
- for(;;) {
- Term term1,term2;
-
- // iterate until we get some docs
- int len1;
- for(;;) {
- len1=0;
- term1 = termEnum1.term();
- if (term1==null) break;
- termDocs1.seek(termEnum1);
- while (termDocs1.next()) {
- int d1 = termDocs1.doc();
- int f1 = termDocs1.freq();
- info1[len1] = (((long)d1)<<32) | f1;
- len1++;
- }
- if (len1>0) break;
- if (!termEnum1.next()) break;
- }
-
- // iterate until we get some docs
- int len2;
- for(;;) {
- len2=0;
- term2 = termEnum2.term();
- if (term2==null) break;
- termDocs2.seek(termEnum2);
- while (termDocs2.next()) {
- int d2 = termDocs2.doc();
- int f2 = termDocs2.freq();
- info2[len2] = (((long)r2r1[d2])<<32) | f2;
- len2++;
- }
- if (len2>0) break;
- if (!termEnum2.next()) break;
- }
-
- if (!hasDeletes)
- assertEquals(termEnum1.docFreq(), termEnum2.docFreq());
-
- assertEquals(len1, len2);
- if (len1==0) break; // no more terms
-
- assertEquals(term1, term2);
-
- // sort info2 to get it into ascending docid
- Arrays.sort(info2, 0, len2);
-
- // now compare
- for (int i=0; i<len1; i++) {
- assertEquals(info1[i], info2[i]);
- }
-
- termEnum1.next();
- termEnum2.next();
- }
- }
-
- public static void verifyEquals(Document d1, Document d2) {
- List<Fieldable> ff1 = d1.getFields();
- List<Fieldable> ff2 = d2.getFields();
-
- Collections.sort(ff1, fieldNameComparator);
- Collections.sort(ff2, fieldNameComparator);
-
- assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());
-
- for (int i=0; i<ff1.size(); i++) {
- Fieldable f1 = ff1.get(i);
- Fieldable f2 = ff2.get(i);
- if (f1.isBinary()) {
- assert(f2.isBinary());
- } else {
- String s1 = f1.stringValue();
- String s2 = f2.stringValue();
- assertEquals(ff1 + " : " + ff2, s1,s2);
- }
- }
- }
-
- public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
- if (d1 == null) {
- assertTrue(d2 == null);
- return;
- }
- assertTrue(d2 != null);
-
- assertEquals(d1.length, d2.length);
- for(int i=0;i<d1.length;i++) {
- TermFreqVector v1 = d1[i];
- TermFreqVector v2 = d2[i];
- if (v1 == null || v2 == null)
- System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
- assertEquals(v1.size(), v2.size());
- int numTerms = v1.size();
- String[] terms1 = v1.getTerms();
- String[] terms2 = v2.getTerms();
- int[] freq1 = v1.getTermFrequencies();
- int[] freq2 = v2.getTermFrequencies();
- for(int j=0;j<numTerms;j++) {
- if (!terms1[j].equals(terms2[j]))
- assertEquals(terms1[j], terms2[j]);
- assertEquals(freq1[j], freq2[j]);
- }
- if (v1 instanceof TermPositionVector) {
- assertTrue(v2 instanceof TermPositionVector);
- TermPositionVector tpv1 = (TermPositionVector) v1;
- TermPositionVector tpv2 = (TermPositionVector) v2;
- for(int j=0;j<numTerms;j++) {
- int[] pos1 = tpv1.getTermPositions(j);
- int[] pos2 = tpv2.getTermPositions(j);
- if (pos1 == null) {
- assertNull(pos2);
- } else {
- assertNotNull(pos1);
- assertNotNull(pos2);
- assertEquals(pos1.length, pos2.length);
- TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
- TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
- if (offsets1 == null)
- assertTrue(offsets2 == null);
- else
- assertTrue(offsets2 != null);
- for(int k=0;k<pos1.length;k++) {
- assertEquals(pos1[k], pos2[k]);
- if (offsets1 != null) {
- assertEquals(offsets1[k].getStartOffset(),
- offsets2[k].getStartOffset());
- assertEquals(offsets1[k].getEndOffset(),
- offsets2[k].getEndOffset());
- }
- }
- }
- }
- }
- }
- }
-
- private class IndexingThread extends Thread {
- IndexWriter w;
- int base;
- int range;
- int iterations;
- Map<String,Document> docs = new HashMap<String,Document>();
- Random r;
-
- public int nextInt(int lim) {
- return r.nextInt(lim);
- }
-
- // start is inclusive and end is exclusive
- public int nextInt(int start, int end) {
- return start + r.nextInt(end-start);
- }
-
- char[] buffer = new char[100];
-
- private int addUTF8Token(int start) {
- final int end = start + nextInt(20);
- if (buffer.length < 1+end) {
- char[] newBuffer = new char[(int) ((1+end)*1.25)];
- System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
- buffer = newBuffer;
- }
-
- for(int i=start;i<end;i++) {
- int t = nextInt(6);
- if (0 == t && i < end-1) {
- // Make a surrogate pair
- // High surrogate
- buffer[i++] = (char) nextInt(0xd800, 0xdc00);
- // Low surrogate
- buffer[i] = (char) nextInt(0xdc00, 0xe000);
- } else if (t <= 1)
- buffer[i] = (char) nextInt(0x80);
- else if (2 == t)
- buffer[i] = (char) nextInt(0x80, 0x800);
- else if (3 == t)
- buffer[i] = (char) nextInt(0x800, 0xd800);
- else if (4 == t)
- buffer[i] = (char) nextInt(0xe000, 0xffff);
- else if (5 == t) {
- // Illegal unpaired surrogate
- if (r.nextBoolean())
- buffer[i] = (char) nextInt(0xd800, 0xdc00);
- else
- buffer[i] = (char) nextInt(0xdc00, 0xe000);
- }
- }
- buffer[end] = ' ';
- return 1+end;
- }
-
- public String getString(int nTokens) {
- nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1;
-
- // Half the time make a random UTF8 string
- if (r.nextBoolean())
- return getUTF8String(nTokens);
-
- // avoid StringBuffer because it adds extra synchronization.
- char[] arr = new char[nTokens*2];
- for (int i=0; i<nTokens; i++) {
- arr[i*2] = (char)('A' + r.nextInt(10));
- arr[i*2+1] = ' ';
- }
- return new String(arr);
- }
-
- public String getUTF8String(int nTokens) {
- int upto = 0;
- Arrays.fill(buffer, (char) 0);
- for(int i=0;i<nTokens;i++)
- upto = addUTF8Token(upto);
- return new String(buffer, 0, upto);
- }
-
- public String getIdString() {
- return Integer.toString(base + nextInt(range));
- }
-
- public void indexDoc() throws IOException {
- Document d = new Document();
-
- ArrayList<Field> fields = new ArrayList<Field>();
- String idString = getIdString();
- Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
- fields.add(idField);
-
- int nFields = nextInt(maxFields);
- for (int i=0; i<nFields; i++) {
-
- Field.TermVector tvVal = Field.TermVector.NO;
- switch (nextInt(4)) {
- case 0:
- tvVal = Field.TermVector.NO;
- break;
- case 1:
- tvVal = Field.TermVector.YES;
- break;
- case 2:
- tvVal = Field.TermVector.WITH_POSITIONS;
- break;
- case 3:
- tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
- break;
- }
-
- switch (nextInt(4)) {
- case 0:
- fields.add(newField("f" + nextInt(100), getString(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, tvVal));
- break;
- case 1:
- fields.add(newField("f" + nextInt(100), getString(0), Field.Store.NO, Field.Index.ANALYZED, tvVal));
- break;
- case 2:
- fields.add(newField("f" + nextInt(100), getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
- break;
- case 3:
- fields.add(newField("f" + nextInt(100), getString(bigFieldSize), Field.Store.YES, Field.Index.ANALYZED, tvVal));
- break;
- }
- }
-
- if (sameFieldOrder) {
- Collections.sort(fields, fieldNameComparator);
- } else {
- // random placement of id field also
- Collections.swap(fields,nextInt(fields.size()), 0);
- }
-
- for (int i=0; i<fields.size(); i++) {
- d.add(fields.get(i));
- }
- if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
- }
- w.updateDocument(idTerm.createTerm(idString), d);
- //System.out.println(Thread.currentThread().getName() + ": indexing "+d);
- docs.put(idString, d);
- }
-
- public void deleteDoc() throws IOException {
- String idString = getIdString();
- if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": del id:" + idString);
- }
- w.deleteDocuments(idTerm.createTerm(idString));
- docs.remove(idString);
- }
-
- public void deleteByQuery() throws IOException {
- String idString = getIdString();
- if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": del query id:" + idString);
- }
- w.deleteDocuments(new TermQuery(idTerm.createTerm(idString)));
- docs.remove(idString);
- }
-
- @Override
- public void run() {
- try {
- r = new Random(base+range+seed);
- for (int i=0; i<iterations; i++) {
- int what = nextInt(100);
- if (what < 5) {
- deleteDoc();
- } else if (what < 10) {
- deleteByQuery();
- } else {
- indexDoc();
- }
- }
- } catch (Throwable e) {
- e.printStackTrace();
- Assert.fail(e.toString());
- }
-
- synchronized (this) {
- docs.size();
- }
- }
- }
-
-}