1 package org.apache.lucene.store.instantiated;
3 * Copyright 2006 The Apache Software Foundation
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 import java.io.IOException;
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.Comparator;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.Random;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.analysis.Token;
28 import org.apache.lucene.analysis.TokenStream;
29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.document.Field;
32 import org.apache.lucene.index.IndexReader;
33 import org.apache.lucene.index.IndexWriter;
34 import org.apache.lucene.index.Payload;
35 import org.apache.lucene.index.Term;
36 import org.apache.lucene.index.TermDocs;
37 import org.apache.lucene.index.TermEnum;
38 import org.apache.lucene.index.TermFreqVector;
39 import org.apache.lucene.index.TermPositionVector;
40 import org.apache.lucene.index.TermPositions;
41 import org.apache.lucene.store.Directory;
42 import org.apache.lucene.util.AttributeImpl;
43 import org.apache.lucene.util.LuceneTestCase;
46 * Asserts equality of content and behaviour of two index readers.
48 public class TestIndicesEquals extends LuceneTestCase {
50 // public void test2() throws Exception {
51 // FSDirectory fsdir = FSDirectory.open(new File("/tmp/fatcorpus"));
52 // IndexReader ir = IndexReader.open(fsdir, false);
53 // InstantiatedIndex ii = new InstantiatedIndex(ir);
55 // testEquals(fsdir, ii);
59 public void testLoadIndexReader() throws Exception {
60 Directory dir = newDirectory();
63 IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
64 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
66 for (int i = 0; i < 20; i++) {
67 Document document = new Document();
68 assembleDocument(document, i);
69 indexWriter.addDocument(document);
73 // test load ii from index reader
74 IndexReader ir = IndexReader.open(dir, false);
75 InstantiatedIndex ii = new InstantiatedIndex(ir);
78 testEqualBehaviour(dir, ii);
83 public void testInstantiatedIndexWriter() throws Exception {
85 Directory dir = newDirectory();
86 InstantiatedIndex ii = new InstantiatedIndex();
88 // we need to pass the "same" random to both, so they surely index the same payload data.
89 long seed = random.nextLong();
92 IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
93 TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))).setMergePolicy(newLogMergePolicy()));
94 indexWriter.setInfoStream(VERBOSE ? System.out : null);
96 System.out.println("TEST: make test index");
98 for (int i = 0; i < 500; i++) {
99 Document document = new Document();
100 assembleDocument(document, i);
101 indexWriter.addDocument(document);
106 InstantiatedIndexWriter instantiatedIndexWriter = ii.indexWriterFactory(new MockAnalyzer(new Random(seed)), true);
107 for (int i = 0; i < 500; i++) {
108 Document document = new Document();
109 assembleDocument(document, i);
110 instantiatedIndexWriter.addDocument(document);
112 instantiatedIndexWriter.close();
115 testEqualBehaviour(dir, ii);
122 private void testTermDocsSomeMore(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
124 IndexReader aprioriReader = IndexReader.open(aprioriIndex, false);
125 IndexReader testReader = testIndex.indexReaderFactory();
129 Term t = new Term("c", "danny");
130 TermEnum aprioriTermEnum = aprioriReader.terms(t);
131 TermEnum testTermEnum = testReader.terms(t);
133 assertEquals(aprioriTermEnum.term(), testTermEnum.term());
135 t = aprioriTermEnum.term();
137 aprioriTermEnum.close();
138 testTermEnum.close();
140 TermDocs aprioriTermDocs = aprioriReader.termDocs(t);
141 TermDocs testTermDocs = testReader.termDocs(t);
143 assertEquals(aprioriTermDocs.next(), testTermDocs.next());
144 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
145 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
147 if (aprioriTermDocs.skipTo(4)) {
148 assertTrue(testTermDocs.skipTo(4));
149 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
150 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
152 assertFalse(testTermDocs.skipTo(4));
155 if (aprioriTermDocs.next()) {
156 assertTrue(testTermDocs.next());
157 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
158 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
160 assertFalse(testTermDocs.next());
164 // beyond this point all next and skipto will return false
166 if (aprioriTermDocs.skipTo(100)) {
167 assertTrue(testTermDocs.skipTo(100));
168 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
169 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
171 assertFalse(testTermDocs.skipTo(100));
175 if (aprioriTermDocs.next()) {
176 assertTrue(testTermDocs.next());
177 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
178 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
180 assertFalse(testTermDocs.next());
183 if (aprioriTermDocs.skipTo(110)) {
184 assertTrue(testTermDocs.skipTo(110));
185 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
186 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
188 assertFalse(testTermDocs.skipTo(110));
191 if (aprioriTermDocs.skipTo(10)) {
192 assertTrue(testTermDocs.skipTo(10));
193 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
194 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
196 assertFalse(testTermDocs.skipTo(10));
200 if (aprioriTermDocs.skipTo(210)) {
201 assertTrue(testTermDocs.skipTo(210));
202 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
203 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
205 assertFalse(testTermDocs.skipTo(210));
208 aprioriTermDocs.close();
209 testTermDocs.close();
213 // test seek null (AllTermDocs)
214 aprioriTermDocs = aprioriReader.termDocs(null);
215 testTermDocs = testReader.termDocs(null);
217 while (aprioriTermDocs.next()) {
218 assertTrue(testTermDocs.next());
219 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
220 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
222 assertFalse(testTermDocs.next());
225 aprioriTermDocs.close();
226 testTermDocs.close();
230 aprioriTermDocs = aprioriReader.termDocs();
231 testTermDocs = testReader.termDocs();
233 // this is invalid use of the API,
234 // but if the response differs then it's an indication that something might have changed.
235 // in 2.9 and 3.0 the two TermDocs-implementations returned different values at this point.
236 // assertEquals("Descripency during invalid use of the TermDocs API, see comments in test code for details.",
237 // aprioriTermDocs.next(), testTermDocs.next());
239 // start using the API the way one is supposed to use it
241 t = new Term("", "");
242 aprioriTermDocs.seek(t);
243 testTermDocs.seek(t);
245 while (aprioriTermDocs.next()) {
246 assertTrue(testTermDocs.next());
247 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
248 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
250 assertFalse(testTermDocs.next());
252 aprioriTermDocs.close();
253 testTermDocs.close();
257 aprioriReader.close();
263 private void assembleDocument(Document document, int i) {
264 document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
266 document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
267 document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
268 document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
269 document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
271 document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
273 document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
275 Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
276 f.setOmitNorms(true);
279 final List<Token> tokens = new ArrayList<Token>(2);
280 Token t = createToken("the", 0, 2, "text");
281 t.setPayload(new Payload(new byte[]{1, 2, 3}));
283 t = createToken("end", 3, 5, "text");
284 t.setPayload(new Payload(new byte[]{2}));
286 tokens.add(createToken("fin", 7, 9));
287 TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
288 final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
289 Iterator<Token> it = tokens.iterator();
292 public final boolean incrementToken() throws IOException {
297 it.next().copyTo(reusableToken);
302 public void reset() throws IOException {
303 it = tokens.iterator();
307 document.add(new Field("f", ts));
317 * Asserts that the content of two index readers equal each other.
319 * @param aprioriIndex the index that is known to be correct
320 * @param testIndex the index that is supposed to equals the apriori index.
323 protected void testEqualBehaviour(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
326 testEquals(aprioriIndex, testIndex);
328 // delete a few documents
329 IndexReader air = IndexReader.open(aprioriIndex, false);
330 InstantiatedIndexReader tir = testIndex.indexReaderFactory();
332 assertEquals(air.isCurrent(), tir.isCurrent());
333 assertEquals(air.hasDeletions(), tir.hasDeletions());
334 assertEquals(air.maxDoc(), tir.maxDoc());
335 assertEquals(air.numDocs(), tir.numDocs());
336 assertEquals(air.numDeletedDocs(), tir.numDeletedDocs());
338 air.deleteDocument(3);
339 tir.deleteDocument(3);
341 assertEquals(air.isCurrent(), tir.isCurrent());
342 assertEquals(air.hasDeletions(), tir.hasDeletions());
343 assertEquals(air.maxDoc(), tir.maxDoc());
344 assertEquals(air.numDocs(), tir.numDocs());
345 assertEquals(air.numDeletedDocs(), tir.numDeletedDocs());
347 air.deleteDocument(8);
348 tir.deleteDocument(8);
350 assertEquals(air.isCurrent(), tir.isCurrent());
351 assertEquals(air.hasDeletions(), tir.hasDeletions());
352 assertEquals(air.maxDoc(), tir.maxDoc());
353 assertEquals(air.numDocs(), tir.numDocs());
354 assertEquals(air.numDeletedDocs(), tir.numDeletedDocs());
356 // this (in 3.0) commits the deletions
360 air = IndexReader.open(aprioriIndex, false);
361 tir = testIndex.indexReaderFactory();
363 assertEquals(air.isCurrent(), tir.isCurrent());
364 assertEquals(air.hasDeletions(), tir.hasDeletions());
365 assertEquals(air.maxDoc(), tir.maxDoc());
366 assertEquals(air.numDocs(), tir.numDocs());
367 assertEquals(air.numDeletedDocs(), tir.numDeletedDocs());
369 for (int d =0; d<air.maxDoc(); d++) {
370 assertEquals(air.isDeleted(d), tir.isDeleted(d));
377 // make sure they still equal
378 testEquals(aprioriIndex, testIndex);
381 protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
383 testTermDocsSomeMore(aprioriIndex, testIndex);
385 IndexReader aprioriReader = IndexReader.open(aprioriIndex, false);
386 IndexReader testReader = testIndex.indexReaderFactory();
388 assertEquals(aprioriReader.numDocs(), testReader.numDocs());
390 // assert field options
391 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED));
392 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR));
393 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR));
394 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS));
395 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR));
396 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET));
397 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION));
398 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET));
399 assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED));
401 for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
403 // test norms as used by normal use
405 byte[] aprioriNorms = aprioriReader.norms((String) field);
406 byte[] testNorms = testReader.norms((String) field);
408 if (aprioriNorms != null) {
409 assertEquals(aprioriNorms.length, testNorms.length);
411 for (int i = 0; i < aprioriNorms.length; i++) {
412 assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
415 // test norms as used by multireader
417 aprioriNorms = new byte[aprioriReader.maxDoc()];
418 aprioriReader.norms((String) field, aprioriNorms, 0);
420 testNorms = new byte[testReader.maxDoc()];
421 testReader.norms((String) field, testNorms, 0);
423 assertEquals(aprioriNorms.length, testNorms.length);
425 for (int i = 0; i < aprioriNorms.length; i++) {
426 assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
430 // test norms as used by multireader
432 aprioriNorms = new byte[aprioriReader.maxDoc() + 10];
433 aprioriReader.norms((String) field, aprioriNorms, 10);
435 testNorms = new byte[testReader.maxDoc() + 10];
436 testReader.norms((String) field, testNorms, 10);
438 assertEquals(aprioriNorms.length, testNorms.length);
440 for (int i = 0; i < aprioriNorms.length; i++) {
441 assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
447 for (int docIndex = 0; docIndex < aprioriReader.numDocs(); docIndex++) {
448 assertEquals(aprioriReader.isDeleted(docIndex), testReader.isDeleted(docIndex));
451 // compare term enumeration stepping
453 TermEnum aprioriTermEnum = aprioriReader.terms();
454 TermEnum testTermEnum = testReader.terms();
459 if (!aprioriTermEnum.next()) {
460 assertFalse(testTermEnum.next());
463 assertTrue(testTermEnum.next());
465 assertEquals(aprioriTermEnum.term(), testTermEnum.term());
466 assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
468 // compare termDocs seeking
470 TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
471 TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
473 while (aprioriTermDocsSeeker.next()) {
474 assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
475 assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
478 aprioriTermDocsSeeker.close();
479 testTermDocsSeeker.close();
481 // compare documents per term
483 assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));
485 TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
486 TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());
489 if (!aprioriTermDocs.next()) {
490 assertFalse(testTermDocs.next());
493 assertTrue(testTermDocs.next());
495 assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
496 assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
499 aprioriTermDocs.close();
500 testTermDocs.close();
502 // compare term positions
504 TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
505 TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());
507 if (aprioriTermPositions != null) {
509 for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
510 boolean hasNext = aprioriTermPositions.next();
512 assertTrue(testTermPositions.next());
514 assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
517 for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
518 int aprioriPos = aprioriTermPositions.nextPosition();
519 int testPos = testTermPositions.nextPosition();
521 if (aprioriPos != testPos) {
522 assertEquals(aprioriPos, testPos);
526 assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
527 if (aprioriTermPositions.isPayloadAvailable()) {
528 assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
529 byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
530 byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
531 for (int i = 0; i < aprioriPayloads.length; i++) {
532 assertEquals(aprioriPayloads[i], testPayloads[i]);
540 aprioriTermPositions.close();
541 testTermPositions.close();
546 // compare term vectors and position vectors
548 for (int documentNumber = 0; documentNumber < aprioriReader.numDocs(); documentNumber++) {
550 if (documentNumber > 0) {
551 assertNotNull(aprioriReader.getTermFreqVector(documentNumber, "b0"));
552 assertNull(aprioriReader.getTermFreqVector(documentNumber, "b1"));
554 assertNotNull(testReader.getTermFreqVector(documentNumber, "b0"));
555 assertNull(testReader.getTermFreqVector(documentNumber, "b1"));
559 TermFreqVector[] aprioriFreqVectors = aprioriReader.getTermFreqVectors(documentNumber);
560 TermFreqVector[] testFreqVectors = testReader.getTermFreqVectors(documentNumber);
562 if (aprioriFreqVectors != null && testFreqVectors != null) {
564 Arrays.sort(aprioriFreqVectors, new Comparator<TermFreqVector>() {
565 public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
566 return termFreqVector.getField().compareTo(termFreqVector1.getField());
569 Arrays.sort(testFreqVectors, new Comparator<TermFreqVector>() {
570 public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
571 return termFreqVector.getField().compareTo(termFreqVector1.getField());
575 assertEquals("document " + documentNumber + " vectors does not match", aprioriFreqVectors.length, testFreqVectors.length);
577 for (int freqVectorIndex = 0; freqVectorIndex < aprioriFreqVectors.length; freqVectorIndex++) {
578 assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTermFrequencies(), testFreqVectors[freqVectorIndex].getTermFrequencies()));
579 assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTerms(), testFreqVectors[freqVectorIndex].getTerms()));
581 if (aprioriFreqVectors[freqVectorIndex] instanceof TermPositionVector) {
582 TermPositionVector aprioriTermPositionVector = (TermPositionVector) aprioriFreqVectors[freqVectorIndex];
583 TermPositionVector testTermPositionVector = (TermPositionVector) testFreqVectors[freqVectorIndex];
585 for (int positionVectorIndex = 0; positionVectorIndex < aprioriFreqVectors[freqVectorIndex].getTerms().length; positionVectorIndex++)
587 if (aprioriTermPositionVector.getOffsets(positionVectorIndex) != null) {
588 assertTrue(Arrays.equals(aprioriTermPositionVector.getOffsets(positionVectorIndex), testTermPositionVector.getOffsets(positionVectorIndex)));
591 if (aprioriTermPositionVector.getTermPositions(positionVectorIndex) != null) {
592 assertTrue(Arrays.equals(aprioriTermPositionVector.getTermPositions(positionVectorIndex), testTermPositionVector.getTermPositions(positionVectorIndex)));
602 aprioriTermEnum.close();
603 testTermEnum.close();
605 aprioriReader.close();
609 private static Token createToken(String term, int start, int offset)
611 return new Token(term, start, offset);
614 private static Token createToken(String term, int start, int offset, String type)
616 return new Token(term, start, offset, type);