lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.util.LuceneTestCase;
  21 import org.apache.lucene.analysis.KeywordAnalyzer;
  22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  23 import org.apache.lucene.document.Document;
  24 import org.apache.lucene.document.Field;
  25 import org.apache.lucene.document.SetBasedFieldSelector;
  26
  27 import org.apache.lucene.index.FieldInvertState;
  28 import org.apache.lucene.index.IndexReader;
  29 import org.apache.lucene.index.IndexWriter;
  30 import org.apache.lucene.index.IndexWriterConfig;
  31 import org.apache.lucene.index.Term;
  32 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  33 import org.apache.lucene.queryParser.QueryParser;
  34 import org.apache.lucene.store.Directory;
  35 import java.io.IOException;
  36 import java.util.Collections;
  37 import java.util.HashSet;
  38 import java.util.Map;
  39 import java.util.Random;
  40 import java.util.Set;
  41
  42 /**
  43  * Tests {@link MultiSearcher} class.
  44  */
  45 public class TestMultiSearcher extends LuceneTestCase
  46 {
  47
  48         /**
  49          * ReturnS a new instance of the concrete MultiSearcher class
  50          * used in this test.
  51          */
  52         protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
  53                 return new MultiSearcher(searchers);
  54         }
  55
  56     public void testEmptyIndex() throws Exception {
  57         // creating two directories for indices
  58         Directory indexStoreA = newDirectory();
  59         Directory indexStoreB = newDirectory();
  60
  61         // creating a document to store
  62         Document lDoc = new Document();
  63         lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
  64         lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
  65         lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
  66
  67         // creating a document to store
  68         Document lDoc2 = new Document();
  69         lDoc2.add(newField("fulltext", "in a galaxy far far away.....",
  70             Field.Store.YES, Field.Index.ANALYZED));
  71         lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
  72         lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
  73
  74         // creating a document to store
  75         Document lDoc3 = new Document();
  76         lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....",
  77             Field.Store.YES, Field.Index.ANALYZED));
  78         lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
  79         lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
  80
  81         // creating an index writer for the first index
  82         IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
  83         // creating an index writer for the second index, but writing nothing
  84         IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
  85
  86         //--------------------------------------------------------------------
  87         // scenario 1
  88         //--------------------------------------------------------------------
  89
  90         // writing the documents to the first index
  91         writerA.addDocument(lDoc);
  92         writerA.addDocument(lDoc2);
  93         writerA.addDocument(lDoc3);
  94         writerA.optimize();
  95         writerA.close();
  96
  97         // closing the second index
  98         writerB.close();
  99
 100         // creating the query
 101         QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT));
 102         Query query = parser.parse("handle:1");
 103
 104         // building the searchables
 105         Searcher[] searchers = new Searcher[2];
 106         // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
 107         searchers[0] = new IndexSearcher(indexStoreB, true);
 108         searchers[1] = new IndexSearcher(indexStoreA, true);
 109         // creating the multiSearcher
 110         Searcher mSearcher = getMultiSearcherInstance(searchers);
 111         // performing the search
 112         ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs;
 113
 114         assertEquals(3, hits.length);
 115
 116         // iterating over the hit documents
 117         for (int i = 0; i < hits.length; i++) {
 118           mSearcher.doc(hits[i].doc);
 119         }
 120         mSearcher.close();
 121
 122
 123         //--------------------------------------------------------------------
 124         // scenario 2
 125         //--------------------------------------------------------------------
 126
 127         // adding one document to the empty index
 128         writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(
 129             TEST_VERSION_CURRENT,
 130                 new StandardAnalyzer(TEST_VERSION_CURRENT))
 131                 .setOpenMode(OpenMode.APPEND));
 132         writerB.addDocument(lDoc);
 133         writerB.optimize();
 134         writerB.close();
 135
 136         // building the searchables
 137         Searcher[] searchers2 = new Searcher[2];
 138         // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
 139         searchers2[0] = new IndexSearcher(indexStoreB, true);
 140         searchers2[1] = new IndexSearcher(indexStoreA, true);
 141         // creating the mulitSearcher
 142         MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2);
 143         // performing the same search
 144         ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs;
 145
 146         assertEquals(4, hits2.length);
 147
 148         // iterating over the hit documents
 149         for (int i = 0; i < hits2.length; i++) {
 150           // no exception should happen at this point
 151           mSearcher2.doc(hits2[i].doc);
 152         }
 153
 154         // test the subSearcher() method:
 155         Query subSearcherQuery = parser.parse("id:doc1");
 156         hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
 157         assertEquals(2, hits2.length);
 158         assertEquals(0, mSearcher2.subSearcher(hits2[0].doc));   // hit from searchers2[0]
 159         assertEquals(1, mSearcher2.subSearcher(hits2[1].doc));   // hit from searchers2[1]
 160         subSearcherQuery = parser.parse("id:doc2");
 161         hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
 162         assertEquals(1, hits2.length);
 163         assertEquals(1, mSearcher2.subSearcher(hits2[0].doc));   // hit from searchers2[1]
 164         mSearcher2.close();
 165
 166         //--------------------------------------------------------------------
 167         // scenario 3
 168         //--------------------------------------------------------------------
 169
 170         // deleting the document just added, this will cause a different exception to take place
 171         Term term = new Term("id", "doc1");
 172         IndexReader readerB = IndexReader.open(indexStoreB, false);
 173         readerB.deleteDocuments(term);
 174         readerB.close();
 175
 176         // optimizing the index with the writer
 177         writerB = new IndexWriter(indexStoreB, new IndexWriterConfig(
 178             TEST_VERSION_CURRENT,
 179                 new StandardAnalyzer(TEST_VERSION_CURRENT))
 180                 .setOpenMode(OpenMode.APPEND));
 181         writerB.optimize();
 182         writerB.close();
 183
 184         // building the searchables
 185         Searcher[] searchers3 = new Searcher[2];
 186
 187         searchers3[0] = new IndexSearcher(indexStoreB, true);
 188         searchers3[1] = new IndexSearcher(indexStoreA, true);
 189         // creating the mulitSearcher
 190         Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
 191         // performing the same search
 192         ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs;
 193
 194         assertEquals(3, hits3.length);
 195
 196         // iterating over the hit documents
 197         for (int i = 0; i < hits3.length; i++) {
 198           mSearcher3.doc(hits3[i].doc);
 199         }
 200         mSearcher3.close();
 201         indexStoreA.close();
 202         indexStoreB.close();
 203     }
 204
 205     private Document createDocument(String contents1, String contents2) {
 206         Document document=new Document();
 207
 208         document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED));
 209       document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED));
 210         if (contents2!=null) {
 211             document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED));
 212         }
 213
 214         return document;
 215     }
 216
 217     private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException {
 218         IndexWriter indexWriter=null;
 219
 220         try {
 221           indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random,
 222               TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode(
 223                   create ? OpenMode.CREATE : OpenMode.APPEND));
 224
 225             for (int i=0; i<nDocs; i++) {
 226                 indexWriter.addDocument(createDocument("doc" + i, contents2));
 227             }
 228         } finally {
 229             if (indexWriter!=null) {
 230                 indexWriter.close();
 231             }
 232         }
 233     }
 234
 235   public void testFieldSelector() throws Exception {
 236     Directory ramDirectory1, ramDirectory2;
 237     IndexSearcher indexSearcher1, indexSearcher2;
 238
 239     ramDirectory1 = newDirectory();
 240     ramDirectory2 = newDirectory();
 241     Query query = new TermQuery(new Term("contents", "doc0"));
 242
 243     // Now put the documents in a different index
 244     initIndex(random, ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
 245     initIndex(random, ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 246
 247     indexSearcher1 = new IndexSearcher(ramDirectory1, true);
 248     indexSearcher2 = new IndexSearcher(ramDirectory2, true);
 249
 250     MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
 251     assertTrue("searcher is null and it shouldn't be", searcher != null);
 252     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 253     assertTrue("hits is null and it shouldn't be", hits != null);
 254     assertTrue(hits.length + " does not equal: " + 2, hits.length == 2);
 255     Document document = searcher.doc(hits[0].doc);
 256     assertTrue("document is null and it shouldn't be", document != null);
 257     assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
 258     //Should be one document from each directory
 259     //they both have two fields, contents and other
 260     Set<String> ftl = new HashSet<String>();
 261     ftl.add("other");
 262     SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
 263     document = searcher.doc(hits[0].doc, fs);
 264     assertTrue("document is null and it shouldn't be", document != null);
 265     assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
 266     String value = document.get("contents");
 267     assertTrue("value is not null and it should be", value == null);
 268     value = document.get("other");
 269     assertTrue("value is null and it shouldn't be", value != null);
 270     ftl.clear();
 271     ftl.add("contents");
 272     fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
 273     document = searcher.doc(hits[1].doc, fs);
 274     value = document.get("contents");
 275     assertTrue("value is null and it shouldn't be", value != null);
 276     value = document.get("other");
 277     assertTrue("value is not null and it should be", value == null);
 278     indexSearcher1.close();
 279     indexSearcher2.close();
 280     ramDirectory1.close();
 281     ramDirectory2.close();
 282     searcher.close();
 283   }
 284
 285   /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
 286  public void testNormalization1() throws IOException {
 287      testNormalization(1, "Using 1 document per index:");
 288  }
 289   */
 290
 291     public void testNormalization10() throws IOException {
 292         testNormalization(10, "Using 10 documents per index:");
 293     }
 294
 295     private void testNormalization(int nDocs, String message) throws IOException {
 296         Query query=new TermQuery(new Term("contents", "doc0"));
 297
 298         Directory ramDirectory1;
 299         IndexSearcher indexSearcher1;
 300         ScoreDoc[] hits;
 301
 302         ramDirectory1=newDirectory();
 303
 304         // First put the documents in the same index
 305         initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
 306         initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 307
 308         indexSearcher1=new IndexSearcher(ramDirectory1, true);
 309         indexSearcher1.setDefaultFieldSortScoring(true, true);
 310
 311         hits=indexSearcher1.search(query, null, 1000).scoreDocs;
 312
 313         assertEquals(message, 2, hits.length);
 314
 315         // Store the scores for use later
 316         float[] scores={ hits[0].score, hits[1].score };
 317
 318         assertTrue(message, scores[0] > scores[1]);
 319
 320         indexSearcher1.close();
 321         ramDirectory1.close();
 322         hits=null;
 323
 324
 325
 326         Directory ramDirectory2;
 327         IndexSearcher indexSearcher2;
 328
 329         ramDirectory1=newDirectory();
 330         ramDirectory2=newDirectory();
 331
 332         // Now put the documents in a different index
 333         initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
 334         initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 335
 336         indexSearcher1=new IndexSearcher(ramDirectory1, true);
 337         indexSearcher1.setDefaultFieldSortScoring(true, true);
 338         indexSearcher2=new IndexSearcher(ramDirectory2, true);
 339         indexSearcher2.setDefaultFieldSortScoring(true, true);
 340
 341         Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
 342
 343         hits=searcher.search(query, null, 1000).scoreDocs;
 344
 345         assertEquals(message, 2, hits.length);
 346
 347         // The scores should be the same (within reason)
 348         assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
 349         assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
 350
 351
 352
 353         // Adding a Sort.RELEVANCE object should not change anything
 354         hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs;
 355
 356         assertEquals(message, 2, hits.length);
 357
 358         assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
 359         assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
 360
 361         searcher.close();
 362
 363         ramDirectory1.close();
 364         ramDirectory2.close();
 365     }
 366
 367     /**
 368      * test that custom similarity is in effect when using MultiSearcher (LUCENE-789).
 369      * @throws IOException
 370      */
 371     public void testCustomSimilarity () throws IOException {
 372         Directory dir = newDirectory();
 373         initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 374         IndexSearcher srchr = new IndexSearcher(dir, true);
 375         MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr});
 376
 377         Similarity customSimilarity = new DefaultSimilarity() {
 378             // overide all
 379             @Override
 380             public float idf(int docFreq, int numDocs) { return 100.0f; }
 381             @Override
 382             public float coord(int overlap, int maxOverlap) { return 1.0f; }
 383             @Override
 384             public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); }
 385             @Override
 386             public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
 387             @Override
 388             public float sloppyFreq(int distance) { return 1.0f; }
 389             @Override
 390             public float tf(float freq) { return 1.0f; }
 391         };
 392
 393         srchr.setSimilarity(customSimilarity);
 394         msrchr.setSimilarity(customSimilarity);
 395
 396         Query query=new TermQuery(new Term("contents", "doc0"));
 397
 398         // Get a score from IndexSearcher
 399         TopDocs topDocs = srchr.search(query, null, 1);
 400         float score1 = topDocs.getMaxScore();
 401
 402         // Get the score from MultiSearcher
 403         topDocs = msrchr.search(query, null, 1);
 404         float scoreN = topDocs.getMaxScore();
 405
 406         // The scores from the IndexSearcher and Multisearcher should be the same
 407         // if the same similarity is used.
 408         assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6);
 409         msrchr.close();
 410         srchr.close();
 411         dir.close();
 412     }
 413
 414     public void testDocFreq() throws IOException{
 415       Directory dir1 = newDirectory();
 416       Directory dir2 = newDirectory();
 417
 418       initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 419       initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
 420       IndexSearcher searcher1 = new IndexSearcher(dir1, true);
 421       IndexSearcher searcher2 = new IndexSearcher(dir2, true);
 422
 423       MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
 424       assertEquals(15, multiSearcher.docFreq(new Term("contents","x")));
 425       multiSearcher.close();
 426       searcher1.close();
 427       searcher2.close();
 428       dir1.close();
 429       dir2.close();
 430     }
 431
 432     public void testCreateDocFrequencyMap() throws IOException{
 433       Directory dir1 = newDirectory();
 434       Directory dir2 = newDirectory();
 435       Term template = new Term("contents") ;
 436       String[] contents  = {"a", "b", "c"};
 437       HashSet<Term> termsSet = new HashSet<Term>();
 438       for (int i = 0; i < contents.length; i++) {
 439         initIndex(random, dir1, i+10, i==0, contents[i]);
 440         initIndex(random, dir2, i+5, i==0, contents[i]);
 441         termsSet.add(template.createTerm(contents[i]));
 442       }
 443       IndexSearcher searcher1 = new IndexSearcher(dir1, true);
 444       IndexSearcher searcher2 = new IndexSearcher(dir2, true);
 445       MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
 446       Map<Term,Integer> docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet);
 447       assertEquals(3, docFrequencyMap.size());
 448       for (int i = 0; i < contents.length; i++) {
 449         assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i])));
 450       }
 451       multiSearcher.close();
 452       searcher1.close();
 453       searcher2.close();
 454       dir1.close();
 455       dir2.close();
 456     }
 457 }