1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.util.LuceneTestCase;
21 import org.apache.lucene.analysis.KeywordAnalyzer;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.document.SetBasedFieldSelector;
27 import org.apache.lucene.index.FieldInvertState;
28 import org.apache.lucene.index.IndexReader;
29 import org.apache.lucene.index.IndexWriter;
30 import org.apache.lucene.index.IndexWriterConfig;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
33 import org.apache.lucene.queryParser.QueryParser;
34 import org.apache.lucene.store.Directory;
35 import java.io.IOException;
36 import java.util.Collections;
37 import java.util.HashSet;
39 import java.util.Random;
43 * Tests {@link MultiSearcher} class.
45 public class TestMultiSearcher extends LuceneTestCase
49 * ReturnS a new instance of the concrete MultiSearcher class
52 protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
53 return new MultiSearcher(searchers);
56 public void testEmptyIndex() throws Exception {
57 // creating two directories for indices
58 Directory indexStoreA = newDirectory();
59 Directory indexStoreB = newDirectory();
61 // creating a document to store
62 Document lDoc = new Document();
63 lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
64 lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
65 lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
67 // creating a document to store
68 Document lDoc2 = new Document();
69 lDoc2.add(newField("fulltext", "in a galaxy far far away.....",
70 Field.Store.YES, Field.Index.ANALYZED));
71 lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
72 lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
74 // creating a document to store
75 Document lDoc3 = new Document();
76 lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....",
77 Field.Store.YES, Field.Index.ANALYZED));
78 lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
79 lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
81 // creating an index writer for the first index
82 IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
83 // creating an index writer for the second index, but writing nothing
84 IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
86 //--------------------------------------------------------------------
88 //--------------------------------------------------------------------
90 // writing the documents to the first index
91 writerA.addDocument(lDoc);
92 writerA.addDocument(lDoc2);
93 writerA.addDocument(lDoc3);
97 // closing the second index
100 // creating the query
101 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT));
102 Query query = parser.parse("handle:1");
104 // building the searchables
105 Searcher[] searchers = new Searcher[2];
106 // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
107 searchers[0] = new IndexSearcher(indexStoreB, true);
108 searchers[1] = new IndexSearcher(indexStoreA, true);
109 // creating the multiSearcher
110 Searcher mSearcher = getMultiSearcherInstance(searchers);
111 // performing the search
112 ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs;
114 assertEquals(3, hits.length);
116 // iterating over the hit documents
117 for (int i = 0; i < hits.length; i++) {
118 mSearcher.doc(hits[i].doc);
123 //--------------------------------------------------------------------
125 //--------------------------------------------------------------------
127 // adding one document to the empty index
128 writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(
129 TEST_VERSION_CURRENT,
130 new StandardAnalyzer(TEST_VERSION_CURRENT))
131 .setOpenMode(OpenMode.APPEND));
132 writerB.addDocument(lDoc);
136 // building the searchables
137 Searcher[] searchers2 = new Searcher[2];
138 // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
139 searchers2[0] = new IndexSearcher(indexStoreB, true);
140 searchers2[1] = new IndexSearcher(indexStoreA, true);
141 // creating the mulitSearcher
142 MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2);
143 // performing the same search
144 ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs;
146 assertEquals(4, hits2.length);
148 // iterating over the hit documents
149 for (int i = 0; i < hits2.length; i++) {
150 // no exception should happen at this point
151 mSearcher2.doc(hits2[i].doc);
154 // test the subSearcher() method:
155 Query subSearcherQuery = parser.parse("id:doc1");
156 hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
157 assertEquals(2, hits2.length);
158 assertEquals(0, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[0]
159 assertEquals(1, mSearcher2.subSearcher(hits2[1].doc)); // hit from searchers2[1]
160 subSearcherQuery = parser.parse("id:doc2");
161 hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
162 assertEquals(1, hits2.length);
163 assertEquals(1, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[1]
166 //--------------------------------------------------------------------
168 //--------------------------------------------------------------------
170 // deleting the document just added, this will cause a different exception to take place
171 Term term = new Term("id", "doc1");
172 IndexReader readerB = IndexReader.open(indexStoreB, false);
173 readerB.deleteDocuments(term);
176 // optimizing the index with the writer
177 writerB = new IndexWriter(indexStoreB, new IndexWriterConfig(
178 TEST_VERSION_CURRENT,
179 new StandardAnalyzer(TEST_VERSION_CURRENT))
180 .setOpenMode(OpenMode.APPEND));
184 // building the searchables
185 Searcher[] searchers3 = new Searcher[2];
187 searchers3[0] = new IndexSearcher(indexStoreB, true);
188 searchers3[1] = new IndexSearcher(indexStoreA, true);
189 // creating the mulitSearcher
190 Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
191 // performing the same search
192 ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs;
194 assertEquals(3, hits3.length);
196 // iterating over the hit documents
197 for (int i = 0; i < hits3.length; i++) {
198 mSearcher3.doc(hits3[i].doc);
205 private Document createDocument(String contents1, String contents2) {
206 Document document=new Document();
208 document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED));
209 document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED));
210 if (contents2!=null) {
211 document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED));
217 private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException {
218 IndexWriter indexWriter=null;
221 indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random,
222 TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode(
223 create ? OpenMode.CREATE : OpenMode.APPEND));
225 for (int i=0; i<nDocs; i++) {
226 indexWriter.addDocument(createDocument("doc" + i, contents2));
229 if (indexWriter!=null) {
235 public void testFieldSelector() throws Exception {
236 Directory ramDirectory1, ramDirectory2;
237 IndexSearcher indexSearcher1, indexSearcher2;
239 ramDirectory1 = newDirectory();
240 ramDirectory2 = newDirectory();
241 Query query = new TermQuery(new Term("contents", "doc0"));
243 // Now put the documents in a different index
244 initIndex(random, ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
245 initIndex(random, ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
247 indexSearcher1 = new IndexSearcher(ramDirectory1, true);
248 indexSearcher2 = new IndexSearcher(ramDirectory2, true);
250 MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
251 assertTrue("searcher is null and it shouldn't be", searcher != null);
252 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
253 assertTrue("hits is null and it shouldn't be", hits != null);
254 assertTrue(hits.length + " does not equal: " + 2, hits.length == 2);
255 Document document = searcher.doc(hits[0].doc);
256 assertTrue("document is null and it shouldn't be", document != null);
257 assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
258 //Should be one document from each directory
259 //they both have two fields, contents and other
260 Set<String> ftl = new HashSet<String>();
262 SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
263 document = searcher.doc(hits[0].doc, fs);
264 assertTrue("document is null and it shouldn't be", document != null);
265 assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
266 String value = document.get("contents");
267 assertTrue("value is not null and it should be", value == null);
268 value = document.get("other");
269 assertTrue("value is null and it shouldn't be", value != null);
272 fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
273 document = searcher.doc(hits[1].doc, fs);
274 value = document.get("contents");
275 assertTrue("value is null and it shouldn't be", value != null);
276 value = document.get("other");
277 assertTrue("value is not null and it should be", value == null);
278 indexSearcher1.close();
279 indexSearcher2.close();
280 ramDirectory1.close();
281 ramDirectory2.close();
285 /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
286 public void testNormalization1() throws IOException {
287 testNormalization(1, "Using 1 document per index:");
291 public void testNormalization10() throws IOException {
292 testNormalization(10, "Using 10 documents per index:");
295 private void testNormalization(int nDocs, String message) throws IOException {
296 Query query=new TermQuery(new Term("contents", "doc0"));
298 Directory ramDirectory1;
299 IndexSearcher indexSearcher1;
302 ramDirectory1=newDirectory();
304 // First put the documents in the same index
305 initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
306 initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
308 indexSearcher1=new IndexSearcher(ramDirectory1, true);
309 indexSearcher1.setDefaultFieldSortScoring(true, true);
311 hits=indexSearcher1.search(query, null, 1000).scoreDocs;
313 assertEquals(message, 2, hits.length);
315 // Store the scores for use later
316 float[] scores={ hits[0].score, hits[1].score };
318 assertTrue(message, scores[0] > scores[1]);
320 indexSearcher1.close();
321 ramDirectory1.close();
326 Directory ramDirectory2;
327 IndexSearcher indexSearcher2;
329 ramDirectory1=newDirectory();
330 ramDirectory2=newDirectory();
332 // Now put the documents in a different index
333 initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
334 initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
336 indexSearcher1=new IndexSearcher(ramDirectory1, true);
337 indexSearcher1.setDefaultFieldSortScoring(true, true);
338 indexSearcher2=new IndexSearcher(ramDirectory2, true);
339 indexSearcher2.setDefaultFieldSortScoring(true, true);
341 Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
343 hits=searcher.search(query, null, 1000).scoreDocs;
345 assertEquals(message, 2, hits.length);
347 // The scores should be the same (within reason)
348 assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
349 assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
353 // Adding a Sort.RELEVANCE object should not change anything
354 hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs;
356 assertEquals(message, 2, hits.length);
358 assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
359 assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
363 ramDirectory1.close();
364 ramDirectory2.close();
368 * test that custom similarity is in effect when using MultiSearcher (LUCENE-789).
369 * @throws IOException
371 public void testCustomSimilarity () throws IOException {
372 Directory dir = newDirectory();
373 initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
374 IndexSearcher srchr = new IndexSearcher(dir, true);
375 MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr});
377 Similarity customSimilarity = new DefaultSimilarity() {
380 public float idf(int docFreq, int numDocs) { return 100.0f; }
382 public float coord(int overlap, int maxOverlap) { return 1.0f; }
384 public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); }
386 public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
388 public float sloppyFreq(int distance) { return 1.0f; }
390 public float tf(float freq) { return 1.0f; }
393 srchr.setSimilarity(customSimilarity);
394 msrchr.setSimilarity(customSimilarity);
396 Query query=new TermQuery(new Term("contents", "doc0"));
398 // Get a score from IndexSearcher
399 TopDocs topDocs = srchr.search(query, null, 1);
400 float score1 = topDocs.getMaxScore();
402 // Get the score from MultiSearcher
403 topDocs = msrchr.search(query, null, 1);
404 float scoreN = topDocs.getMaxScore();
406 // The scores from the IndexSearcher and Multisearcher should be the same
407 // if the same similarity is used.
408 assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6);
414 public void testDocFreq() throws IOException{
415 Directory dir1 = newDirectory();
416 Directory dir2 = newDirectory();
418 initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
419 initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
420 IndexSearcher searcher1 = new IndexSearcher(dir1, true);
421 IndexSearcher searcher2 = new IndexSearcher(dir2, true);
423 MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
424 assertEquals(15, multiSearcher.docFreq(new Term("contents","x")));
425 multiSearcher.close();
432 public void testCreateDocFrequencyMap() throws IOException{
433 Directory dir1 = newDirectory();
434 Directory dir2 = newDirectory();
435 Term template = new Term("contents") ;
436 String[] contents = {"a", "b", "c"};
437 HashSet<Term> termsSet = new HashSet<Term>();
438 for (int i = 0; i < contents.length; i++) {
439 initIndex(random, dir1, i+10, i==0, contents[i]);
440 initIndex(random, dir2, i+5, i==0, contents[i]);
441 termsSet.add(template.createTerm(contents[i]));
443 IndexSearcher searcher1 = new IndexSearcher(dir1, true);
444 IndexSearcher searcher2 = new IndexSearcher(dir2, true);
445 MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
446 Map<Term,Integer> docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet);
447 assertEquals(3, docFrequencyMap.size());
448 for (int i = 0; i < contents.length; i++) {
449 assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i])));
451 multiSearcher.close();