X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java diff --git a/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java new file mode 100644 index 0000000..7c8ac66 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java @@ -0,0 +1,457 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SetBasedFieldSelector; + +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +/** + * Tests {@link MultiSearcher} class. + */ +public class TestMultiSearcher extends LuceneTestCase +{ + + /** + * ReturnS a new instance of the concrete MultiSearcher class + * used in this test. + */ + protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException { + return new MultiSearcher(searchers); + } + + public void testEmptyIndex() throws Exception { + // creating two directories for indices + Directory indexStoreA = newDirectory(); + Directory indexStoreB = newDirectory(); + + // creating a document to store + Document lDoc = new Document(); + lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); + lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + + // creating a document to store + Document lDoc2 = new Document(); + lDoc2.add(newField("fulltext", "in a galaxy far far away.....", + Field.Store.YES, Field.Index.ANALYZED)); + lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); + lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + + // creating a document to store + Document lDoc3 = new Document(); + lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....", + Field.Store.YES, Field.Index.ANALYZED)); + lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); + lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + + // creating an index writer for the first index + IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + // creating an index writer for the second index, but writing nothing + IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + + //-------------------------------------------------------------------- + // scenario 1 + //-------------------------------------------------------------------- + + // writing the documents to the first index + writerA.addDocument(lDoc); + writerA.addDocument(lDoc2); + writerA.addDocument(lDoc3); + writerA.forceMerge(1); + writerA.close(); + + // closing the second index + writerB.close(); + + // creating the query + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT)); + Query query = parser.parse("handle:1"); + + // building the searchables + Searcher[] searchers = new Searcher[2]; + // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index + searchers[0] = new IndexSearcher(indexStoreB, true); + searchers[1] = new IndexSearcher(indexStoreA, true); + // creating the multiSearcher + Searcher mSearcher = getMultiSearcherInstance(searchers); + // performing the search + ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs; + + assertEquals(3, hits.length); + + // iterating over the hit documents + for (int i = 0; i < hits.length; i++) { + mSearcher.doc(hits[i].doc); + } + mSearcher.close(); + + + //-------------------------------------------------------------------- + // scenario 2 + //-------------------------------------------------------------------- + + // adding one document to the empty index + writerB = new IndexWriter(indexStoreB, newIndexWriterConfig( + TEST_VERSION_CURRENT, + new StandardAnalyzer(TEST_VERSION_CURRENT)) + .setOpenMode(OpenMode.APPEND)); + writerB.addDocument(lDoc); + writerB.forceMerge(1); + writerB.close(); + + // building the searchables + Searcher[] searchers2 = new Searcher[2]; + // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index + searchers2[0] = new IndexSearcher(indexStoreB, true); + searchers2[1] = new IndexSearcher(indexStoreA, true); + // creating the mulitSearcher + MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2); + // performing the same search + ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs; + + assertEquals(4, hits2.length); + + // iterating over the hit documents + for (int i = 0; i < hits2.length; i++) { + // no exception should happen at this point + mSearcher2.doc(hits2[i].doc); + } + + // test the subSearcher() method: + Query subSearcherQuery = parser.parse("id:doc1"); + hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; + assertEquals(2, hits2.length); + assertEquals(0, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[0] + assertEquals(1, mSearcher2.subSearcher(hits2[1].doc)); // hit from searchers2[1] + subSearcherQuery = parser.parse("id:doc2"); + hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; + assertEquals(1, hits2.length); + assertEquals(1, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[1] + mSearcher2.close(); + + //-------------------------------------------------------------------- + // scenario 3 + //-------------------------------------------------------------------- + + // deleting the document just added, this will cause a different exception to take place + Term term = new Term("id", "doc1"); + IndexReader readerB = IndexReader.open(indexStoreB, false); + readerB.deleteDocuments(term); + readerB.close(); + + // optimizing the index with the writer + writerB = new IndexWriter(indexStoreB, new IndexWriterConfig( + TEST_VERSION_CURRENT, + new StandardAnalyzer(TEST_VERSION_CURRENT)) + .setOpenMode(OpenMode.APPEND)); + writerB.forceMerge(1); + writerB.close(); + + // building the searchables + Searcher[] searchers3 = new Searcher[2]; + + searchers3[0] = new IndexSearcher(indexStoreB, true); + searchers3[1] = new IndexSearcher(indexStoreA, true); + // creating the mulitSearcher + Searcher mSearcher3 = getMultiSearcherInstance(searchers3); + // performing the same search + ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs; + + assertEquals(3, hits3.length); + + // iterating over the hit documents + for (int i = 0; i < hits3.length; i++) { + mSearcher3.doc(hits3[i].doc); + } + mSearcher3.close(); + indexStoreA.close(); + indexStoreB.close(); + } + + private Document createDocument(String contents1, String contents2) { + Document document=new Document(); + + document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED)); + document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED)); + if (contents2!=null) { + document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED)); + } + + return document; + } + + private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException { + IndexWriter indexWriter=null; + + try { + indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random, + TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode( + create ? OpenMode.CREATE : OpenMode.APPEND)); + + for (int i=0; i ftl = new HashSet(); + ftl.add("other"); + SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); + document = searcher.doc(hits[0].doc, fs); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); + String value = document.get("contents"); + assertTrue("value is not null and it should be", value == null); + value = document.get("other"); + assertTrue("value is null and it shouldn't be", value != null); + ftl.clear(); + ftl.add("contents"); + fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); + document = searcher.doc(hits[1].doc, fs); + value = document.get("contents"); + assertTrue("value is null and it shouldn't be", value != null); + value = document.get("other"); + assertTrue("value is not null and it should be", value == null); + indexSearcher1.close(); + indexSearcher2.close(); + ramDirectory1.close(); + ramDirectory2.close(); + searcher.close(); + } + + /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 + public void testNormalization1() throws IOException { + testNormalization(1, "Using 1 document per index:"); + } + */ + + public void testNormalization10() throws IOException { + testNormalization(10, "Using 10 documents per index:"); + } + + private void testNormalization(int nDocs, String message) throws IOException { + Query query=new TermQuery(new Term("contents", "doc0")); + + Directory ramDirectory1; + IndexSearcher indexSearcher1; + ScoreDoc[] hits; + + ramDirectory1=newDirectory(); + + // First put the documents in the same index + initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... + initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + + indexSearcher1=new IndexSearcher(ramDirectory1, true); + indexSearcher1.setDefaultFieldSortScoring(true, true); + + hits=indexSearcher1.search(query, null, 1000).scoreDocs; + + assertEquals(message, 2, hits.length); + + // Store the scores for use later + float[] scores={ hits[0].score, hits[1].score }; + + assertTrue(message, scores[0] > scores[1]); + + indexSearcher1.close(); + ramDirectory1.close(); + hits=null; + + + + Directory ramDirectory2; + IndexSearcher indexSearcher2; + + ramDirectory1=newDirectory(); + ramDirectory2=newDirectory(); + + // Now put the documents in a different index + initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... + initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + + indexSearcher1=new IndexSearcher(ramDirectory1, true); + indexSearcher1.setDefaultFieldSortScoring(true, true); + indexSearcher2=new IndexSearcher(ramDirectory2, true); + indexSearcher2.setDefaultFieldSortScoring(true, true); + + Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); + + hits=searcher.search(query, null, 1000).scoreDocs; + + assertEquals(message, 2, hits.length); + + // The scores should be the same (within reason) + assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 + assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 + + + + // Adding a Sort.RELEVANCE object should not change anything + hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs; + + assertEquals(message, 2, hits.length); + + assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 + assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 + + searcher.close(); + + ramDirectory1.close(); + ramDirectory2.close(); + } + + /** + * test that custom similarity is in effect when using MultiSearcher (LUCENE-789). + * @throws IOException + */ + public void testCustomSimilarity () throws IOException { + Directory dir = newDirectory(); + initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + IndexSearcher srchr = new IndexSearcher(dir, true); + MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr}); + + Similarity customSimilarity = new DefaultSimilarity() { + // overide all + @Override + public float idf(int docFreq, int numDocs) { return 100.0f; } + @Override + public float coord(int overlap, int maxOverlap) { return 1.0f; } + @Override + public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); } + @Override + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + @Override + public float sloppyFreq(int distance) { return 1.0f; } + @Override + public float tf(float freq) { return 1.0f; } + }; + + srchr.setSimilarity(customSimilarity); + msrchr.setSimilarity(customSimilarity); + + Query query=new TermQuery(new Term("contents", "doc0")); + + // Get a score from IndexSearcher + TopDocs topDocs = srchr.search(query, null, 1); + float score1 = topDocs.getMaxScore(); + + // Get the score from MultiSearcher + topDocs = msrchr.search(query, null, 1); + float scoreN = topDocs.getMaxScore(); + + // The scores from the IndexSearcher and Multisearcher should be the same + // if the same similarity is used. + assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6); + msrchr.close(); + srchr.close(); + dir.close(); + } + + public void testDocFreq() throws IOException{ + Directory dir1 = newDirectory(); + Directory dir2 = newDirectory(); + + initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + IndexSearcher searcher1 = new IndexSearcher(dir1, true); + IndexSearcher searcher2 = new IndexSearcher(dir2, true); + + MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); + assertEquals(15, multiSearcher.docFreq(new Term("contents","x"))); + multiSearcher.close(); + searcher1.close(); + searcher2.close(); + dir1.close(); + dir2.close(); + } + + public void testCreateDocFrequencyMap() throws IOException{ + Directory dir1 = newDirectory(); + Directory dir2 = newDirectory(); + Term template = new Term("contents") ; + String[] contents = {"a", "b", "c"}; + HashSet termsSet = new HashSet(); + for (int i = 0; i < contents.length; i++) { + initIndex(random, dir1, i+10, i==0, contents[i]); + initIndex(random, dir2, i+5, i==0, contents[i]); + termsSet.add(template.createTerm(contents[i])); + } + IndexSearcher searcher1 = new IndexSearcher(dir1, true); + IndexSearcher searcher2 = new IndexSearcher(dir2, true); + MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); + Map docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet); + assertEquals(3, docFrequencyMap.size()); + for (int i = 0; i < contents.length; i++) { + assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i]))); + } + multiSearcher.close(); + searcher1.close(); + searcher2.close(); + dir1.close(); + dir2.close(); + } +}