X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java diff --git a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java b/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java deleted file mode 100644 index c5b4546..0000000 --- a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java +++ /dev/null @@ -1,457 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.KeywordAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.SetBasedFieldSelector; - -import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.store.Directory; -import java.io.IOException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Map; -import java.util.Random; -import java.util.Set; - -/** - * Tests {@link MultiSearcher} class. - */ -public class TestMultiSearcher extends LuceneTestCase -{ - - /** - * ReturnS a new instance of the concrete MultiSearcher class - * used in this test. - */ - protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException { - return new MultiSearcher(searchers); - } - - public void testEmptyIndex() throws Exception { - // creating two directories for indices - Directory indexStoreA = newDirectory(); - Directory indexStoreB = newDirectory(); - - // creating a document to store - Document lDoc = new Document(); - lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); - lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - - // creating a document to store - Document lDoc2 = new Document(); - lDoc2.add(newField("fulltext", "in a galaxy far far away.....", - Field.Store.YES, Field.Index.ANALYZED)); - lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); - lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - - // creating a document to store - Document lDoc3 = new Document(); - lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....", - Field.Store.YES, Field.Index.ANALYZED)); - lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); - lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - - // creating an index writer for the first index - IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); - // creating an index writer for the second index, but writing nothing - IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); - - //-------------------------------------------------------------------- - // scenario 1 - //-------------------------------------------------------------------- - - // writing the documents to the first index - writerA.addDocument(lDoc); - writerA.addDocument(lDoc2); - writerA.addDocument(lDoc3); - writerA.optimize(); - writerA.close(); - - // closing the second index - writerB.close(); - - // creating the query - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT)); - Query query = parser.parse("handle:1"); - - // building the searchables - Searcher[] searchers = new Searcher[2]; - // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index - searchers[0] = new IndexSearcher(indexStoreB, true); - searchers[1] = new IndexSearcher(indexStoreA, true); - // creating the multiSearcher - Searcher mSearcher = getMultiSearcherInstance(searchers); - // performing the search - ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs; - - assertEquals(3, hits.length); - - // iterating over the hit documents - for (int i = 0; i < hits.length; i++) { - mSearcher.doc(hits[i].doc); - } - mSearcher.close(); - - - //-------------------------------------------------------------------- - // scenario 2 - //-------------------------------------------------------------------- - - // adding one document to the empty index - writerB = new IndexWriter(indexStoreB, newIndexWriterConfig( - TEST_VERSION_CURRENT, - new StandardAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND)); - writerB.addDocument(lDoc); - writerB.optimize(); - writerB.close(); - - // building the searchables - Searcher[] searchers2 = new Searcher[2]; - // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index - searchers2[0] = new IndexSearcher(indexStoreB, true); - searchers2[1] = new IndexSearcher(indexStoreA, true); - // creating the mulitSearcher - MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2); - // performing the same search - ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs; - - assertEquals(4, hits2.length); - - // iterating over the hit documents - for (int i = 0; i < hits2.length; i++) { - // no exception should happen at this point - mSearcher2.doc(hits2[i].doc); - } - - // test the subSearcher() method: - Query subSearcherQuery = parser.parse("id:doc1"); - hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; - assertEquals(2, hits2.length); - assertEquals(0, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[0] - assertEquals(1, mSearcher2.subSearcher(hits2[1].doc)); // hit from searchers2[1] - subSearcherQuery = parser.parse("id:doc2"); - hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; - assertEquals(1, hits2.length); - assertEquals(1, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[1] - mSearcher2.close(); - - //-------------------------------------------------------------------- - // scenario 3 - //-------------------------------------------------------------------- - - // deleting the document just added, this will cause a different exception to take place - Term term = new Term("id", "doc1"); - IndexReader readerB = IndexReader.open(indexStoreB, false); - readerB.deleteDocuments(term); - readerB.close(); - - // optimizing the index with the writer - writerB = new IndexWriter(indexStoreB, new IndexWriterConfig( - TEST_VERSION_CURRENT, - new StandardAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND)); - writerB.optimize(); - writerB.close(); - - // building the searchables - Searcher[] searchers3 = new Searcher[2]; - - searchers3[0] = new IndexSearcher(indexStoreB, true); - searchers3[1] = new IndexSearcher(indexStoreA, true); - // creating the mulitSearcher - Searcher mSearcher3 = getMultiSearcherInstance(searchers3); - // performing the same search - ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs; - - assertEquals(3, hits3.length); - - // iterating over the hit documents - for (int i = 0; i < hits3.length; i++) { - mSearcher3.doc(hits3[i].doc); - } - mSearcher3.close(); - indexStoreA.close(); - indexStoreB.close(); - } - - private Document createDocument(String contents1, String contents2) { - Document document=new Document(); - - document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED)); - document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED)); - if (contents2!=null) { - document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED)); - } - - return document; - } - - private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException { - IndexWriter indexWriter=null; - - try { - indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random, - TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode( - create ? OpenMode.CREATE : OpenMode.APPEND)); - - for (int i=0; i ftl = new HashSet(); - ftl.add("other"); - SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); - document = searcher.doc(hits[0].doc, fs); - assertTrue("document is null and it shouldn't be", document != null); - assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); - String value = document.get("contents"); - assertTrue("value is not null and it should be", value == null); - value = document.get("other"); - assertTrue("value is null and it shouldn't be", value != null); - ftl.clear(); - ftl.add("contents"); - fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); - document = searcher.doc(hits[1].doc, fs); - value = document.get("contents"); - assertTrue("value is null and it shouldn't be", value != null); - value = document.get("other"); - assertTrue("value is not null and it should be", value == null); - indexSearcher1.close(); - indexSearcher2.close(); - ramDirectory1.close(); - ramDirectory2.close(); - searcher.close(); - } - - /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 - public void testNormalization1() throws IOException { - testNormalization(1, "Using 1 document per index:"); - } - */ - - public void testNormalization10() throws IOException { - testNormalization(10, "Using 10 documents per index:"); - } - - private void testNormalization(int nDocs, String message) throws IOException { - Query query=new TermQuery(new Term("contents", "doc0")); - - Directory ramDirectory1; - IndexSearcher indexSearcher1; - ScoreDoc[] hits; - - ramDirectory1=newDirectory(); - - // First put the documents in the same index - initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... - initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... - - indexSearcher1=new IndexSearcher(ramDirectory1, true); - indexSearcher1.setDefaultFieldSortScoring(true, true); - - hits=indexSearcher1.search(query, null, 1000).scoreDocs; - - assertEquals(message, 2, hits.length); - - // Store the scores for use later - float[] scores={ hits[0].score, hits[1].score }; - - assertTrue(message, scores[0] > scores[1]); - - indexSearcher1.close(); - ramDirectory1.close(); - hits=null; - - - - Directory ramDirectory2; - IndexSearcher indexSearcher2; - - ramDirectory1=newDirectory(); - ramDirectory2=newDirectory(); - - // Now put the documents in a different index - initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... - initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... - - indexSearcher1=new IndexSearcher(ramDirectory1, true); - indexSearcher1.setDefaultFieldSortScoring(true, true); - indexSearcher2=new IndexSearcher(ramDirectory2, true); - indexSearcher2.setDefaultFieldSortScoring(true, true); - - Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); - - hits=searcher.search(query, null, 1000).scoreDocs; - - assertEquals(message, 2, hits.length); - - // The scores should be the same (within reason) - assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 - assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 - - - - // Adding a Sort.RELEVANCE object should not change anything - hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs; - - assertEquals(message, 2, hits.length); - - assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 - assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 - - searcher.close(); - - ramDirectory1.close(); - ramDirectory2.close(); - } - - /** - * test that custom similarity is in effect when using MultiSearcher (LUCENE-789). - * @throws IOException - */ - public void testCustomSimilarity () throws IOException { - Directory dir = newDirectory(); - initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... - IndexSearcher srchr = new IndexSearcher(dir, true); - MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr}); - - Similarity customSimilarity = new DefaultSimilarity() { - // overide all - @Override - public float idf(int docFreq, int numDocs) { return 100.0f; } - @Override - public float coord(int overlap, int maxOverlap) { return 1.0f; } - @Override - public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); } - @Override - public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } - @Override - public float sloppyFreq(int distance) { return 1.0f; } - @Override - public float tf(float freq) { return 1.0f; } - }; - - srchr.setSimilarity(customSimilarity); - msrchr.setSimilarity(customSimilarity); - - Query query=new TermQuery(new Term("contents", "doc0")); - - // Get a score from IndexSearcher - TopDocs topDocs = srchr.search(query, null, 1); - float score1 = topDocs.getMaxScore(); - - // Get the score from MultiSearcher - topDocs = msrchr.search(query, null, 1); - float scoreN = topDocs.getMaxScore(); - - // The scores from the IndexSearcher and Multisearcher should be the same - // if the same similarity is used. - assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6); - msrchr.close(); - srchr.close(); - dir.close(); - } - - public void testDocFreq() throws IOException{ - Directory dir1 = newDirectory(); - Directory dir2 = newDirectory(); - - initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... - initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... - IndexSearcher searcher1 = new IndexSearcher(dir1, true); - IndexSearcher searcher2 = new IndexSearcher(dir2, true); - - MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); - assertEquals(15, multiSearcher.docFreq(new Term("contents","x"))); - multiSearcher.close(); - searcher1.close(); - searcher2.close(); - dir1.close(); - dir2.close(); - } - - public void testCreateDocFrequencyMap() throws IOException{ - Directory dir1 = newDirectory(); - Directory dir2 = newDirectory(); - Term template = new Term("contents") ; - String[] contents = {"a", "b", "c"}; - HashSet termsSet = new HashSet(); - for (int i = 0; i < contents.length; i++) { - initIndex(random, dir1, i+10, i==0, contents[i]); - initIndex(random, dir2, i+5, i==0, contents[i]); - termsSet.add(template.createTerm(contents[i])); - } - IndexSearcher searcher1 = new IndexSearcher(dir1, true); - IndexSearcher searcher2 = new IndexSearcher(dir2, true); - MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); - Map docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet); - assertEquals(3, docFrequencyMap.size()); - for (int i = 0; i < contents.length; i++) { - assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i]))); - } - multiSearcher.close(); - searcher1.close(); - searcher2.close(); - dir1.close(); - dir2.close(); - } -}