X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene-java-3.5.0/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java new file mode 100644 index 0000000..b471a00 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -0,0 +1,178 @@ +package org.apache.lucene.index.memory; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util._TestUtil; + +/** + * Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, + * returning the same results for queries on some randomish indexes. + */ +public class MemoryIndexTest extends BaseTokenStreamTestCase { + private Set queries = new HashSet(); + + public static final int ITERATIONS = 100 * RANDOM_MULTIPLIER; + + @Override + public void setUp() throws Exception { + super.setUp(); + queries.addAll(readQueries("testqueries.txt")); + queries.addAll(readQueries("testqueries2.txt")); + } + + /** + * read a set of queries from a resource file + */ + private Set readQueries(String resource) throws IOException { + Set queries = new HashSet(); + InputStream stream = getClass().getResourceAsStream(resource); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); + String line = null; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.length() > 0 && !line.startsWith("#") && !line.startsWith("//")) { + queries.add(line); + } + } + return queries; + } + + + /** + * runs random tests, up to ITERATIONS times. + */ + public void testRandomQueries() throws Exception { + for (int i = 0; i < ITERATIONS; i++) + assertAgainstRAMDirectory(); + } + + /** + * Build a randomish document for both RAMDirectory and MemoryIndex, + * and run all the queries against it. + */ + public void assertAgainstRAMDirectory() throws Exception { + StringBuilder fooField = new StringBuilder(); + StringBuilder termField = new StringBuilder(); + + // add up to 250 terms to field "foo" + final int numFooTerms = random.nextInt(250 * RANDOM_MULTIPLIER); + for (int i = 0; i < numFooTerms; i++) { + fooField.append(" "); + fooField.append(randomTerm()); + } + + // add up to 250 terms to field "term" + final int numTermTerms = random.nextInt(250 * RANDOM_MULTIPLIER); + for (int i = 0; i < numTermTerms; i++) { + termField.append(" "); + termField.append(randomTerm()); + } + + Directory ramdir = newDirectory(); + Analyzer analyzer = randomAnalyzer(); + IndexWriter writer = new IndexWriter(ramdir, + new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + Field field1 = newField("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED); + Field field2 = newField("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED); + doc.add(field1); + doc.add(field2); + writer.addDocument(doc); + writer.close(); + + MemoryIndex memory = new MemoryIndex(); + memory.addField("foo", fooField.toString(), analyzer); + memory.addField("term", termField.toString(), analyzer); + assertAllQueries(memory, ramdir, analyzer); + ramdir.close(); + } + + /** + * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same. + */ + public void assertAllQueries(MemoryIndex memory, Directory ramdir, Analyzer analyzer) throws Exception { + IndexReader reader = IndexReader.open(ramdir); + IndexSearcher ram = new IndexSearcher(reader); + IndexSearcher mem = memory.createSearcher(); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer); + for (String query : queries) { + TopDocs ramDocs = ram.search(qp.parse(query), 1); + TopDocs memDocs = mem.search(qp.parse(query), 1); + assertEquals(ramDocs.totalHits, memDocs.totalHits); + } + ram.close(); + reader.close(); + mem.close(); + } + + /** + * Return a random analyzer (Simple, Stop, Standard) to analyze the terms. + */ + private Analyzer randomAnalyzer() { + switch(random.nextInt(3)) { + case 0: return new MockAnalyzer(random, MockTokenizer.SIMPLE, true); + case 1: return new StopAnalyzer(TEST_VERSION_CURRENT); + default: return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); + } + } + + /** + * Some terms to be indexed, in addition to random words. + * These terms are commonly used in the queries. + */ + private static final String[] TEST_TERMS = {"term", "Term", "tErm", "TERM", + "telm", "stop", "drop", "roll", "phrase", "a", "c", "bar", "blar", + "gack", "weltbank", "worlbank", "hello", "on", "the", "apache", "Apache", + "copyright", "Copyright"}; + + + /** + * half of the time, returns a random term from TEST_TERMS. + * the other half of the time, returns a random unicode string. + */ + private String randomTerm() { + if (random.nextBoolean()) { + // return a random TEST_TERM + return TEST_TERMS[random.nextInt(TEST_TERMS.length)]; + } else { + // return a random unicode term + return _TestUtil.randomUnicodeString(random); + } + } +}