X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java deleted file mode 100755 index 42c7e63..0000000 --- a/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ /dev/null @@ -1,1052 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.benchmark.byTask; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.StringReader; -import java.text.Collator; -import java.util.List; -import java.util.Locale; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.benchmark.BenchmarkTestCase; -import org.apache.lucene.benchmark.byTask.feeds.DocMaker; -import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; -import org.apache.lucene.benchmark.byTask.stats.TaskStats; -import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask; -import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask; -import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; -import org.apache.lucene.collation.CollationKeyAnalyzer; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogDocMergePolicy; -import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.TermFreqVector; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.Directory; -import org.apache.lucene.search.FieldCache.StringIndex; -import org.apache.lucene.search.FieldCache; - -/** - * Test very simply that perf tasks - simple algorithms - are doing what they should. - */ -public class TestPerfTasksLogic extends BenchmarkTestCase { - - @Override - public void setUp() throws Exception { - super.setUp(); - copyToWorkDir("reuters.first20.lines.txt"); - } - - /** - * Test index creation logic - */ - public void testIndexAndSearchTasks() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 1000", - "Optimize", - "CloseIndex", - "OpenReader", - "{ CountingSearchTest } : 200", - "CloseReader", - "[ CountingSearchTest > : 70", - "[ CountingSearchTest > : 9", - }; - - // 2. we test this value later - CountingSearchTestTask.numSearches = 0; - - // 3. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 4. test specific checks after the benchmark run completed. - assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches); - assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); - // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) - .setOpenMode(OpenMode.APPEND)); - iw.close(); - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); - ir.close(); - } - - /** - * Test timed sequence task. - */ - public void testTimedSearchTask() throws Exception { - String algLines[] = { - "log.step=100000", - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 100", - "Optimize", - "CloseIndex", - "OpenReader", - "{ CountingSearchTest } : .5s", - "CloseReader", - }; - - CountingSearchTestTask.numSearches = 0; - execBenchmark(algLines); - assertTrue(CountingSearchTestTask.numSearches > 0); - long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis; - assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500); - } - - // disabled until we fix BG thread prio -- this test - // causes build to hang - public void testBGSearchTaskThreads() throws Exception { - String algLines[] = { - "log.time.step.msec = 100", - "log.step=100000", - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 1000", - "Optimize", - "CloseIndex", - "OpenReader", - "{", - " [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1", - " Wait(0.5)", - "}", - "CloseReader", - "RepSumByPref X" - }; - - CountingSearchTestTask.numSearches = 0; - execBenchmark(algLines); - assertTrue(CountingSearchTestTask.numSearches > 0); - } - - public void testHighlighting() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "doc.stored=true", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "query.maker=" + ReutersQueryMaker.class.getName(), - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 100", - "Optimize", - "CloseIndex", - "OpenReader(true)", - "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", - "CloseReader", - }; - - // 2. we test this value later - CountingHighlighterTestTask.numHighlightedResults = 0; - CountingHighlighterTestTask.numDocsRetrieved = 0; - // 3. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 4. test specific checks after the benchmark run completed. - assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved); - //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs - //we probably should use a different doc/query maker, but... - assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0); - - assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); - // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); - iw.close(); - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs()); - ir.close(); - } - - public void testHighlightingTV() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "doc.stored=true",//doc storage is required in order to have text to highlight - "doc.term.vector.offsets=true", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "query.maker=" + ReutersQueryMaker.class.getName(), - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 1000", - "Optimize", - "CloseIndex", - "OpenReader(false)", - "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", - "CloseReader", - }; - - // 2. we test this value later - CountingHighlighterTestTask.numHighlightedResults = 0; - CountingHighlighterTestTask.numDocsRetrieved = 0; - // 3. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 4. test specific checks after the benchmark run completed. - assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved); - //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs - //we probably should use a different doc/query maker, but... - assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0); - - assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); - // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); - iw.close(); - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); - ir.close(); - } - - public void testHighlightingNoTvNoStore() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "doc.stored=false", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "query.maker=" + ReutersQueryMaker.class.getName(), - "ResetSystemErase", - "CreateIndex", - "{ AddDoc } : 1000", - "Optimize", - "CloseIndex", - "OpenReader", - "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", - "CloseReader", - }; - - // 2. we test this value later - CountingHighlighterTestTask.numHighlightedResults = 0; - CountingHighlighterTestTask.numDocsRetrieved = 0; - // 3. execute the algorithm (required in every "logic" test) - try { - Benchmark benchmark = execBenchmark(algLines); - assertTrue("CountingHighlighterTest should have thrown an exception", false); - assertNotNull(benchmark); // (avoid compile warning on unused variable) - } catch (Exception e) { - assertTrue(true); - } - } - - /** - * Test Exhasting Doc Maker logic - */ - public void testExhaustContentSource() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource", - "content.source.log.step=1", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "# ----- alg ", - "CreateIndex", - "{ AddDoc } : * ", - "Optimize", - "CloseIndex", - "OpenReader", - "{ CountingSearchTest } : 100", - "CloseReader", - "[ CountingSearchTest > : 30", - "[ CountingSearchTest > : 9", - }; - - // 2. we test this value later - CountingSearchTestTask.numSearches = 0; - - // 3. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 4. test specific checks after the benchmark run completed. - assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches); - assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); - // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); - iw.close(); - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs()); - ir.close(); - } - - // LUCENE-1994: test thread safety of SortableSingleDocMaker - public void testDocMakerThreadSafety() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource", - "doc.term.vector=false", - "log.step.AddDoc=10000", - "content.source.forever=true", - "directory=RAMDirectory", - "doc.reuse.fields=false", - "doc.stored=false", - "doc.tokenized=false", - "doc.index.props=true", - "# ----- alg ", - "CreateIndex", - "[ { AddDoc > : 250 ] : 4", - "CloseIndex", - }; - - // 2. we test this value later - CountingSearchTestTask.numSearches = 0; - - // 3. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true); - StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country"); - final int maxDoc = r.maxDoc(); - assertEquals(1000, maxDoc); - for(int i=0;i<1000;i++) { - assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]); - } - r.close(); - } - - /** - * Test Parallel Doc Maker logic (for LUCENE-940) - */ - public void testParallelDocMaker() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=FSDirectory", - "doc.stored=false", - "doc.tokenized=false", - "# ----- alg ", - "CreateIndex", - "[ { AddDoc } : * ] : 4 ", - "CloseIndex", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - /** - * Test WriteLineDoc and LineDocSource. - */ - public void testLineDocFile() throws Exception { - File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt"); - - // We will call WriteLineDocs this many times - final int NUM_TRY_DOCS = 50; - - // Creates a line file with first 50 docs from SingleDocSource - String algLines1[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource", - "content.source.forever=true", - "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'), - "# ----- alg ", - "{WriteLineDoc()}:" + NUM_TRY_DOCS, - }; - - // Run algo - Benchmark benchmark = execBenchmark(algLines1); - - BufferedReader r = new BufferedReader(new FileReader(lineFile)); - int numLines = 0; - String line; - while((line = r.readLine()) != null) { - if (numLines==0 && line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)) { - continue; // do not count the header line as a doc - } - numLines++; - } - r.close(); - assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines); - - // Index the line docs - String algLines2[] = { - "# ----- properties ", - "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'), - "content.source.forever=false", - "doc.reuse.fields=false", - "ram.flush.mb=4", - "# ----- alg ", - "ResetSystemErase", - "CreateIndex", - "{AddDoc}: *", - "CloseIndex", - }; - - // Run algo - benchmark = execBenchmark(algLines2); - - // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), - new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) - .setOpenMode(OpenMode.APPEND)); - iw.close(); - - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs()); - ir.close(); - - lineFile.delete(); - } - - /** - * Test ReadTokensTask - */ - public void testReadTokens() throws Exception { - - // We will call ReadTokens on this many docs - final int NUM_DOCS = 20; - - // Read tokens from first NUM_DOCS docs from Reuters and - // then build index from the same docs - String algLines1[] = { - "# ----- properties ", - "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "# ----- alg ", - "{ReadTokens}: " + NUM_DOCS, - "ResetSystemErase", - "CreateIndex", - "{AddDoc}: " + NUM_DOCS, - "CloseIndex", - }; - - // Run algo - Benchmark benchmark = execBenchmark(algLines1); - - List stats = benchmark.getRunData().getPoints().taskStats(); - - // Count how many tokens all ReadTokens saw - int totalTokenCount1 = 0; - for (final TaskStats stat : stats) { - if (stat.getTask().getName().equals("ReadTokens")) { - totalTokenCount1 += stat.getCount(); - } - } - - // Separately count how many tokens are actually in the index: - IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true); - assertEquals(NUM_DOCS, reader.numDocs()); - - TermEnum terms = reader.terms(); - TermDocs termDocs = reader.termDocs(); - int totalTokenCount2 = 0; - while(terms.next()) { - Term term = terms.term(); - /* not-tokenized, but indexed field */ - if (term != null && term.field() != DocMaker.ID_FIELD && term.field() != DocMaker.DATE_MSEC_FIELD && term.field() != DocMaker.TIME_SEC_FIELD) { - termDocs.seek(terms.term()); - while (termDocs.next()) - totalTokenCount2 += termDocs.freq(); - } - } - reader.close(); - - // Make sure they are the same - assertEquals(totalTokenCount1, totalTokenCount2); - } - - /** - * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941) - */ - public void testParallelExhausted() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "task.max.depth.log=1", - "# ----- alg ", - "CreateIndex", - "{ [ AddDoc]: 4} : * ", - "ResetInputs ", - "{ [ AddDoc]: 4} : * ", - "WaitForMerges", - "CloseIndex", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 2 * 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - - /** - * Test that exhaust in loop works as expected (LUCENE-1115). - */ - public void testExhaustedLooped() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "task.max.depth.log=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " WaitForMerges", - " CloseIndex", - "} : 2", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - /** - * Test that we can close IndexWriter with argument "false". - */ - public void testCloseIndexFalse() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "ram.flush.mb=-1", - "max.buffered=2", - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " CloseIndex(false)", - "} : 2", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - public static class MyMergeScheduler extends SerialMergeScheduler { - boolean called; - public MyMergeScheduler() { - super(); - called = true; - } - } - - public void testDeleteByPercent() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "ram.flush.mb=-1", - "max.buffered=2", - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "CreateIndex", - "{ \"AddDocs\" AddDoc > : * ", - "CloseIndex()", - "OpenReader(false)", - "DeleteByPercent(20)", - "CloseReader" - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 16; // first 20 reuters docs, minus 20% - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - /** - * Test that we can set merge scheduler". - */ - public void testMergeScheduler() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "merge.scheduler=" + MyMergeScheduler.class.getName(), - "doc.stored=false", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - "} : 2", - }; - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - assertTrue("did not use the specified MergeScheduler", - ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig() - .getMergeScheduler()).called); - benchmark.getRunData().getIndexWriter().close(); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - public static class MyMergePolicy extends LogDocMergePolicy { - boolean called; - public MyMergePolicy() { - called = true; - } - } - - /** - * Test that we can set merge policy". - */ - public void testMergePolicy() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "ram.flush.mb=-1", - "max.buffered=2", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "merge.policy=" + MyMergePolicy.class.getName(), - "doc.stored=false", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - "} : 2", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called); - benchmark.getRunData().getIndexWriter().close(); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - } - - /** - * Test that IndexWriter settings stick. - */ - public void testIndexWriterSettings() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "ram.flush.mb=-1", - "max.buffered=2", - "compound=cmpnd:true:false", - "doc.term.vector=vector:false:true", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "merge.factor=3", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " NewRound", - "} : 2", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - final IndexWriter writer = benchmark.getRunData().getIndexWriter(); - assertEquals(2, writer.getConfig().getMaxBufferedDocs()); - assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB()); - assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor()); - assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile()); - writer.close(); - Directory dir = benchmark.getRunData().getDirectory(); - IndexReader reader = IndexReader.open(dir, true); - TermFreqVector [] tfv = reader.getTermFreqVectors(0); - assertNotNull(tfv); - assertTrue(tfv.length > 0); - reader.close(); - } - - /** - * Test that we can call optimize(maxNumSegments). - */ - public void testOptimizeMaxNumSegments() throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "ram.flush.mb=-1", - "max.buffered=3", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "merge.policy=org.apache.lucene.index.LogDocMergePolicy", - "doc.stored=false", - "doc.tokenized=false", - "debug.level=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " Optimize(3)", - " CloseIndex()", - "} : 2", - }; - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test number of docs in the index - IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); - int ndocsExpected = 20; // first 20 reuters docs. - assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); - ir.close(); - - // Make sure we have 3 segments: - SegmentInfos infos = new SegmentInfos(); - infos.read(benchmark.getRunData().getDirectory()); - assertEquals(3, infos.size()); - } - - /** - * Test disabling task count (LUCENE-1136). - */ - public void testDisableCounting() throws Exception { - doTestDisableCounting(true); - doTestDisableCounting(false); - } - - private void doTestDisableCounting(boolean disable) throws Exception { - // 1. alg definition (required in every "logic" test) - String algLines[] = disableCountingLines(disable); - - // 2. execute the algorithm (required in every "logic" test) - Benchmark benchmark = execBenchmark(algLines); - - // 3. test counters - int n = disable ? 0 : 1; - int nChecked = 0; - for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) { - String taskName = stats.getTask().getName(); - if (taskName.equals("Rounds")) { - assertEquals("Wrong total count!",20+2*n,stats.getCount()); - nChecked++; - } else if (taskName.equals("CreateIndex")) { - assertEquals("Wrong count for CreateIndex!",n,stats.getCount()); - nChecked++; - } else if (taskName.equals("CloseIndex")) { - assertEquals("Wrong count for CloseIndex!",n,stats.getCount()); - nChecked++; - } - } - assertEquals("Missing some tasks to check!",3,nChecked); - } - - private String[] disableCountingLines (boolean disable) { - String dis = disable ? "-" : ""; - return new String[] { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=30", - "doc.term.vector=false", - "content.source.forever=false", - "directory=RAMDirectory", - "doc.stored=false", - "doc.tokenized=false", - "task.max.depth.log=1", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " "+dis+"CreateIndex", // optionally disable counting here - " { \"AddDocs\" AddDoc > : * ", - " "+dis+" CloseIndex", // optionally disable counting here (with extra blanks) - "}", - "RepSumByName", - }; - } - - /** - * Test that we can change the Locale in the runData, - * that it is parsed as we expect. - */ - public void testLocale() throws Exception { - // empty Locale: clear it (null) - Benchmark benchmark = execBenchmark(getLocaleConfig("")); - assertNull(benchmark.getRunData().getLocale()); - - // ROOT locale - benchmark = execBenchmark(getLocaleConfig("ROOT")); - assertEquals(new Locale(""), benchmark.getRunData().getLocale()); - - // specify just a language - benchmark = execBenchmark(getLocaleConfig("de")); - assertEquals(new Locale("de"), benchmark.getRunData().getLocale()); - - // specify language + country - benchmark = execBenchmark(getLocaleConfig("en,US")); - assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale()); - - // specify language + country + variant - benchmark = execBenchmark(getLocaleConfig("no,NO,NY")); - assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale()); - } - - private String[] getLocaleConfig(String localeParam) { - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "content.source.forever=false", - "directory=RAMDirectory", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " NewLocale(" + localeParam + ")", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " NewRound", - "} : 1", - }; - return algLines; - } - - /** - * Test that we can create CollationAnalyzers. - */ - public void testCollator() throws Exception { - // ROOT locale - Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk")); - CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator - .getInstance(new Locale(""))); - assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); - - // specify just a language - benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de"))); - assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); - - // specify language + country - benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en", - "US"))); - assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); - - // specify language + country + variant - benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk")); - expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no", - "NO", "NY"))); - assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); - } - - private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) - throws Exception { - TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text)); - TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); - ts1.reset(); - ts2.reset(); - CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class); - CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class); - assertTrue(ts1.incrementToken()); - assertTrue(ts2.incrementToken()); - assertEquals(termAtt1.toString(), termAtt2.toString()); - assertFalse(ts1.incrementToken()); - assertFalse(ts2.incrementToken()); - ts1.close(); - ts2.close(); - } - - private String[] getCollatorConfig(String localeParam, - String collationParam) { - String algLines[] = { - "# ----- properties ", - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.log.step=3", - "content.source.forever=false", - "directory=RAMDirectory", - "# ----- alg ", - "{ \"Rounds\"", - " ResetSystemErase", - " NewLocale(" + localeParam + ")", - " NewCollationAnalyzer(" + collationParam + ")", - " CreateIndex", - " { \"AddDocs\" AddDoc > : * ", - " NewRound", - "} : 1", - }; - return algLines; - } - - /** - * Test that we can create ShingleAnalyzerWrappers. - */ - public void testShingleAnalyzer() throws Exception { - String text = "one,two,three, four five six"; - - // Default analyzer, maxShingleSize, and outputUnigrams - Benchmark benchmark = execBenchmark(getShingleConfig("")); - benchmark.getRunData().getAnalyzer().tokenStream - ("bogus", new StringReader(text)).close(); - assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, - new String[] {"one", "one two", "two", "two three", - "three", "three four", "four", "four five", - "five", "five six", "six"}); - // Default analyzer, maxShingleSize = 3, and outputUnigrams = false - benchmark = execBenchmark - (getShingleConfig("maxShingleSize:3,outputUnigrams:false")); - assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, - new String[] { "one two", "one two three", "two three", - "two three four", "three four", - "three four five", "four five", - "four five six", "five six" }); - // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams - benchmark = execBenchmark - (getShingleConfig("analyzer:WhitespaceAnalyzer")); - assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, - new String[] { "one,two,three,", "one,two,three, four", - "four", "four five", "five", "five six", - "six" }); - - // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false - benchmark = execBenchmark - (getShingleConfig - ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer")); - assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, - new String[] { "one,two,three, four", - "one,two,three, four five", - "four five", "four five six", - "five six" }); - } - - private void assertEqualShingle - (Analyzer analyzer, String text, String[] expected) throws Exception { - BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected); - } - - private String[] getShingleConfig(String params) { - String algLines[] = { - "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", - "docs.file=" + getReuters20LinesFile(), - "content.source.forever=false", - "directory=RAMDirectory", - "NewShingleAnalyzer(" + params + ")", - "CreateIndex", - "{ \"AddDocs\" AddDoc > : * " - }; - return algLines; - } - - private String getReuters20LinesFile() { - return getWorkDirResourcePath("reuters.first20.lines.txt"); - } -}