+++ /dev/null
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.benchmark.byTask;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.StringReader;
-import java.text.Collator;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.benchmark.BenchmarkTestCase;
-import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
-import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
-import org.apache.lucene.benchmark.byTask.stats.TaskStats;
-import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
-import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
-import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
-import org.apache.lucene.collation.CollationKeyAnalyzer;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogDocMergePolicy;
-import org.apache.lucene.index.LogMergePolicy;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.TermFreqVector;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.search.FieldCache.StringIndex;
-import org.apache.lucene.search.FieldCache;
-
-/**
- * Test very simply that perf tasks - simple algorithms - are doing what they should.
- */
-public class TestPerfTasksLogic extends BenchmarkTestCase {
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- copyToWorkDir("reuters.first20.lines.txt");
- }
-
- /**
- * Test index creation logic
- */
- public void testIndexAndSearchTasks() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 1000",
- "Optimize",
- "CloseIndex",
- "OpenReader",
- "{ CountingSearchTest } : 200",
- "CloseReader",
- "[ CountingSearchTest > : 70",
- "[ CountingSearchTest > : 9",
- };
-
- // 2. we test this value later
- CountingSearchTestTask.numSearches = 0;
-
- // 3. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 4. test specific checks after the benchmark run completed.
- assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);
- assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
- // now we should be able to open the index for write.
- IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
- new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
- .setOpenMode(OpenMode.APPEND));
- iw.close();
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
- ir.close();
- }
-
- /**
- * Test timed sequence task.
- */
- public void testTimedSearchTask() throws Exception {
- String algLines[] = {
- "log.step=100000",
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 100",
- "Optimize",
- "CloseIndex",
- "OpenReader",
- "{ CountingSearchTest } : .5s",
- "CloseReader",
- };
-
- CountingSearchTestTask.numSearches = 0;
- execBenchmark(algLines);
- assertTrue(CountingSearchTestTask.numSearches > 0);
- long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis;
- assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
- }
-
- // disabled until we fix BG thread prio -- this test
- // causes build to hang
- public void testBGSearchTaskThreads() throws Exception {
- String algLines[] = {
- "log.time.step.msec = 100",
- "log.step=100000",
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 1000",
- "Optimize",
- "CloseIndex",
- "OpenReader",
- "{",
- " [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1",
- " Wait(0.5)",
- "}",
- "CloseReader",
- "RepSumByPref X"
- };
-
- CountingSearchTestTask.numSearches = 0;
- execBenchmark(algLines);
- assertTrue(CountingSearchTestTask.numSearches > 0);
- }
-
- public void testHighlighting() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "doc.stored=true",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "query.maker=" + ReutersQueryMaker.class.getName(),
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 100",
- "Optimize",
- "CloseIndex",
- "OpenReader(true)",
- "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
- "CloseReader",
- };
-
- // 2. we test this value later
- CountingHighlighterTestTask.numHighlightedResults = 0;
- CountingHighlighterTestTask.numDocsRetrieved = 0;
- // 3. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 4. test specific checks after the benchmark run completed.
- assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
- //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
- //we probably should use a different doc/query maker, but...
- assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
-
- assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
- // now we should be able to open the index for write.
- IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
- iw.close();
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
- ir.close();
- }
-
- public void testHighlightingTV() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "doc.stored=true",//doc storage is required in order to have text to highlight
- "doc.term.vector.offsets=true",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "query.maker=" + ReutersQueryMaker.class.getName(),
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 1000",
- "Optimize",
- "CloseIndex",
- "OpenReader(false)",
- "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
- "CloseReader",
- };
-
- // 2. we test this value later
- CountingHighlighterTestTask.numHighlightedResults = 0;
- CountingHighlighterTestTask.numDocsRetrieved = 0;
- // 3. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 4. test specific checks after the benchmark run completed.
- assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
- //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
- //we probably should use a different doc/query maker, but...
- assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
-
- assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
- // now we should be able to open the index for write.
- IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
- iw.close();
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
- ir.close();
- }
-
- public void testHighlightingNoTvNoStore() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "doc.stored=false",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "query.maker=" + ReutersQueryMaker.class.getName(),
- "ResetSystemErase",
- "CreateIndex",
- "{ AddDoc } : 1000",
- "Optimize",
- "CloseIndex",
- "OpenReader",
- "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
- "CloseReader",
- };
-
- // 2. we test this value later
- CountingHighlighterTestTask.numHighlightedResults = 0;
- CountingHighlighterTestTask.numDocsRetrieved = 0;
- // 3. execute the algorithm (required in every "logic" test)
- try {
- Benchmark benchmark = execBenchmark(algLines);
- assertTrue("CountingHighlighterTest should have thrown an exception", false);
- assertNotNull(benchmark); // (avoid compile warning on unused variable)
- } catch (Exception e) {
- assertTrue(true);
- }
- }
-
- /**
- * Test Exhasting Doc Maker logic
- */
- public void testExhaustContentSource() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
- "content.source.log.step=1",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "# ----- alg ",
- "CreateIndex",
- "{ AddDoc } : * ",
- "Optimize",
- "CloseIndex",
- "OpenReader",
- "{ CountingSearchTest } : 100",
- "CloseReader",
- "[ CountingSearchTest > : 30",
- "[ CountingSearchTest > : 9",
- };
-
- // 2. we test this value later
- CountingSearchTestTask.numSearches = 0;
-
- // 3. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 4. test specific checks after the benchmark run completed.
- assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
- assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
- // now we should be able to open the index for write.
- IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
- iw.close();
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
- ir.close();
- }
-
- // LUCENE-1994: test thread safety of SortableSingleDocMaker
- public void testDocMakerThreadSafety() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
- "doc.term.vector=false",
- "log.step.AddDoc=10000",
- "content.source.forever=true",
- "directory=RAMDirectory",
- "doc.reuse.fields=false",
- "doc.stored=false",
- "doc.tokenized=false",
- "doc.index.props=true",
- "# ----- alg ",
- "CreateIndex",
- "[ { AddDoc > : 250 ] : 4",
- "CloseIndex",
- };
-
- // 2. we test this value later
- CountingSearchTestTask.numSearches = 0;
-
- // 3. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
- final int maxDoc = r.maxDoc();
- assertEquals(1000, maxDoc);
- for(int i=0;i<1000;i++) {
- assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
- }
- r.close();
- }
-
- /**
- * Test Parallel Doc Maker logic (for LUCENE-940)
- */
- public void testParallelDocMaker() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=FSDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "# ----- alg ",
- "CreateIndex",
- "[ { AddDoc } : * ] : 4 ",
- "CloseIndex",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- /**
- * Test WriteLineDoc and LineDocSource.
- */
- public void testLineDocFile() throws Exception {
- File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt");
-
- // We will call WriteLineDocs this many times
- final int NUM_TRY_DOCS = 50;
-
- // Creates a line file with first 50 docs from SingleDocSource
- String algLines1[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
- "content.source.forever=true",
- "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
- "# ----- alg ",
- "{WriteLineDoc()}:" + NUM_TRY_DOCS,
- };
-
- // Run algo
- Benchmark benchmark = execBenchmark(algLines1);
-
- BufferedReader r = new BufferedReader(new FileReader(lineFile));
- int numLines = 0;
- String line;
- while((line = r.readLine()) != null) {
- if (numLines==0 && line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)) {
- continue; // do not count the header line as a doc
- }
- numLines++;
- }
- r.close();
- assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines);
-
- // Index the line docs
- String algLines2[] = {
- "# ----- properties ",
- "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
- "content.source.forever=false",
- "doc.reuse.fields=false",
- "ram.flush.mb=4",
- "# ----- alg ",
- "ResetSystemErase",
- "CreateIndex",
- "{AddDoc}: *",
- "CloseIndex",
- };
-
- // Run algo
- benchmark = execBenchmark(algLines2);
-
- // now we should be able to open the index for write.
- IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
- new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
- .setOpenMode(OpenMode.APPEND));
- iw.close();
-
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
- ir.close();
-
- lineFile.delete();
- }
-
- /**
- * Test ReadTokensTask
- */
- public void testReadTokens() throws Exception {
-
- // We will call ReadTokens on this many docs
- final int NUM_DOCS = 20;
-
- // Read tokens from first NUM_DOCS docs from Reuters and
- // then build index from the same docs
- String algLines1[] = {
- "# ----- properties ",
- "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "# ----- alg ",
- "{ReadTokens}: " + NUM_DOCS,
- "ResetSystemErase",
- "CreateIndex",
- "{AddDoc}: " + NUM_DOCS,
- "CloseIndex",
- };
-
- // Run algo
- Benchmark benchmark = execBenchmark(algLines1);
-
- List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
-
- // Count how many tokens all ReadTokens saw
- int totalTokenCount1 = 0;
- for (final TaskStats stat : stats) {
- if (stat.getTask().getName().equals("ReadTokens")) {
- totalTokenCount1 += stat.getCount();
- }
- }
-
- // Separately count how many tokens are actually in the index:
- IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- assertEquals(NUM_DOCS, reader.numDocs());
-
- TermEnum terms = reader.terms();
- TermDocs termDocs = reader.termDocs();
- int totalTokenCount2 = 0;
- while(terms.next()) {
- Term term = terms.term();
- /* not-tokenized, but indexed field */
- if (term != null && term.field() != DocMaker.ID_FIELD && term.field() != DocMaker.DATE_MSEC_FIELD && term.field() != DocMaker.TIME_SEC_FIELD) {
- termDocs.seek(terms.term());
- while (termDocs.next())
- totalTokenCount2 += termDocs.freq();
- }
- }
- reader.close();
-
- // Make sure they are the same
- assertEquals(totalTokenCount1, totalTokenCount2);
- }
-
- /**
- * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
- */
- public void testParallelExhausted() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "task.max.depth.log=1",
- "# ----- alg ",
- "CreateIndex",
- "{ [ AddDoc]: 4} : * ",
- "ResetInputs ",
- "{ [ AddDoc]: 4} : * ",
- "WaitForMerges",
- "CloseIndex",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 2 * 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
-
- /**
- * Test that exhaust in loop works as expected (LUCENE-1115).
- */
- public void testExhaustedLooped() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "task.max.depth.log=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " WaitForMerges",
- " CloseIndex",
- "} : 2",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- /**
- * Test that we can close IndexWriter with argument "false".
- */
- public void testCloseIndexFalse() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "ram.flush.mb=-1",
- "max.buffered=2",
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " CloseIndex(false)",
- "} : 2",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- public static class MyMergeScheduler extends SerialMergeScheduler {
- boolean called;
- public MyMergeScheduler() {
- super();
- called = true;
- }
- }
-
- public void testDeleteByPercent() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "ram.flush.mb=-1",
- "max.buffered=2",
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "CreateIndex",
- "{ \"AddDocs\" AddDoc > : * ",
- "CloseIndex()",
- "OpenReader(false)",
- "DeleteByPercent(20)",
- "CloseReader"
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 16; // first 20 reuters docs, minus 20%
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- /**
- * Test that we can set merge scheduler".
- */
- public void testMergeScheduler() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "merge.scheduler=" + MyMergeScheduler.class.getName(),
- "doc.stored=false",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- "} : 2",
- };
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- assertTrue("did not use the specified MergeScheduler",
- ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig()
- .getMergeScheduler()).called);
- benchmark.getRunData().getIndexWriter().close();
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- public static class MyMergePolicy extends LogDocMergePolicy {
- boolean called;
- public MyMergePolicy() {
- called = true;
- }
- }
-
- /**
- * Test that we can set merge policy".
- */
- public void testMergePolicy() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "ram.flush.mb=-1",
- "max.buffered=2",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "merge.policy=" + MyMergePolicy.class.getName(),
- "doc.stored=false",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- "} : 2",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
- assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called);
- benchmark.getRunData().getIndexWriter().close();
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
- }
-
- /**
- * Test that IndexWriter settings stick.
- */
- public void testIndexWriterSettings() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "ram.flush.mb=-1",
- "max.buffered=2",
- "compound=cmpnd:true:false",
- "doc.term.vector=vector:false:true",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "merge.factor=3",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " NewRound",
- "} : 2",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
- final IndexWriter writer = benchmark.getRunData().getIndexWriter();
- assertEquals(2, writer.getConfig().getMaxBufferedDocs());
- assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
- assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
- assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
- writer.close();
- Directory dir = benchmark.getRunData().getDirectory();
- IndexReader reader = IndexReader.open(dir, true);
- TermFreqVector [] tfv = reader.getTermFreqVectors(0);
- assertNotNull(tfv);
- assertTrue(tfv.length > 0);
- reader.close();
- }
-
- /**
- * Test that we can call optimize(maxNumSegments).
- */
- public void testOptimizeMaxNumSegments() throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "ram.flush.mb=-1",
- "max.buffered=3",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "merge.policy=org.apache.lucene.index.LogDocMergePolicy",
- "doc.stored=false",
- "doc.tokenized=false",
- "debug.level=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " Optimize(3)",
- " CloseIndex()",
- "} : 2",
- };
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test number of docs in the index
- IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
- int ndocsExpected = 20; // first 20 reuters docs.
- assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
- ir.close();
-
- // Make sure we have 3 segments:
- SegmentInfos infos = new SegmentInfos();
- infos.read(benchmark.getRunData().getDirectory());
- assertEquals(3, infos.size());
- }
-
- /**
- * Test disabling task count (LUCENE-1136).
- */
- public void testDisableCounting() throws Exception {
- doTestDisableCounting(true);
- doTestDisableCounting(false);
- }
-
- private void doTestDisableCounting(boolean disable) throws Exception {
- // 1. alg definition (required in every "logic" test)
- String algLines[] = disableCountingLines(disable);
-
- // 2. execute the algorithm (required in every "logic" test)
- Benchmark benchmark = execBenchmark(algLines);
-
- // 3. test counters
- int n = disable ? 0 : 1;
- int nChecked = 0;
- for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) {
- String taskName = stats.getTask().getName();
- if (taskName.equals("Rounds")) {
- assertEquals("Wrong total count!",20+2*n,stats.getCount());
- nChecked++;
- } else if (taskName.equals("CreateIndex")) {
- assertEquals("Wrong count for CreateIndex!",n,stats.getCount());
- nChecked++;
- } else if (taskName.equals("CloseIndex")) {
- assertEquals("Wrong count for CloseIndex!",n,stats.getCount());
- nChecked++;
- }
- }
- assertEquals("Missing some tasks to check!",3,nChecked);
- }
-
- private String[] disableCountingLines (boolean disable) {
- String dis = disable ? "-" : "";
- return new String[] {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=30",
- "doc.term.vector=false",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "doc.stored=false",
- "doc.tokenized=false",
- "task.max.depth.log=1",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " "+dis+"CreateIndex", // optionally disable counting here
- " { \"AddDocs\" AddDoc > : * ",
- " "+dis+" CloseIndex", // optionally disable counting here (with extra blanks)
- "}",
- "RepSumByName",
- };
- }
-
- /**
- * Test that we can change the Locale in the runData,
- * that it is parsed as we expect.
- */
- public void testLocale() throws Exception {
- // empty Locale: clear it (null)
- Benchmark benchmark = execBenchmark(getLocaleConfig(""));
- assertNull(benchmark.getRunData().getLocale());
-
- // ROOT locale
- benchmark = execBenchmark(getLocaleConfig("ROOT"));
- assertEquals(new Locale(""), benchmark.getRunData().getLocale());
-
- // specify just a language
- benchmark = execBenchmark(getLocaleConfig("de"));
- assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
-
- // specify language + country
- benchmark = execBenchmark(getLocaleConfig("en,US"));
- assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
-
- // specify language + country + variant
- benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
- assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
- }
-
- private String[] getLocaleConfig(String localeParam) {
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " NewLocale(" + localeParam + ")",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " NewRound",
- "} : 1",
- };
- return algLines;
- }
-
- /**
- * Test that we can create CollationAnalyzers.
- */
- public void testCollator() throws Exception {
- // ROOT locale
- Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
- CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
- .getInstance(new Locale("")));
- assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
-
- // specify just a language
- benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
- expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
- assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
-
- // specify language + country
- benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
- expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
- "US")));
- assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
-
- // specify language + country + variant
- benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
- expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
- "NO", "NY")));
- assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
- }
-
- private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
- throws Exception {
- TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
- TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
- ts1.reset();
- ts2.reset();
- CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
- CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
- assertTrue(ts1.incrementToken());
- assertTrue(ts2.incrementToken());
- assertEquals(termAtt1.toString(), termAtt2.toString());
- assertFalse(ts1.incrementToken());
- assertFalse(ts2.incrementToken());
- ts1.close();
- ts2.close();
- }
-
- private String[] getCollatorConfig(String localeParam,
- String collationParam) {
- String algLines[] = {
- "# ----- properties ",
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.log.step=3",
- "content.source.forever=false",
- "directory=RAMDirectory",
- "# ----- alg ",
- "{ \"Rounds\"",
- " ResetSystemErase",
- " NewLocale(" + localeParam + ")",
- " NewCollationAnalyzer(" + collationParam + ")",
- " CreateIndex",
- " { \"AddDocs\" AddDoc > : * ",
- " NewRound",
- "} : 1",
- };
- return algLines;
- }
-
- /**
- * Test that we can create ShingleAnalyzerWrappers.
- */
- public void testShingleAnalyzer() throws Exception {
- String text = "one,two,three, four five six";
-
- // Default analyzer, maxShingleSize, and outputUnigrams
- Benchmark benchmark = execBenchmark(getShingleConfig(""));
- benchmark.getRunData().getAnalyzer().tokenStream
- ("bogus", new StringReader(text)).close();
- assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
- new String[] {"one", "one two", "two", "two three",
- "three", "three four", "four", "four five",
- "five", "five six", "six"});
- // Default analyzer, maxShingleSize = 3, and outputUnigrams = false
- benchmark = execBenchmark
- (getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
- assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
- new String[] { "one two", "one two three", "two three",
- "two three four", "three four",
- "three four five", "four five",
- "four five six", "five six" });
- // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
- benchmark = execBenchmark
- (getShingleConfig("analyzer:WhitespaceAnalyzer"));
- assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
- new String[] { "one,two,three,", "one,two,three, four",
- "four", "four five", "five", "five six",
- "six" });
-
- // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
- benchmark = execBenchmark
- (getShingleConfig
- ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
- assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
- new String[] { "one,two,three, four",
- "one,two,three, four five",
- "four five", "four five six",
- "five six" });
- }
-
- private void assertEqualShingle
- (Analyzer analyzer, String text, String[] expected) throws Exception {
- BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
- }
-
- private String[] getShingleConfig(String params) {
- String algLines[] = {
- "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
- "docs.file=" + getReuters20LinesFile(),
- "content.source.forever=false",
- "directory=RAMDirectory",
- "NewShingleAnalyzer(" + params + ")",
- "CreateIndex",
- "{ \"AddDocs\" AddDoc > : * "
- };
- return algLines;
- }
-
- private String getReuters20LinesFile() {
- return getWorkDirResourcePath("reuters.first20.lines.txt");
- }
-}