pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / benchmark / src / test / org / apache / lucene / benchmark / byTask / TestPerfTasksLogic.java
diff --git a/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java

new file mode 100755 (executable)

index 0000000..6b0e3cd
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@@ -0,0 +1,1089 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.benchmark.byTask;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.StringReader;
+import java.text.Collator;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
+import org.apache.lucene.benchmark.byTask.stats.TaskStats;
+import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
+import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
+import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SerialMergeScheduler;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.search.FieldCache.StringIndex;
+import org.apache.lucene.search.FieldCache;
+
+/**
+ * Test very simply that perf tasks - simple algorithms - are doing what they should.
+ */
+public class TestPerfTasksLogic extends BenchmarkTestCase {
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    copyToWorkDir("reuters.first20.lines.txt");
+  }
+
+  /**
+   * Test index creation logic
+   */
+  public void testIndexAndSearchTasks() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 1000",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader",
+        "{ CountingSearchTest } : 200",
+        "CloseReader",
+        "[ CountingSearchTest > : 70",
+        "[ CountingSearchTest > : 9",
+    };
+    
+    // 2. we test this value later
+    CountingSearchTestTask.numSearches = 0;
+    
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 4. test specific checks after the benchmark run completed.
+    assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);
+    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
+    // now we should be able to open the index for write. 
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+            .setOpenMode(OpenMode.APPEND));
+    iw.close();
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
+    ir.close();
+  }
+
+  /**
+   * Test timed sequence task.
+   */
+  public void testTimedSearchTask() throws Exception {
+    String algLines[] = {
+        "log.step=100000",
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 100",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader",
+        "{ CountingSearchTest } : .5s",
+        "CloseReader",
+    };
+
+    CountingSearchTestTask.numSearches = 0;
+    execBenchmark(algLines);
+    assertTrue(CountingSearchTestTask.numSearches > 0);
+    long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis;
+    assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
+  }
+
+  // disabled until we fix BG thread prio -- this test
+  // causes build to hang
+  public void testBGSearchTaskThreads() throws Exception {
+    String algLines[] = {
+        "log.time.step.msec = 100",
+        "log.step=100000",
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 1000",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader",
+        "{",
+        "  [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1",
+        "  Wait(0.5)",
+        "}",
+        "CloseReader",
+        "RepSumByPref X"
+    };
+
+    CountingSearchTestTask.numSearches = 0;
+    execBenchmark(algLines);
+    assertTrue(CountingSearchTestTask.numSearches > 0);
+  }
+
+  public void testHighlighting() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "doc.stored=true",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "query.maker=" + ReutersQueryMaker.class.getName(),
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 100",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader(true)",
+        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
+        "CloseReader",
+    };
+
+    // 2. we test this value later
+    CountingHighlighterTestTask.numHighlightedResults = 0;
+    CountingHighlighterTestTask.numDocsRetrieved = 0;
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 4. test specific checks after the benchmark run completed.
+    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
+    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
+    //we probably should use a different doc/query maker, but...
+    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
+
+    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
+    // now we should be able to open the index for write.
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
+    iw.close();
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
+    ir.close();
+  }
+
+  public void testHighlightingTV() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "doc.stored=true",//doc storage is required in order to have text to highlight
+        "doc.term.vector.offsets=true",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "query.maker=" + ReutersQueryMaker.class.getName(),
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 1000",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader(false)",
+        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
+        "CloseReader",
+    };
+
+    // 2. we test this value later
+    CountingHighlighterTestTask.numHighlightedResults = 0;
+    CountingHighlighterTestTask.numDocsRetrieved = 0;
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 4. test specific checks after the benchmark run completed.
+    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
+    //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
+    //we probably should use a different doc/query maker, but...
+    assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
+
+    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
+    // now we should be able to open the index for write.
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
+    iw.close();
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
+    ir.close();
+  }
+
+  public void testHighlightingNoTvNoStore() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "doc.stored=false",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "query.maker=" + ReutersQueryMaker.class.getName(),
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 1000",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader",
+        "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
+        "CloseReader",
+    };
+
+    // 2. we test this value later
+    CountingHighlighterTestTask.numHighlightedResults = 0;
+    CountingHighlighterTestTask.numDocsRetrieved = 0;
+    // 3. execute the algorithm  (required in every "logic" test)
+    try {
+      Benchmark benchmark = execBenchmark(algLines);
+      assertTrue("CountingHighlighterTest should have thrown an exception", false);
+      assertNotNull(benchmark); // (avoid compile warning on unused variable)
+    } catch (Exception e) {
+      assertTrue(true);
+    }
+  }
+
+  /**
+   * Test Exhasting Doc Maker logic
+   */
+  public void testExhaustContentSource() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
+        "content.source.log.step=1",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "# ----- alg ",
+        "CreateIndex",
+        "{ AddDoc } : * ",
+        "ForceMerge(1)",
+        "CloseIndex",
+        "OpenReader",
+        "{ CountingSearchTest } : 100",
+        "CloseReader",
+        "[ CountingSearchTest > : 30",
+        "[ CountingSearchTest > : 9",
+    };
+    
+    // 2. we test this value later
+    CountingSearchTestTask.numSearches = 0;
+    
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 4. test specific checks after the benchmark run completed.
+    assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
+    assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
+    // now we should be able to open the index for write. 
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
+    iw.close();
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
+    ir.close();
+  }
+
+  // LUCENE-1994: test thread safety of SortableSingleDocMaker
+  public void testDocMakerThreadSafety() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
+        "doc.term.vector=false",
+        "log.step.AddDoc=10000",
+        "content.source.forever=true",
+        "directory=RAMDirectory",
+        "doc.reuse.fields=false",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "doc.index.props=true",
+        "# ----- alg ",
+        "CreateIndex",
+        "[ { AddDoc > : 250 ] : 4",
+        "CloseIndex",
+    };
+    
+    // 2. we test this value later
+    CountingSearchTestTask.numSearches = 0;
+    
+    // 3. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
+    final int maxDoc = r.maxDoc();
+    assertEquals(1000, maxDoc);
+    for(int i=0;i<1000;i++) {
+      assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
+    }
+    r.close();
+  }
+
+  /**
+   * Test Parallel Doc Maker logic (for LUCENE-940)
+   */
+  public void testParallelDocMaker() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=FSDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "# ----- alg ",
+        "CreateIndex",
+        "[ { AddDoc } : * ] : 4 ",
+        "CloseIndex",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+  /**
+   * Test WriteLineDoc and LineDocSource.
+   */
+  public void testLineDocFile() throws Exception {
+    File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt");
+
+    // We will call WriteLineDocs this many times
+    final int NUM_TRY_DOCS = 50;
+
+    // Creates a line file with first 50 docs from SingleDocSource
+    String algLines1[] = {
+      "# ----- properties ",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
+      "content.source.forever=true",
+      "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
+      "# ----- alg ",
+      "{WriteLineDoc()}:" + NUM_TRY_DOCS,
+    };
+
+    // Run algo
+    Benchmark benchmark = execBenchmark(algLines1);
+
+    BufferedReader r = new BufferedReader(new FileReader(lineFile));
+    int numLines = 0;
+    String line;
+    while((line = r.readLine()) != null) {
+      if (numLines==0 && line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)) {
+        continue; // do not count the header line as a doc 
+      }
+      numLines++;
+    }
+    r.close();
+    assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines);
+    
+    // Index the line docs
+    String algLines2[] = {
+      "# ----- properties ",
+      "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+      "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
+      "content.source.forever=false",
+      "doc.reuse.fields=false",
+      "ram.flush.mb=4",
+      "# ----- alg ",
+      "ResetSystemErase",
+      "CreateIndex",
+      "{AddDoc}: *",
+      "CloseIndex",
+    };
+    
+    // Run algo
+    benchmark = execBenchmark(algLines2);
+
+    // now we should be able to open the index for write. 
+    IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
+            .setOpenMode(OpenMode.APPEND));
+    iw.close();
+
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
+    ir.close();
+
+    lineFile.delete();
+  }
+  
+  /**
+   * Test ReadTokensTask
+   */
+  public void testReadTokens() throws Exception {
+
+    // We will call ReadTokens on this many docs
+    final int NUM_DOCS = 20;
+
+    // Read tokens from first NUM_DOCS docs from Reuters and
+    // then build index from the same docs
+    String algLines1[] = {
+      "# ----- properties ",
+      "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+      "docs.file=" + getReuters20LinesFile(),
+      "# ----- alg ",
+      "{ReadTokens}: " + NUM_DOCS,
+      "ResetSystemErase",
+      "CreateIndex",
+      "{AddDoc}: " + NUM_DOCS,
+      "CloseIndex",
+    };
+
+    // Run algo
+    Benchmark benchmark = execBenchmark(algLines1);
+
+    List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
+
+    // Count how many tokens all ReadTokens saw
+    int totalTokenCount1 = 0;
+    for (final TaskStats stat : stats) {
+      if (stat.getTask().getName().equals("ReadTokens")) {
+        totalTokenCount1 += stat.getCount();
+      }
+    }
+
+    // Separately count how many tokens are actually in the index:
+    IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    assertEquals(NUM_DOCS, reader.numDocs());
+
+    TermEnum terms = reader.terms();
+    TermDocs termDocs = reader.termDocs();
+    int totalTokenCount2 = 0;
+    while(terms.next()) {
+      Term term = terms.term();
+      /* not-tokenized, but indexed field */
+      if (term != null && term.field() != DocMaker.ID_FIELD && term.field() != DocMaker.DATE_MSEC_FIELD && term.field() != DocMaker.TIME_SEC_FIELD) {
+          termDocs.seek(terms.term());
+        while (termDocs.next())
+          totalTokenCount2 += termDocs.freq();
+      }
+    }
+    reader.close();
+
+    // Make sure they are the same
+    assertEquals(totalTokenCount1, totalTokenCount2);
+  }
+  
+  /**
+   * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
+   */
+  public void testParallelExhausted() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "task.max.depth.log=1",
+        "# ----- alg ",
+        "CreateIndex",
+        "{ [ AddDoc]: 4} : * ",
+        "ResetInputs ",
+        "{ [ AddDoc]: 4} : * ",
+        "WaitForMerges",
+        "CloseIndex",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 2 * 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+
+  /**
+   * Test that exhaust in loop works as expected (LUCENE-1115).
+   */
+  public void testExhaustedLooped() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "task.max.depth.log=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  WaitForMerges",
+        "  CloseIndex",
+        "} : 2",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20;  // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+  
+  /**
+   * Test that we can close IndexWriter with argument "false".
+   */
+  public void testCloseIndexFalse() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "ram.flush.mb=-1",
+        "max.buffered=2",
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  CloseIndex(false)",
+        "} : 2",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+  public static class MyMergeScheduler extends SerialMergeScheduler {
+    boolean called;
+    public MyMergeScheduler() {
+      super();
+      called = true;
+    }
+  }
+
+  public void testDeleteByPercent() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "ram.flush.mb=-1",
+        "max.buffered=2",
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "CreateIndex",
+        "{ \"AddDocs\"  AddDoc > : * ",
+        "CloseIndex()",
+        "OpenReader(false)",
+        "DeleteByPercent(20)",
+        "CloseReader"
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 16; // first 20 reuters docs, minus 20%
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+  /**
+   * Test that we can set merge scheduler".
+   */
+  public void testMergeScheduler() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "merge.scheduler=" + MyMergeScheduler.class.getName(),
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "} : 2",
+    };
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    assertTrue("did not use the specified MergeScheduler",
+        ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig()
+            .getMergeScheduler()).called);
+    benchmark.getRunData().getIndexWriter().close();
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+  public static class MyMergePolicy extends LogDocMergePolicy {
+    boolean called;
+    public MyMergePolicy() {
+      called = true;
+    }
+  }
+  
+  /**
+   * Test that we can set merge policy".
+   */
+  public void testMergePolicy() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "ram.flush.mb=-1",
+        "max.buffered=2",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "merge.policy=" + MyMergePolicy.class.getName(),
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "} : 2",
+    };
+
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+    assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called);
+    benchmark.getRunData().getIndexWriter().close();
+    
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
+  /**
+   * Test that IndexWriter settings stick.
+   */
+  public void testIndexWriterSettings() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "ram.flush.mb=-1",
+        "max.buffered=2",
+        "compound=cmpnd:true:false",
+        "doc.term.vector=vector:false:true",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "merge.factor=3",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  NewRound",
+        "} : 2",
+    };
+
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+    final IndexWriter writer = benchmark.getRunData().getIndexWriter();
+    assertEquals(2, writer.getConfig().getMaxBufferedDocs());
+    assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
+    assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
+    assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
+    writer.close();
+    Directory dir = benchmark.getRunData().getDirectory();
+    IndexReader reader = IndexReader.open(dir, true);
+    TermFreqVector [] tfv = reader.getTermFreqVectors(0);
+    assertNotNull(tfv);
+    assertTrue(tfv.length > 0);
+    reader.close();
+  }
+
+  /**
+   * Test indexing with facets tasks.
+   */
+  public void testIndexingWithFacets() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=100",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "merge.factor=3",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "ResetSystemErase",
+        "CreateIndex",
+        "CreateTaxonomyIndex",
+        "{ \"AddDocs\"  AddFacetedDoc > : * ",
+        "CloseIndex",
+        "CloseTaxonomyIndex",
+        "OpenTaxonomyReader",
+    };
+
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+    PerfRunData runData = benchmark.getRunData();
+    assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
+    TaxonomyReader taxoReader = runData.getTaxonomyReader();
+    assertNotNull("taxo reader was not opened", taxoReader);
+    assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
+    taxoReader.close();
+  }
+  
+  /**
+   * Test that we can call forceMerge(maxNumSegments).
+   */
+  public void testForceMerge() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "ram.flush.mb=-1",
+        "max.buffered=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "merge.policy=org.apache.lucene.index.LogDocMergePolicy",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  ForceMerge(3)",
+        "  CloseIndex()",
+        "} : 2",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 20; // first 20 reuters docs.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+
+    // Make sure we have 3 segments:
+    SegmentInfos infos = new SegmentInfos();
+    infos.read(benchmark.getRunData().getDirectory());
+    assertEquals(3, infos.size());
+  }
+  
+  /**
+   * Test disabling task count (LUCENE-1136).
+   */
+  public void testDisableCounting() throws Exception {
+    doTestDisableCounting(true);
+    doTestDisableCounting(false);
+  }
+
+  private void doTestDisableCounting(boolean disable) throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = disableCountingLines(disable);
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test counters
+    int n = disable ? 0 : 1;
+    int nChecked = 0;
+    for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) {
+      String taskName = stats.getTask().getName();
+      if (taskName.equals("Rounds")) {
+        assertEquals("Wrong total count!",20+2*n,stats.getCount());
+        nChecked++;
+      } else if (taskName.equals("CreateIndex")) {
+        assertEquals("Wrong count for CreateIndex!",n,stats.getCount());
+        nChecked++;
+      } else if (taskName.equals("CloseIndex")) {
+        assertEquals("Wrong count for CloseIndex!",n,stats.getCount());
+        nChecked++;
+      }
+    }
+    assertEquals("Missing some tasks to check!",3,nChecked);
+  }
+
+  private String[] disableCountingLines (boolean disable) {
+    String dis = disable ? "-" : "";
+    return new String[] {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=30",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "task.max.depth.log=1",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  "+dis+"CreateIndex",            // optionally disable counting here
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  "+dis+"  CloseIndex",             // optionally disable counting here (with extra blanks)
+        "}",
+        "RepSumByName",
+    };
+  }
+
+  /**
+   * Test that we can change the Locale in the runData,
+   * that it is parsed as we expect.
+   */
+  public void testLocale() throws Exception {
+    // empty Locale: clear it (null)
+    Benchmark benchmark = execBenchmark(getLocaleConfig(""));
+    assertNull(benchmark.getRunData().getLocale());
+
+    // ROOT locale
+    benchmark = execBenchmark(getLocaleConfig("ROOT"));
+    assertEquals(new Locale(""), benchmark.getRunData().getLocale());
+    
+    // specify just a language 
+    benchmark = execBenchmark(getLocaleConfig("de"));
+    assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
+    
+    // specify language + country
+    benchmark = execBenchmark(getLocaleConfig("en,US"));
+    assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
+    
+    // specify language + country + variant
+    benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
+    assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
+  }
+   
+  private String[] getLocaleConfig(String localeParam) {
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  NewLocale(" + localeParam + ")",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  NewRound",
+        "} : 1",
+    };
+    return algLines;
+  }
+  
+  /**
+   * Test that we can create CollationAnalyzers.
+   */
+  public void testCollator() throws Exception {
+    // ROOT locale
+    Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
+    CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
+        .getInstance(new Locale("")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify just a language
+    benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify language + country
+    benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
+        "US")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify language + country + variant
+    benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
+        "NO", "NY")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+  }
+  
+  private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
+      throws Exception {
+    TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
+    TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
+    ts1.reset();
+    ts2.reset();
+    CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
+    CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
+    assertTrue(ts1.incrementToken());
+    assertTrue(ts2.incrementToken());
+    assertEquals(termAtt1.toString(), termAtt2.toString());
+    assertFalse(ts1.incrementToken());
+    assertFalse(ts2.incrementToken());
+    ts1.close();
+    ts2.close();
+  }
+  
+  private String[] getCollatorConfig(String localeParam, 
+      String collationParam) {
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  NewLocale(" + localeParam + ")",
+        "  NewCollationAnalyzer(" + collationParam + ")",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  NewRound",
+        "} : 1",
+    };
+    return algLines;
+  }
+  
+  /**
+   * Test that we can create ShingleAnalyzerWrappers.
+   */
+  public void testShingleAnalyzer() throws Exception {
+    String text = "one,two,three, four five six";
+    
+    // Default analyzer, maxShingleSize, and outputUnigrams
+    Benchmark benchmark = execBenchmark(getShingleConfig(""));
+    benchmark.getRunData().getAnalyzer().tokenStream
+      ("bogus", new StringReader(text)).close();
+    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
+                       new String[] {"one", "one two", "two", "two three",
+                                     "three", "three four", "four", "four five",
+                                     "five", "five six", "six"});
+    // Default analyzer, maxShingleSize = 3, and outputUnigrams = false
+    benchmark = execBenchmark
+      (getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
+    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
+                       new String[] { "one two", "one two three", "two three",
+                                      "two three four", "three four", 
+                                      "three four five", "four five",
+                                      "four five six", "five six" });
+    // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
+    benchmark = execBenchmark
+      (getShingleConfig("analyzer:WhitespaceAnalyzer"));
+    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
+                       new String[] { "one,two,three,", "one,two,three, four",
+                                      "four", "four five", "five", "five six", 
+                                      "six" });
+    
+    // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
+    benchmark = execBenchmark
+      (getShingleConfig
+        ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
+    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
+                       new String[] { "one,two,three, four", 
+                                      "one,two,three, four five",
+                                      "four five", "four five six",
+                                      "five six" });
+  }
+  
+  private void assertEqualShingle
+    (Analyzer analyzer, String text, String[] expected) throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
+  }
+  
+  private String[] getShingleConfig(String params) { 
+    String algLines[] = {
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "NewShingleAnalyzer(" + params + ")",
+        "CreateIndex",
+        "{ \"AddDocs\"  AddDoc > : * "
+    };
+    return algLines;
+  }
+  
+  private String getReuters20LinesFile() {
+    return getWorkDirResourcePath("reuters.first20.lines.txt");
+  }  
+}