2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.benchmark.byTask;
20 import java.io.BufferedReader;
22 import java.io.FileReader;
23 import java.io.StringReader;
24 import java.text.Collator;
25 import java.util.List;
26 import java.util.Locale;
28 import org.apache.lucene.analysis.Analyzer;
29 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
30 import org.apache.lucene.analysis.MockAnalyzer;
31 import org.apache.lucene.analysis.TokenStream;
32 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
33 import org.apache.lucene.benchmark.BenchmarkTestCase;
34 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
35 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
36 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
37 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
38 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
39 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
40 import org.apache.lucene.collation.CollationKeyAnalyzer;
41 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
42 import org.apache.lucene.index.IndexReader;
43 import org.apache.lucene.index.IndexWriter;
44 import org.apache.lucene.index.IndexWriterConfig;
45 import org.apache.lucene.index.LogDocMergePolicy;
46 import org.apache.lucene.index.LogMergePolicy;
47 import org.apache.lucene.index.Term;
48 import org.apache.lucene.index.TermEnum;
49 import org.apache.lucene.index.TermDocs;
50 import org.apache.lucene.index.SegmentInfos;
51 import org.apache.lucene.index.SerialMergeScheduler;
52 import org.apache.lucene.index.TermFreqVector;
53 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
54 import org.apache.lucene.store.Directory;
55 import org.apache.lucene.search.FieldCache.StringIndex;
56 import org.apache.lucene.search.FieldCache;
59 * Test very simply that perf tasks - simple algorithms - are doing what they should.
61 public class TestPerfTasksLogic extends BenchmarkTestCase {
64 public void setUp() throws Exception {
66 copyToWorkDir("reuters.first20.lines.txt");
70 * Test index creation logic
72 public void testIndexAndSearchTasks() throws Exception {
73 // 1. alg definition (required in every "logic" test)
81 "{ CountingSearchTest } : 200",
83 "[ CountingSearchTest > : 70",
84 "[ CountingSearchTest > : 9",
87 // 2. we test this value later
88 CountingSearchTestTask.numSearches = 0;
90 // 3. execute the algorithm (required in every "logic" test)
91 Benchmark benchmark = execBenchmark(algLines);
93 // 4. test specific checks after the benchmark run completed.
94 assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);
95 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
96 // now we should be able to open the index for write.
97 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
98 new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
99 .setOpenMode(OpenMode.APPEND));
101 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
102 assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
107 * Test timed sequence task.
109 public void testTimedSearchTask() throws Exception {
110 String algLines[] = {
118 "{ CountingSearchTest } : .5s",
122 CountingSearchTestTask.numSearches = 0;
123 execBenchmark(algLines);
124 assertTrue(CountingSearchTestTask.numSearches > 0);
125 long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis;
126 assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
129 // disabled until we fix BG thread prio -- this test
130 // causes build to hang
131 public void testBGSearchTaskThreads() throws Exception {
132 String algLines[] = {
133 "log.time.step.msec = 100",
142 " [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1",
149 CountingSearchTestTask.numSearches = 0;
150 execBenchmark(algLines);
151 assertTrue(CountingSearchTestTask.numSearches > 0);
154 public void testHighlighting() throws Exception {
155 // 1. alg definition (required in every "logic" test)
156 String algLines[] = {
158 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
159 "docs.file=" + getReuters20LinesFile(),
160 "query.maker=" + ReutersQueryMaker.class.getName(),
167 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
171 // 2. we test this value later
172 CountingHighlighterTestTask.numHighlightedResults = 0;
173 CountingHighlighterTestTask.numDocsRetrieved = 0;
174 // 3. execute the algorithm (required in every "logic" test)
175 Benchmark benchmark = execBenchmark(algLines);
177 // 4. test specific checks after the benchmark run completed.
178 assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
179 //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
180 //we probably should use a different doc/query maker, but...
181 assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
183 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
184 // now we should be able to open the index for write.
185 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
187 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
188 assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
192 public void testHighlightingTV() throws Exception {
193 // 1. alg definition (required in every "logic" test)
194 String algLines[] = {
195 "doc.stored=true",//doc storage is required in order to have text to highlight
196 "doc.term.vector.offsets=true",
197 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
198 "docs.file=" + getReuters20LinesFile(),
199 "query.maker=" + ReutersQueryMaker.class.getName(),
206 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
210 // 2. we test this value later
211 CountingHighlighterTestTask.numHighlightedResults = 0;
212 CountingHighlighterTestTask.numDocsRetrieved = 0;
213 // 3. execute the algorithm (required in every "logic" test)
214 Benchmark benchmark = execBenchmark(algLines);
216 // 4. test specific checks after the benchmark run completed.
217 assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
218 //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
219 //we probably should use a different doc/query maker, but...
220 assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
222 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
223 // now we should be able to open the index for write.
224 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
226 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
227 assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
231 public void testHighlightingNoTvNoStore() throws Exception {
232 // 1. alg definition (required in every "logic" test)
233 String algLines[] = {
235 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
236 "docs.file=" + getReuters20LinesFile(),
237 "query.maker=" + ReutersQueryMaker.class.getName(),
244 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
248 // 2. we test this value later
249 CountingHighlighterTestTask.numHighlightedResults = 0;
250 CountingHighlighterTestTask.numDocsRetrieved = 0;
251 // 3. execute the algorithm (required in every "logic" test)
253 Benchmark benchmark = execBenchmark(algLines);
254 assertTrue("CountingHighlighterTest should have thrown an exception", false);
255 assertNotNull(benchmark); // (avoid compile warning on unused variable)
256 } catch (Exception e) {
262 * Test Exhasting Doc Maker logic
264 public void testExhaustContentSource() throws Exception {
265 // 1. alg definition (required in every "logic" test)
266 String algLines[] = {
267 "# ----- properties ",
268 "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
269 "content.source.log.step=1",
270 "doc.term.vector=false",
271 "content.source.forever=false",
272 "directory=RAMDirectory",
274 "doc.tokenized=false",
281 "{ CountingSearchTest } : 100",
283 "[ CountingSearchTest > : 30",
284 "[ CountingSearchTest > : 9",
287 // 2. we test this value later
288 CountingSearchTestTask.numSearches = 0;
290 // 3. execute the algorithm (required in every "logic" test)
291 Benchmark benchmark = execBenchmark(algLines);
293 // 4. test specific checks after the benchmark run completed.
294 assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
295 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
296 // now we should be able to open the index for write.
297 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
299 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
300 assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
304 // LUCENE-1994: test thread safety of SortableSingleDocMaker
305 public void testDocMakerThreadSafety() throws Exception {
306 // 1. alg definition (required in every "logic" test)
307 String algLines[] = {
308 "# ----- properties ",
309 "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
310 "doc.term.vector=false",
311 "log.step.AddDoc=10000",
312 "content.source.forever=true",
313 "directory=RAMDirectory",
314 "doc.reuse.fields=false",
316 "doc.tokenized=false",
317 "doc.index.props=true",
320 "[ { AddDoc > : 250 ] : 4",
324 // 2. we test this value later
325 CountingSearchTestTask.numSearches = 0;
327 // 3. execute the algorithm (required in every "logic" test)
328 Benchmark benchmark = execBenchmark(algLines);
330 IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
331 StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
332 final int maxDoc = r.maxDoc();
333 assertEquals(1000, maxDoc);
334 for(int i=0;i<1000;i++) {
335 assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
341 * Test Parallel Doc Maker logic (for LUCENE-940)
343 public void testParallelDocMaker() throws Exception {
344 // 1. alg definition (required in every "logic" test)
345 String algLines[] = {
346 "# ----- properties ",
347 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
348 "docs.file=" + getReuters20LinesFile(),
349 "content.source.log.step=3",
350 "doc.term.vector=false",
351 "content.source.forever=false",
352 "directory=FSDirectory",
354 "doc.tokenized=false",
357 "[ { AddDoc } : * ] : 4 ",
361 // 2. execute the algorithm (required in every "logic" test)
362 Benchmark benchmark = execBenchmark(algLines);
364 // 3. test number of docs in the index
365 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
366 int ndocsExpected = 20; // first 20 reuters docs.
367 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
372 * Test WriteLineDoc and LineDocSource.
374 public void testLineDocFile() throws Exception {
375 File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt");
377 // We will call WriteLineDocs this many times
378 final int NUM_TRY_DOCS = 50;
380 // Creates a line file with first 50 docs from SingleDocSource
381 String algLines1[] = {
382 "# ----- properties ",
383 "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
384 "content.source.forever=true",
385 "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
387 "{WriteLineDoc()}:" + NUM_TRY_DOCS,
391 Benchmark benchmark = execBenchmark(algLines1);
393 BufferedReader r = new BufferedReader(new FileReader(lineFile));
396 while((line = r.readLine()) != null) {
397 if (numLines==0 && line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)) {
398 continue; // do not count the header line as a doc
403 assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines);
405 // Index the line docs
406 String algLines2[] = {
407 "# ----- properties ",
408 "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
409 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
410 "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
411 "content.source.forever=false",
412 "doc.reuse.fields=false",
422 benchmark = execBenchmark(algLines2);
424 // now we should be able to open the index for write.
425 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
426 new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
427 .setOpenMode(OpenMode.APPEND));
430 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
431 assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
438 * Test ReadTokensTask
440 public void testReadTokens() throws Exception {
442 // We will call ReadTokens on this many docs
443 final int NUM_DOCS = 20;
445 // Read tokens from first NUM_DOCS docs from Reuters and
446 // then build index from the same docs
447 String algLines1[] = {
448 "# ----- properties ",
449 "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
450 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
451 "docs.file=" + getReuters20LinesFile(),
453 "{ReadTokens}: " + NUM_DOCS,
456 "{AddDoc}: " + NUM_DOCS,
461 Benchmark benchmark = execBenchmark(algLines1);
463 List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
465 // Count how many tokens all ReadTokens saw
466 int totalTokenCount1 = 0;
467 for (final TaskStats stat : stats) {
468 if (stat.getTask().getName().equals("ReadTokens")) {
469 totalTokenCount1 += stat.getCount();
473 // Separately count how many tokens are actually in the index:
474 IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
475 assertEquals(NUM_DOCS, reader.numDocs());
477 TermEnum terms = reader.terms();
478 TermDocs termDocs = reader.termDocs();
479 int totalTokenCount2 = 0;
480 while(terms.next()) {
481 Term term = terms.term();
482 /* not-tokenized, but indexed field */
483 if (term != null && term.field() != DocMaker.ID_FIELD && term.field() != DocMaker.DATE_MSEC_FIELD && term.field() != DocMaker.TIME_SEC_FIELD) {
484 termDocs.seek(terms.term());
485 while (termDocs.next())
486 totalTokenCount2 += termDocs.freq();
491 // Make sure they are the same
492 assertEquals(totalTokenCount1, totalTokenCount2);
496 * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
498 public void testParallelExhausted() throws Exception {
499 // 1. alg definition (required in every "logic" test)
500 String algLines[] = {
501 "# ----- properties ",
502 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
503 "docs.file=" + getReuters20LinesFile(),
504 "content.source.log.step=3",
505 "doc.term.vector=false",
506 "content.source.forever=false",
507 "directory=RAMDirectory",
509 "doc.tokenized=false",
510 "task.max.depth.log=1",
513 "{ [ AddDoc]: 4} : * ",
515 "{ [ AddDoc]: 4} : * ",
520 // 2. execute the algorithm (required in every "logic" test)
521 Benchmark benchmark = execBenchmark(algLines);
523 // 3. test number of docs in the index
524 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
525 int ndocsExpected = 2 * 20; // first 20 reuters docs.
526 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
532 * Test that exhaust in loop works as expected (LUCENE-1115).
534 public void testExhaustedLooped() throws Exception {
535 // 1. alg definition (required in every "logic" test)
536 String algLines[] = {
537 "# ----- properties ",
538 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
539 "docs.file=" + getReuters20LinesFile(),
540 "content.source.log.step=3",
541 "doc.term.vector=false",
542 "content.source.forever=false",
543 "directory=RAMDirectory",
545 "doc.tokenized=false",
546 "task.max.depth.log=1",
551 " { \"AddDocs\" AddDoc > : * ",
557 // 2. execute the algorithm (required in every "logic" test)
558 Benchmark benchmark = execBenchmark(algLines);
560 // 3. test number of docs in the index
561 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
562 int ndocsExpected = 20; // first 20 reuters docs.
563 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
568 * Test that we can close IndexWriter with argument "false".
570 public void testCloseIndexFalse() throws Exception {
571 // 1. alg definition (required in every "logic" test)
572 String algLines[] = {
573 "# ----- properties ",
574 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
575 "docs.file=" + getReuters20LinesFile(),
578 "content.source.log.step=3",
579 "doc.term.vector=false",
580 "content.source.forever=false",
581 "directory=RAMDirectory",
583 "doc.tokenized=false",
589 " { \"AddDocs\" AddDoc > : * ",
590 " CloseIndex(false)",
594 // 2. execute the algorithm (required in every "logic" test)
595 Benchmark benchmark = execBenchmark(algLines);
597 // 3. test number of docs in the index
598 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
599 int ndocsExpected = 20; // first 20 reuters docs.
600 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
604 public static class MyMergeScheduler extends SerialMergeScheduler {
606 public MyMergeScheduler() {
612 public void testDeleteByPercent() throws Exception {
613 // 1. alg definition (required in every "logic" test)
614 String algLines[] = {
615 "# ----- properties ",
616 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
617 "docs.file=" + getReuters20LinesFile(),
620 "content.source.log.step=3",
621 "doc.term.vector=false",
622 "content.source.forever=false",
623 "directory=RAMDirectory",
625 "doc.tokenized=false",
629 "{ \"AddDocs\" AddDoc > : * ",
632 "DeleteByPercent(20)",
636 // 2. execute the algorithm (required in every "logic" test)
637 Benchmark benchmark = execBenchmark(algLines);
639 // 3. test number of docs in the index
640 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
641 int ndocsExpected = 16; // first 20 reuters docs, minus 20%
642 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
647 * Test that we can set merge scheduler".
649 public void testMergeScheduler() throws Exception {
650 // 1. alg definition (required in every "logic" test)
651 String algLines[] = {
652 "# ----- properties ",
653 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
654 "docs.file=" + getReuters20LinesFile(),
655 "content.source.log.step=3",
656 "doc.term.vector=false",
657 "content.source.forever=false",
658 "directory=RAMDirectory",
659 "merge.scheduler=" + MyMergeScheduler.class.getName(),
661 "doc.tokenized=false",
667 " { \"AddDocs\" AddDoc > : * ",
670 // 2. execute the algorithm (required in every "logic" test)
671 Benchmark benchmark = execBenchmark(algLines);
673 assertTrue("did not use the specified MergeScheduler",
674 ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig()
675 .getMergeScheduler()).called);
676 benchmark.getRunData().getIndexWriter().close();
678 // 3. test number of docs in the index
679 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
680 int ndocsExpected = 20; // first 20 reuters docs.
681 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
685 public static class MyMergePolicy extends LogDocMergePolicy {
687 public MyMergePolicy() {
693 * Test that we can set merge policy".
695 public void testMergePolicy() throws Exception {
696 // 1. alg definition (required in every "logic" test)
697 String algLines[] = {
698 "# ----- properties ",
699 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
700 "docs.file=" + getReuters20LinesFile(),
701 "content.source.log.step=3",
704 "doc.term.vector=false",
705 "content.source.forever=false",
706 "directory=RAMDirectory",
707 "merge.policy=" + MyMergePolicy.class.getName(),
709 "doc.tokenized=false",
715 " { \"AddDocs\" AddDoc > : * ",
719 // 2. execute the algorithm (required in every "logic" test)
720 Benchmark benchmark = execBenchmark(algLines);
721 assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called);
722 benchmark.getRunData().getIndexWriter().close();
724 // 3. test number of docs in the index
725 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
726 int ndocsExpected = 20; // first 20 reuters docs.
727 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
732 * Test that IndexWriter settings stick.
734 public void testIndexWriterSettings() throws Exception {
735 // 1. alg definition (required in every "logic" test)
736 String algLines[] = {
737 "# ----- properties ",
738 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
739 "docs.file=" + getReuters20LinesFile(),
740 "content.source.log.step=3",
743 "compound=cmpnd:true:false",
744 "doc.term.vector=vector:false:true",
745 "content.source.forever=false",
746 "directory=RAMDirectory",
749 "doc.tokenized=false",
755 " { \"AddDocs\" AddDoc > : * ",
760 // 2. execute the algorithm (required in every "logic" test)
761 Benchmark benchmark = execBenchmark(algLines);
762 final IndexWriter writer = benchmark.getRunData().getIndexWriter();
763 assertEquals(2, writer.getConfig().getMaxBufferedDocs());
764 assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
765 assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
766 assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
768 Directory dir = benchmark.getRunData().getDirectory();
769 IndexReader reader = IndexReader.open(dir, true);
770 TermFreqVector [] tfv = reader.getTermFreqVectors(0);
772 assertTrue(tfv.length > 0);
777 * Test indexing with facets tasks.
779 public void testIndexingWithFacets() throws Exception {
780 // 1. alg definition (required in every "logic" test)
781 String algLines[] = {
782 "# ----- properties ",
783 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
784 "docs.file=" + getReuters20LinesFile(),
785 "content.source.log.step=100",
786 "content.source.forever=false",
787 "directory=RAMDirectory",
790 "doc.tokenized=false",
795 "CreateTaxonomyIndex",
796 "{ \"AddDocs\" AddFacetedDoc > : * ",
798 "CloseTaxonomyIndex",
799 "OpenTaxonomyReader",
802 // 2. execute the algorithm (required in every "logic" test)
803 Benchmark benchmark = execBenchmark(algLines);
804 PerfRunData runData = benchmark.getRunData();
805 assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
806 TaxonomyReader taxoReader = runData.getTaxonomyReader();
807 assertNotNull("taxo reader was not opened", taxoReader);
808 assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
813 * Test that we can call forceMerge(maxNumSegments).
815 public void testForceMerge() throws Exception {
816 // 1. alg definition (required in every "logic" test)
817 String algLines[] = {
818 "# ----- properties ",
819 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
820 "docs.file=" + getReuters20LinesFile(),
821 "content.source.log.step=3",
824 "doc.term.vector=false",
825 "content.source.forever=false",
826 "directory=RAMDirectory",
827 "merge.policy=org.apache.lucene.index.LogDocMergePolicy",
829 "doc.tokenized=false",
835 " { \"AddDocs\" AddDoc > : * ",
841 // 2. execute the algorithm (required in every "logic" test)
842 Benchmark benchmark = execBenchmark(algLines);
844 // 3. test number of docs in the index
845 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
846 int ndocsExpected = 20; // first 20 reuters docs.
847 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
850 // Make sure we have 3 segments:
851 SegmentInfos infos = new SegmentInfos();
852 infos.read(benchmark.getRunData().getDirectory());
853 assertEquals(3, infos.size());
857 * Test disabling task count (LUCENE-1136).
859 public void testDisableCounting() throws Exception {
860 doTestDisableCounting(true);
861 doTestDisableCounting(false);
864 private void doTestDisableCounting(boolean disable) throws Exception {
865 // 1. alg definition (required in every "logic" test)
866 String algLines[] = disableCountingLines(disable);
868 // 2. execute the algorithm (required in every "logic" test)
869 Benchmark benchmark = execBenchmark(algLines);
872 int n = disable ? 0 : 1;
874 for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) {
875 String taskName = stats.getTask().getName();
876 if (taskName.equals("Rounds")) {
877 assertEquals("Wrong total count!",20+2*n,stats.getCount());
879 } else if (taskName.equals("CreateIndex")) {
880 assertEquals("Wrong count for CreateIndex!",n,stats.getCount());
882 } else if (taskName.equals("CloseIndex")) {
883 assertEquals("Wrong count for CloseIndex!",n,stats.getCount());
887 assertEquals("Missing some tasks to check!",3,nChecked);
890 private String[] disableCountingLines (boolean disable) {
891 String dis = disable ? "-" : "";
892 return new String[] {
893 "# ----- properties ",
894 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
895 "docs.file=" + getReuters20LinesFile(),
896 "content.source.log.step=30",
897 "doc.term.vector=false",
898 "content.source.forever=false",
899 "directory=RAMDirectory",
901 "doc.tokenized=false",
902 "task.max.depth.log=1",
906 " "+dis+"CreateIndex", // optionally disable counting here
907 " { \"AddDocs\" AddDoc > : * ",
908 " "+dis+" CloseIndex", // optionally disable counting here (with extra blanks)
915 * Test that we can change the Locale in the runData,
916 * that it is parsed as we expect.
918 public void testLocale() throws Exception {
919 // empty Locale: clear it (null)
920 Benchmark benchmark = execBenchmark(getLocaleConfig(""));
921 assertNull(benchmark.getRunData().getLocale());
924 benchmark = execBenchmark(getLocaleConfig("ROOT"));
925 assertEquals(new Locale(""), benchmark.getRunData().getLocale());
927 // specify just a language
928 benchmark = execBenchmark(getLocaleConfig("de"));
929 assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
931 // specify language + country
932 benchmark = execBenchmark(getLocaleConfig("en,US"));
933 assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
935 // specify language + country + variant
936 benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
937 assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
940 private String[] getLocaleConfig(String localeParam) {
941 String algLines[] = {
942 "# ----- properties ",
943 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
944 "docs.file=" + getReuters20LinesFile(),
945 "content.source.log.step=3",
946 "content.source.forever=false",
947 "directory=RAMDirectory",
951 " NewLocale(" + localeParam + ")",
953 " { \"AddDocs\" AddDoc > : * ",
961 * Test that we can create CollationAnalyzers.
963 public void testCollator() throws Exception {
965 Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
966 CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
967 .getInstance(new Locale("")));
968 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
970 // specify just a language
971 benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
972 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
973 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
975 // specify language + country
976 benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
977 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
979 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
981 // specify language + country + variant
982 benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
983 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
985 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
988 private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
990 TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
991 TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
994 CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
995 CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
996 assertTrue(ts1.incrementToken());
997 assertTrue(ts2.incrementToken());
998 assertEquals(termAtt1.toString(), termAtt2.toString());
999 assertFalse(ts1.incrementToken());
1000 assertFalse(ts2.incrementToken());
1005 private String[] getCollatorConfig(String localeParam,
1006 String collationParam) {
1007 String algLines[] = {
1008 "# ----- properties ",
1009 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
1010 "docs.file=" + getReuters20LinesFile(),
1011 "content.source.log.step=3",
1012 "content.source.forever=false",
1013 "directory=RAMDirectory",
1016 " ResetSystemErase",
1017 " NewLocale(" + localeParam + ")",
1018 " NewCollationAnalyzer(" + collationParam + ")",
1020 " { \"AddDocs\" AddDoc > : * ",
1028 * Test that we can create ShingleAnalyzerWrappers.
1030 public void testShingleAnalyzer() throws Exception {
1031 String text = "one,two,three, four five six";
1033 // Default analyzer, maxShingleSize, and outputUnigrams
1034 Benchmark benchmark = execBenchmark(getShingleConfig(""));
1035 benchmark.getRunData().getAnalyzer().tokenStream
1036 ("bogus", new StringReader(text)).close();
1037 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1038 new String[] {"one", "one two", "two", "two three",
1039 "three", "three four", "four", "four five",
1040 "five", "five six", "six"});
1041 // Default analyzer, maxShingleSize = 3, and outputUnigrams = false
1042 benchmark = execBenchmark
1043 (getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
1044 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1045 new String[] { "one two", "one two three", "two three",
1046 "two three four", "three four",
1047 "three four five", "four five",
1048 "four five six", "five six" });
1049 // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
1050 benchmark = execBenchmark
1051 (getShingleConfig("analyzer:WhitespaceAnalyzer"));
1052 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1053 new String[] { "one,two,three,", "one,two,three, four",
1054 "four", "four five", "five", "five six",
1057 // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
1058 benchmark = execBenchmark
1060 ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
1061 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1062 new String[] { "one,two,three, four",
1063 "one,two,three, four five",
1064 "four five", "four five six",
1068 private void assertEqualShingle
1069 (Analyzer analyzer, String text, String[] expected) throws Exception {
1070 BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
1073 private String[] getShingleConfig(String params) {
1074 String algLines[] = {
1075 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
1076 "docs.file=" + getReuters20LinesFile(),
1077 "content.source.forever=false",
1078 "directory=RAMDirectory",
1079 "NewShingleAnalyzer(" + params + ")",
1081 "{ \"AddDocs\" AddDoc > : * "
1086 private String getReuters20LinesFile() {
1087 return getWorkDirResourcePath("reuters.first20.lines.txt");