2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.benchmark.byTask;
20 import java.io.BufferedReader;
22 import java.io.FileReader;
23 import java.io.StringReader;
24 import java.text.Collator;
25 import java.util.List;
26 import java.util.Locale;
28 import org.apache.lucene.analysis.Analyzer;
29 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
30 import org.apache.lucene.analysis.MockAnalyzer;
31 import org.apache.lucene.analysis.TokenStream;
32 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
33 import org.apache.lucene.benchmark.BenchmarkTestCase;
34 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
35 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
36 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
37 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
38 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
39 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
40 import org.apache.lucene.collation.CollationKeyAnalyzer;
41 import org.apache.lucene.index.IndexReader;
42 import org.apache.lucene.index.IndexWriter;
43 import org.apache.lucene.index.IndexWriterConfig;
44 import org.apache.lucene.index.LogDocMergePolicy;
45 import org.apache.lucene.index.LogMergePolicy;
46 import org.apache.lucene.index.Term;
47 import org.apache.lucene.index.TermEnum;
48 import org.apache.lucene.index.TermDocs;
49 import org.apache.lucene.index.SegmentInfos;
50 import org.apache.lucene.index.SerialMergeScheduler;
51 import org.apache.lucene.index.TermFreqVector;
52 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
53 import org.apache.lucene.store.Directory;
54 import org.apache.lucene.search.FieldCache.StringIndex;
55 import org.apache.lucene.search.FieldCache;
58 * Test very simply that perf tasks - simple algorithms - are doing what they should.
60 public class TestPerfTasksLogic extends BenchmarkTestCase {
63 public void setUp() throws Exception {
65 copyToWorkDir("reuters.first20.lines.txt");
69 * Test index creation logic
71 public void testIndexAndSearchTasks() throws Exception {
72 // 1. alg definition (required in every "logic" test)
80 "{ CountingSearchTest } : 200",
82 "[ CountingSearchTest > : 70",
83 "[ CountingSearchTest > : 9",
86 // 2. we test this value later
87 CountingSearchTestTask.numSearches = 0;
89 // 3. execute the algorithm (required in every "logic" test)
90 Benchmark benchmark = execBenchmark(algLines);
92 // 4. test specific checks after the benchmark run completed.
93 assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches);
94 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
95 // now we should be able to open the index for write.
96 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
97 new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
98 .setOpenMode(OpenMode.APPEND));
100 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
101 assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
106 * Test timed sequence task.
108 public void testTimedSearchTask() throws Exception {
109 String algLines[] = {
117 "{ CountingSearchTest } : .5s",
121 CountingSearchTestTask.numSearches = 0;
122 execBenchmark(algLines);
123 assertTrue(CountingSearchTestTask.numSearches > 0);
124 long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis;
125 assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
128 // disabled until we fix BG thread prio -- this test
129 // causes build to hang
130 public void testBGSearchTaskThreads() throws Exception {
131 String algLines[] = {
132 "log.time.step.msec = 100",
141 " [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1",
148 CountingSearchTestTask.numSearches = 0;
149 execBenchmark(algLines);
150 assertTrue(CountingSearchTestTask.numSearches > 0);
153 public void testHighlighting() throws Exception {
154 // 1. alg definition (required in every "logic" test)
155 String algLines[] = {
157 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
158 "docs.file=" + getReuters20LinesFile(),
159 "query.maker=" + ReutersQueryMaker.class.getName(),
166 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
170 // 2. we test this value later
171 CountingHighlighterTestTask.numHighlightedResults = 0;
172 CountingHighlighterTestTask.numDocsRetrieved = 0;
173 // 3. execute the algorithm (required in every "logic" test)
174 Benchmark benchmark = execBenchmark(algLines);
176 // 4. test specific checks after the benchmark run completed.
177 assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
178 //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
179 //we probably should use a different doc/query maker, but...
180 assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
182 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
183 // now we should be able to open the index for write.
184 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
186 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
187 assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
191 public void testHighlightingTV() throws Exception {
192 // 1. alg definition (required in every "logic" test)
193 String algLines[] = {
194 "doc.stored=true",//doc storage is required in order to have text to highlight
195 "doc.term.vector.offsets=true",
196 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
197 "docs.file=" + getReuters20LinesFile(),
198 "query.maker=" + ReutersQueryMaker.class.getName(),
205 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
209 // 2. we test this value later
210 CountingHighlighterTestTask.numHighlightedResults = 0;
211 CountingHighlighterTestTask.numDocsRetrieved = 0;
212 // 3. execute the algorithm (required in every "logic" test)
213 Benchmark benchmark = execBenchmark(algLines);
215 // 4. test specific checks after the benchmark run completed.
216 assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
217 //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
218 //we probably should use a different doc/query maker, but...
219 assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
221 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
222 // now we should be able to open the index for write.
223 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
225 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
226 assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
230 public void testHighlightingNoTvNoStore() throws Exception {
231 // 1. alg definition (required in every "logic" test)
232 String algLines[] = {
234 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
235 "docs.file=" + getReuters20LinesFile(),
236 "query.maker=" + ReutersQueryMaker.class.getName(),
243 "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
247 // 2. we test this value later
248 CountingHighlighterTestTask.numHighlightedResults = 0;
249 CountingHighlighterTestTask.numDocsRetrieved = 0;
250 // 3. execute the algorithm (required in every "logic" test)
252 Benchmark benchmark = execBenchmark(algLines);
253 assertTrue("CountingHighlighterTest should have thrown an exception", false);
254 assertNotNull(benchmark); // (avoid compile warning on unused variable)
255 } catch (Exception e) {
261 * Test Exhasting Doc Maker logic
263 public void testExhaustContentSource() throws Exception {
264 // 1. alg definition (required in every "logic" test)
265 String algLines[] = {
266 "# ----- properties ",
267 "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
268 "content.source.log.step=1",
269 "doc.term.vector=false",
270 "content.source.forever=false",
271 "directory=RAMDirectory",
273 "doc.tokenized=false",
280 "{ CountingSearchTest } : 100",
282 "[ CountingSearchTest > : 30",
283 "[ CountingSearchTest > : 9",
286 // 2. we test this value later
287 CountingSearchTestTask.numSearches = 0;
289 // 3. execute the algorithm (required in every "logic" test)
290 Benchmark benchmark = execBenchmark(algLines);
292 // 4. test specific checks after the benchmark run completed.
293 assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
294 assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
295 // now we should be able to open the index for write.
296 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
298 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
299 assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
303 // LUCENE-1994: test thread safety of SortableSingleDocMaker
304 public void testDocMakerThreadSafety() throws Exception {
305 // 1. alg definition (required in every "logic" test)
306 String algLines[] = {
307 "# ----- properties ",
308 "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
309 "doc.term.vector=false",
310 "log.step.AddDoc=10000",
311 "content.source.forever=true",
312 "directory=RAMDirectory",
313 "doc.reuse.fields=false",
315 "doc.tokenized=false",
316 "doc.index.props=true",
319 "[ { AddDoc > : 250 ] : 4",
323 // 2. we test this value later
324 CountingSearchTestTask.numSearches = 0;
326 // 3. execute the algorithm (required in every "logic" test)
327 Benchmark benchmark = execBenchmark(algLines);
329 IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
330 StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
331 final int maxDoc = r.maxDoc();
332 assertEquals(1000, maxDoc);
333 for(int i=0;i<1000;i++) {
334 assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
340 * Test Parallel Doc Maker logic (for LUCENE-940)
342 public void testParallelDocMaker() throws Exception {
343 // 1. alg definition (required in every "logic" test)
344 String algLines[] = {
345 "# ----- properties ",
346 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
347 "docs.file=" + getReuters20LinesFile(),
348 "content.source.log.step=3",
349 "doc.term.vector=false",
350 "content.source.forever=false",
351 "directory=FSDirectory",
353 "doc.tokenized=false",
356 "[ { AddDoc } : * ] : 4 ",
360 // 2. execute the algorithm (required in every "logic" test)
361 Benchmark benchmark = execBenchmark(algLines);
363 // 3. test number of docs in the index
364 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
365 int ndocsExpected = 20; // first 20 reuters docs.
366 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
371 * Test WriteLineDoc and LineDocSource.
373 public void testLineDocFile() throws Exception {
374 File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt");
376 // We will call WriteLineDocs this many times
377 final int NUM_TRY_DOCS = 50;
379 // Creates a line file with first 50 docs from SingleDocSource
380 String algLines1[] = {
381 "# ----- properties ",
382 "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
383 "content.source.forever=true",
384 "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
386 "{WriteLineDoc()}:" + NUM_TRY_DOCS,
390 Benchmark benchmark = execBenchmark(algLines1);
392 BufferedReader r = new BufferedReader(new FileReader(lineFile));
395 while((line = r.readLine()) != null) {
396 if (numLines==0 && line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR)) {
397 continue; // do not count the header line as a doc
402 assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines);
404 // Index the line docs
405 String algLines2[] = {
406 "# ----- properties ",
407 "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
408 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
409 "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
410 "content.source.forever=false",
411 "doc.reuse.fields=false",
421 benchmark = execBenchmark(algLines2);
423 // now we should be able to open the index for write.
424 IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),
425 new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
426 .setOpenMode(OpenMode.APPEND));
429 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
430 assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
437 * Test ReadTokensTask
439 public void testReadTokens() throws Exception {
441 // We will call ReadTokens on this many docs
442 final int NUM_DOCS = 20;
444 // Read tokens from first NUM_DOCS docs from Reuters and
445 // then build index from the same docs
446 String algLines1[] = {
447 "# ----- properties ",
448 "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
449 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
450 "docs.file=" + getReuters20LinesFile(),
452 "{ReadTokens}: " + NUM_DOCS,
455 "{AddDoc}: " + NUM_DOCS,
460 Benchmark benchmark = execBenchmark(algLines1);
462 List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
464 // Count how many tokens all ReadTokens saw
465 int totalTokenCount1 = 0;
466 for (final TaskStats stat : stats) {
467 if (stat.getTask().getName().equals("ReadTokens")) {
468 totalTokenCount1 += stat.getCount();
472 // Separately count how many tokens are actually in the index:
473 IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
474 assertEquals(NUM_DOCS, reader.numDocs());
476 TermEnum terms = reader.terms();
477 TermDocs termDocs = reader.termDocs();
478 int totalTokenCount2 = 0;
479 while(terms.next()) {
480 Term term = terms.term();
481 /* not-tokenized, but indexed field */
482 if (term != null && term.field() != DocMaker.ID_FIELD && term.field() != DocMaker.DATE_MSEC_FIELD && term.field() != DocMaker.TIME_SEC_FIELD) {
483 termDocs.seek(terms.term());
484 while (termDocs.next())
485 totalTokenCount2 += termDocs.freq();
490 // Make sure they are the same
491 assertEquals(totalTokenCount1, totalTokenCount2);
495 * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
497 public void testParallelExhausted() throws Exception {
498 // 1. alg definition (required in every "logic" test)
499 String algLines[] = {
500 "# ----- properties ",
501 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
502 "docs.file=" + getReuters20LinesFile(),
503 "content.source.log.step=3",
504 "doc.term.vector=false",
505 "content.source.forever=false",
506 "directory=RAMDirectory",
508 "doc.tokenized=false",
509 "task.max.depth.log=1",
512 "{ [ AddDoc]: 4} : * ",
514 "{ [ AddDoc]: 4} : * ",
519 // 2. execute the algorithm (required in every "logic" test)
520 Benchmark benchmark = execBenchmark(algLines);
522 // 3. test number of docs in the index
523 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
524 int ndocsExpected = 2 * 20; // first 20 reuters docs.
525 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
531 * Test that exhaust in loop works as expected (LUCENE-1115).
533 public void testExhaustedLooped() throws Exception {
534 // 1. alg definition (required in every "logic" test)
535 String algLines[] = {
536 "# ----- properties ",
537 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
538 "docs.file=" + getReuters20LinesFile(),
539 "content.source.log.step=3",
540 "doc.term.vector=false",
541 "content.source.forever=false",
542 "directory=RAMDirectory",
544 "doc.tokenized=false",
545 "task.max.depth.log=1",
550 " { \"AddDocs\" AddDoc > : * ",
556 // 2. execute the algorithm (required in every "logic" test)
557 Benchmark benchmark = execBenchmark(algLines);
559 // 3. test number of docs in the index
560 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
561 int ndocsExpected = 20; // first 20 reuters docs.
562 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
567 * Test that we can close IndexWriter with argument "false".
569 public void testCloseIndexFalse() throws Exception {
570 // 1. alg definition (required in every "logic" test)
571 String algLines[] = {
572 "# ----- properties ",
573 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
574 "docs.file=" + getReuters20LinesFile(),
577 "content.source.log.step=3",
578 "doc.term.vector=false",
579 "content.source.forever=false",
580 "directory=RAMDirectory",
582 "doc.tokenized=false",
588 " { \"AddDocs\" AddDoc > : * ",
589 " CloseIndex(false)",
593 // 2. execute the algorithm (required in every "logic" test)
594 Benchmark benchmark = execBenchmark(algLines);
596 // 3. test number of docs in the index
597 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
598 int ndocsExpected = 20; // first 20 reuters docs.
599 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
603 public static class MyMergeScheduler extends SerialMergeScheduler {
605 public MyMergeScheduler() {
611 public void testDeleteByPercent() throws Exception {
612 // 1. alg definition (required in every "logic" test)
613 String algLines[] = {
614 "# ----- properties ",
615 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
616 "docs.file=" + getReuters20LinesFile(),
619 "content.source.log.step=3",
620 "doc.term.vector=false",
621 "content.source.forever=false",
622 "directory=RAMDirectory",
624 "doc.tokenized=false",
628 "{ \"AddDocs\" AddDoc > : * ",
631 "DeleteByPercent(20)",
635 // 2. execute the algorithm (required in every "logic" test)
636 Benchmark benchmark = execBenchmark(algLines);
638 // 3. test number of docs in the index
639 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
640 int ndocsExpected = 16; // first 20 reuters docs, minus 20%
641 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
646 * Test that we can set merge scheduler".
648 public void testMergeScheduler() throws Exception {
649 // 1. alg definition (required in every "logic" test)
650 String algLines[] = {
651 "# ----- properties ",
652 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
653 "docs.file=" + getReuters20LinesFile(),
654 "content.source.log.step=3",
655 "doc.term.vector=false",
656 "content.source.forever=false",
657 "directory=RAMDirectory",
658 "merge.scheduler=" + MyMergeScheduler.class.getName(),
660 "doc.tokenized=false",
666 " { \"AddDocs\" AddDoc > : * ",
669 // 2. execute the algorithm (required in every "logic" test)
670 Benchmark benchmark = execBenchmark(algLines);
672 assertTrue("did not use the specified MergeScheduler",
673 ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig()
674 .getMergeScheduler()).called);
675 benchmark.getRunData().getIndexWriter().close();
677 // 3. test number of docs in the index
678 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
679 int ndocsExpected = 20; // first 20 reuters docs.
680 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
684 public static class MyMergePolicy extends LogDocMergePolicy {
686 public MyMergePolicy() {
692 * Test that we can set merge policy".
694 public void testMergePolicy() throws Exception {
695 // 1. alg definition (required in every "logic" test)
696 String algLines[] = {
697 "# ----- properties ",
698 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
699 "docs.file=" + getReuters20LinesFile(),
700 "content.source.log.step=3",
703 "doc.term.vector=false",
704 "content.source.forever=false",
705 "directory=RAMDirectory",
706 "merge.policy=" + MyMergePolicy.class.getName(),
708 "doc.tokenized=false",
714 " { \"AddDocs\" AddDoc > : * ",
718 // 2. execute the algorithm (required in every "logic" test)
719 Benchmark benchmark = execBenchmark(algLines);
720 assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called);
721 benchmark.getRunData().getIndexWriter().close();
723 // 3. test number of docs in the index
724 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
725 int ndocsExpected = 20; // first 20 reuters docs.
726 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
731 * Test that IndexWriter settings stick.
733 public void testIndexWriterSettings() throws Exception {
734 // 1. alg definition (required in every "logic" test)
735 String algLines[] = {
736 "# ----- properties ",
737 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
738 "docs.file=" + getReuters20LinesFile(),
739 "content.source.log.step=3",
742 "compound=cmpnd:true:false",
743 "doc.term.vector=vector:false:true",
744 "content.source.forever=false",
745 "directory=RAMDirectory",
748 "doc.tokenized=false",
754 " { \"AddDocs\" AddDoc > : * ",
759 // 2. execute the algorithm (required in every "logic" test)
760 Benchmark benchmark = execBenchmark(algLines);
761 final IndexWriter writer = benchmark.getRunData().getIndexWriter();
762 assertEquals(2, writer.getConfig().getMaxBufferedDocs());
763 assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
764 assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
765 assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
767 Directory dir = benchmark.getRunData().getDirectory();
768 IndexReader reader = IndexReader.open(dir, true);
769 TermFreqVector [] tfv = reader.getTermFreqVectors(0);
771 assertTrue(tfv.length > 0);
776 * Test that we can call optimize(maxNumSegments).
778 public void testOptimizeMaxNumSegments() throws Exception {
779 // 1. alg definition (required in every "logic" test)
780 String algLines[] = {
781 "# ----- properties ",
782 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
783 "docs.file=" + getReuters20LinesFile(),
784 "content.source.log.step=3",
787 "doc.term.vector=false",
788 "content.source.forever=false",
789 "directory=RAMDirectory",
790 "merge.policy=org.apache.lucene.index.LogDocMergePolicy",
792 "doc.tokenized=false",
798 " { \"AddDocs\" AddDoc > : * ",
804 // 2. execute the algorithm (required in every "logic" test)
805 Benchmark benchmark = execBenchmark(algLines);
807 // 3. test number of docs in the index
808 IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
809 int ndocsExpected = 20; // first 20 reuters docs.
810 assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
813 // Make sure we have 3 segments:
814 SegmentInfos infos = new SegmentInfos();
815 infos.read(benchmark.getRunData().getDirectory());
816 assertEquals(3, infos.size());
820 * Test disabling task count (LUCENE-1136).
822 public void testDisableCounting() throws Exception {
823 doTestDisableCounting(true);
824 doTestDisableCounting(false);
827 private void doTestDisableCounting(boolean disable) throws Exception {
828 // 1. alg definition (required in every "logic" test)
829 String algLines[] = disableCountingLines(disable);
831 // 2. execute the algorithm (required in every "logic" test)
832 Benchmark benchmark = execBenchmark(algLines);
835 int n = disable ? 0 : 1;
837 for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) {
838 String taskName = stats.getTask().getName();
839 if (taskName.equals("Rounds")) {
840 assertEquals("Wrong total count!",20+2*n,stats.getCount());
842 } else if (taskName.equals("CreateIndex")) {
843 assertEquals("Wrong count for CreateIndex!",n,stats.getCount());
845 } else if (taskName.equals("CloseIndex")) {
846 assertEquals("Wrong count for CloseIndex!",n,stats.getCount());
850 assertEquals("Missing some tasks to check!",3,nChecked);
853 private String[] disableCountingLines (boolean disable) {
854 String dis = disable ? "-" : "";
855 return new String[] {
856 "# ----- properties ",
857 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
858 "docs.file=" + getReuters20LinesFile(),
859 "content.source.log.step=30",
860 "doc.term.vector=false",
861 "content.source.forever=false",
862 "directory=RAMDirectory",
864 "doc.tokenized=false",
865 "task.max.depth.log=1",
869 " "+dis+"CreateIndex", // optionally disable counting here
870 " { \"AddDocs\" AddDoc > : * ",
871 " "+dis+" CloseIndex", // optionally disable counting here (with extra blanks)
878 * Test that we can change the Locale in the runData,
879 * that it is parsed as we expect.
881 public void testLocale() throws Exception {
882 // empty Locale: clear it (null)
883 Benchmark benchmark = execBenchmark(getLocaleConfig(""));
884 assertNull(benchmark.getRunData().getLocale());
887 benchmark = execBenchmark(getLocaleConfig("ROOT"));
888 assertEquals(new Locale(""), benchmark.getRunData().getLocale());
890 // specify just a language
891 benchmark = execBenchmark(getLocaleConfig("de"));
892 assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
894 // specify language + country
895 benchmark = execBenchmark(getLocaleConfig("en,US"));
896 assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
898 // specify language + country + variant
899 benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
900 assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
903 private String[] getLocaleConfig(String localeParam) {
904 String algLines[] = {
905 "# ----- properties ",
906 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
907 "docs.file=" + getReuters20LinesFile(),
908 "content.source.log.step=3",
909 "content.source.forever=false",
910 "directory=RAMDirectory",
914 " NewLocale(" + localeParam + ")",
916 " { \"AddDocs\" AddDoc > : * ",
924 * Test that we can create CollationAnalyzers.
926 public void testCollator() throws Exception {
928 Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
929 CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
930 .getInstance(new Locale("")));
931 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
933 // specify just a language
934 benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
935 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
936 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
938 // specify language + country
939 benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
940 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
942 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
944 // specify language + country + variant
945 benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
946 expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
948 assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
951 private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
953 TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
954 TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
957 CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
958 CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
959 assertTrue(ts1.incrementToken());
960 assertTrue(ts2.incrementToken());
961 assertEquals(termAtt1.toString(), termAtt2.toString());
962 assertFalse(ts1.incrementToken());
963 assertFalse(ts2.incrementToken());
968 private String[] getCollatorConfig(String localeParam,
969 String collationParam) {
970 String algLines[] = {
971 "# ----- properties ",
972 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
973 "docs.file=" + getReuters20LinesFile(),
974 "content.source.log.step=3",
975 "content.source.forever=false",
976 "directory=RAMDirectory",
980 " NewLocale(" + localeParam + ")",
981 " NewCollationAnalyzer(" + collationParam + ")",
983 " { \"AddDocs\" AddDoc > : * ",
991 * Test that we can create ShingleAnalyzerWrappers.
993 public void testShingleAnalyzer() throws Exception {
994 String text = "one,two,three, four five six";
996 // Default analyzer, maxShingleSize, and outputUnigrams
997 Benchmark benchmark = execBenchmark(getShingleConfig(""));
998 benchmark.getRunData().getAnalyzer().tokenStream
999 ("bogus", new StringReader(text)).close();
1000 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1001 new String[] {"one", "one two", "two", "two three",
1002 "three", "three four", "four", "four five",
1003 "five", "five six", "six"});
1004 // Default analyzer, maxShingleSize = 3, and outputUnigrams = false
1005 benchmark = execBenchmark
1006 (getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
1007 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1008 new String[] { "one two", "one two three", "two three",
1009 "two three four", "three four",
1010 "three four five", "four five",
1011 "four five six", "five six" });
1012 // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
1013 benchmark = execBenchmark
1014 (getShingleConfig("analyzer:WhitespaceAnalyzer"));
1015 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1016 new String[] { "one,two,three,", "one,two,three, four",
1017 "four", "four five", "five", "five six",
1020 // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
1021 benchmark = execBenchmark
1023 ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
1024 assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
1025 new String[] { "one,two,three, four",
1026 "one,two,three, four five",
1027 "four five", "four five six",
1031 private void assertEqualShingle
1032 (Analyzer analyzer, String text, String[] expected) throws Exception {
1033 BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
1036 private String[] getShingleConfig(String params) {
1037 String algLines[] = {
1038 "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
1039 "docs.file=" + getReuters20LinesFile(),
1040 "content.source.forever=false",
1041 "directory=RAMDirectory",
1042 "NewShingleAnalyzer(" + params + ")",
1044 "{ \"AddDocs\" AddDoc > : * "
1049 private String getReuters20LinesFile() {
1050 return getWorkDirResourcePath("reuters.first20.lines.txt");