1 package org.apache.lucene.index;
4 * Copyright 2006 The Apache Software Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.Random;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.ReusableAnalyzerBase;
26 import org.apache.lucene.analysis.TokenStream;
27 import org.apache.lucene.analysis.Tokenizer;
28 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
29 import org.apache.lucene.document.Document;
30 import org.apache.lucene.document.Field;
31 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
32 import org.apache.lucene.store.Directory;
33 import org.apache.lucene.util.LuceneTestCase;
35 class RepeatingTokenStream extends Tokenizer {
37 private final Random random;
38 private final float percentDocs;
39 private final int maxTF;
41 CharTermAttribute termAtt;
44 public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
47 this.percentDocs = percentDocs;
49 this.termAtt = addAttribute(CharTermAttribute.class);
53 public boolean incrementToken() throws IOException {
57 termAtt.append(value);
64 public void reset() throws IOException {
66 if (random.nextFloat() < percentDocs) {
67 num = random.nextInt(maxTF) + 1;
75 public class TestTermdocPerf extends LuceneTestCase {
77 void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
78 final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
80 Analyzer analyzer = new Analyzer() {
82 public TokenStream tokenStream(String fieldName, Reader reader) {
87 Document doc = new Document();
88 doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
89 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
90 TEST_VERSION_CURRENT, analyzer)
91 .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100));
92 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
94 for (int i=0; i<ndocs; i++) {
95 writer.addDocument(doc);
103 public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
104 Directory dir = newDirectory();
106 long start = System.currentTimeMillis();
107 addDocs(random, dir, ndocs, "foo", "val", maxTF, percentDocs);
108 long end = System.currentTimeMillis();
109 if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
111 IndexReader reader = IndexReader.open(dir, true);
112 TermEnum tenum = reader.terms(new Term("foo","val"));
113 TermDocs tdocs = reader.termDocs();
115 start = System.currentTimeMillis();
118 for (int i=0; i<iter; i++) {
120 while (tdocs.next()) {
125 end = System.currentTimeMillis();
126 if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
131 public void testTermDocPerf() throws IOException {
132 // performance test for 10% of documents containing a term
133 // doTest(100000, 10000,3,.1f);