1 package org.apache.lucene.index;
4 * Copyright 2006 The Apache Software Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.Random;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.Tokenizer;
27 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field;
30 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
31 import org.apache.lucene.store.Directory;
32 import org.apache.lucene.util.LuceneTestCase;
34 class RepeatingTokenStream extends Tokenizer {
36 private final Random random;
37 private final float percentDocs;
38 private final int maxTF;
40 CharTermAttribute termAtt;
43 public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
46 this.percentDocs = percentDocs;
48 this.termAtt = addAttribute(CharTermAttribute.class);
52 public boolean incrementToken() throws IOException {
56 termAtt.append(value);
63 public void reset() throws IOException {
65 if (random.nextFloat() < percentDocs) {
66 num = random.nextInt(maxTF) + 1;
74 public class TestTermdocPerf extends LuceneTestCase {
76 void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
77 final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
79 Analyzer analyzer = new Analyzer() {
81 public TokenStream tokenStream(String fieldName, Reader reader) {
86 Document doc = new Document();
87 doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
88 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
89 TEST_VERSION_CURRENT, analyzer)
90 .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100));
91 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
93 for (int i=0; i<ndocs; i++) {
94 writer.addDocument(doc);
102 public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
103 Directory dir = newDirectory();
105 long start = System.currentTimeMillis();
106 addDocs(random, dir, ndocs, "foo", "val", maxTF, percentDocs);
107 long end = System.currentTimeMillis();
108 if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
110 IndexReader reader = IndexReader.open(dir, true);
111 TermEnum tenum = reader.terms(new Term("foo","val"));
112 TermDocs tdocs = reader.termDocs();
114 start = System.currentTimeMillis();
117 for (int i=0; i<iter; i++) {
119 while (tdocs.next()) {
124 end = System.currentTimeMillis();
125 if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
130 public void testTermDocPerf() throws IOException {
131 // performance test for 10% of documents containing a term
132 // doTest(100000, 10000,3,.1f);