1 package org.apache.lucene.index;
4 * Copyright 2006 The Apache Software Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.Random;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
30 import org.apache.lucene.store.Directory;
31 import org.apache.lucene.util.LuceneTestCase;
33 class RepeatingTokenStream extends TokenStream {
35 CharTermAttribute termAtt;
38 public RepeatingTokenStream(String val) {
40 this.termAtt = addAttribute(CharTermAttribute.class);
44 public boolean incrementToken() throws IOException {
48 termAtt.append(value);
56 public class TestTermdocPerf extends LuceneTestCase {
58 void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
59 final RepeatingTokenStream ts = new RepeatingTokenStream(val);
61 Analyzer analyzer = new Analyzer() {
63 public TokenStream tokenStream(String fieldName, Reader reader) {
64 if (random.nextFloat() < percentDocs) ts.num = random.nextInt(maxTF)+1;
70 Document doc = new Document();
71 doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
72 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
73 TEST_VERSION_CURRENT, analyzer)
74 .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100));
75 ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
77 for (int i=0; i<ndocs; i++) {
78 writer.addDocument(doc);
86 public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
87 Directory dir = newDirectory();
89 long start = System.currentTimeMillis();
90 addDocs(random, dir, ndocs, "foo", "val", maxTF, percentDocs);
91 long end = System.currentTimeMillis();
92 if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
94 IndexReader reader = IndexReader.open(dir, true);
95 TermEnum tenum = reader.terms(new Term("foo","val"));
96 TermDocs tdocs = reader.termDocs();
98 start = System.currentTimeMillis();
101 for (int i=0; i<iter; i++) {
103 while (tdocs.next()) {
108 end = System.currentTimeMillis();
109 if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
114 public void testTermDocPerf() throws IOException {
115 // performance test for 10% of documents containing a term
116 // doTest(100000, 10000,3,.1f);