lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Copyright 2006 The Apache Software Foundation
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19
  20 import java.io.IOException;
  21 import java.io.Reader;
  22 import java.util.Random;
  23
  24 import org.apache.lucene.analysis.Analyzer;
  25 import org.apache.lucene.analysis.ReusableAnalyzerBase;
  26 import org.apache.lucene.analysis.TokenStream;
  27 import org.apache.lucene.analysis.Tokenizer;
  28 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  29 import org.apache.lucene.document.Document;
  30 import org.apache.lucene.document.Field;
  31 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  32 import org.apache.lucene.store.Directory;
  33 import org.apache.lucene.util.LuceneTestCase;
  34
  35 class RepeatingTokenStream extends Tokenizer {
  36
  37   private final Random random;
  38   private final float percentDocs;
  39   private final int maxTF;
  40   private int num;
  41   CharTermAttribute termAtt;
  42   String value;
  43
  44    public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
  45      this.value = val;
  46      this.random = random;
  47      this.percentDocs = percentDocs;
  48      this.maxTF = maxTF;
  49      this.termAtt = addAttribute(CharTermAttribute.class);
  50    }
  51
  52    @Override
  53    public boolean incrementToken() throws IOException {
  54      num--;
  55      if (num >= 0) {
  56        clearAttributes();
  57        termAtt.append(value);
  58        return true;
  59      }
  60      return false;
  61    }
  62
  63   @Override
  64   public void reset() throws IOException {
  65     super.reset();
  66     if (random.nextFloat() < percentDocs) {
  67       num = random.nextInt(maxTF) + 1;
  68     } else {
  69       num = 0;
  70     }
  71   }
  72 }
  73
  74
  75 public class TestTermdocPerf extends LuceneTestCase {
  76
  77   void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
  78     final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
  79
  80     Analyzer analyzer = new Analyzer() {
  81       @Override
  82       public TokenStream tokenStream(String fieldName, Reader reader) {
  83         return ts;
  84       }
  85     };
  86
  87     Document doc = new Document();
  88     doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  89     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
  90         TEST_VERSION_CURRENT, analyzer)
  91         .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100));
  92     ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
  93
  94     for (int i=0; i<ndocs; i++) {
  95       writer.addDocument(doc);
  96     }
  97
  98     writer.optimize();
  99     writer.close();
 100   }
 101
 102
 103   public int doTest(int iter, int ndocs, int maxTF, float percentDocs) throws IOException {
 104     Directory dir = newDirectory();
 105
 106     long start = System.currentTimeMillis();
 107     addDocs(random, dir, ndocs, "foo", "val", maxTF, percentDocs);
 108     long end = System.currentTimeMillis();
 109     if (VERBOSE) System.out.println("milliseconds for creation of " + ndocs + " docs = " + (end-start));
 110
 111     IndexReader reader = IndexReader.open(dir, true);
 112     TermEnum tenum = reader.terms(new Term("foo","val"));
 113     TermDocs tdocs = reader.termDocs();
 114
 115     start = System.currentTimeMillis();
 116
 117     int ret=0;
 118     for (int i=0; i<iter; i++) {
 119       tdocs.seek(tenum);
 120       while (tdocs.next()) {
 121         ret += tdocs.doc();
 122       }
 123     }
 124
 125     end = System.currentTimeMillis();
 126     if (VERBOSE) System.out.println("milliseconds for " + iter + " TermDocs iteration: " + (end-start));
 127
 128     return ret;
 129   }
 130
 131   public void testTermDocPerf() throws IOException {
 132     // performance test for 10% of documents containing a term
 133     // doTest(100000, 10000,3,.1f);
 134   }
 135
 136
 137 }