1 package org.apache.lucene.search;
3 import org.apache.lucene.util.DocIdBitSet;
4 import org.apache.lucene.util.LuceneTestCase;
6 import java.util.BitSet;
7 import java.io.IOException;
9 import org.apache.lucene.index.IndexReader;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.index.Term;
12 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13 import org.apache.lucene.store.Directory;
14 import org.apache.lucene.analysis.MockAnalyzer;
15 import org.apache.lucene.analysis.WhitespaceAnalyzer;
16 import org.apache.lucene.document.Document;
17 import org.apache.lucene.document.Field;
20 * Licensed to the Apache Software Foundation (ASF) under one or more
21 * contributor license agreements. See the NOTICE file distributed with
22 * this work for additional information regarding copyright ownership.
23 * The ASF licenses this file to You under the Apache License, Version 2.0
24 * (the "License"); you may not use this file except in compliance with
25 * the License. You may obtain a copy of the License at
27 * http://www.apache.org/licenses/LICENSE-2.0
29 * Unless required by applicable law or agreed to in writing, software
30 * distributed under the License is distributed on an "AS IS" BASIS,
31 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32 * See the License for the specific language governing permissions and
33 * limitations under the License.
36 public class TestScorerPerf extends LuceneTestCase {
37 boolean validate = true; // set to false when doing performance testing
44 public void createDummySearcher() throws Exception {
45 // Create a dummy index with nothing in it.
46 // This could possibly fail if Lucene starts checking for docid ranges...
48 IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
49 iw.addDocument(new Document());
51 s = new IndexSearcher(d, true);
54 public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
55 int[] freq = new int[nTerms];
56 terms = new Term[nTerms];
57 for (int i=0; i<nTerms; i++) {
58 int f = (nTerms+1)-i; // make first terms less frequent
59 freq[i] = (int)Math.ceil(Math.pow(f,power));
60 terms[i] = new Term("f",Character.toString((char)('A'+i)));
63 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
64 for (int i=0; i<nDocs; i++) {
65 Document d = new Document();
66 for (int j=0; j<nTerms; j++) {
67 if (random.nextInt(freq[j]) == 0) {
68 d.add(newField("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
69 //System.out.println(d);
79 public BitSet randBitSet(int sz, int numBitsToSet) {
80 BitSet set = new BitSet(sz);
81 for (int i=0; i<numBitsToSet; i++) {
82 set.set(random.nextInt(sz));
87 public BitSet[] randBitSets(int numSets, int setSize) {
88 BitSet[] sets = new BitSet[numSets];
89 for (int i=0; i<sets.length; i++) {
90 sets[i] = randBitSet(setSize, random.nextInt(setSize));
95 public static class CountingHitCollector extends Collector {
98 protected int docBase = 0;
101 public void setScorer(Scorer scorer) throws IOException {}
104 public void collect(int doc) {
106 sum += docBase + doc; // use it to avoid any possibility of being optimized away
109 public int getCount() { return count; }
110 public int getSum() { return sum; }
113 public void setNextReader(IndexReader reader, int base) {
117 public boolean acceptsDocsOutOfOrder() {
123 public static class MatchingHitCollector extends CountingHitCollector {
126 public MatchingHitCollector(BitSet answer) {
127 this.answer = answer;
130 public void collect(int doc, float score) {
132 pos = answer.nextSetBit(pos+1);
133 if (pos != doc + docBase) {
134 throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase);
141 BitSet addClause(BooleanQuery bq, BitSet result) {
142 final BitSet rnd = sets[random.nextInt(sets.length)];
143 Query q = new ConstantScoreQuery(new Filter() {
145 public DocIdSet getDocIdSet(IndexReader reader) {
146 return new DocIdBitSet(rnd);
149 bq.add(q, BooleanClause.Occur.MUST);
151 if (result==null) result = (BitSet)rnd.clone();
152 else result.and(rnd);
158 public int doConjunctions(int iter, int maxClauses) throws IOException {
161 for (int i=0; i<iter; i++) {
162 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
163 BooleanQuery bq = new BooleanQuery();
165 for (int j=0; j<nClauses; j++) {
166 result = addClause(bq,result);
169 CountingHitCollector hc = validate ? new MatchingHitCollector(result)
170 : new CountingHitCollector();
174 if (validate) assertEquals(result.cardinality(), hc.getCount());
175 // System.out.println(hc.getCount());
181 public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
185 for (int i=0; i<iter; i++) {
186 int oClauses = random.nextInt(maxOuterClauses-1)+2;
187 BooleanQuery oq = new BooleanQuery();
190 for (int o=0; o<oClauses; o++) {
192 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
193 BooleanQuery bq = new BooleanQuery();
194 for (int j=0; j<nClauses; j++) {
195 result = addClause(bq,result);
198 oq.add(bq, BooleanClause.Occur.MUST);
201 CountingHitCollector hc = validate ? new MatchingHitCollector(result)
202 : new CountingHitCollector();
204 nMatches += hc.getCount();
206 if (validate) assertEquals(result.cardinality(), hc.getCount());
207 // System.out.println(hc.getCount());
209 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
214 public int doTermConjunctions(IndexSearcher s,
218 ) throws IOException {
222 for (int i=0; i<iter; i++) {
223 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
224 BooleanQuery bq = new BooleanQuery();
225 BitSet termflag = new BitSet(termsInIndex);
226 for (int j=0; j<nClauses; j++) {
228 // don't pick same clause twice
229 tnum = random.nextInt(termsInIndex);
230 if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
231 if (tnum<0 || tnum>=termsInIndex) tnum=termflag.nextClearBit(0);
233 Query tq = new TermQuery(terms[tnum]);
234 bq.add(tq, BooleanClause.Occur.MUST);
237 CountingHitCollector hc = new CountingHitCollector();
239 nMatches += hc.getCount();
242 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
248 public int doNestedTermConjunctions(IndexSearcher s,
253 ) throws IOException {
256 for (int i=0; i<iter; i++) {
257 int oClauses = random.nextInt(maxOuterClauses-1)+2;
258 BooleanQuery oq = new BooleanQuery();
259 for (int o=0; o<oClauses; o++) {
261 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
262 BooleanQuery bq = new BooleanQuery();
263 BitSet termflag = new BitSet(termsInIndex);
264 for (int j=0; j<nClauses; j++) {
266 // don't pick same clause twice
267 tnum = random.nextInt(termsInIndex);
268 if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
269 if (tnum<0 || tnum>=25) tnum=termflag.nextClearBit(0);
271 Query tq = new TermQuery(terms[tnum]);
272 bq.add(tq, BooleanClause.Occur.MUST);
275 oq.add(bq, BooleanClause.Occur.MUST);
279 CountingHitCollector hc = new CountingHitCollector();
281 nMatches += hc.getCount();
284 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
289 public int doSloppyPhrase(IndexSearcher s,
293 ) throws IOException {
296 for (int i=0; i<iter; i++) {
297 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
298 PhraseQuery q = new PhraseQuery();
299 for (int j=0; j<nClauses; j++) {
300 int tnum = random.nextInt(termsInIndex);
301 q.add(new Term("f",Character.toString((char)(tnum+'A'))), j);
303 q.setSlop(termsInIndex); // this could be random too
305 CountingHitCollector hc = new CountingHitCollector();
314 public void testConjunctions() throws Exception {
315 // test many small sets... the bugs will be found on boundary conditions
316 createDummySearcher();
318 sets=randBitSets(atLeast(1000), atLeast(10));
319 doConjunctions(atLeast(10000), atLeast(5));
320 doNestedConjunctions(atLeast(10000), atLeast(3), atLeast(3));
328 public void testConjunctionPerf() throws Exception {
330 createDummySearcher();
332 sets=randBitSets(32,1000000);
333 for (int i=0; i<bigIter; i++) {
334 long start = System.currentTimeMillis();
335 doConjunctions(500,6);
336 long end = System.currentTimeMillis();
337 if (VERBOSE) System.out.println("milliseconds="+(end-start));
342 public void testNestedConjunctionPerf() throws Exception {
344 createDummySearcher();
346 sets=randBitSets(32,1000000);
347 for (int i=0; i<bigIter; i++) {
348 long start = System.currentTimeMillis();
349 doNestedConjunctions(500,3,3);
350 long end = System.currentTimeMillis();
351 if (VERBOSE) System.out.println("milliseconds="+(end-start));
357 public void testConjunctionTerms() throws Exception {
360 RAMDirectory dir = new RAMDirectory();
361 if (VERBOSE) System.out.println("Creating index");
362 createRandomTerms(100000,25,.5, dir);
363 s = new IndexSearcher(dir, true);
364 if (VERBOSE) System.out.println("Starting performance test");
365 for (int i=0; i<bigIter; i++) {
366 long start = System.currentTimeMillis();
367 doTermConjunctions(s,25,5,1000);
368 long end = System.currentTimeMillis();
369 if (VERBOSE) System.out.println("milliseconds="+(end-start));
374 public void testNestedConjunctionTerms() throws Exception {
377 RAMDirectory dir = new RAMDirectory();
378 if (VERBOSE) System.out.println("Creating index");
379 createRandomTerms(100000,25,.2, dir);
380 s = new IndexSearcher(dir, true);
381 if (VERBOSE) System.out.println("Starting performance test");
382 for (int i=0; i<bigIter; i++) {
383 long start = System.currentTimeMillis();
384 doNestedTermConjunctions(s,25,3,3,200);
385 long end = System.currentTimeMillis();
386 if (VERBOSE) System.out.println("milliseconds="+(end-start));
392 public void testSloppyPhrasePerf() throws Exception {
395 RAMDirectory dir = new RAMDirectory();
396 if (VERBOSE) System.out.println("Creating index");
397 createRandomTerms(100000,25,2,dir);
398 s = new IndexSearcher(dir, true);
399 if (VERBOSE) System.out.println("Starting performance test");
400 for (int i=0; i<bigIter; i++) {
401 long start = System.currentTimeMillis();
402 doSloppyPhrase(s,25,2,1000);
403 long end = System.currentTimeMillis();
404 if (VERBOSE) System.out.println("milliseconds="+(end-start));