1 package org.apache.lucene.search;
3 import org.apache.lucene.util.DocIdBitSet;
4 import org.apache.lucene.util.LuceneTestCase;
6 import java.util.BitSet;
7 import java.io.IOException;
9 import org.apache.lucene.index.IndexReader;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.index.Term;
12 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
13 import org.apache.lucene.store.Directory;
14 import org.apache.lucene.analysis.MockAnalyzer;
15 import org.apache.lucene.document.Document;
16 import org.apache.lucene.document.Field;
19 * Licensed to the Apache Software Foundation (ASF) under one or more
20 * contributor license agreements. See the NOTICE file distributed with
21 * this work for additional information regarding copyright ownership.
22 * The ASF licenses this file to You under the Apache License, Version 2.0
23 * (the "License"); you may not use this file except in compliance with
24 * the License. You may obtain a copy of the License at
26 * http://www.apache.org/licenses/LICENSE-2.0
28 * Unless required by applicable law or agreed to in writing, software
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
35 public class TestScorerPerf extends LuceneTestCase {
36 boolean validate = true; // set to false when doing performance testing
44 // TODO: this should be setUp()....
45 public void createDummySearcher() throws Exception {
46 // Create a dummy index with nothing in it.
47 // This could possibly fail if Lucene starts checking for docid ranges...
49 IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
50 iw.addDocument(new Document());
52 r = IndexReader.open(d);
53 s = new IndexSearcher(r);
56 public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
57 int[] freq = new int[nTerms];
58 terms = new Term[nTerms];
59 for (int i=0; i<nTerms; i++) {
60 int f = (nTerms+1)-i; // make first terms less frequent
61 freq[i] = (int)Math.ceil(Math.pow(f,power));
62 terms[i] = new Term("f",Character.toString((char)('A'+i)));
65 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
66 for (int i=0; i<nDocs; i++) {
67 Document d = new Document();
68 for (int j=0; j<nTerms; j++) {
69 if (random.nextInt(freq[j]) == 0) {
70 d.add(newField("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
71 //System.out.println(d);
81 public BitSet randBitSet(int sz, int numBitsToSet) {
82 BitSet set = new BitSet(sz);
83 for (int i=0; i<numBitsToSet; i++) {
84 set.set(random.nextInt(sz));
89 public BitSet[] randBitSets(int numSets, int setSize) {
90 BitSet[] sets = new BitSet[numSets];
91 for (int i=0; i<sets.length; i++) {
92 sets[i] = randBitSet(setSize, random.nextInt(setSize));
97 public static class CountingHitCollector extends Collector {
100 protected int docBase = 0;
103 public void setScorer(Scorer scorer) throws IOException {}
106 public void collect(int doc) {
108 sum += docBase + doc; // use it to avoid any possibility of being eliminated by hotspot
111 public int getCount() { return count; }
112 public int getSum() { return sum; }
115 public void setNextReader(IndexReader reader, int base) {
119 public boolean acceptsDocsOutOfOrder() {
125 public static class MatchingHitCollector extends CountingHitCollector {
128 public MatchingHitCollector(BitSet answer) {
129 this.answer = answer;
132 public void collect(int doc, float score) {
134 pos = answer.nextSetBit(pos+1);
135 if (pos != doc + docBase) {
136 throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase);
143 BitSet addClause(BooleanQuery bq, BitSet result) {
144 final BitSet rnd = sets[random.nextInt(sets.length)];
145 Query q = new ConstantScoreQuery(new Filter() {
147 public DocIdSet getDocIdSet(IndexReader reader) {
148 return new DocIdBitSet(rnd);
151 bq.add(q, BooleanClause.Occur.MUST);
153 if (result==null) result = (BitSet)rnd.clone();
154 else result.and(rnd);
160 public int doConjunctions(int iter, int maxClauses) throws IOException {
163 for (int i=0; i<iter; i++) {
164 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
165 BooleanQuery bq = new BooleanQuery();
167 for (int j=0; j<nClauses; j++) {
168 result = addClause(bq,result);
171 CountingHitCollector hc = validate ? new MatchingHitCollector(result)
172 : new CountingHitCollector();
176 if (validate) assertEquals(result.cardinality(), hc.getCount());
177 // System.out.println(hc.getCount());
183 public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
187 for (int i=0; i<iter; i++) {
188 int oClauses = random.nextInt(maxOuterClauses-1)+2;
189 BooleanQuery oq = new BooleanQuery();
192 for (int o=0; o<oClauses; o++) {
194 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
195 BooleanQuery bq = new BooleanQuery();
196 for (int j=0; j<nClauses; j++) {
197 result = addClause(bq,result);
200 oq.add(bq, BooleanClause.Occur.MUST);
203 CountingHitCollector hc = validate ? new MatchingHitCollector(result)
204 : new CountingHitCollector();
206 nMatches += hc.getCount();
208 if (validate) assertEquals(result.cardinality(), hc.getCount());
209 // System.out.println(hc.getCount());
211 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
216 public int doTermConjunctions(IndexSearcher s,
220 ) throws IOException {
224 for (int i=0; i<iter; i++) {
225 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
226 BooleanQuery bq = new BooleanQuery();
227 BitSet termflag = new BitSet(termsInIndex);
228 for (int j=0; j<nClauses; j++) {
230 // don't pick same clause twice
231 tnum = random.nextInt(termsInIndex);
232 if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
233 if (tnum<0 || tnum>=termsInIndex) tnum=termflag.nextClearBit(0);
235 Query tq = new TermQuery(terms[tnum]);
236 bq.add(tq, BooleanClause.Occur.MUST);
239 CountingHitCollector hc = new CountingHitCollector();
241 nMatches += hc.getCount();
244 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
250 public int doNestedTermConjunctions(IndexSearcher s,
255 ) throws IOException {
258 for (int i=0; i<iter; i++) {
259 int oClauses = random.nextInt(maxOuterClauses-1)+2;
260 BooleanQuery oq = new BooleanQuery();
261 for (int o=0; o<oClauses; o++) {
263 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
264 BooleanQuery bq = new BooleanQuery();
265 BitSet termflag = new BitSet(termsInIndex);
266 for (int j=0; j<nClauses; j++) {
268 // don't pick same clause twice
269 tnum = random.nextInt(termsInIndex);
270 if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
271 if (tnum<0 || tnum>=25) tnum=termflag.nextClearBit(0);
273 Query tq = new TermQuery(terms[tnum]);
274 bq.add(tq, BooleanClause.Occur.MUST);
277 oq.add(bq, BooleanClause.Occur.MUST);
281 CountingHitCollector hc = new CountingHitCollector();
283 nMatches += hc.getCount();
286 if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
291 public int doSloppyPhrase(IndexSearcher s,
295 ) throws IOException {
298 for (int i=0; i<iter; i++) {
299 int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
300 PhraseQuery q = new PhraseQuery();
301 for (int j=0; j<nClauses; j++) {
302 int tnum = random.nextInt(termsInIndex);
303 q.add(new Term("f",Character.toString((char)(tnum+'A'))), j);
305 q.setSlop(termsInIndex); // this could be random too
307 CountingHitCollector hc = new CountingHitCollector();
316 public void testConjunctions() throws Exception {
317 // test many small sets... the bugs will be found on boundary conditions
318 createDummySearcher();
320 sets=randBitSets(atLeast(1000), atLeast(10));
321 doConjunctions(atLeast(10000), atLeast(5));
322 doNestedConjunctions(atLeast(10000), atLeast(3), atLeast(3));
331 public void testConjunctionPerf() throws Exception {
333 createDummySearcher();
335 sets=randBitSets(32,1000000);
336 for (int i=0; i<bigIter; i++) {
337 long start = System.currentTimeMillis();
338 doConjunctions(500,6);
339 long end = System.currentTimeMillis();
340 if (VERBOSE) System.out.println("milliseconds="+(end-start));
345 public void testNestedConjunctionPerf() throws Exception {
347 createDummySearcher();
349 sets=randBitSets(32,1000000);
350 for (int i=0; i<bigIter; i++) {
351 long start = System.currentTimeMillis();
352 doNestedConjunctions(500,3,3);
353 long end = System.currentTimeMillis();
354 if (VERBOSE) System.out.println("milliseconds="+(end-start));
360 public void testConjunctionTerms() throws Exception {
363 RAMDirectory dir = new RAMDirectory();
364 if (VERBOSE) System.out.println("Creating index");
365 createRandomTerms(100000,25,.5, dir);
366 s = new IndexSearcher(dir, true);
367 if (VERBOSE) System.out.println("Starting performance test");
368 for (int i=0; i<bigIter; i++) {
369 long start = System.currentTimeMillis();
370 doTermConjunctions(s,25,5,1000);
371 long end = System.currentTimeMillis();
372 if (VERBOSE) System.out.println("milliseconds="+(end-start));
377 public void testNestedConjunctionTerms() throws Exception {
380 RAMDirectory dir = new RAMDirectory();
381 if (VERBOSE) System.out.println("Creating index");
382 createRandomTerms(100000,25,.2, dir);
383 s = new IndexSearcher(dir, true);
384 if (VERBOSE) System.out.println("Starting performance test");
385 for (int i=0; i<bigIter; i++) {
386 long start = System.currentTimeMillis();
387 doNestedTermConjunctions(s,25,3,3,200);
388 long end = System.currentTimeMillis();
389 if (VERBOSE) System.out.println("milliseconds="+(end-start));
395 public void testSloppyPhrasePerf() throws Exception {
398 RAMDirectory dir = new RAMDirectory();
399 if (VERBOSE) System.out.println("Creating index");
400 createRandomTerms(100000,25,2,dir);
401 s = new IndexSearcher(dir, true);
402 if (VERBOSE) System.out.println("Starting performance test");
403 for (int i=0; i<bigIter; i++) {
404 long start = System.currentTimeMillis();
405 doSloppyPhrase(s,25,2,1000);
406 long end = System.currentTimeMillis();
407 if (VERBOSE) System.out.println("milliseconds="+(end-start));