+++ /dev/null
-package org.apache.lucene.search;
-
-import org.apache.lucene.util.DocIdBitSet;
-import org.apache.lucene.util.LuceneTestCase;
-
-import java.util.BitSet;
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class TestScorerPerf extends LuceneTestCase {
- boolean validate = true; // set to false when doing performance testing
-
- BitSet[] sets;
- Term[] terms;
- IndexSearcher s;
- Directory d;
-
- public void createDummySearcher() throws Exception {
- // Create a dummy index with nothing in it.
- // This could possibly fail if Lucene starts checking for docid ranges...
- d = newDirectory();
- IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
- iw.addDocument(new Document());
- iw.close();
- s = new IndexSearcher(d, true);
- }
-
- public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
- int[] freq = new int[nTerms];
- terms = new Term[nTerms];
- for (int i=0; i<nTerms; i++) {
- int f = (nTerms+1)-i; // make first terms less frequent
- freq[i] = (int)Math.ceil(Math.pow(f,power));
- terms[i] = new Term("f",Character.toString((char)('A'+i)));
- }
-
- IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
- for (int i=0; i<nDocs; i++) {
- Document d = new Document();
- for (int j=0; j<nTerms; j++) {
- if (random.nextInt(freq[j]) == 0) {
- d.add(newField("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
- //System.out.println(d);
- }
- }
- iw.addDocument(d);
- }
- iw.optimize();
- iw.close();
- }
-
-
- public BitSet randBitSet(int sz, int numBitsToSet) {
- BitSet set = new BitSet(sz);
- for (int i=0; i<numBitsToSet; i++) {
- set.set(random.nextInt(sz));
- }
- return set;
- }
-
- public BitSet[] randBitSets(int numSets, int setSize) {
- BitSet[] sets = new BitSet[numSets];
- for (int i=0; i<sets.length; i++) {
- sets[i] = randBitSet(setSize, random.nextInt(setSize));
- }
- return sets;
- }
-
- public static class CountingHitCollector extends Collector {
- int count=0;
- int sum=0;
- protected int docBase = 0;
-
- @Override
- public void setScorer(Scorer scorer) throws IOException {}
-
- @Override
- public void collect(int doc) {
- count++;
- sum += docBase + doc; // use it to avoid any possibility of being optimized away
- }
-
- public int getCount() { return count; }
- public int getSum() { return sum; }
-
- @Override
- public void setNextReader(IndexReader reader, int base) {
- docBase = base;
- }
- @Override
- public boolean acceptsDocsOutOfOrder() {
- return true;
- }
- }
-
-
- public static class MatchingHitCollector extends CountingHitCollector {
- BitSet answer;
- int pos=-1;
- public MatchingHitCollector(BitSet answer) {
- this.answer = answer;
- }
-
- public void collect(int doc, float score) {
-
- pos = answer.nextSetBit(pos+1);
- if (pos != doc + docBase) {
- throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase);
- }
- super.collect(doc);
- }
- }
-
-
- BitSet addClause(BooleanQuery bq, BitSet result) {
- final BitSet rnd = sets[random.nextInt(sets.length)];
- Query q = new ConstantScoreQuery(new Filter() {
- @Override
- public DocIdSet getDocIdSet(IndexReader reader) {
- return new DocIdBitSet(rnd);
- }
- });
- bq.add(q, BooleanClause.Occur.MUST);
- if (validate) {
- if (result==null) result = (BitSet)rnd.clone();
- else result.and(rnd);
- }
- return result;
- }
-
-
- public int doConjunctions(int iter, int maxClauses) throws IOException {
- int ret=0;
-
- for (int i=0; i<iter; i++) {
- int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
- BooleanQuery bq = new BooleanQuery();
- BitSet result=null;
- for (int j=0; j<nClauses; j++) {
- result = addClause(bq,result);
- }
-
- CountingHitCollector hc = validate ? new MatchingHitCollector(result)
- : new CountingHitCollector();
- s.search(bq, hc);
- ret += hc.getSum();
-
- if (validate) assertEquals(result.cardinality(), hc.getCount());
- // System.out.println(hc.getCount());
- }
-
- return ret;
- }
-
- public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
- int ret=0;
- long nMatches=0;
-
- for (int i=0; i<iter; i++) {
- int oClauses = random.nextInt(maxOuterClauses-1)+2;
- BooleanQuery oq = new BooleanQuery();
- BitSet result=null;
-
- for (int o=0; o<oClauses; o++) {
-
- int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
- BooleanQuery bq = new BooleanQuery();
- for (int j=0; j<nClauses; j++) {
- result = addClause(bq,result);
- }
-
- oq.add(bq, BooleanClause.Occur.MUST);
- } // outer
-
- CountingHitCollector hc = validate ? new MatchingHitCollector(result)
- : new CountingHitCollector();
- s.search(oq, hc);
- nMatches += hc.getCount();
- ret += hc.getSum();
- if (validate) assertEquals(result.cardinality(), hc.getCount());
- // System.out.println(hc.getCount());
- }
- if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
- return ret;
- }
-
-
- public int doTermConjunctions(IndexSearcher s,
- int termsInIndex,
- int maxClauses,
- int iter
- ) throws IOException {
- int ret=0;
-
- long nMatches=0;
- for (int i=0; i<iter; i++) {
- int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
- BooleanQuery bq = new BooleanQuery();
- BitSet termflag = new BitSet(termsInIndex);
- for (int j=0; j<nClauses; j++) {
- int tnum;
- // don't pick same clause twice
- tnum = random.nextInt(termsInIndex);
- if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
- if (tnum<0 || tnum>=termsInIndex) tnum=termflag.nextClearBit(0);
- termflag.set(tnum);
- Query tq = new TermQuery(terms[tnum]);
- bq.add(tq, BooleanClause.Occur.MUST);
- }
-
- CountingHitCollector hc = new CountingHitCollector();
- s.search(bq, hc);
- nMatches += hc.getCount();
- ret += hc.getSum();
- }
- if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
-
- return ret;
- }
-
-
- public int doNestedTermConjunctions(IndexSearcher s,
- int termsInIndex,
- int maxOuterClauses,
- int maxClauses,
- int iter
- ) throws IOException {
- int ret=0;
- long nMatches=0;
- for (int i=0; i<iter; i++) {
- int oClauses = random.nextInt(maxOuterClauses-1)+2;
- BooleanQuery oq = new BooleanQuery();
- for (int o=0; o<oClauses; o++) {
-
- int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
- BooleanQuery bq = new BooleanQuery();
- BitSet termflag = new BitSet(termsInIndex);
- for (int j=0; j<nClauses; j++) {
- int tnum;
- // don't pick same clause twice
- tnum = random.nextInt(termsInIndex);
- if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
- if (tnum<0 || tnum>=25) tnum=termflag.nextClearBit(0);
- termflag.set(tnum);
- Query tq = new TermQuery(terms[tnum]);
- bq.add(tq, BooleanClause.Occur.MUST);
- } // inner
-
- oq.add(bq, BooleanClause.Occur.MUST);
- } // outer
-
-
- CountingHitCollector hc = new CountingHitCollector();
- s.search(oq, hc);
- nMatches += hc.getCount();
- ret += hc.getSum();
- }
- if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
- return ret;
- }
-
-
- public int doSloppyPhrase(IndexSearcher s,
- int termsInIndex,
- int maxClauses,
- int iter
- ) throws IOException {
- int ret=0;
-
- for (int i=0; i<iter; i++) {
- int nClauses = random.nextInt(maxClauses-1)+2; // min 2 clauses
- PhraseQuery q = new PhraseQuery();
- for (int j=0; j<nClauses; j++) {
- int tnum = random.nextInt(termsInIndex);
- q.add(new Term("f",Character.toString((char)(tnum+'A'))), j);
- }
- q.setSlop(termsInIndex); // this could be random too
-
- CountingHitCollector hc = new CountingHitCollector();
- s.search(q, hc);
- ret += hc.getSum();
- }
-
- return ret;
- }
-
-
- public void testConjunctions() throws Exception {
- // test many small sets... the bugs will be found on boundary conditions
- createDummySearcher();
- validate=true;
- sets=randBitSets(atLeast(1000), atLeast(10));
- doConjunctions(atLeast(10000), atLeast(5));
- doNestedConjunctions(atLeast(10000), atLeast(3), atLeast(3));
- s.close();
- d.close();
- }
-
- /***
- int bigIter=10;
-
- public void testConjunctionPerf() throws Exception {
- r = newRandom();
- createDummySearcher();
- validate=false;
- sets=randBitSets(32,1000000);
- for (int i=0; i<bigIter; i++) {
- long start = System.currentTimeMillis();
- doConjunctions(500,6);
- long end = System.currentTimeMillis();
- if (VERBOSE) System.out.println("milliseconds="+(end-start));
- }
- s.close();
- }
-
- public void testNestedConjunctionPerf() throws Exception {
- r = newRandom();
- createDummySearcher();
- validate=false;
- sets=randBitSets(32,1000000);
- for (int i=0; i<bigIter; i++) {
- long start = System.currentTimeMillis();
- doNestedConjunctions(500,3,3);
- long end = System.currentTimeMillis();
- if (VERBOSE) System.out.println("milliseconds="+(end-start));
- }
- s.close();
- }
-
-
- public void testConjunctionTerms() throws Exception {
- r = newRandom();
- validate=false;
- RAMDirectory dir = new RAMDirectory();
- if (VERBOSE) System.out.println("Creating index");
- createRandomTerms(100000,25,.5, dir);
- s = new IndexSearcher(dir, true);
- if (VERBOSE) System.out.println("Starting performance test");
- for (int i=0; i<bigIter; i++) {
- long start = System.currentTimeMillis();
- doTermConjunctions(s,25,5,1000);
- long end = System.currentTimeMillis();
- if (VERBOSE) System.out.println("milliseconds="+(end-start));
- }
- s.close();
- }
-
- public void testNestedConjunctionTerms() throws Exception {
- r = newRandom();
- validate=false;
- RAMDirectory dir = new RAMDirectory();
- if (VERBOSE) System.out.println("Creating index");
- createRandomTerms(100000,25,.2, dir);
- s = new IndexSearcher(dir, true);
- if (VERBOSE) System.out.println("Starting performance test");
- for (int i=0; i<bigIter; i++) {
- long start = System.currentTimeMillis();
- doNestedTermConjunctions(s,25,3,3,200);
- long end = System.currentTimeMillis();
- if (VERBOSE) System.out.println("milliseconds="+(end-start));
- }
- s.close();
- }
-
-
- public void testSloppyPhrasePerf() throws Exception {
- r = newRandom();
- validate=false;
- RAMDirectory dir = new RAMDirectory();
- if (VERBOSE) System.out.println("Creating index");
- createRandomTerms(100000,25,2,dir);
- s = new IndexSearcher(dir, true);
- if (VERBOSE) System.out.println("Starting performance test");
- for (int i=0; i<bigIter; i++) {
- long start = System.currentTimeMillis();
- doSloppyPhrase(s,25,2,1000);
- long end = System.currentTimeMillis();
- if (VERBOSE) System.out.println("milliseconds="+(end-start));
- }
- s.close();
- }
- ***/
-
-
-}