lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.ArrayList;
  22
  23 import org.apache.lucene.index.IndexReader;
  24 import org.apache.lucene.index.Term;
  25
  26 class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
  27
  28   // Defaults derived from rough tests with a 20.0 million
  29   // doc Wikipedia index.  With more than 350 terms in the
  30   // query, the filter method is fastest:
  31   public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
  32
  33   // If the query will hit more than 1 in 1000 of the docs
  34   // in the index (0.1%), the filter method is fastest:
  35   public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
  36
  37   private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
  38   private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
  39
  40   /** If the number of terms in this query is equal to or
  41    *  larger than this setting then {@link
  42    *  #CONSTANT_SCORE_FILTER_REWRITE} is used. */
  43   public void setTermCountCutoff(int count) {
  44     termCountCutoff = count;
  45   }
  46
  47   /** @see #setTermCountCutoff */
  48   public int getTermCountCutoff() {
  49     return termCountCutoff;
  50   }
  51
  52   /** If the number of documents to be visited in the
  53    *  postings exceeds this specified percentage of the
  54    *  maxDoc() for the index, then {@link
  55    *  #CONSTANT_SCORE_FILTER_REWRITE} is used.
  56    *  @param percent 0.0 to 100.0 */
  57   public void setDocCountPercent(double percent) {
  58     docCountPercent = percent;
  59   }
  60
  61   /** @see #setDocCountPercent */
  62   public double getDocCountPercent() {
  63     return docCountPercent;
  64   }
  65
  66   @Override
  67   protected BooleanQuery getTopLevelQuery() {
  68     return new BooleanQuery(true);
  69   }
  70
  71   @Override
  72   protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) {
  73     topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
  74   }
  75
  76   @Override
  77   public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
  78
  79     // Get the enum and start visiting terms.  If we
  80     // exhaust the enum before hitting either of the
  81     // cutoffs, we use ConstantBooleanQueryRewrite; else,
  82     // ConstantFilterRewrite:
  83     final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
  84     final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
  85
  86     final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
  87     collectTerms(reader, query, col);
  88
  89     if (col.hasCutOff) {
  90       return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
  91     } else {
  92       final Query result;
  93       if (col.pendingTerms.isEmpty()) {
  94         result = getTopLevelQuery();
  95       } else {
  96         BooleanQuery bq = getTopLevelQuery();
  97         for(Term term : col.pendingTerms) {
  98           addClause(bq, term, 1.0f);
  99         }
 100         // Strip scores
 101         result = new ConstantScoreQuery(bq);
 102         result.setBoost(query.getBoost());
 103       }
 104       query.incTotalNumberOfTerms(col.pendingTerms.size());
 105       return result;
 106     }
 107   }
 108
 109   private static final class CutOffTermCollector implements TermCollector {
 110     CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
 111       this.reader = reader;
 112       this.docCountCutoff = docCountCutoff;
 113       this.termCountLimit = termCountLimit;
 114     }
 115
 116     public boolean collect(Term t, float boost) throws IOException {
 117       pendingTerms.add(t);
 118       // Loading the TermInfo from the terms dict here
 119       // should not be costly, because 1) the
 120       // query/filter will load the TermInfo when it
 121       // runs, and 2) the terms dict has a cache:
 122       docVisitCount += reader.docFreq(t);
 123       if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
 124         hasCutOff = true;
 125         return false;
 126       }
 127       return true;
 128     }
 129
 130     int docVisitCount = 0;
 131     boolean hasCutOff = false;
 132
 133     final IndexReader reader;
 134     final int docCountCutoff, termCountLimit;
 135     final ArrayList<Term> pendingTerms = new ArrayList<Term>();
 136   }
 137
 138   @Override
 139   public int hashCode() {
 140     final int prime = 1279;
 141     return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
 142   }
 143
 144   @Override
 145   public boolean equals(Object obj) {
 146     if (this == obj)
 147       return true;
 148     if (obj == null)
 149       return false;
 150     if (getClass() != obj.getClass())
 151       return false;
 152
 153     ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
 154     if (other.termCountCutoff != termCountCutoff) {
 155       return false;
 156     }
 157
 158     if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
 159       return false;
 160     }
 161
 162     return true;
 163   }
 164 }