1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.Term;
26 class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
28 // Defaults derived from rough tests with a 20.0 million
29 // doc Wikipedia index. With more than 350 terms in the
30 // query, the filter method is fastest:
31 public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
33 // If the query will hit more than 1 in 1000 of the docs
34 // in the index (0.1%), the filter method is fastest:
35 public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
37 private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
38 private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
40 /** If the number of terms in this query is equal to or
41 * larger than this setting then {@link
42 * #CONSTANT_SCORE_FILTER_REWRITE} is used. */
43 public void setTermCountCutoff(int count) {
44 termCountCutoff = count;
47 /** @see #setTermCountCutoff */
48 public int getTermCountCutoff() {
49 return termCountCutoff;
52 /** If the number of documents to be visited in the
53 * postings exceeds this specified percentage of the
54 * maxDoc() for the index, then {@link
55 * #CONSTANT_SCORE_FILTER_REWRITE} is used.
56 * @param percent 0.0 to 100.0 */
57 public void setDocCountPercent(double percent) {
58 docCountPercent = percent;
61 /** @see #setDocCountPercent */
62 public double getDocCountPercent() {
63 return docCountPercent;
67 protected BooleanQuery getTopLevelQuery() {
68 return new BooleanQuery(true);
72 protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) {
73 topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
77 public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
79 // Get the enum and start visiting terms. If we
80 // exhaust the enum before hitting either of the
81 // cutoffs, we use ConstantBooleanQueryRewrite; else,
82 // ConstantFilterRewrite:
83 final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
84 final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
86 final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
87 collectTerms(reader, query, col);
90 return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
93 if (col.pendingTerms.isEmpty()) {
94 result = getTopLevelQuery();
96 BooleanQuery bq = getTopLevelQuery();
97 for(Term term : col.pendingTerms) {
98 addClause(bq, term, 1.0f);
101 result = new ConstantScoreQuery(bq);
102 result.setBoost(query.getBoost());
104 query.incTotalNumberOfTerms(col.pendingTerms.size());
109 private static final class CutOffTermCollector implements TermCollector {
110 CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
111 this.reader = reader;
112 this.docCountCutoff = docCountCutoff;
113 this.termCountLimit = termCountLimit;
116 public boolean collect(Term t, float boost) throws IOException {
118 // Loading the TermInfo from the terms dict here
119 // should not be costly, because 1) the
120 // query/filter will load the TermInfo when it
121 // runs, and 2) the terms dict has a cache:
122 docVisitCount += reader.docFreq(t);
123 if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
130 int docVisitCount = 0;
131 boolean hasCutOff = false;
133 final IndexReader reader;
134 final int docCountCutoff, termCountLimit;
135 final ArrayList<Term> pendingTerms = new ArrayList<Term>();
139 public int hashCode() {
140 final int prime = 1279;
141 return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
145 public boolean equals(Object obj) {
150 if (getClass() != obj.getClass())
153 ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
154 if (other.termCountCutoff != termCountCutoff) {
158 if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {