1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Serializable;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.Term;
26 import org.apache.lucene.queryParser.QueryParser; // for javadoc
29 * An abstract {@link Query} that matches documents
30 * containing a subset of terms provided by a {@link
31 * FilteredTermEnum} enumeration.
33 * <p>This query cannot be used directly; you must subclass
34 * it and define {@link #getEnum} to provide a {@link
35 * FilteredTermEnum} that iterates through the terms to be
38 * <p><b>NOTE</b>: if {@link #setRewriteMethod} is either
39 * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link
40 * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a
41 * {@link BooleanQuery.TooManyClauses} exception during
42 * searching, which happens when the number of terms to be
43 * searched exceeds {@link
44 * BooleanQuery#getMaxClauseCount()}. Setting {@link
45 * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE}
48 * <p>The recommended rewrite method is {@link
49 * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU
50 * computing unhelpful scores, and it tries to pick the most
51 * performant rewrite method given the query. If you
52 * need scoring (like {@link FuzzyQuery}, use
53 * {@link TopTermsScoringBooleanQueryRewrite} which uses
54 * a priority queue to only collect competitive terms
55 * and not hit this limitation.
57 * Note that {@link QueryParser} produces
58 * MultiTermQueries using {@link
59 * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default.
61 public abstract class MultiTermQuery extends Query {
62 protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
63 transient int numberOfTerms = 0;
65 /** Abstract class that defines how the query is rewritten. */
66 public static abstract class RewriteMethod implements Serializable {
67 public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
70 /** A rewrite method that first creates a private Filter,
71 * by visiting each term in sequence and marking all docs
72 * for that term. Matching documents are assigned a
73 * constant score equal to the query's boost.
75 * <p> This method is faster than the BooleanQuery
76 * rewrite methods when the number of matched terms or
77 * matched documents is non-trivial. Also, it will never
78 * hit an errant {@link BooleanQuery.TooManyClauses}
81 * @see #setRewriteMethod */
82 public static final RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new RewriteMethod() {
84 public Query rewrite(IndexReader reader, MultiTermQuery query) {
85 Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter<MultiTermQuery>(query));
86 result.setBoost(query.getBoost());
90 // Make sure we are still a singleton even after deserializing
91 protected Object readResolve() {
92 return CONSTANT_SCORE_FILTER_REWRITE;
96 /** A rewrite method that first translates each term into
97 * {@link BooleanClause.Occur#SHOULD} clause in a
98 * BooleanQuery, and keeps the scores as computed by the
99 * query. Note that typically such scores are
100 * meaningless to the user, and require non-trivial CPU
101 * to compute, so it's almost always better to use {@link
102 * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
104 * <p><b>NOTE</b>: This rewrite method will hit {@link
105 * BooleanQuery.TooManyClauses} if the number of terms
106 * exceeds {@link BooleanQuery#getMaxClauseCount}.
108 * @see #setRewriteMethod */
109 public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = ScoringRewrite.SCORING_BOOLEAN_QUERY_REWRITE;
111 /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except
112 * scores are not computed. Instead, each matching
113 * document receives a constant score equal to the
116 * <p><b>NOTE</b>: This rewrite method will hit {@link
117 * BooleanQuery.TooManyClauses} if the number of terms
118 * exceeds {@link BooleanQuery#getMaxClauseCount}.
120 * @see #setRewriteMethod */
121 public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
124 * A rewrite method that first translates each term into
125 * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the
126 * scores as computed by the query.
129 * This rewrite method only uses the top scoring terms so it will not overflow
130 * the boolean max clause count. It is the default rewrite method for
131 * {@link FuzzyQuery}.
133 * @see #setRewriteMethod
135 public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite<BooleanQuery> {
138 * Create a TopTermsScoringBooleanQueryRewrite for
139 * at most <code>size</code> terms.
141 * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
142 * <code>size</code>, then it will be used instead.
144 public TopTermsScoringBooleanQueryRewrite(int size) {
149 protected int getMaxSize() {
150 return BooleanQuery.getMaxClauseCount();
154 protected BooleanQuery getTopLevelQuery() {
155 return new BooleanQuery(true);
159 protected void addClause(BooleanQuery topLevel, Term term, float boost) {
160 final TermQuery tq = new TermQuery(term);
162 topLevel.add(tq, BooleanClause.Occur.SHOULD);
167 * A rewrite method that first translates each term into
168 * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but the scores
169 * are only computed as the boost.
171 * This rewrite method only uses the top scoring terms so it will not overflow
172 * the boolean max clause count.
174 * @see #setRewriteMethod
176 public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite<BooleanQuery> {
179 * Create a TopTermsBoostOnlyBooleanQueryRewrite for
180 * at most <code>size</code> terms.
182 * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
183 * <code>size</code>, then it will be used instead.
185 public TopTermsBoostOnlyBooleanQueryRewrite(int size) {
190 protected int getMaxSize() {
191 return BooleanQuery.getMaxClauseCount();
195 protected BooleanQuery getTopLevelQuery() {
196 return new BooleanQuery(true);
200 protected void addClause(BooleanQuery topLevel, Term term, float boost) {
201 final Query q = new ConstantScoreQuery(new TermQuery(term));
203 topLevel.add(q, BooleanClause.Occur.SHOULD);
207 /** A rewrite method that tries to pick the best
208 * constant-score rewrite method based on term and
209 * document counts from the query. If both the number of
210 * terms and documents is small enough, then {@link
211 * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.
212 * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
215 public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {}
217 /** Read-only default instance of {@link
218 * ConstantScoreAutoRewrite}, with {@link
219 * ConstantScoreAutoRewrite#setTermCountCutoff} set to
221 * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
223 * ConstantScoreAutoRewrite#setDocCountPercent} set to
225 * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}.
226 * Note that you cannot alter the configuration of this
227 * instance; you'll need to create a private instance
229 public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() {
231 public void setTermCountCutoff(int count) {
232 throw new UnsupportedOperationException("Please create a private instance");
236 public void setDocCountPercent(double percent) {
237 throw new UnsupportedOperationException("Please create a private instance");
240 // Make sure we are still a singleton even after deserializing
241 protected Object readResolve() {
242 return CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
247 * Constructs a query matching terms that cannot be represented with a single
250 public MultiTermQuery() {
253 /** Construct the enumeration to be used, expanding the pattern term. */
254 protected abstract FilteredTermEnum getEnum(IndexReader reader)
258 * Expert: Return the number of unique terms visited during execution of the query.
259 * If there are many of them, you may consider using another query type
260 * or optimize your total term count in index.
261 * <p>This method is not thread safe, be sure to only call it when no query is running!
262 * If you re-use the same query instance for another
263 * search, be sure to first reset the term counter
264 * with {@link #clearTotalNumberOfTerms}.
265 * <p>On optimized indexes / no MultiReaders, you get the correct number of
266 * unique terms for the whole index. Use this number to compare different queries.
267 * For non-optimized indexes this number can also be achieved in
268 * non-constant-score mode. In constant-score mode you get the total number of
269 * terms seeked for all segments / sub-readers.
270 * @see #clearTotalNumberOfTerms
272 public int getTotalNumberOfTerms() {
273 return numberOfTerms;
277 * Expert: Resets the counting of unique terms.
278 * Do this before executing the query/filter.
279 * @see #getTotalNumberOfTerms
281 public void clearTotalNumberOfTerms() {
285 protected void incTotalNumberOfTerms(int inc) {
286 numberOfTerms += inc;
290 * To rewrite to a simpler form, instead return a simpler
291 * enum from {@link #getEnum(IndexReader)}. For example,
292 * to rewrite to a single term, return a {@link SingleTermEnum}
295 public final Query rewrite(IndexReader reader) throws IOException {
296 return rewriteMethod.rewrite(reader, this);
300 * @see #setRewriteMethod
302 public RewriteMethod getRewriteMethod() {
303 return rewriteMethod;
307 * Sets the rewrite method to be used when executing the
308 * query. You can use one of the four core methods, or
309 * implement your own subclass of {@link RewriteMethod}. */
310 public void setRewriteMethod(RewriteMethod method) {
311 rewriteMethod = method;
315 public int hashCode() {
316 final int prime = 31;
318 result = prime * result + Float.floatToIntBits(getBoost());
319 result = prime * result;
320 result += rewriteMethod.hashCode();
325 public boolean equals(Object obj) {
330 if (getClass() != obj.getClass())
332 MultiTermQuery other = (MultiTermQuery) obj;
333 if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost()))
335 if (!rewriteMethod.equals(other.rewriteMethod)) {