X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java new file mode 100644 index 0000000..430cb8b --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -0,0 +1,349 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +import org.apache.lucene.queryParser.QueryParser; // for javadoc + +/** + * An abstract {@link Query} that matches documents + * containing a subset of terms provided by a {@link + * FilteredTermEnum} enumeration. + * + *

This query cannot be used directly; you must subclass + * it and define {@link #getEnum} to provide a {@link + * FilteredTermEnum} that iterates through the terms to be + * matched. + * + *

NOTE: if {@link #setRewriteMethod} is either + * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + * {@link BooleanQuery.TooManyClauses} exception during + * searching, which happens when the number of terms to be + * searched exceeds {@link + * BooleanQuery#getMaxClauseCount()}. Setting {@link + * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} + * prevents this. + * + *

The recommended rewrite method is {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + * computing unhelpful scores, and it tries to pick the most + * performant rewrite method given the query. If you + * need scoring (like {@link FuzzyQuery}, use + * {@link TopTermsScoringBooleanQueryRewrite} which uses + * a priority queue to only collect competitive terms + * and not hit this limitation. + * + * Note that {@link QueryParser} produces + * MultiTermQueries using {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. + */ +public abstract class MultiTermQuery extends Query { + protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + transient int numberOfTerms = 0; + + /** Abstract class that defines how the query is rewritten. */ + public static abstract class RewriteMethod implements Serializable { + public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; + } + + /** A rewrite method that first creates a private Filter, + * by visiting each term in sequence and marking all docs + * for that term. Matching documents are assigned a + * constant score equal to the query's boost. + * + *

This method is faster than the BooleanQuery + * rewrite methods when the number of matched terms or + * matched documents is non-trivial. Also, it will never + * hit an errant {@link BooleanQuery.TooManyClauses} + * exception. + * + * @see #setRewriteMethod */ + public static final RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_FILTER_REWRITE; + } + }; + + /** A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a + * BooleanQuery, and keeps the scores as computed by the + * query. Note that typically such scores are + * meaningless to the user, and require non-trivial CPU + * to compute, so it's almost always better to use {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. + * + *

NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = ScoringRewrite.SCORING_BOOLEAN_QUERY_REWRITE; + + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. + * + *

NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + + /** + * A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *

+ * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. It is the default rewrite method for + * {@link FuzzyQuery}. + * + * @see #setRewriteMethod + */ + public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite { + + /** + * Create a TopTermsScoringBooleanQueryRewrite for + * at most size terms. + *

+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsScoringBooleanQueryRewrite(int size) { + super(size); + } + + @Override + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); + } + + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, float boost) { + final TermQuery tq = new TermQuery(term); + tq.setBoost(boost); + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } + } + + /** + * A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but the scores + * are only computed as the boost. + *

+ * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. + * + * @see #setRewriteMethod + */ + public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite { + + /** + * Create a TopTermsBoostOnlyBooleanQueryRewrite for + * at most size terms. + *

+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsBoostOnlyBooleanQueryRewrite(int size) { + super(size); + } + + @Override + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); + } + + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, float boost) { + final Query q = new ConstantScoreQuery(new TermQuery(term)); + q.setBoost(boost); + topLevel.add(q, BooleanClause.Occur.SHOULD); + } + } + + /** A rewrite method that tries to pick the best + * constant-score rewrite method based on term and + * document counts from the query. If both the number of + * terms and documents is small enough, then {@link + * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. + * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is + * used. + */ + public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {} + + /** Read-only default instance of {@link + * ConstantScoreAutoRewrite}, with {@link + * ConstantScoreAutoRewrite#setTermCountCutoff} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} + * and {@link + * ConstantScoreAutoRewrite#setDocCountPercent} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. + * Note that you cannot alter the configuration of this + * instance; you'll need to create a private instance + * instead. */ + public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() { + @Override + public void setTermCountCutoff(int count) { + throw new UnsupportedOperationException("Please create a private instance"); + } + + @Override + public void setDocCountPercent(double percent) { + throw new UnsupportedOperationException("Please create a private instance"); + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + } + }; + + /** + * Constructs a query matching terms that cannot be represented with a single + * Term. + */ + public MultiTermQuery() { + } + + /** Construct the enumeration to be used, expanding the pattern term. */ + protected abstract FilteredTermEnum getEnum(IndexReader reader) + throws IOException; + + /** + * Expert: Return the number of unique terms visited during execution of the query. + * If there are many of them, you may consider using another query type + * or reduce your total term count in index. + *

This method is not thread safe, be sure to only call it when no query is running! + * If you re-use the same query instance for another + * search, be sure to first reset the term counter + * with {@link #clearTotalNumberOfTerms}. + *

On single-segment indexes / no MultiReaders, you get the correct number of + * unique terms for the whole index. Use this number to compare different queries. + * For multi-segment indexes this number can also be achieved in + * non-constant-score mode. In constant-score mode you get the total number of + * terms seeked for all segments / sub-readers. + * @see #clearTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. + */ + @Deprecated + public int getTotalNumberOfTerms() { + return numberOfTerms; + } + + /** + * Expert: Resets the counting of unique terms. + * Do this before executing the query/filter. + * @see #getTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. + */ + @Deprecated + public void clearTotalNumberOfTerms() { + numberOfTerms = 0; + } + + /** + * @deprecated Don't use this method, as its not thread safe and useless. + */ + @Deprecated + protected void incTotalNumberOfTerms(int inc) { + numberOfTerms += inc; + } + + /** + * To rewrite to a simpler form, instead return a simpler + * enum from {@link #getEnum(IndexReader)}. For example, + * to rewrite to a single term, return a {@link SingleTermEnum} + */ + @Override + public final Query rewrite(IndexReader reader) throws IOException { + return rewriteMethod.rewrite(reader, this); + } + + /** + * @see #setRewriteMethod + */ + public RewriteMethod getRewriteMethod() { + return rewriteMethod; + } + + /** + * Sets the rewrite method to be used when executing the + * query. You can use one of the four core methods, or + * implement your own subclass of {@link RewriteMethod}. */ + public void setRewriteMethod(RewriteMethod method) { + rewriteMethod = method; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(getBoost()); + result = prime * result; + result += rewriteMethod.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + MultiTermQuery other = (MultiTermQuery) obj; + if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost())) + return false; + if (!rewriteMethod.equals(other.rewriteMethod)) { + return false; + } + return true; + } + +}