--- /dev/null
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
+
+ // Defaults derived from rough tests with a 20.0 million
+ // doc Wikipedia index. With more than 350 terms in the
+ // query, the filter method is fastest:
+ public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
+
+ // If the query will hit more than 1 in 1000 of the docs
+ // in the index (0.1%), the filter method is fastest:
+ public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
+
+ private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
+ private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
+
+ /** If the number of terms in this query is equal to or
+ * larger than this setting then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used. */
+ public void setTermCountCutoff(int count) {
+ termCountCutoff = count;
+ }
+
+ /** @see #setTermCountCutoff */
+ public int getTermCountCutoff() {
+ return termCountCutoff;
+ }
+
+ /** If the number of documents to be visited in the
+ * postings exceeds this specified percentage of the
+ * maxDoc() for the index, then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used.
+ * @param percent 0.0 to 100.0 */
+ public void setDocCountPercent(double percent) {
+ docCountPercent = percent;
+ }
+
+ /** @see #setDocCountPercent */
+ public double getDocCountPercent() {
+ return docCountPercent;
+ }
+
+ @Override
+ protected BooleanQuery getTopLevelQuery() {
+ return new BooleanQuery(true);
+ }
+
+ @Override
+ protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) {
+ topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
+ }
+
+ @Override
+ public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
+
+ // Get the enum and start visiting terms. If we
+ // exhaust the enum before hitting either of the
+ // cutoffs, we use ConstantBooleanQueryRewrite; else,
+ // ConstantFilterRewrite:
+ final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
+ final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
+
+ final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
+ collectTerms(reader, query, col);
+
+ if (col.hasCutOff) {
+ return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
+ } else {
+ final Query result;
+ if (col.pendingTerms.isEmpty()) {
+ result = getTopLevelQuery();
+ } else {
+ BooleanQuery bq = getTopLevelQuery();
+ for(Term term : col.pendingTerms) {
+ addClause(bq, term, 1.0f);
+ }
+ // Strip scores
+ result = new ConstantScoreQuery(bq);
+ result.setBoost(query.getBoost());
+ }
+ query.incTotalNumberOfTerms(col.pendingTerms.size());
+ return result;
+ }
+ }
+
+ private static final class CutOffTermCollector implements TermCollector {
+ CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
+ this.reader = reader;
+ this.docCountCutoff = docCountCutoff;
+ this.termCountLimit = termCountLimit;
+ }
+
+ public boolean collect(Term t, float boost) throws IOException {
+ pendingTerms.add(t);
+ // Loading the TermInfo from the terms dict here
+ // should not be costly, because 1) the
+ // query/filter will load the TermInfo when it
+ // runs, and 2) the terms dict has a cache:
+ docVisitCount += reader.docFreq(t);
+ if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
+ hasCutOff = true;
+ return false;
+ }
+ return true;
+ }
+
+ int docVisitCount = 0;
+ boolean hasCutOff = false;
+
+ final IndexReader reader;
+ final int docCountCutoff, termCountLimit;
+ final ArrayList<Term> pendingTerms = new ArrayList<Term>();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 1279;
+ return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+
+ ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
+ if (other.termCountCutoff != termCountCutoff) {
+ return false;
+ }
+
+ if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
+ return false;
+ }
+
+ return true;
+ }
+}