--- /dev/null
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+/* Description from Doug Cutting (excerpted from
+ * LUCENE-1483):
+ *
+ * BooleanScorer uses an array to score windows of
+ * 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
+ * etc. For each window it iterates through all query terms
+ * and accumulates a score in table[doc%2K]. It also stores
+ * in the table a bitmask representing which terms
+ * contributed to the score. Non-zero scores are chained in
+ * a linked list. At the end of scoring each window it then
+ * iterates through the linked list and, if the bitmask
+ * matches the boolean constraints, collects a hit. For
+ * boolean queries with lots of frequent terms this can be
+ * much faster, since it does not need to update a priority
+ * queue for each posting, instead performing constant-time
+ * operations per posting. The only downside is that it
+ * results in hits being delivered out-of-order within the
+ * window, which means it cannot be nested within other
+ * scorers. But it works well as a top-level scorer.
+ *
+ * The new BooleanScorer2 implementation instead works by
+ * merging priority queues of postings, albeit with some
+ * clever tricks. For example, a pure conjunction (all terms
+ * required) does not require a priority queue. Instead it
+ * sorts the posting streams at the start, then repeatedly
+ * skips the first to to the last. If the first ever equals
+ * the last, then there's a hit. When some terms are
+ * required and some terms are optional, the conjunction can
+ * be evaluated first, then the optional terms can all skip
+ * to the match and be added to the score. Thus the
+ * conjunction can reduce the number of priority queue
+ * updates for the optional terms. */
+
+final class BooleanScorer extends Scorer {
+
+ private static final class BooleanScorerCollector extends Collector {
+ private BucketTable bucketTable;
+ private int mask;
+ private Scorer scorer;
+
+ public BooleanScorerCollector(int mask, BucketTable bucketTable) {
+ this.mask = mask;
+ this.bucketTable = bucketTable;
+ }
+
+ @Override
+ public void collect(final int doc) throws IOException {
+ final BucketTable table = bucketTable;
+ final int i = doc & BucketTable.MASK;
+ final Bucket bucket = table.buckets[i];
+
+ if (bucket.doc != doc) { // invalid bucket
+ bucket.doc = doc; // set doc
+ bucket.score = scorer.score(); // initialize score
+ bucket.bits = mask; // initialize mask
+ bucket.coord = 1; // initialize coord
+
+ bucket.next = table.first; // push onto valid list
+ table.first = bucket;
+ } else { // valid bucket
+ bucket.score += scorer.score(); // increment score
+ bucket.bits |= mask; // add bits in mask
+ bucket.coord++; // increment coord
+ }
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) {
+ // not needed by this implementation
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ }
+
+ // An internal class which is used in score(Collector, int) for setting the
+ // current score. This is required since Collector exposes a setScorer method
+ // and implementations that need the score will call scorer.score().
+ // Therefore the only methods that are implemented are score() and doc().
+ private static final class BucketScorer extends Scorer {
+
+ float score;
+ int doc = NO_MORE_DOCS;
+ int freq;
+
+ public BucketScorer(Weight weight) { super(weight); }
+
+ @Override
+ public int advance(int target) throws IOException { return NO_MORE_DOCS; }
+
+ @Override
+ public int docID() { return doc; }
+
+ @Override
+ public float freq() { return freq; }
+
+ @Override
+ public int nextDoc() throws IOException { return NO_MORE_DOCS; }
+
+ @Override
+ public float score() throws IOException { return score; }
+
+ }
+
+ static final class Bucket {
+ int doc = -1; // tells if bucket is valid
+ float score; // incremental score
+ // TODO: break out bool anyProhibited, int
+ // numRequiredMatched; then we can remove 32 limit on
+ // required clauses
+ int bits; // used for bool constraints
+ int coord; // count of terms in score
+ Bucket next; // next valid bucket
+ }
+
+ /** A simple hash table of document scores within a range. */
+ static final class BucketTable {
+ public static final int SIZE = 1 << 11;
+ public static final int MASK = SIZE - 1;
+
+ final Bucket[] buckets = new Bucket[SIZE];
+ Bucket first = null; // head of valid list
+
+ public BucketTable() {
+ // Pre-fill to save the lazy init when collecting
+ // each sub:
+ for(int idx=0;idx<SIZE;idx++) {
+ buckets[idx] = new Bucket();
+ }
+ }
+
+ public Collector newCollector(int mask) {
+ return new BooleanScorerCollector(mask, this);
+ }
+
+ public int size() { return SIZE; }
+ }
+
+ static final class SubScorer {
+ public Scorer scorer;
+ // TODO: re-enable this if BQ ever sends us required clauses
+ //public boolean required = false;
+ public boolean prohibited;
+ public Collector collector;
+ public SubScorer next;
+
+ public SubScorer(Scorer scorer, boolean required, boolean prohibited,
+ Collector collector, SubScorer next)
+ throws IOException {
+ if (required) {
+ throw new IllegalArgumentException("this scorer cannot handle required=true");
+ }
+ this.scorer = scorer;
+ // TODO: re-enable this if BQ ever sends us required clauses
+ //this.required = required;
+ this.prohibited = prohibited;
+ this.collector = collector;
+ this.next = next;
+ }
+ }
+
+ private SubScorer scorers = null;
+ private BucketTable bucketTable = new BucketTable();
+ private final float[] coordFactors;
+ // TODO: re-enable this if BQ ever sends us required clauses
+ //private int requiredMask = 0;
+ private final int minNrShouldMatch;
+ private int end;
+ private Bucket current;
+ private int doc = -1;
+
+ // Any time a prohibited clause matches we set bit 0:
+ private static final int PROHIBITED_MASK = 1;
+
+ BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
+ List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
+ super(weight);
+ this.minNrShouldMatch = minNrShouldMatch;
+
+ if (optionalScorers != null && optionalScorers.size() > 0) {
+ for (Scorer scorer : optionalScorers) {
+ if (scorer.nextDoc() != NO_MORE_DOCS) {
+ scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
+ }
+ }
+ }
+
+ if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
+ for (Scorer scorer : prohibitedScorers) {
+ if (scorer.nextDoc() != NO_MORE_DOCS) {
+ scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
+ }
+ }
+ }
+
+ coordFactors = new float[optionalScorers.size() + 1];
+ for (int i = 0; i < coordFactors.length; i++) {
+ coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
+ }
+ }
+
+ // firstDocID is ignored since nextDoc() initializes 'current'
+ @Override
+ protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
+ // Make sure it's only BooleanScorer that calls us:
+ assert firstDocID == -1;
+ boolean more;
+ Bucket tmp;
+ BucketScorer bs = new BucketScorer(weight);
+
+ // The internal loop will set the score and doc before calling collect.
+ collector.setScorer(bs);
+ do {
+ bucketTable.first = null;
+
+ while (current != null) { // more queued
+
+ // check prohibited & required
+ if ((current.bits & PROHIBITED_MASK) == 0) {
+
+ // TODO: re-enable this if BQ ever sends us required
+ // clauses
+ //&& (current.bits & requiredMask) == requiredMask) {
+
+ // TODO: can we remove this?
+ if (current.doc >= max){
+ tmp = current;
+ current = current.next;
+ tmp.next = bucketTable.first;
+ bucketTable.first = tmp;
+ continue;
+ }
+
+ if (current.coord >= minNrShouldMatch) {
+ bs.score = current.score * coordFactors[current.coord];
+ bs.doc = current.doc;
+ bs.freq = current.coord;
+ collector.collect(current.doc);
+ }
+ }
+
+ current = current.next; // pop the queue
+ }
+
+ if (bucketTable.first != null){
+ current = bucketTable.first;
+ bucketTable.first = current.next;
+ return true;
+ }
+
+ // refill the queue
+ more = false;
+ end += BucketTable.SIZE;
+ for (SubScorer sub = scorers; sub != null; sub = sub.next) {
+ int subScorerDocID = sub.scorer.docID();
+ if (subScorerDocID != NO_MORE_DOCS) {
+ more |= sub.scorer.score(sub.collector, end, subScorerDocID);
+ }
+ }
+ current = bucketTable.first;
+
+ } while (current != null || more);
+
+ return false;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int docID() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public float score() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void score(Collector collector) throws IOException {
+ score(collector, Integer.MAX_VALUE, -1);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("boolean(");
+ for (SubScorer sub = scorers; sub != null; sub = sub.next) {
+ buffer.append(sub.scorer.toString());
+ buffer.append(" ");
+ }
+ buffer.append(")");
+ return buffer.toString();
+ }
+
+ @Override
+ protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
+ super.visitSubScorers(parent, relationship, visitor);
+ final Query q = weight.getQuery();
+ SubScorer sub = scorers;
+ while(sub != null) {
+ // TODO: re-enable this if BQ ever sends us required
+ //clauses
+ //if (sub.required) {
+ //relationship = Occur.MUST;
+ if (!sub.prohibited) {
+ relationship = Occur.SHOULD;
+ } else {
+ // TODO: maybe it's pointless to do this, but, it is
+ // possible the doc may still be collected, eg foo
+ // OR (bar -fee)
+ relationship = Occur.MUST_NOT;
+ }
+ sub.scorer.visitSubScorers(q, relationship, visitor);
+ sub = sub.next;
+ }
+ }
+
+}