1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.List;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.search.BooleanClause.Occur;
26 /* Description from Doug Cutting (excerpted from
29 * BooleanScorer uses a ~16k array to score windows of
30 * docs. So it scores docs 0-16k first, then docs 16-32k,
31 * etc. For each window it iterates through all query terms
32 * and accumulates a score in table[doc%16k]. It also stores
33 * in the table a bitmask representing which terms
34 * contributed to the score. Non-zero scores are chained in
35 * a linked list. At the end of scoring each window it then
36 * iterates through the linked list and, if the bitmask
37 * matches the boolean constraints, collects a hit. For
38 * boolean queries with lots of frequent terms this can be
39 * much faster, since it does not need to update a priority
40 * queue for each posting, instead performing constant-time
41 * operations per posting. The only downside is that it
42 * results in hits being delivered out-of-order within the
43 * window, which means it cannot be nested within other
44 * scorers. But it works well as a top-level scorer.
46 * The new BooleanScorer2 implementation instead works by
47 * merging priority queues of postings, albeit with some
48 * clever tricks. For example, a pure conjunction (all terms
49 * required) does not require a priority queue. Instead it
50 * sorts the posting streams at the start, then repeatedly
51 * skips the first to to the last. If the first ever equals
52 * the last, then there's a hit. When some terms are
53 * required and some terms are optional, the conjunction can
54 * be evaluated first, then the optional terms can all skip
55 * to the match and be added to the score. Thus the
56 * conjunction can reduce the number of priority queue
57 * updates for the optional terms. */
59 final class BooleanScorer extends Scorer {
61 private static final class BooleanScorerCollector extends Collector {
62 private BucketTable bucketTable;
64 private Scorer scorer;
66 public BooleanScorerCollector(int mask, BucketTable bucketTable) {
68 this.bucketTable = bucketTable;
72 public void collect(final int doc) throws IOException {
73 final BucketTable table = bucketTable;
74 final int i = doc & BucketTable.MASK;
75 Bucket bucket = table.buckets[i];
77 table.buckets[i] = bucket = new Bucket();
79 if (bucket.doc != doc) { // invalid bucket
80 bucket.doc = doc; // set doc
81 bucket.score = scorer.score(); // initialize score
82 bucket.bits = mask; // initialize mask
83 bucket.coord = 1; // initialize coord
85 bucket.next = table.first; // push onto valid list
87 } else { // valid bucket
88 bucket.score += scorer.score(); // increment score
89 bucket.bits |= mask; // add bits in mask
90 bucket.coord++; // increment coord
95 public void setNextReader(IndexReader reader, int docBase) {
96 // not needed by this implementation
100 public void setScorer(Scorer scorer) throws IOException {
101 this.scorer = scorer;
105 public boolean acceptsDocsOutOfOrder() {
111 // An internal class which is used in score(Collector, int) for setting the
112 // current score. This is required since Collector exposes a setScorer method
113 // and implementations that need the score will call scorer.score().
114 // Therefore the only methods that are implemented are score() and doc().
115 private static final class BucketScorer extends Scorer {
118 int doc = NO_MORE_DOCS;
121 public BucketScorer(Weight weight) { super(weight); }
124 public int advance(int target) throws IOException { return NO_MORE_DOCS; }
127 public int docID() { return doc; }
130 public float freq() { return freq; }
133 public int nextDoc() throws IOException { return NO_MORE_DOCS; }
136 public float score() throws IOException { return score; }
140 static final class Bucket {
141 int doc = -1; // tells if bucket is valid
142 float score; // incremental score
143 int bits; // used for bool constraints
144 int coord; // count of terms in score
145 Bucket next; // next valid bucket
148 /** A simple hash table of document scores within a range. */
149 static final class BucketTable {
150 public static final int SIZE = 1 << 11;
151 public static final int MASK = SIZE - 1;
153 final Bucket[] buckets = new Bucket[SIZE];
154 Bucket first = null; // head of valid list
156 public BucketTable() {}
158 public Collector newCollector(int mask) {
159 return new BooleanScorerCollector(mask, this);
162 public int size() { return SIZE; }
165 static final class SubScorer {
166 public Scorer scorer;
167 // TODO: re-enable this if BQ ever sends us required clauses
168 //public boolean required = false;
169 public boolean prohibited = false;
170 public Collector collector;
171 public SubScorer next;
173 public SubScorer(Scorer scorer, boolean required, boolean prohibited,
174 Collector collector, SubScorer next)
177 throw new IllegalArgumentException("this scorer cannot handle required=true");
179 this.scorer = scorer;
180 // TODO: re-enable this if BQ ever sends us required clauses
181 //this.required = required;
182 this.prohibited = prohibited;
183 this.collector = collector;
188 private SubScorer scorers = null;
189 private BucketTable bucketTable = new BucketTable();
190 private final float[] coordFactors;
191 // TODO: re-enable this if BQ ever sends us required clauses
192 //private int requiredMask = 0;
193 private int prohibitedMask = 0;
194 private int nextMask = 1;
195 private final int minNrShouldMatch;
197 private Bucket current;
198 private int doc = -1;
200 BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
201 List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
203 this.minNrShouldMatch = minNrShouldMatch;
205 if (optionalScorers != null && optionalScorers.size() > 0) {
206 for (Scorer scorer : optionalScorers) {
207 if (scorer.nextDoc() != NO_MORE_DOCS) {
208 scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
213 if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
214 for (Scorer scorer : prohibitedScorers) {
216 nextMask = nextMask << 1;
217 prohibitedMask |= mask; // update prohibited mask
218 if (scorer.nextDoc() != NO_MORE_DOCS) {
219 scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
224 coordFactors = new float[optionalScorers.size() + 1];
225 for (int i = 0; i < coordFactors.length; i++) {
226 coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
230 // firstDocID is ignored since nextDoc() initializes 'current'
232 protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
235 BucketScorer bs = new BucketScorer(weight);
236 // The internal loop will set the score and doc before calling collect.
237 collector.setScorer(bs);
239 bucketTable.first = null;
241 while (current != null) { // more queued
243 // check prohibited & required
244 if ((current.bits & prohibitedMask) == 0) {
246 // TODO: re-enable this if BQ ever sends us required
248 //&& (current.bits & requiredMask) == requiredMask) {
250 if (current.doc >= max){
252 current = current.next;
253 tmp.next = bucketTable.first;
254 bucketTable.first = tmp;
258 if (current.coord >= minNrShouldMatch) {
259 bs.score = current.score * coordFactors[current.coord];
260 bs.doc = current.doc;
261 bs.freq = current.coord;
262 collector.collect(current.doc);
266 current = current.next; // pop the queue
269 if (bucketTable.first != null){
270 current = bucketTable.first;
271 bucketTable.first = current.next;
277 end += BucketTable.SIZE;
278 for (SubScorer sub = scorers; sub != null; sub = sub.next) {
279 int subScorerDocID = sub.scorer.docID();
280 if (subScorerDocID != NO_MORE_DOCS) {
281 more |= sub.scorer.score(sub.collector, end, subScorerDocID);
284 current = bucketTable.first;
286 } while (current != null || more);
292 public int advance(int target) throws IOException {
293 throw new UnsupportedOperationException();
302 public int nextDoc() throws IOException {
305 while (bucketTable.first != null) { // more queued
306 current = bucketTable.first;
307 bucketTable.first = current.next; // pop the queue
309 // check prohibited & required, and minNrShouldMatch
310 if ((current.bits & prohibitedMask) == 0 &&
311 current.coord >= minNrShouldMatch) {
312 // TODO: re-enable this if BQ ever sends us required clauses
313 // (current.bits & requiredMask) == requiredMask &&
314 return doc = current.doc;
320 end += BucketTable.SIZE;
321 for (SubScorer sub = scorers; sub != null; sub = sub.next) {
322 int subScorerDocID = sub.scorer.docID();
323 if (subScorerDocID != NO_MORE_DOCS) {
324 more |= sub.scorer.score(sub.collector, end, subScorerDocID);
327 } while (bucketTable.first != null || more);
329 return doc = NO_MORE_DOCS;
333 public float score() {
334 return current.score * coordFactors[current.coord];
338 public void score(Collector collector) throws IOException {
339 score(collector, Integer.MAX_VALUE, nextDoc());
343 public String toString() {
344 StringBuilder buffer = new StringBuilder();
345 buffer.append("boolean(");
346 for (SubScorer sub = scorers; sub != null; sub = sub.next) {
347 buffer.append(sub.scorer.toString());
351 return buffer.toString();
355 protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
356 super.visitSubScorers(parent, relationship, visitor);
357 final Query q = weight.getQuery();
358 SubScorer sub = scorers;
360 // TODO: re-enable this if BQ ever sends us required
362 //if (sub.required) {
363 //relationship = Occur.MUST;
364 if (!sub.prohibited) {
365 relationship = Occur.SHOULD;
367 // TODO: maybe it's pointless to do this, but, it is
368 // possible the doc may still be collected, eg foo
370 relationship = Occur.MUST_NOT;
372 sub.scorer.visitSubScorers(q, relationship, visitor);