1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.List;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.search.BooleanClause.Occur;
26 /* Description from Doug Cutting (excerpted from
29 * BooleanScorer uses an array to score windows of
30 * 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
31 * etc. For each window it iterates through all query terms
32 * and accumulates a score in table[doc%2K]. It also stores
33 * in the table a bitmask representing which terms
34 * contributed to the score. Non-zero scores are chained in
35 * a linked list. At the end of scoring each window it then
36 * iterates through the linked list and, if the bitmask
37 * matches the boolean constraints, collects a hit. For
38 * boolean queries with lots of frequent terms this can be
39 * much faster, since it does not need to update a priority
40 * queue for each posting, instead performing constant-time
41 * operations per posting. The only downside is that it
42 * results in hits being delivered out-of-order within the
43 * window, which means it cannot be nested within other
44 * scorers. But it works well as a top-level scorer.
46 * The new BooleanScorer2 implementation instead works by
47 * merging priority queues of postings, albeit with some
48 * clever tricks. For example, a pure conjunction (all terms
49 * required) does not require a priority queue. Instead it
50 * sorts the posting streams at the start, then repeatedly
51 * skips the first to to the last. If the first ever equals
52 * the last, then there's a hit. When some terms are
53 * required and some terms are optional, the conjunction can
54 * be evaluated first, then the optional terms can all skip
55 * to the match and be added to the score. Thus the
56 * conjunction can reduce the number of priority queue
57 * updates for the optional terms. */
59 final class BooleanScorer extends Scorer {
61 private static final class BooleanScorerCollector extends Collector {
62 private BucketTable bucketTable;
64 private Scorer scorer;
66 public BooleanScorerCollector(int mask, BucketTable bucketTable) {
68 this.bucketTable = bucketTable;
72 public void collect(final int doc) throws IOException {
73 final BucketTable table = bucketTable;
74 final int i = doc & BucketTable.MASK;
75 final Bucket bucket = table.buckets[i];
77 if (bucket.doc != doc) { // invalid bucket
78 bucket.doc = doc; // set doc
79 bucket.score = scorer.score(); // initialize score
80 bucket.bits = mask; // initialize mask
81 bucket.coord = 1; // initialize coord
83 bucket.next = table.first; // push onto valid list
85 } else { // valid bucket
86 bucket.score += scorer.score(); // increment score
87 bucket.bits |= mask; // add bits in mask
88 bucket.coord++; // increment coord
93 public void setNextReader(IndexReader reader, int docBase) {
94 // not needed by this implementation
98 public void setScorer(Scorer scorer) throws IOException {
103 public boolean acceptsDocsOutOfOrder() {
109 // An internal class which is used in score(Collector, int) for setting the
110 // current score. This is required since Collector exposes a setScorer method
111 // and implementations that need the score will call scorer.score().
112 // Therefore the only methods that are implemented are score() and doc().
113 private static final class BucketScorer extends Scorer {
116 int doc = NO_MORE_DOCS;
119 public BucketScorer(Weight weight) { super(weight); }
122 public int advance(int target) throws IOException { return NO_MORE_DOCS; }
125 public int docID() { return doc; }
128 public float freq() { return freq; }
131 public int nextDoc() throws IOException { return NO_MORE_DOCS; }
134 public float score() throws IOException { return score; }
138 static final class Bucket {
139 int doc = -1; // tells if bucket is valid
140 float score; // incremental score
141 // TODO: break out bool anyProhibited, int
142 // numRequiredMatched; then we can remove 32 limit on
144 int bits; // used for bool constraints
145 int coord; // count of terms in score
146 Bucket next; // next valid bucket
149 /** A simple hash table of document scores within a range. */
150 static final class BucketTable {
151 public static final int SIZE = 1 << 11;
152 public static final int MASK = SIZE - 1;
154 final Bucket[] buckets = new Bucket[SIZE];
155 Bucket first = null; // head of valid list
157 public BucketTable() {
158 // Pre-fill to save the lazy init when collecting
160 for(int idx=0;idx<SIZE;idx++) {
161 buckets[idx] = new Bucket();
165 public Collector newCollector(int mask) {
166 return new BooleanScorerCollector(mask, this);
169 public int size() { return SIZE; }
172 static final class SubScorer {
173 public Scorer scorer;
174 // TODO: re-enable this if BQ ever sends us required clauses
175 //public boolean required = false;
176 public boolean prohibited;
177 public Collector collector;
178 public SubScorer next;
180 public SubScorer(Scorer scorer, boolean required, boolean prohibited,
181 Collector collector, SubScorer next)
184 throw new IllegalArgumentException("this scorer cannot handle required=true");
186 this.scorer = scorer;
187 // TODO: re-enable this if BQ ever sends us required clauses
188 //this.required = required;
189 this.prohibited = prohibited;
190 this.collector = collector;
195 private SubScorer scorers = null;
196 private BucketTable bucketTable = new BucketTable();
197 private final float[] coordFactors;
198 // TODO: re-enable this if BQ ever sends us required clauses
199 //private int requiredMask = 0;
200 private final int minNrShouldMatch;
202 private Bucket current;
203 private int doc = -1;
205 // Any time a prohibited clause matches we set bit 0:
206 private static final int PROHIBITED_MASK = 1;
208 BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
209 List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
211 this.minNrShouldMatch = minNrShouldMatch;
213 if (optionalScorers != null && optionalScorers.size() > 0) {
214 for (Scorer scorer : optionalScorers) {
215 if (scorer.nextDoc() != NO_MORE_DOCS) {
216 scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
221 if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
222 for (Scorer scorer : prohibitedScorers) {
223 if (scorer.nextDoc() != NO_MORE_DOCS) {
224 scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
229 coordFactors = new float[optionalScorers.size() + 1];
230 for (int i = 0; i < coordFactors.length; i++) {
231 coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
235 // firstDocID is ignored since nextDoc() initializes 'current'
237 protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
238 // Make sure it's only BooleanScorer that calls us:
239 assert firstDocID == -1;
242 BucketScorer bs = new BucketScorer(weight);
244 // The internal loop will set the score and doc before calling collect.
245 collector.setScorer(bs);
247 bucketTable.first = null;
249 while (current != null) { // more queued
251 // check prohibited & required
252 if ((current.bits & PROHIBITED_MASK) == 0) {
254 // TODO: re-enable this if BQ ever sends us required
256 //&& (current.bits & requiredMask) == requiredMask) {
258 // TODO: can we remove this?
259 if (current.doc >= max){
261 current = current.next;
262 tmp.next = bucketTable.first;
263 bucketTable.first = tmp;
267 if (current.coord >= minNrShouldMatch) {
268 bs.score = current.score * coordFactors[current.coord];
269 bs.doc = current.doc;
270 bs.freq = current.coord;
271 collector.collect(current.doc);
275 current = current.next; // pop the queue
278 if (bucketTable.first != null){
279 current = bucketTable.first;
280 bucketTable.first = current.next;
286 end += BucketTable.SIZE;
287 for (SubScorer sub = scorers; sub != null; sub = sub.next) {
288 int subScorerDocID = sub.scorer.docID();
289 if (subScorerDocID != NO_MORE_DOCS) {
290 more |= sub.scorer.score(sub.collector, end, subScorerDocID);
293 current = bucketTable.first;
295 } while (current != null || more);
301 public int advance(int target) throws IOException {
302 throw new UnsupportedOperationException();
307 throw new UnsupportedOperationException();
311 public int nextDoc() throws IOException {
312 throw new UnsupportedOperationException();
316 public float score() {
317 throw new UnsupportedOperationException();
321 public void score(Collector collector) throws IOException {
322 score(collector, Integer.MAX_VALUE, -1);
326 public String toString() {
327 StringBuilder buffer = new StringBuilder();
328 buffer.append("boolean(");
329 for (SubScorer sub = scorers; sub != null; sub = sub.next) {
330 buffer.append(sub.scorer.toString());
334 return buffer.toString();
338 protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor<Query, Query, Scorer> visitor) {
339 super.visitSubScorers(parent, relationship, visitor);
340 final Query q = weight.getQuery();
341 SubScorer sub = scorers;
343 // TODO: re-enable this if BQ ever sends us required
345 //if (sub.required) {
346 //relationship = Occur.MUST;
347 if (!sub.prohibited) {
348 relationship = Occur.SHOULD;
350 // TODO: maybe it's pointless to do this, but, it is
351 // possible the doc may still be collected, eg foo
353 relationship = Occur.MUST_NOT;
355 sub.scorer.visitSubScorers(q, relationship, visitor);