1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import org.apache.lucene.index.IndexReader;
25 * A {@link Collector} implementation that collects the top-scoring hits,
26 * returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to
27 * implement {@link TopDocs}-based search. Hits are sorted by score descending
28 * and then (when the scores are tied) docID ascending. When you create an
29 * instance of this collector you should know in advance whether documents are
30 * going to be collected in doc Id order or not.
32 * <p><b>NOTE</b>: The values {@link Float#NaN} and
33 * {Float#NEGATIVE_INFINITY} are not valid scores. This
34 * collector will not properly collect hits with such
37 public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
39 // Assumes docs are scored in order.
40 private static class InOrderTopScoreDocCollector extends TopScoreDocCollector {
41 private InOrderTopScoreDocCollector(int numHits) {
46 public void collect(int doc) throws IOException {
47 float score = scorer.score();
49 // This collector cannot handle these scores:
50 assert score != Float.NEGATIVE_INFINITY;
51 assert !Float.isNaN(score);
54 if (score <= pqTop.score) {
55 // Since docs are returned in-order (i.e., increasing doc Id), a document
56 // with equal score to pqTop.score cannot compete since HitQueue favors
57 // documents with lower doc Ids. Therefore reject those docs too.
60 pqTop.doc = doc + docBase;
62 pqTop = pq.updateTop();
66 public boolean acceptsDocsOutOfOrder() {
71 // Assumes docs are scored in order.
72 private static class InOrderPagingScoreDocCollector extends TopScoreDocCollector {
73 private final ScoreDoc after;
74 // this is always after.doc - docBase, to save an add when score == after.score
76 private int collectedHits;
78 private InOrderPagingScoreDocCollector(ScoreDoc after, int numHits) {
84 public void collect(int doc) throws IOException {
85 float score = scorer.score();
87 // This collector cannot handle these scores:
88 assert score != Float.NEGATIVE_INFINITY;
89 assert !Float.isNaN(score);
93 if (score > after.score || (score == after.score && doc <= afterDoc)) {
94 // hit was collected on a previous page
98 if (score <= pqTop.score) {
99 // Since docs are returned in-order (i.e., increasing doc Id), a document
100 // with equal score to pqTop.score cannot compete since HitQueue favors
101 // documents with lower doc Ids. Therefore reject those docs too.
105 pqTop.doc = doc + docBase;
107 pqTop = pq.updateTop();
111 public boolean acceptsDocsOutOfOrder() {
116 public void setNextReader(IndexReader reader, int base) {
117 super.setNextReader(reader, base);
118 afterDoc = after.doc - docBase;
122 protected int topDocsSize() {
123 return collectedHits < pq.size() ? collectedHits : pq.size();
127 protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
128 return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results);
132 // Assumes docs are scored out of order.
133 private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector {
134 private OutOfOrderTopScoreDocCollector(int numHits) {
139 public void collect(int doc) throws IOException {
140 float score = scorer.score();
142 // This collector cannot handle NaN
143 assert !Float.isNaN(score);
146 if (score < pqTop.score) {
147 // Doesn't compete w/ bottom entry in queue
151 if (score == pqTop.score && doc > pqTop.doc) {
152 // Break tie in score by doc ID:
157 pqTop = pq.updateTop();
161 public boolean acceptsDocsOutOfOrder() {
166 // Assumes docs are scored out of order.
167 private static class OutOfOrderPagingScoreDocCollector extends TopScoreDocCollector {
168 private final ScoreDoc after;
169 // this is always after.doc - docBase, to save an add when score == after.score
170 private int afterDoc;
171 private int collectedHits;
173 private OutOfOrderPagingScoreDocCollector(ScoreDoc after, int numHits) {
179 public void collect(int doc) throws IOException {
180 float score = scorer.score();
182 // This collector cannot handle NaN
183 assert !Float.isNaN(score);
186 if (score > after.score || (score == after.score && doc <= afterDoc)) {
187 // hit was collected on a previous page
190 if (score < pqTop.score) {
191 // Doesn't compete w/ bottom entry in queue
195 if (score == pqTop.score && doc > pqTop.doc) {
196 // Break tie in score by doc ID:
202 pqTop = pq.updateTop();
206 public boolean acceptsDocsOutOfOrder() {
211 public void setNextReader(IndexReader reader, int base) {
212 super.setNextReader(reader, base);
213 afterDoc = after.doc - docBase;
217 protected int topDocsSize() {
218 return collectedHits < pq.size() ? collectedHits : pq.size();
222 protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
223 return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results);
228 * Creates a new {@link TopScoreDocCollector} given the number of hits to
229 * collect and whether documents are scored in order by the input
230 * {@link Scorer} to {@link #setScorer(Scorer)}.
232 * <p><b>NOTE</b>: The instances returned by this method
233 * pre-allocate a full array of length
234 * <code>numHits</code>, and fill the array with sentinel
237 public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) {
238 return create(numHits, null, docsScoredInOrder);
242 * Creates a new {@link TopScoreDocCollector} given the number of hits to
243 * collect, the bottom of the previous page, and whether documents are scored in order by the input
244 * {@link Scorer} to {@link #setScorer(Scorer)}.
246 * <p><b>NOTE</b>: The instances returned by this method
247 * pre-allocate a full array of length
248 * <code>numHits</code>, and fill the array with sentinel
251 public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) {
254 throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
257 if (docsScoredInOrder) {
259 ? new InOrderTopScoreDocCollector(numHits)
260 : new InOrderPagingScoreDocCollector(after, numHits);
263 ? new OutOfOrderTopScoreDocCollector(numHits)
264 : new OutOfOrderPagingScoreDocCollector(after, numHits);
273 // prevents instantiation
274 private TopScoreDocCollector(int numHits) {
275 super(new HitQueue(numHits, true));
276 // HitQueue implements getSentinelObject to return a ScoreDoc, so we know
277 // that at this point top() is already initialized.
282 protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
283 if (results == null) {
284 return EMPTY_TOPDOCS;
287 // We need to compute maxScore in order to set it in TopDocs. If start == 0,
288 // it means the largest element is already in results, use its score as
289 // maxScore. Otherwise pop everything else, until the largest element is
290 // extracted and use its score as maxScore.
291 float maxScore = Float.NaN;
293 maxScore = results[0].score;
295 for (int i = pq.size(); i > 1; i--) { pq.pop(); }
296 maxScore = pq.pop().score;
299 return new TopDocs(totalHits, results, maxScore);
303 public void setNextReader(IndexReader reader, int base) {
308 public void setScorer(Scorer scorer) throws IOException {
309 this.scorer = scorer;