1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 import org.apache.lucene.util.PriorityQueue;
24 * A base class for all collectors that return a {@link TopDocs} output. This
25 * collector allows easy extension by providing a single constructor which
26 * accepts a {@link PriorityQueue} as well as protected members for that
27 * priority queue and a counter of the number of total hits.<br>
28 * Extending classes can override any of the methods to provide their own
29 * implementation, as well as avoid the use of the priority queue entirely by
30 * passing null to {@link #TopDocsCollector(PriorityQueue)}. In that case
31 * however, you might want to consider overriding all methods, in order to avoid
32 * a NullPointerException.
34 public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector {
36 // This is used in case topDocs() is called with illegal parameters, or there
37 // simply aren't (enough) results.
38 protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN);
41 * The priority queue which holds the top documents. Note that different
42 * implementations of PriorityQueue give different meaning to 'top documents'.
43 * HitQueue for example aggregates the top scoring documents, while other PQ
44 * implementations may hold documents sorted by other criteria.
46 protected PriorityQueue<T> pq;
48 /** The total number of documents that the collector encountered. */
49 protected int totalHits;
51 protected TopDocsCollector(PriorityQueue<T> pq) {
56 * Populates the results array with the ScoreDoc instances. This can be
57 * overridden in case a different ScoreDoc type should be returned.
59 protected void populateResults(ScoreDoc[] results, int howMany) {
60 for (int i = howMany - 1; i >= 0; i--) {
61 results[i] = pq.pop();
66 * Returns a {@link TopDocs} instance containing the given results. If
67 * <code>results</code> is null it means there are no results to return,
68 * either because there were 0 calls to collect() or because the arguments to
69 * topDocs were invalid.
71 protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
72 return results == null ? EMPTY_TOPDOCS : new TopDocs(totalHits, results);
75 /** The total number of documents that matched this query. */
76 public int getTotalHits() {
80 /** Returns the top docs that were collected by this collector. */
81 public TopDocs topDocs() {
82 // In case pq was populated with sentinel values, there might be less
83 // results than pq.size(). Therefore return all results until either
84 // pq.size() or totalHits.
85 return topDocs(0, totalHits < pq.size() ? totalHits : pq.size());
89 * Returns the documents in the rage [start .. pq.size()) that were collected
90 * by this collector. Note that if start >= pq.size(), an empty TopDocs is
92 * This method is convenient to call if the application always asks for the
93 * last results, starting from the last 'page'.<br>
94 * <b>NOTE:</b> you cannot call this method more than once for each search
95 * execution. If you need to call it more than once, passing each time a
96 * different <code>start</code>, you should call {@link #topDocs()} and work
97 * with the returned {@link TopDocs} object, which will contain all the
98 * results this search execution collected.
100 public TopDocs topDocs(int start) {
101 // In case pq was populated with sentinel values, there might be less
102 // results than pq.size(). Therefore return all results until either
103 // pq.size() or totalHits.
104 return topDocs(start, totalHits < pq.size() ? totalHits : pq.size());
108 * Returns the documents in the rage [start .. start+howMany) that were
109 * collected by this collector. Note that if start >= pq.size(), an empty
110 * TopDocs is returned, and if pq.size() - start < howMany, then only the
111 * available documents in [start .. pq.size()) are returned.<br>
112 * This method is useful to call in case pagination of search results is
113 * allowed by the search application, as well as it attempts to optimize the
114 * memory used by allocating only as much as requested by howMany.<br>
115 * <b>NOTE:</b> you cannot call this method more than once for each search
116 * execution. If you need to call it more than once, passing each time a
117 * different range, you should call {@link #topDocs()} and work with the
118 * returned {@link TopDocs} object, which will contain all the results this
119 * search execution collected.
121 public TopDocs topDocs(int start, int howMany) {
123 // In case pq was populated with sentinel values, there might be less
124 // results than pq.size(). Therefore return all results until either
125 // pq.size() or totalHits.
126 int size = totalHits < pq.size() ? totalHits : pq.size();
128 // Don't bother to throw an exception, just return an empty TopDocs in case
129 // the parameters are invalid or out of range.
130 if (start < 0 || start >= size || howMany <= 0) {
131 return newTopDocs(null, start);
134 // We know that start < pqsize, so just fix howMany.
135 howMany = Math.min(size - start, howMany);
136 ScoreDoc[] results = new ScoreDoc[howMany];
138 // pq's pop() returns the 'least' element in the queue, therefore need
139 // to discard the first ones, until we reach the requested range.
140 // Note that this loop will usually not be executed, since the common usage
141 // should be that the caller asks for the last howMany results. However it's
142 // needed here for completeness.
143 for (int i = pq.size() - start - howMany; i > 0; i--) { pq.pop(); }
145 // Get the requested results from pq.
146 populateResults(results, howMany);
148 return newTopDocs(results, start);