lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21
  22 import org.apache.lucene.index.IndexReader;
  23
  24 /**
  25  * A {@link Collector} implementation that collects the top-scoring hits,
  26  * returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to
  27  * implement {@link TopDocs}-based search. Hits are sorted by score descending
  28  * and then (when the scores are tied) docID ascending. When you create an
  29  * instance of this collector you should know in advance whether documents are
  30  * going to be collected in doc Id order or not.
  31  *
  32  * <p><b>NOTE</b>: The values {@link Float#NaN} and
  33  * {Float#NEGATIVE_INFINITY} are not valid scores.  This
  34  * collector will not properly collect hits with such
  35  * scores.
  36  */
  37 public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
  38
  39   // Assumes docs are scored in order.
  40   private static class InOrderTopScoreDocCollector extends TopScoreDocCollector {
  41     private InOrderTopScoreDocCollector(int numHits) {
  42       super(numHits);
  43     }
  44
  45     @Override
  46     public void collect(int doc) throws IOException {
  47       float score = scorer.score();
  48
  49       // This collector cannot handle these scores:
  50       assert score != Float.NEGATIVE_INFINITY;
  51       assert !Float.isNaN(score);
  52
  53       totalHits++;
  54       if (score <= pqTop.score) {
  55         // Since docs are returned in-order (i.e., increasing doc Id), a document
  56         // with equal score to pqTop.score cannot compete since HitQueue favors
  57         // documents with lower doc Ids. Therefore reject those docs too.
  58         return;
  59       }
  60       pqTop.doc = doc + docBase;
  61       pqTop.score = score;
  62       pqTop = pq.updateTop();
  63     }
  64
  65     @Override
  66     public boolean acceptsDocsOutOfOrder() {
  67       return false;
  68     }
  69   }
  70
  71   // Assumes docs are scored in order.
  72   private static class InOrderPagingScoreDocCollector extends TopScoreDocCollector {
  73     private final ScoreDoc after;
  74     // this is always after.doc - docBase, to save an add when score == after.score
  75     private int afterDoc;
  76     private int collectedHits;
  77
  78     private InOrderPagingScoreDocCollector(ScoreDoc after, int numHits) {
  79       super(numHits);
  80       this.after = after;
  81     }
  82
  83     @Override
  84     public void collect(int doc) throws IOException {
  85       float score = scorer.score();
  86
  87       // This collector cannot handle these scores:
  88       assert score != Float.NEGATIVE_INFINITY;
  89       assert !Float.isNaN(score);
  90
  91       totalHits++;
  92
  93       if (score > after.score || (score == after.score && doc <= afterDoc)) {
  94         // hit was collected on a previous page
  95         return;
  96       }
  97
  98       if (score <= pqTop.score) {
  99         // Since docs are returned in-order (i.e., increasing doc Id), a document
 100         // with equal score to pqTop.score cannot compete since HitQueue favors
 101         // documents with lower doc Ids. Therefore reject those docs too.
 102         return;
 103       }
 104       collectedHits++;
 105       pqTop.doc = doc + docBase;
 106       pqTop.score = score;
 107       pqTop = pq.updateTop();
 108     }
 109
 110     @Override
 111     public boolean acceptsDocsOutOfOrder() {
 112       return false;
 113     }
 114
 115     @Override
 116     public void setNextReader(IndexReader reader, int base) {
 117       super.setNextReader(reader, base);
 118       afterDoc = after.doc - docBase;
 119     }
 120
 121     @Override
 122     protected int topDocsSize() {
 123       return collectedHits < pq.size() ? collectedHits : pq.size();
 124     }
 125
 126     @Override
 127     protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
 128       return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results);
 129     }
 130   }
 131
 132   // Assumes docs are scored out of order.
 133   private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector {
 134     private OutOfOrderTopScoreDocCollector(int numHits) {
 135       super(numHits);
 136     }
 137
 138     @Override
 139     public void collect(int doc) throws IOException {
 140       float score = scorer.score();
 141
 142       // This collector cannot handle NaN
 143       assert !Float.isNaN(score);
 144
 145       totalHits++;
 146       if (score < pqTop.score) {
 147         // Doesn't compete w/ bottom entry in queue
 148         return;
 149       }
 150       doc += docBase;
 151       if (score == pqTop.score && doc > pqTop.doc) {
 152         // Break tie in score by doc ID:
 153         return;
 154       }
 155       pqTop.doc = doc;
 156       pqTop.score = score;
 157       pqTop = pq.updateTop();
 158     }
 159
 160     @Override
 161     public boolean acceptsDocsOutOfOrder() {
 162       return true;
 163     }
 164   }
 165
 166   // Assumes docs are scored out of order.
 167   private static class OutOfOrderPagingScoreDocCollector extends TopScoreDocCollector {
 168     private final ScoreDoc after;
 169     // this is always after.doc - docBase, to save an add when score == after.score
 170     private int afterDoc;
 171     private int collectedHits;
 172
 173     private OutOfOrderPagingScoreDocCollector(ScoreDoc after, int numHits) {
 174       super(numHits);
 175       this.after = after;
 176     }
 177
 178     @Override
 179     public void collect(int doc) throws IOException {
 180       float score = scorer.score();
 181
 182       // This collector cannot handle NaN
 183       assert !Float.isNaN(score);
 184
 185       totalHits++;
 186       if (score > after.score || (score == after.score && doc <= afterDoc)) {
 187         // hit was collected on a previous page
 188         return;
 189       }
 190       if (score < pqTop.score) {
 191         // Doesn't compete w/ bottom entry in queue
 192         return;
 193       }
 194       doc += docBase;
 195       if (score == pqTop.score && doc > pqTop.doc) {
 196         // Break tie in score by doc ID:
 197         return;
 198       }
 199       collectedHits++;
 200       pqTop.doc = doc;
 201       pqTop.score = score;
 202       pqTop = pq.updateTop();
 203     }
 204
 205     @Override
 206     public boolean acceptsDocsOutOfOrder() {
 207       return true;
 208     }
 209
 210     @Override
 211     public void setNextReader(IndexReader reader, int base) {
 212       super.setNextReader(reader, base);
 213       afterDoc = after.doc - docBase;
 214     }
 215
 216     @Override
 217     protected int topDocsSize() {
 218       return collectedHits < pq.size() ? collectedHits : pq.size();
 219     }
 220
 221     @Override
 222     protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
 223       return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results);
 224     }
 225   }
 226
 227   /**
 228    * Creates a new {@link TopScoreDocCollector} given the number of hits to
 229    * collect and whether documents are scored in order by the input
 230    * {@link Scorer} to {@link #setScorer(Scorer)}.
 231    *
 232    * <p><b>NOTE</b>: The instances returned by this method
 233    * pre-allocate a full array of length
 234    * <code>numHits</code>, and fill the array with sentinel
 235    * objects.
 236    */
 237   public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) {
 238     return create(numHits, null, docsScoredInOrder);
 239   }
 240
 241   /**
 242    * Creates a new {@link TopScoreDocCollector} given the number of hits to
 243    * collect, the bottom of the previous page, and whether documents are scored in order by the input
 244    * {@link Scorer} to {@link #setScorer(Scorer)}.
 245    *
 246    * <p><b>NOTE</b>: The instances returned by this method
 247    * pre-allocate a full array of length
 248    * <code>numHits</code>, and fill the array with sentinel
 249    * objects.
 250    */
 251   public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) {
 252
 253     if (numHits <= 0) {
 254       throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
 255     }
 256
 257     if (docsScoredInOrder) {
 258       return after == null
 259         ? new InOrderTopScoreDocCollector(numHits)
 260         : new InOrderPagingScoreDocCollector(after, numHits);
 261     } else {
 262       return after == null
 263         ? new OutOfOrderTopScoreDocCollector(numHits)
 264         : new OutOfOrderPagingScoreDocCollector(after, numHits);
 265     }
 266
 267   }
 268
 269   ScoreDoc pqTop;
 270   int docBase = 0;
 271   Scorer scorer;
 272
 273   // prevents instantiation
 274   private TopScoreDocCollector(int numHits) {
 275     super(new HitQueue(numHits, true));
 276     // HitQueue implements getSentinelObject to return a ScoreDoc, so we know
 277     // that at this point top() is already initialized.
 278     pqTop = pq.top();
 279   }
 280
 281   @Override
 282   protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
 283     if (results == null) {
 284       return EMPTY_TOPDOCS;
 285     }
 286
 287     // We need to compute maxScore in order to set it in TopDocs. If start == 0,
 288     // it means the largest element is already in results, use its score as
 289     // maxScore. Otherwise pop everything else, until the largest element is
 290     // extracted and use its score as maxScore.
 291     float maxScore = Float.NaN;
 292     if (start == 0) {
 293       maxScore = results[0].score;
 294     } else {
 295       for (int i = pq.size(); i > 1; i--) { pq.pop(); }
 296       maxScore = pq.pop().score;
 297     }
 298
 299     return new TopDocs(totalHits, results, maxScore);
 300   }
 301
 302   @Override
 303   public void setNextReader(IndexReader reader, int base) {
 304     docBase = base;
 305   }
 306
 307   @Override
 308   public void setScorer(Scorer scorer) throws IOException {
 309     this.scorer = scorer;
 310   }
 311 }