1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.text.Collator;
22 import java.util.Locale;
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.search.FieldCache.DoubleParser;
26 import org.apache.lucene.search.FieldCache.LongParser;
27 import org.apache.lucene.search.FieldCache.ByteParser;
28 import org.apache.lucene.search.FieldCache.FloatParser;
29 import org.apache.lucene.search.FieldCache.IntParser;
30 import org.apache.lucene.search.FieldCache.ShortParser;
31 import org.apache.lucene.search.FieldCache.StringIndex;
32 import org.apache.lucene.util.OpenBitSet;
35 * Expert: a FieldComparator compares hits so as to determine their
36 * sort order when collecting the top results with {@link
37 * TopFieldCollector}. The concrete public FieldComparator
38 * classes here correspond to the SortField types.
40 * <p>This API is designed to achieve high performance
41 * sorting, by exposing a tight interaction with {@link
42 * FieldValueHitQueue} as it visits hits. Whenever a hit is
43 * competitive, it's enrolled into a virtual slot, which is
44 * an int ranging from 0 to numHits-1. The {@link
45 * FieldComparator} is made aware of segment transitions
46 * during searching in case any internal state it's tracking
47 * needs to be recomputed during these transitions.</p>
49 * <p>A comparator must define these functions:</p>
53 * <li> {@link #compare} Compare a hit at 'slot a'
56 * <li> {@link #setBottom} This method is called by
57 * {@link FieldValueHitQueue} to notify the
58 * FieldComparator of the current weakest ("bottom")
59 * slot. Note that this slot may not hold the weakest
60 * value according to your comparator, in cases where
61 * your comparator is not the primary one (ie, is only
62 * used to break ties from the comparators before it).
64 * <li> {@link #compareBottom} Compare a new hit (docID)
65 * against the "weakest" (bottom) entry in the queue.
67 * <li> {@link #copy} Installs a new hit into the
68 * priority queue. The {@link FieldValueHitQueue}
69 * calls this method when a new hit is competitive.
71 * <li> {@link #setNextReader} Invoked
72 * when the search is switching to the next segment.
73 * You may need to update internal state of the
74 * comparator, for example retrieving new values from
75 * the {@link FieldCache}.
77 * <li> {@link #value} Return the sort value stored in
78 * the specified slot. This is only called at the end
79 * of the search, in order to populate {@link
80 * FieldDoc#fields} when returning the top results.
83 * @lucene.experimental
85 public abstract class FieldComparator<T> {
87 protected T missingValue = null;
89 /** Set a default sorting value for documents which lacks one */
90 public FieldComparator<T> setMissingValue(T missingValue) {
91 this.missingValue = missingValue;
96 * Compare hit at slot1 with hit at slot2.
98 * @param slot1 first slot to compare
99 * @param slot2 second slot to compare
100 * @return any N < 0 if slot2's value is sorted after
101 * slot1, any N > 0 if the slot2's value is sorted before
102 * slot1 and 0 if they are equal
104 public abstract int compare(int slot1, int slot2);
107 * Set the bottom slot, ie the "weakest" (sorted last)
108 * entry in the queue. When {@link #compareBottom} is
109 * called, you should compare against this slot. This
110 * will always be called before {@link #compareBottom}.
112 * @param slot the currently weakest (sorted last) slot in the queue
114 public abstract void setBottom(final int slot);
117 * Compare the bottom of the queue with doc. This will
118 * only invoked after setBottom has been called. This
119 * should return the same result as {@link
120 * #compare(int,int)}} as if bottom were slot1 and the new
121 * document were slot 2.
123 * <p>For a search that hits many results, this method
124 * will be the hotspot (invoked by far the most
127 * @param doc that was hit
128 * @return any N < 0 if the doc's value is sorted after
129 * the bottom entry (not competitive), any N > 0 if the
130 * doc's value is sorted before the bottom entry and 0 if
133 public abstract int compareBottom(int doc) throws IOException;
136 * This method is called when a new hit is competitive.
137 * You should copy any state associated with this document
138 * that will be required for future comparisons, into the
141 * @param slot which slot to copy the hit to
142 * @param doc docID relative to current reader
144 public abstract void copy(int slot, int doc) throws IOException;
147 * Set a new Reader. All doc correspond to the current Reader.
149 * @param reader current reader
150 * @param docBase docBase of this reader
151 * @throws IOException
152 * @throws IOException
154 public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
156 /** Sets the Scorer to use in case a document's score is
159 * @param scorer Scorer instance that you should use to
160 * obtain the current hit's score, if necessary. */
161 public void setScorer(Scorer scorer) {
162 // Empty implementation since most comparators don't need the score. This
163 // can be overridden by those that need it.
167 * Return the actual value in the slot.
169 * @param slot the value
170 * @return value in this slot
172 public abstract T value(int slot);
174 /** Returns -1 if first is less than second. Default
175 * impl to assume the type implements Comparable and
176 * invoke .compareTo; be sure to override this method if
177 * your FieldComparator's type isn't a Comparable or
178 * if your values may sometimes be null */
179 @SuppressWarnings("unchecked")
180 public int compareValues(T first, T second) {
181 return ((Comparable<T>) first).compareTo(second);
184 /** Parses field's values as byte (using {@link
185 * FieldCache#getBytes} and sorts by ascending value */
186 public static final class ByteComparator extends FieldComparator<Byte> {
187 private final byte[] values;
188 private byte[] currentReaderValues;
189 private final String field;
190 private ByteParser parser;
193 ByteComparator(int numHits, String field, FieldCache.Parser parser) {
194 values = new byte[numHits];
196 this.parser = (ByteParser) parser;
200 public int compare(int slot1, int slot2) {
201 return values[slot1] - values[slot2];
205 public int compareBottom(int doc) {
206 return bottom - currentReaderValues[doc];
210 public void copy(int slot, int doc) {
211 values[slot] = currentReaderValues[doc];
215 public void setNextReader(IndexReader reader, int docBase) throws IOException {
216 currentReaderValues = FieldCache.DEFAULT.getBytes(reader, field, parser);
217 if (missingValue != null) {
218 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
219 final byte byteValue = missingValue.byteValue();
220 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
221 currentReaderValues[doc] = byteValue;
227 public void setBottom(final int bottom) {
228 this.bottom = values[bottom];
232 public Byte value(int slot) {
233 return Byte.valueOf(values[slot]);
237 /** Sorts by ascending docID */
238 public static final class DocComparator extends FieldComparator<Integer> {
239 private final int[] docIDs;
243 DocComparator(int numHits) {
244 docIDs = new int[numHits];
248 public int compare(int slot1, int slot2) {
249 // No overflow risk because docIDs are non-negative
250 return docIDs[slot1] - docIDs[slot2];
254 public int compareBottom(int doc) {
255 // No overflow risk because docIDs are non-negative
256 return bottom - (docBase + doc);
260 public void copy(int slot, int doc) {
261 docIDs[slot] = docBase + doc;
265 public void setNextReader(IndexReader reader, int docBase) {
266 // TODO: can we "map" our docIDs to the current
267 // reader? saves having to then subtract on every
269 this.docBase = docBase;
273 public void setBottom(final int bottom) {
274 this.bottom = docIDs[bottom];
278 public Integer value(int slot) {
279 return Integer.valueOf(docIDs[slot]);
283 /** Parses field's values as double (using {@link
284 * FieldCache#getDoubles} and sorts by ascending value */
285 public static final class DoubleComparator extends FieldComparator<Double> {
286 private final double[] values;
287 private double[] currentReaderValues;
288 private final String field;
289 private DoubleParser parser;
290 private double bottom;
292 DoubleComparator(int numHits, String field, FieldCache.Parser parser) {
293 values = new double[numHits];
295 this.parser = (DoubleParser) parser;
299 public int compare(int slot1, int slot2) {
300 final double v1 = values[slot1];
301 final double v2 = values[slot2];
304 } else if (v1 < v2) {
312 public int compareBottom(int doc) {
313 final double v2 = currentReaderValues[doc];
316 } else if (bottom < v2) {
324 public void copy(int slot, int doc) {
325 values[slot] = currentReaderValues[doc];
329 public void setNextReader(IndexReader reader, int docBase) throws IOException {
330 currentReaderValues = FieldCache.DEFAULT.getDoubles(reader, field, parser);
331 if (missingValue != null) {
332 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
333 final double doubleValue = missingValue.doubleValue();
334 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
335 currentReaderValues[doc] = doubleValue;
341 public void setBottom(final int bottom) {
342 this.bottom = values[bottom];
346 public Double value(int slot) {
347 return Double.valueOf(values[slot]);
351 /** Parses field's values as float (using {@link
352 * FieldCache#getFloats} and sorts by ascending value */
353 public static final class FloatComparator extends FieldComparator<Float> {
354 private final float[] values;
355 private float[] currentReaderValues;
356 private final String field;
357 private FloatParser parser;
358 private float bottom;
360 FloatComparator(int numHits, String field, FieldCache.Parser parser) {
361 values = new float[numHits];
363 this.parser = (FloatParser) parser;
367 public int compare(int slot1, int slot2) {
368 // TODO: are there sneaky non-branch ways to compute
370 final float v1 = values[slot1];
371 final float v2 = values[slot2];
374 } else if (v1 < v2) {
382 public int compareBottom(int doc) {
383 // TODO: are there sneaky non-branch ways to compute
385 final float v2 = currentReaderValues[doc];
388 } else if (bottom < v2) {
396 public void copy(int slot, int doc) {
397 values[slot] = currentReaderValues[doc];
401 public void setNextReader(IndexReader reader, int docBase) throws IOException {
402 currentReaderValues = FieldCache.DEFAULT.getFloats(reader, field, parser);
403 if (missingValue != null) {
404 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
405 final float floatValue = missingValue.floatValue();
406 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
407 currentReaderValues[doc] = floatValue;
413 public void setBottom(final int bottom) {
414 this.bottom = values[bottom];
418 public Float value(int slot) {
419 return Float.valueOf(values[slot]);
423 /** Parses field's values as int (using {@link
424 * FieldCache#getInts} and sorts by ascending value */
425 public static final class IntComparator extends FieldComparator<Integer> {
426 private final int[] values;
427 private int[] currentReaderValues;
428 private final String field;
429 private IntParser parser;
430 private int bottom; // Value of bottom of queue
432 IntComparator(int numHits, String field, FieldCache.Parser parser) {
433 values = new int[numHits];
435 this.parser = (IntParser) parser;
439 public int compare(int slot1, int slot2) {
440 // TODO: there are sneaky non-branch ways to compute
442 // Cannot return values[slot1] - values[slot2] because that
444 final int v1 = values[slot1];
445 final int v2 = values[slot2];
448 } else if (v1 < v2) {
456 public int compareBottom(int doc) {
457 // TODO: there are sneaky non-branch ways to compute
459 // Cannot return bottom - values[slot2] because that
461 final int v2 = currentReaderValues[doc];
464 } else if (bottom < v2) {
472 public void copy(int slot, int doc) {
473 values[slot] = currentReaderValues[doc];
477 public void setNextReader(IndexReader reader, int docBase) throws IOException {
478 currentReaderValues = FieldCache.DEFAULT.getInts(reader, field, parser);
479 if (missingValue != null) {
480 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
481 final int intValue = missingValue.intValue();
482 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
483 currentReaderValues[doc] = intValue;
489 public void setBottom(final int bottom) {
490 this.bottom = values[bottom];
494 public Integer value(int slot) {
495 return Integer.valueOf(values[slot]);
499 /** Parses field's values as long (using {@link
500 * FieldCache#getLongs} and sorts by ascending value */
501 public static final class LongComparator extends FieldComparator<Long> {
502 private final long[] values;
503 private long[] currentReaderValues;
504 private final String field;
505 private LongParser parser;
508 LongComparator(int numHits, String field, FieldCache.Parser parser) {
509 values = new long[numHits];
511 this.parser = (LongParser) parser;
515 public int compare(int slot1, int slot2) {
516 // TODO: there are sneaky non-branch ways to compute
518 final long v1 = values[slot1];
519 final long v2 = values[slot2];
522 } else if (v1 < v2) {
530 public int compareBottom(int doc) {
531 // TODO: there are sneaky non-branch ways to compute
533 final long v2 = currentReaderValues[doc];
536 } else if (bottom < v2) {
544 public void copy(int slot, int doc) {
545 values[slot] = currentReaderValues[doc];
549 public void setNextReader(IndexReader reader, int docBase) throws IOException {
550 currentReaderValues = FieldCache.DEFAULT.getLongs(reader, field, parser);
551 if (missingValue != null) {
552 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
553 final long longValue = missingValue.longValue();
554 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
555 currentReaderValues[doc] = longValue;
561 public void setBottom(final int bottom) {
562 this.bottom = values[bottom];
566 public Long value(int slot) {
567 return Long.valueOf(values[slot]);
571 /** Sorts by descending relevance. NOTE: if you are
572 * sorting only by descending relevance and then
573 * secondarily by ascending docID, performance is faster
574 * using {@link TopScoreDocCollector} directly (which {@link
575 * IndexSearcher#search} uses when no {@link Sort} is
577 public static final class RelevanceComparator extends FieldComparator<Float> {
578 private final float[] scores;
579 private float bottom;
580 private Scorer scorer;
582 RelevanceComparator(int numHits) {
583 scores = new float[numHits];
587 public int compare(int slot1, int slot2) {
588 final float score1 = scores[slot1];
589 final float score2 = scores[slot2];
590 return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0);
594 public int compareBottom(int doc) throws IOException {
595 float score = scorer.score();
596 return bottom > score ? -1 : (bottom < score ? 1 : 0);
600 public void copy(int slot, int doc) throws IOException {
601 scores[slot] = scorer.score();
605 public void setNextReader(IndexReader reader, int docBase) {
609 public void setBottom(final int bottom) {
610 this.bottom = scores[bottom];
614 public void setScorer(Scorer scorer) {
615 // wrap with a ScoreCachingWrappingScorer so that successive calls to
616 // score() will not incur score computation over and
618 if (!(scorer instanceof ScoreCachingWrappingScorer)) {
619 this.scorer = new ScoreCachingWrappingScorer(scorer);
621 this.scorer = scorer;
626 public Float value(int slot) {
627 return Float.valueOf(scores[slot]);
630 // Override because we sort reverse of natural Float order:
632 public int compareValues(Float first, Float second) {
633 // Reversed intentionally because relevance by default
635 return second.compareTo(first);
639 /** Parses field's values as short (using {@link
640 * FieldCache#getShorts} and sorts by ascending value */
641 public static final class ShortComparator extends FieldComparator<Short> {
642 private final short[] values;
643 private short[] currentReaderValues;
644 private final String field;
645 private ShortParser parser;
646 private short bottom;
648 ShortComparator(int numHits, String field, FieldCache.Parser parser) {
649 values = new short[numHits];
651 this.parser = (ShortParser) parser;
655 public int compare(int slot1, int slot2) {
656 return values[slot1] - values[slot2];
660 public int compareBottom(int doc) {
661 return bottom - currentReaderValues[doc];
665 public void copy(int slot, int doc) {
666 values[slot] = currentReaderValues[doc];
670 public void setNextReader(IndexReader reader, int docBase) throws IOException {
671 currentReaderValues = FieldCache.DEFAULT.getShorts(reader, field, parser);
672 if (missingValue != null) {
673 DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
674 final short shortValue = missingValue.shortValue();
675 for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
676 currentReaderValues[doc] = shortValue;
682 public void setBottom(final int bottom) {
683 this.bottom = values[bottom];
687 public Short value(int slot) {
688 return Short.valueOf(values[slot]);
692 /** Sorts by a field's value using the Collator for a
694 public static final class StringComparatorLocale extends FieldComparator<String> {
696 private final String[] values;
697 private String[] currentReaderValues;
698 private final String field;
699 final Collator collator;
700 private String bottom;
702 StringComparatorLocale(int numHits, String field, Locale locale) {
703 values = new String[numHits];
705 collator = Collator.getInstance(locale);
709 public int compare(int slot1, int slot2) {
710 final String val1 = values[slot1];
711 final String val2 = values[slot2];
717 } else if (val2 == null) {
720 return collator.compare(val1, val2);
724 public int compareBottom(int doc) {
725 final String val2 = currentReaderValues[doc];
726 if (bottom == null) {
731 } else if (val2 == null) {
734 return collator.compare(bottom, val2);
738 public void copy(int slot, int doc) {
739 values[slot] = currentReaderValues[doc];
743 public void setNextReader(IndexReader reader, int docBase) throws IOException {
744 currentReaderValues = FieldCache.DEFAULT.getStrings(reader, field);
748 public void setBottom(final int bottom) {
749 this.bottom = values[bottom];
753 public String value(int slot) {
758 public int compareValues(String val1, String val2) {
764 } else if (val2 == null) {
767 return collator.compare(val1, val2);
771 /** Sorts by field's natural String sort order, using
772 * ordinals. This is functionally equivalent to {@link
773 * StringValComparator}, but it first resolves the string
774 * to their relative ordinal positions (using the index
775 * returned by {@link FieldCache#getStringIndex}), and
776 * does most comparisons using the ordinals. For medium
777 * to large results, this comparator will be much faster
778 * than {@link StringValComparator}. For very small
779 * result sets it may be slower. */
780 public static final class StringOrdValComparator extends FieldComparator<String> {
782 private final int[] ords;
783 private final String[] values;
784 private final int[] readerGen;
786 private int currentReaderGen = -1;
787 private String[] lookup;
789 private final String field;
791 private int bottomSlot = -1;
792 private int bottomOrd;
793 private boolean bottomSameReader;
794 private String bottomValue;
796 public StringOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
797 ords = new int[numHits];
798 values = new String[numHits];
799 readerGen = new int[numHits];
804 public int compare(int slot1, int slot2) {
805 if (readerGen[slot1] == readerGen[slot2]) {
806 return ords[slot1] - ords[slot2];
809 final String val1 = values[slot1];
810 final String val2 = values[slot2];
816 } else if (val2 == null) {
819 return val1.compareTo(val2);
823 public int compareBottom(int doc) {
824 assert bottomSlot != -1;
825 if (bottomSameReader) {
826 // ord is precisely comparable, even in the equal case
827 return bottomOrd - this.order[doc];
829 // ord is only approx comparable: if they are not
830 // equal, we can use that; if they are equal, we
831 // must fallback to compare by value
832 final int order = this.order[doc];
833 final int cmp = bottomOrd - order;
838 final String val2 = lookup[order];
839 if (bottomValue == null) {
845 } else if (val2 == null) {
849 return bottomValue.compareTo(val2);
854 public void copy(int slot, int doc) {
855 final int ord = order[doc];
858 values[slot] = lookup[ord];
859 readerGen[slot] = currentReaderGen;
863 public void setNextReader(IndexReader reader, int docBase) throws IOException {
864 StringIndex currentReaderValues = FieldCache.DEFAULT.getStringIndex(reader, field);
866 order = currentReaderValues.order;
867 lookup = currentReaderValues.lookup;
868 assert lookup.length > 0;
869 if (bottomSlot != -1) {
870 setBottom(bottomSlot);
875 public void setBottom(final int bottom) {
878 bottomValue = values[bottomSlot];
879 if (currentReaderGen == readerGen[bottomSlot]) {
880 bottomOrd = ords[bottomSlot];
881 bottomSameReader = true;
883 if (bottomValue == null) {
884 ords[bottomSlot] = 0;
886 bottomSameReader = true;
887 readerGen[bottomSlot] = currentReaderGen;
889 final int index = binarySearch(lookup, bottomValue);
891 bottomOrd = -index - 2;
892 bottomSameReader = false;
896 bottomSameReader = true;
897 readerGen[bottomSlot] = currentReaderGen;
898 ords[bottomSlot] = bottomOrd;
905 public String value(int slot) {
910 public int compareValues(String val1, String val2) {
916 } else if (val2 == null) {
919 return val1.compareTo(val2);
922 public String[] getValues() {
926 public int getBottomSlot() {
930 public String getField() {
935 /** Sorts by field's natural String sort order. All
936 * comparisons are done using String.compareTo, which is
937 * slow for medium to large result sets but possibly
938 * very fast for very small results sets. */
939 public static final class StringValComparator extends FieldComparator<String> {
941 private String[] values;
942 private String[] currentReaderValues;
943 private final String field;
944 private String bottom;
946 StringValComparator(int numHits, String field) {
947 values = new String[numHits];
952 public int compare(int slot1, int slot2) {
953 final String val1 = values[slot1];
954 final String val2 = values[slot2];
960 } else if (val2 == null) {
964 return val1.compareTo(val2);
968 public int compareBottom(int doc) {
969 final String val2 = currentReaderValues[doc];
970 if (bottom == null) {
975 } else if (val2 == null) {
978 return bottom.compareTo(val2);
982 public void copy(int slot, int doc) {
983 values[slot] = currentReaderValues[doc];
987 public void setNextReader(IndexReader reader, int docBase) throws IOException {
988 currentReaderValues = FieldCache.DEFAULT.getStrings(reader, field);
992 public void setBottom(final int bottom) {
993 this.bottom = values[bottom];
997 public String value(int slot) {
1002 public int compareValues(String val1, String val2) {
1008 } else if (val2 == null) {
1011 return val1.compareTo(val2);
1016 final protected static int binarySearch(String[] a, String key) {
1017 return binarySearch(a, key, 0, a.length-1);
1020 final protected static int binarySearch(String[] a, String key, int low, int high) {
1022 while (low <= high) {
1023 int mid = (low + high) >>> 1;
1024 String midVal = a[mid];
1026 if (midVal != null) {
1027 cmp = midVal.compareTo(key);