1 package org.apache.lucene.search.grouping;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.search.*;
23 import java.io.IOException;
27 * A base implementation of {@link AbstractAllGroupHeadsCollector} for retrieving the most relevant groups when grouping
28 * on a string based group field. More specifically this all concrete implementations of this base implementation
29 * use {@link org.apache.lucene.search.FieldCache.StringIndex}.
31 * @lucene.experimental
33 public abstract class TermAllGroupHeadsCollector<GH extends AbstractAllGroupHeadsCollector.GroupHead> extends AbstractAllGroupHeadsCollector<GH> {
35 private static final int DEFAULT_INITIAL_SIZE = 128;
37 final String groupField;
38 FieldCache.StringIndex groupIndex;
39 IndexReader indexReader;
42 protected TermAllGroupHeadsCollector(String groupField, int numberOfSorts) {
44 this.groupField = groupField;
48 * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
49 * This factory method decides with implementation is best suited.
51 * Delegates to {@link #create(String, org.apache.lucene.search.Sort, int)} with an initialSize of 128.
53 * @param groupField The field to group by
54 * @param sortWithinGroup The sort within each group
55 * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
56 * @throws IOException If I/O related errors occur
58 public static AbstractAllGroupHeadsCollector create(String groupField, Sort sortWithinGroup) throws IOException {
59 return create(groupField, sortWithinGroup, DEFAULT_INITIAL_SIZE);
63 * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
64 * This factory method decides with implementation is best suited.
66 * @param groupField The field to group by
67 * @param sortWithinGroup The sort within each group
68 * @param initialSize The initial allocation size of the internal int set and group list which should roughly match
69 * the total number of expected unique groups. Be aware that the heap usage is
70 * 4 bytes * initialSize.
71 * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
72 * @throws IOException If I/O related errors occur
74 public static AbstractAllGroupHeadsCollector create(String groupField, Sort sortWithinGroup, int initialSize) throws IOException {
75 boolean sortAllScore = true;
76 boolean sortAllFieldValue = true;
78 for (SortField sortField : sortWithinGroup.getSort()) {
79 if (sortField.getType() == SortField.SCORE) {
80 sortAllFieldValue = false;
81 } else if (needGeneralImpl(sortField)) {
82 return new GeneralAllGroupHeadsCollector(groupField, sortWithinGroup);
89 return new ScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
90 } else if (sortAllFieldValue) {
91 return new OrdAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
93 return new OrdScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
97 // Returns when a sort field needs the general impl.
98 private static boolean needGeneralImpl(SortField sortField) {
99 int sortType = sortField.getType();
100 // Note (MvG): We can also make an optimized impl when sorting is SortField.DOC
101 return sortType != SortField.STRING_VAL && sortType != SortField.STRING && sortType != SortField.SCORE;
104 // A general impl that works for any group sort.
105 static class GeneralAllGroupHeadsCollector extends TermAllGroupHeadsCollector<GeneralAllGroupHeadsCollector.GroupHead> {
107 private final Sort sortWithinGroup;
108 private final Map<String, GroupHead> groups;
110 private Scorer scorer;
112 GeneralAllGroupHeadsCollector(String groupField, Sort sortWithinGroup) throws IOException {
113 super(groupField, sortWithinGroup.getSort().length);
114 this.sortWithinGroup = sortWithinGroup;
115 groups = new HashMap<String, GroupHead>();
117 final SortField[] sortFields = sortWithinGroup.getSort();
118 for (int i = 0; i < sortFields.length; i++) {
119 reversed[i] = sortFields[i].getReverse() ? -1 : 1;
123 protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
124 final int ord = groupIndex.order[doc];
125 final String groupValue = ord == 0 ? null : groupIndex.lookup[ord];
126 GroupHead groupHead = groups.get(groupValue);
127 if (groupHead == null) {
128 groupHead = new GroupHead(groupValue, sortWithinGroup, doc);
129 groups.put(groupValue == null ? null : groupValue, groupHead);
130 temporalResult.stop = true;
132 temporalResult.stop = false;
134 temporalResult.groupHead = groupHead;
137 protected Collection<GroupHead> getCollectedGroupHeads() {
138 return groups.values();
141 public void setNextReader(IndexReader reader, int docBase) throws IOException {
142 this.indexReader = reader;
143 this.docBase = docBase;
144 groupIndex = FieldCache.DEFAULT.getStringIndex(reader, groupField);
146 for (GroupHead groupHead : groups.values()) {
147 for (int i = 0; i < groupHead.comparators.length; i++) {
148 groupHead.comparators[i].setNextReader(reader, docBase);
153 public void setScorer(Scorer scorer) throws IOException {
154 this.scorer = scorer;
155 for (GroupHead groupHead : groups.values()) {
156 for (FieldComparator comparator : groupHead.comparators) {
157 comparator.setScorer(scorer);
162 class GroupHead extends AbstractAllGroupHeadsCollector.GroupHead<String> {
164 final FieldComparator[] comparators;
166 private GroupHead(String groupValue, Sort sort, int doc) throws IOException {
167 super(groupValue, doc + docBase);
168 final SortField[] sortFields = sort.getSort();
169 comparators = new FieldComparator[sortFields.length];
170 for (int i = 0; i < sortFields.length; i++) {
171 comparators[i] = sortFields[i].getComparator(1, i);
172 comparators[i].setNextReader(indexReader, docBase);
173 comparators[i].setScorer(scorer);
174 comparators[i].copy(0, doc);
175 comparators[i].setBottom(0);
179 public int compare(int compIDX, int doc) throws IOException {
180 return comparators[compIDX].compareBottom(doc);
183 public void updateDocHead(int doc) throws IOException {
184 for (FieldComparator comparator : comparators) {
185 comparator.copy(0, doc);
186 comparator.setBottom(0);
188 this.doc = doc + docBase;
194 // AbstractAllGroupHeadsCollector optimized for ord fields and scores.
195 static class OrdScoreAllGroupHeadsCollector extends TermAllGroupHeadsCollector<OrdScoreAllGroupHeadsCollector.GroupHead> {
197 private final SentinelIntSet ordSet;
198 private final List<GroupHead> collectedGroups;
199 private final SortField[] fields;
201 private FieldCache.StringIndex[] sortsIndex;
202 private Scorer scorer;
203 private GroupHead[] segmentGroupHeads;
205 OrdScoreAllGroupHeadsCollector(String groupField, Sort sortWithinGroup, int initialSize) {
206 super(groupField, sortWithinGroup.getSort().length);
207 ordSet = new SentinelIntSet(initialSize, -1);
208 collectedGroups = new ArrayList<GroupHead>(initialSize);
210 final SortField[] sortFields = sortWithinGroup.getSort();
211 fields = new SortField[sortFields.length];
212 sortsIndex = new FieldCache.StringIndex[sortFields.length];
213 for (int i = 0; i < sortFields.length; i++) {
214 reversed[i] = sortFields[i].getReverse() ? -1 : 1;
215 fields[i] = sortFields[i];
219 protected Collection<GroupHead> getCollectedGroupHeads() {
220 return collectedGroups;
223 public void setScorer(Scorer scorer) throws IOException {
224 this.scorer = scorer;
227 protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
228 int key = groupIndex.order[doc];
230 if (!ordSet.exists(key)) {
232 String term = key == 0 ? null : groupIndex.lookup[key];
233 groupHead = new GroupHead(doc, term);
234 collectedGroups.add(groupHead);
235 segmentGroupHeads[key] = groupHead;
236 temporalResult.stop = true;
238 temporalResult.stop = false;
239 groupHead = segmentGroupHeads[key];
241 temporalResult.groupHead = groupHead;
244 public void setNextReader(IndexReader reader, int docBase) throws IOException {
245 this.indexReader = reader;
246 this.docBase = docBase;
247 groupIndex = FieldCache.DEFAULT.getStringIndex(reader, groupField);
248 for (int i = 0; i < fields.length; i++) {
249 if (fields[i].getType() == SortField.SCORE) {
253 sortsIndex[i] = FieldCache.DEFAULT.getStringIndex(reader, fields[i].getField());
256 // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
258 segmentGroupHeads = new GroupHead[groupIndex.lookup.length];
259 for (GroupHead collectedGroup : collectedGroups) {
260 int ord = groupIndex.binarySearchLookup(collectedGroup.groupValue);
263 segmentGroupHeads[ord] = collectedGroup;
265 for (int i = 0; i < sortsIndex.length; i++) {
266 if (fields[i].getType() == SortField.SCORE) {
270 collectedGroup.sortOrds[i] = sortsIndex[i].binarySearchLookup(collectedGroup.sortValues[i]);
276 class GroupHead extends AbstractAllGroupHeadsCollector.GroupHead<String> {
282 private GroupHead(int doc, String groupValue) throws IOException {
283 super(groupValue, doc + docBase);
284 sortValues = new String[sortsIndex.length];
285 sortOrds = new int[sortsIndex.length];
286 scores = new float[sortsIndex.length];
287 for (int i = 0; i < sortsIndex.length; i++) {
288 if (fields[i].getType() == SortField.SCORE) {
289 scores[i] = scorer.score();
291 sortValues[i] = sortsIndex[i].lookup[sortsIndex[i].order[doc]];
292 sortOrds[i] = sortsIndex[i].order[doc];
298 public int compare(int compIDX, int doc) throws IOException {
299 if (fields[compIDX].getType() == SortField.SCORE) {
300 float score = scorer.score();
301 if (scores[compIDX] < score) {
303 } else if (scores[compIDX] > score) {
308 if (sortOrds[compIDX] < 0) {
309 // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
310 final String val1 = sortValues[compIDX];
311 final String val2 = sortsIndex[compIDX].lookup[sortsIndex[compIDX].order[doc]];
317 } else if (val2 == null) {
320 return val1.compareTo(val2);
322 return sortOrds[compIDX] - sortsIndex[compIDX].order[doc];
327 public void updateDocHead(int doc) throws IOException {
328 for (int i = 0; i < sortsIndex.length; i++) {
329 if (fields[i].getType() == SortField.SCORE) {
330 scores[i] = scorer.score();
332 sortValues[i] = sortsIndex[i].lookup[sortsIndex[i].order[doc]];
333 sortOrds[i] = sortsIndex[i].order[doc];
336 this.doc = doc + docBase;
344 // AbstractAllGroupHeadsCollector optimized for ord fields.
345 static class OrdAllGroupHeadsCollector extends TermAllGroupHeadsCollector<OrdAllGroupHeadsCollector.GroupHead> {
347 private final SentinelIntSet ordSet;
348 private final List<GroupHead> collectedGroups;
349 private final SortField[] fields;
351 private FieldCache.StringIndex[] sortsIndex;
352 private GroupHead[] segmentGroupHeads;
354 OrdAllGroupHeadsCollector(String groupField, Sort sortWithinGroup, int initialSize) {
355 super(groupField, sortWithinGroup.getSort().length);
356 ordSet = new SentinelIntSet(initialSize, -1);
357 collectedGroups = new ArrayList<GroupHead>(initialSize);
359 final SortField[] sortFields = sortWithinGroup.getSort();
360 fields = new SortField[sortFields.length];
361 sortsIndex = new FieldCache.StringIndex[sortFields.length];
362 for (int i = 0; i < sortFields.length; i++) {
363 reversed[i] = sortFields[i].getReverse() ? -1 : 1;
364 fields[i] = sortFields[i];
368 protected Collection<GroupHead> getCollectedGroupHeads() {
369 return collectedGroups;
372 public void setScorer(Scorer scorer) throws IOException {
375 protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
376 int key = groupIndex.order[doc];
378 if (!ordSet.exists(key)) {
380 String term = key == 0 ? null : groupIndex.lookup[key];
381 groupHead = new GroupHead(doc, term);
382 collectedGroups.add(groupHead);
383 segmentGroupHeads[key] = groupHead;
384 temporalResult.stop = true;
386 temporalResult.stop = false;
387 groupHead = segmentGroupHeads[key];
389 temporalResult.groupHead = groupHead;
392 public void setNextReader(IndexReader reader, int docBase) throws IOException {
393 this.indexReader = reader;
394 this.docBase = docBase;
395 groupIndex = FieldCache.DEFAULT.getStringIndex(reader, groupField);
396 for (int i = 0; i < fields.length; i++) {
397 sortsIndex[i] = FieldCache.DEFAULT.getStringIndex(reader, fields[i].getField());
400 // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
402 segmentGroupHeads = new GroupHead[groupIndex.lookup.length];
403 for (GroupHead collectedGroup : collectedGroups) {
404 int groupOrd = groupIndex.binarySearchLookup(collectedGroup.groupValue);
406 ordSet.put(groupOrd);
407 segmentGroupHeads[groupOrd] = collectedGroup;
409 for (int i = 0; i < sortsIndex.length; i++) {
410 collectedGroup.sortOrds[i] = sortsIndex[i].binarySearchLookup(collectedGroup.sortValues[i]);
416 class GroupHead extends AbstractAllGroupHeadsCollector.GroupHead<String> {
421 private GroupHead(int doc, String groupValue) throws IOException {
422 super(groupValue, doc + docBase);
423 sortValues = new String[sortsIndex.length];
424 sortOrds = new int[sortsIndex.length];
425 for (int i = 0; i < sortsIndex.length; i++) {
426 sortValues[i] = sortsIndex[i].lookup[sortsIndex[i].order[doc]];
427 sortOrds[i] = sortsIndex[i].order[doc];
431 public int compare(int compIDX, int doc) throws IOException {
432 if (sortOrds[compIDX] < 0) {
433 // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
434 final String val1 = sortValues[compIDX];
435 final String val2 = sortsIndex[compIDX].lookup[sortsIndex[compIDX].order[doc]];
441 } else if (val2 == null) {
444 return val1.compareTo(val2);
446 return sortOrds[compIDX] - sortsIndex[compIDX].order[doc];
450 public void updateDocHead(int doc) throws IOException {
451 for (int i = 0; i < sortsIndex.length; i++) {
452 sortValues[i] = sortsIndex[i].lookup[sortsIndex[i].order[doc]];
453 sortOrds[i] = sortsIndex[i].order[doc];
455 this.doc = doc + docBase;
463 // AbstractAllGroupHeadsCollector optimized for scores.
464 static class ScoreAllGroupHeadsCollector extends TermAllGroupHeadsCollector<ScoreAllGroupHeadsCollector.GroupHead> {
466 private final SentinelIntSet ordSet;
467 private final List<GroupHead> collectedGroups;
468 private final SortField[] fields;
470 private Scorer scorer;
471 private GroupHead[] segmentGroupHeads;
473 ScoreAllGroupHeadsCollector(String groupField, Sort sortWithinGroup, int initialSize) {
474 super(groupField, sortWithinGroup.getSort().length);
475 ordSet = new SentinelIntSet(initialSize, -1);
476 collectedGroups = new ArrayList<GroupHead>(initialSize);
478 final SortField[] sortFields = sortWithinGroup.getSort();
479 fields = new SortField[sortFields.length];
480 for (int i = 0; i < sortFields.length; i++) {
481 reversed[i] = sortFields[i].getReverse() ? -1 : 1;
482 fields[i] = sortFields[i];
486 protected Collection<GroupHead> getCollectedGroupHeads() {
487 return collectedGroups;
490 public void setScorer(Scorer scorer) throws IOException {
491 this.scorer = scorer;
494 protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
495 int key = groupIndex.order[doc];
497 if (!ordSet.exists(key)) {
499 String term = key == 0 ? null : groupIndex.lookup[key];
500 groupHead = new GroupHead(doc, term);
501 collectedGroups.add(groupHead);
502 segmentGroupHeads[key] = groupHead;
503 temporalResult.stop = true;
505 temporalResult.stop = false;
506 groupHead = segmentGroupHeads[key];
508 temporalResult.groupHead = groupHead;
511 public void setNextReader(IndexReader reader, int docBase) throws IOException {
512 this.indexReader = reader;
513 this.docBase = docBase;
514 groupIndex = FieldCache.DEFAULT.getStringIndex(reader, groupField);
516 // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
518 segmentGroupHeads = new GroupHead[groupIndex.lookup.length];
519 for (GroupHead collectedGroup : collectedGroups) {
520 int ord = groupIndex.binarySearchLookup(collectedGroup.groupValue);
523 segmentGroupHeads[ord] = collectedGroup;
528 class GroupHead extends AbstractAllGroupHeadsCollector.GroupHead<String> {
532 private GroupHead(int doc, String groupValue) throws IOException {
533 super(groupValue, doc + docBase);
534 scores = new float[fields.length];
535 float score = scorer.score();
536 for (int i = 0; i < scores.length; i++) {
541 public int compare(int compIDX, int doc) throws IOException {
542 float score = scorer.score();
543 if (scores[compIDX] < score) {
545 } else if (scores[compIDX] > score) {
551 public void updateDocHead(int doc) throws IOException {
552 float score = scorer.score();
553 for (int i = 0; i < scores.length; i++) {
556 this.doc = doc + docBase;