1 package org.apache.lucene.search.grouping;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.search.ScoreDoc;
21 import org.apache.lucene.search.Sort;
22 import org.apache.lucene.search.SortField;
23 import org.apache.lucene.search.TopDocs;
25 import java.io.IOException;
27 /** Represents result returned by a grouping search.
29 * @lucene.experimental */
30 public class TopGroups<GROUP_VALUE_TYPE> {
31 /** Number of documents matching the search */
32 public final int totalHitCount;
34 /** Number of documents grouped into the topN groups */
35 public final int totalGroupedHitCount;
37 /** The total number of unique groups. If <code>null</code> this value is not computed. */
38 public final Integer totalGroupCount;
40 /** Group results in groupSort order */
41 public final GroupDocs<GROUP_VALUE_TYPE>[] groups;
43 /** How groups are sorted against each other */
44 public final SortField[] groupSort;
46 /** How docs are sorted within each group */
47 public final SortField[] withinGroupSort;
49 public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<GROUP_VALUE_TYPE>[] groups) {
50 this.groupSort = groupSort;
51 this.withinGroupSort = withinGroupSort;
52 this.totalHitCount = totalHitCount;
53 this.totalGroupedHitCount = totalGroupedHitCount;
55 this.totalGroupCount = null;
58 public TopGroups(TopGroups<GROUP_VALUE_TYPE> oldTopGroups, Integer totalGroupCount) {
59 this.groupSort = oldTopGroups.groupSort;
60 this.withinGroupSort = oldTopGroups.withinGroupSort;
61 this.totalHitCount = oldTopGroups.totalHitCount;
62 this.totalGroupedHitCount = oldTopGroups.totalGroupedHitCount;
63 this.groups = oldTopGroups.groups;
64 this.totalGroupCount = totalGroupCount;
67 /** Merges an array of TopGroups, for example obtained
68 * from the second-pass collector across multiple
69 * shards. Each TopGroups must have been sorted by the
70 * same groupSort and docSort, and the top groups passed
71 * to all second-pass collectors must be the same.
73 * <b>NOTE</b>: We can't always compute an exact totalGroupCount.
74 * Documents belonging to a group may occur on more than
75 * one shard and thus the merged totalGroupCount can be
76 * higher than the actual totalGroupCount. In this case the
77 * totalGroupCount represents a upper bound. If the documents
78 * of one group do only reside in one shard then the
79 * totalGroupCount is exact.
81 * <b>NOTE</b>: the topDocs in each GroupDocs is actually
82 * an instance of TopDocsAndShards
84 public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN)
87 //System.out.println("TopGroups.merge");
89 if (shardGroups.length == 0) {
93 int totalHitCount = 0;
94 int totalGroupedHitCount = 0;
95 // Optionally merge the totalGroupCount.
96 Integer totalGroupCount = null;
98 final int numGroups = shardGroups[0].groups.length;
99 for(TopGroups<T> shard : shardGroups) {
100 if (numGroups != shard.groups.length) {
101 throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
103 totalHitCount += shard.totalHitCount;
104 totalGroupedHitCount += shard.totalGroupedHitCount;
105 if (shard.totalGroupCount != null) {
106 if (totalGroupCount == null) {
110 totalGroupCount += shard.totalGroupCount;
114 @SuppressWarnings("unchecked")
115 final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
117 final TopDocs[] shardTopDocs = new TopDocs[shardGroups.length];
119 for(int groupIDX=0;groupIDX<numGroups;groupIDX++) {
120 final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
121 //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
122 float maxScore = Float.MIN_VALUE;
124 for(int shardIDX=0;shardIDX<shardGroups.length;shardIDX++) {
125 //System.out.println(" shard=" + shardIDX);
126 final TopGroups<T> shard = shardGroups[shardIDX];
127 final GroupDocs shardGroupDocs = shard.groups[groupIDX];
128 if (groupValue == null) {
129 if (shardGroupDocs.groupValue != null) {
130 throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
132 } else if (!groupValue.equals(shardGroupDocs.groupValue)) {
133 throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
137 for(ScoreDoc sd : shardGroupDocs.scoreDocs) {
138 System.out.println(" doc=" + sd.doc);
142 shardTopDocs[shardIDX] = new TopDocs(shardGroupDocs.totalHits,
143 shardGroupDocs.scoreDocs,
144 shardGroupDocs.maxScore);
145 maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
146 totalHits += shardGroupDocs.totalHits;
149 final TopDocs mergedTopDocs = TopDocs.merge(docSort, docOffset + docTopN, shardTopDocs);
152 final ScoreDoc[] mergedScoreDocs;
153 if (docOffset == 0) {
154 mergedScoreDocs = mergedTopDocs.scoreDocs;
155 } else if (docOffset >= mergedTopDocs.scoreDocs.length) {
156 mergedScoreDocs = new ScoreDoc[0];
158 mergedScoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length - docOffset];
159 System.arraycopy(mergedTopDocs.scoreDocs,
163 mergedTopDocs.scoreDocs.length - docOffset);
165 //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
166 mergedGroupDocs[groupIDX] = new GroupDocs<T>(maxScore,
170 shardGroups[0].groups[groupIDX].groupSortValues);
173 if (totalGroupCount != null) {
174 TopGroups<T> result = new TopGroups<T>(groupSort.getSort(),
175 docSort == null ? null : docSort.getSort(),
177 totalGroupedHitCount,
179 return new TopGroups<T>(result, totalGroupCount);
181 return new TopGroups<T>(groupSort.getSort(),
182 docSort == null ? null : docSort.getSort(),
184 totalGroupedHitCount,