1 package org.apache.lucene.search.grouping;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.search.FieldCache;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.List;
29 * A collector that collects all groups that match the
30 * query. Only the group value is collected, and the order
31 * is undefined. This collector does not determine
32 * the most relevant document of a group.
35 * Implementation detail: an int hash set (SentinelIntSet)
36 * is used to detect if a group is already added to the
37 * total count. For each segment the int set is cleared and filled
38 * with previous counted groups that occur in the new
41 * @lucene.experimental
43 public class TermAllGroupsCollector extends AbstractAllGroupsCollector<String> {
45 private static final int DEFAULT_INITIAL_SIZE = 128;
47 private final String groupField;
48 private final SentinelIntSet ordSet;
49 private final List<String> groups;
51 private FieldCache.StringIndex index;
54 * Expert: Constructs a {@link AbstractAllGroupsCollector}
56 * @param groupField The field to group by
57 * @param initialSize The initial allocation size of the
58 * internal int set and group list
59 * which should roughly match the total
60 * number of expected unique groups. Be aware that the
61 * heap usage is 4 bytes * initialSize.
63 public TermAllGroupsCollector(String groupField, int initialSize) {
64 ordSet = new SentinelIntSet(initialSize, -1);
65 groups = new ArrayList<String>(initialSize);
66 this.groupField = groupField;
70 * Constructs a {@link AbstractAllGroupsCollector}. This sets the
71 * initial allocation size for the internal int set and group
74 * @param groupField The field to group by
76 public TermAllGroupsCollector(String groupField) {
77 this(groupField, DEFAULT_INITIAL_SIZE);
80 public void collect(int doc) throws IOException {
81 int key = index.order[doc];
82 if (!ordSet.exists(key)) {
84 String term = key == 0 ? null : index.lookup[key];
92 public Collection<String> getGroups() {
96 public void setNextReader(IndexReader reader, int docBase) throws IOException {
97 index = FieldCache.DEFAULT.getStringIndex(reader, groupField);
99 // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
101 for (String countedGroup : groups) {
102 int ord = index.binarySearchLookup(countedGroup);