--- /dev/null
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.FieldCache;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * A collector that collects all groups that match the
+ * query. Only the group value is collected, and the order
+ * is undefined. This collector does not determine
+ * the most relevant document of a group.
+ *
+ * <p/>
+ * Implementation detail: an int hash set (SentinelIntSet)
+ * is used to detect if a group is already added to the
+ * total count. For each segment the int set is cleared and filled
+ * with previous counted groups that occur in the new
+ * segment.
+ *
+ * @lucene.experimental
+ */
+public class TermAllGroupsCollector extends AbstractAllGroupsCollector<String> {
+
+ private static final int DEFAULT_INITIAL_SIZE = 128;
+
+ private final String groupField;
+ private final SentinelIntSet ordSet;
+ private final List<String> groups;
+
+ private FieldCache.StringIndex index;
+
+ /**
+ * Expert: Constructs a {@link AbstractAllGroupsCollector}
+ *
+ * @param groupField The field to group by
+ * @param initialSize The initial allocation size of the
+ * internal int set and group list
+ * which should roughly match the total
+ * number of expected unique groups. Be aware that the
+ * heap usage is 4 bytes * initialSize.
+ */
+ public TermAllGroupsCollector(String groupField, int initialSize) {
+ ordSet = new SentinelIntSet(initialSize, -1);
+ groups = new ArrayList<String>(initialSize);
+ this.groupField = groupField;
+ }
+
+ /**
+ * Constructs a {@link AbstractAllGroupsCollector}. This sets the
+ * initial allocation size for the internal int set and group
+ * list to 128.
+ *
+ * @param groupField The field to group by
+ */
+ public TermAllGroupsCollector(String groupField) {
+ this(groupField, DEFAULT_INITIAL_SIZE);
+ }
+
+ public void collect(int doc) throws IOException {
+ int key = index.order[doc];
+ if (!ordSet.exists(key)) {
+ ordSet.put(key);
+ String term = key == 0 ? null : index.lookup[key];
+ groups.add(term);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public Collection<String> getGroups() {
+ return groups;
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ index = FieldCache.DEFAULT.getStringIndex(reader, groupField);
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.clear();
+ for (String countedGroup : groups) {
+ int ord = index.binarySearchLookup(countedGroup);
+ if (ord >= 0) {
+ ordSet.put(ord);
+ }
+ }
+ }
+
+}