-package org.apache.lucene.search.grouping;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.FieldCache;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * A collector that collects all groups that match the
- * query. Only the group value is collected, and the order
- * is undefined. This collector does not determine
- * the most relevant document of a group.
- *
- * <p/>
- * Implementation detail: an int hash set (SentinelIntSet)
- * is used to detect if a group is already added to the
- * total count. For each segment the int set is cleared and filled
- * with previous counted groups that occur in the new
- * segment.
- *
- * @lucene.experimental
- */
-public class TermAllGroupsCollector extends AbstractAllGroupsCollector<String> {
-
- private static final int DEFAULT_INITIAL_SIZE = 128;
-
- private final String groupField;
- private final SentinelIntSet ordSet;
- private final List<String> groups;
-
- private FieldCache.StringIndex index;
-
- /**
- * Expert: Constructs a {@link AbstractAllGroupsCollector}
- *
- * @param groupField The field to group by
- * @param initialSize The initial allocation size of the
- * internal int set and group list
- * which should roughly match the total
- * number of expected unique groups. Be aware that the
- * heap usage is 4 bytes * initialSize.
- */
- public TermAllGroupsCollector(String groupField, int initialSize) {
- ordSet = new SentinelIntSet(initialSize, -1);
- groups = new ArrayList<String>(initialSize);
- this.groupField = groupField;
- }
-
- /**
- * Constructs a {@link AbstractAllGroupsCollector}. This sets the
- * initial allocation size for the internal int set and group
- * list to 128.
- *
- * @param groupField The field to group by
- */
- public TermAllGroupsCollector(String groupField) {
- this(groupField, DEFAULT_INITIAL_SIZE);
- }
-
- public void collect(int doc) throws IOException {
- int key = index.order[doc];
- if (!ordSet.exists(key)) {
- ordSet.put(key);
- String term = key == 0 ? null : index.lookup[key];
- groups.add(term);
- }
- }
-
- /**
- * {@inheritDoc}
- */
- public Collection<String> getGroups() {
- return groups;
- }
-
- public void setNextReader(IndexReader reader, int docBase) throws IOException {
- index = FieldCache.DEFAULT.getStringIndex(reader, groupField);
-
- // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
- ordSet.clear();
- for (String countedGroup : groups) {
- int ord = index.binarySearchLookup(countedGroup);
- if (ord >= 0) {
- ordSet.put(ord);
- }
- }
- }
-
-}