X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
diff --git a/lucene-java-3.5.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java b/lucene-java-3.5.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
new file mode 100644
index 0000000..fb4b660
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
@@ -0,0 +1,358 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.*;
+
+/** FirstPassGroupingCollector is the first of two passes necessary
+ * to collect grouped hits. This pass gathers the top N sorted
+ * groups. Concrete subclasses define what a group is and how it
+ * is internally collected.
+ *
+ *
See {@link org.apache.lucene.search.grouping} for more
+ * details including a full code example.
+ *
+ * @lucene.experimental
+ */
+abstract public class AbstractFirstPassGroupingCollector extends Collector {
+
+ private final Sort groupSort;
+ private final FieldComparator[] comparators;
+ private final int[] reversed;
+ private final int topNGroups;
+ private final HashMap> groupMap;
+ private final int compIDXEnd;
+
+ // Set once we reach topNGroups unique groups:
+ private TreeSet> orderedGroups;
+ private int docBase;
+ private int spareSlot;
+
+ /**
+ * Create the first pass collector.
+ *
+ * @param groupSort The {@link Sort} used to sort the
+ * groups. The top sorted document within each group
+ * according to groupSort, determines how that group
+ * sorts against other groups. This must be non-null,
+ * ie, if you want to groupSort by relevance use
+ * Sort.RELEVANCE.
+ * @param topNGroups How many top groups to keep.
+ * @throws IOException If I/O related errors occur
+ */
+ public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
+ if (topNGroups < 1) {
+ throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
+ }
+
+ // TODO: allow null groupSort to mean "by relevance",
+ // and specialize it?
+ this.groupSort = groupSort;
+
+ this.topNGroups = topNGroups;
+
+ final SortField[] sortFields = groupSort.getSort();
+ comparators = new FieldComparator[sortFields.length];
+ compIDXEnd = comparators.length - 1;
+ reversed = new int[sortFields.length];
+ for (int i = 0; i < sortFields.length; i++) {
+ final SortField sortField = sortFields[i];
+
+ // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
+ comparators[i] = sortField.getComparator(topNGroups + 1, i);
+ reversed[i] = sortField.getReverse() ? -1 : 1;
+ }
+
+ spareSlot = topNGroups;
+ groupMap = new HashMap>(topNGroups);
+ }
+
+ /**
+ * Returns top groups, starting from offset. This may
+ * return null, if no groups were collected, or if the
+ * number of unique groups collected is <= offset.
+ *
+ * @param groupOffset The offset in the collected groups
+ * @param fillFields Whether to fill to {@link SearchGroup#sortValues}
+ * @return top groups, starting from offset
+ */
+ public Collection> getTopGroups(int groupOffset, boolean fillFields) {
+
+ //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
+
+ if (groupOffset < 0) {
+ throw new IllegalArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")");
+ }
+
+ if (groupMap.size() <= groupOffset) {
+ return null;
+ }
+
+ if (orderedGroups == null) {
+ buildSortedSet();
+ }
+
+ final Collection> result = new ArrayList>();
+ int upto = 0;
+ final int sortFieldCount = groupSort.getSort().length;
+ for(CollectedSearchGroup group : orderedGroups) {
+ if (upto++ < groupOffset) {
+ continue;
+ }
+ //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ SearchGroup searchGroup = new SearchGroup();
+ searchGroup.groupValue = group.groupValue;
+ if (fillFields) {
+ searchGroup.sortValues = new Object[sortFieldCount];
+ for(int sortFieldIDX=0;sortFieldIDX group = groupMap.get(groupValue);
+
+ if (group == null) {
+
+ // First time we are seeing this group, or, we've seen
+ // it before but it fell out of the top N and is now
+ // coming back
+
+ if (groupMap.size() < topNGroups) {
+
+ // Still in startup transient: we have not
+ // seen enough unique groups to start pruning them;
+ // just keep collecting them
+
+ // Add a new CollectedSearchGroup:
+ CollectedSearchGroup sg = new CollectedSearchGroup();
+ sg.groupValue = copyDocGroupValue(groupValue, null);
+ sg.comparatorSlot = groupMap.size();
+ sg.topDoc = docBase + doc;
+ for (FieldComparator fc : comparators) {
+ fc.copy(sg.comparatorSlot, doc);
+ }
+ groupMap.put(sg.groupValue, sg);
+
+ if (groupMap.size() == topNGroups) {
+ // End of startup transient: we now have max
+ // number of groups; from here on we will drop
+ // bottom group when we insert new one:
+ buildSortedSet();
+ }
+
+ return;
+ }
+
+ // We already tested that the document is competitive, so replace
+ // the bottom group with this new group.
+
+ // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
+ final CollectedSearchGroup bottomGroup = orderedGroups.last();
+ orderedGroups.remove(bottomGroup);
+ assert orderedGroups.size() == topNGroups -1;
+
+ groupMap.remove(bottomGroup.groupValue);
+
+ // reuse the removed CollectedSearchGroup
+ bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
+ bottomGroup.topDoc = docBase + doc;
+
+ for (FieldComparator fc : comparators) {
+ fc.copy(bottomGroup.comparatorSlot, doc);
+ }
+
+ groupMap.put(bottomGroup.groupValue, bottomGroup);
+ orderedGroups.add(bottomGroup);
+ assert orderedGroups.size() == topNGroups;
+
+ final int lastComparatorSlot = orderedGroups.last().comparatorSlot;
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(lastComparatorSlot);
+ }
+
+ return;
+ }
+
+ // Update existing group:
+ for (int compIDX = 0;; compIDX++) {
+ final FieldComparator fc = comparators[compIDX];
+ fc.copy(spareSlot, doc);
+
+ final int c = reversed[compIDX] * fc.compare(group.comparatorSlot, spareSlot);
+ if (c < 0) {
+ // Definitely not competitive.
+ return;
+ } else if (c > 0) {
+ // Definitely competitive; set remaining comparators:
+ for (int compIDX2=compIDX+1; compIDX2 prevLast;
+ if (orderedGroups != null) {
+ prevLast = orderedGroups.last();
+ orderedGroups.remove(group);
+ assert orderedGroups.size() == topNGroups-1;
+ } else {
+ prevLast = null;
+ }
+
+ group.topDoc = docBase + doc;
+
+ // Swap slots
+ final int tmp = spareSlot;
+ spareSlot = group.comparatorSlot;
+ group.comparatorSlot = tmp;
+
+ // Re-add the changed group
+ if (orderedGroups != null) {
+ orderedGroups.add(group);
+ assert orderedGroups.size() == topNGroups;
+ final CollectedSearchGroup newLast = orderedGroups.last();
+ // If we changed the value of the last group, or changed which group was last, then update bottom:
+ if (group == newLast || prevLast != newLast) {
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(newLast.comparatorSlot);
+ }
+ }
+ }
+ }
+
+ private void buildSortedSet() {
+ final Comparator comparator = new Comparator() {
+ public int compare(CollectedSearchGroup o1, CollectedSearchGroup o2) {
+ for (int compIDX = 0;; compIDX++) {
+ FieldComparator fc = comparators[compIDX];
+ final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
+ if (c != 0) {
+ return c;
+ } else if (compIDX == compIDXEnd) {
+ return o1.topDoc - o2.topDoc;
+ }
+ }
+ }
+ };
+
+ orderedGroups = new TreeSet>(comparator);
+ orderedGroups.addAll(groupMap.values());
+ assert orderedGroups.size() > 0;
+
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(orderedGroups.last().comparatorSlot);
+ }
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ this.docBase = docBase;
+ for (int i=0; i extends SearchGroup {
+ int topDoc;
+ int comparatorSlot;
+}