pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / contrib / grouping / src / java / org / apache / lucene / search / grouping / AbstractFirstPassGroupingCollector.java
diff --git a/lucene-java-3.4.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java b/lucene-java-3.4.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java

deleted file mode 100644 (file)

index fb4b660..0000000
--- a/lucene-java-3.4.0/lucene/contrib/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
+++ /dev/null
@@ -1,358 +0,0 @@
-package org.apache.lucene.search.grouping;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.*;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.*;
-
-/** FirstPassGroupingCollector is the first of two passes necessary
- *  to collect grouped hits.  This pass gathers the top N sorted
- *  groups. Concrete subclasses define what a group is and how it
- *  is internally collected.
- *
- *  <p>See {@link org.apache.lucene.search.grouping} for more
- *  details including a full code example.</p>
- *
- * @lucene.experimental
- */
-abstract public class AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> extends Collector {
-
-  private final Sort groupSort;
-  private final FieldComparator[] comparators;
-  private final int[] reversed;
-  private final int topNGroups;
-  private final HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>> groupMap;
-  private final int compIDXEnd;
-
-  // Set once we reach topNGroups unique groups:
-  private TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>> orderedGroups;
-  private int docBase;
-  private int spareSlot;
-
-  /**
-   * Create the first pass collector.
-   *
-   *  @param groupSort The {@link Sort} used to sort the
-   *    groups.  The top sorted document within each group
-   *    according to groupSort, determines how that group
-   *    sorts against other groups.  This must be non-null,
-   *    ie, if you want to groupSort by relevance use
-   *    Sort.RELEVANCE.
-   *  @param topNGroups How many top groups to keep.
-   *  @throws IOException If I/O related errors occur
-   */
-  public AbstractFirstPassGroupingCollector(Sort groupSort, int topNGroups) throws IOException {
-    if (topNGroups < 1) {
-      throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
-    }
-
-    // TODO: allow null groupSort to mean "by relevance",
-    // and specialize it?
-    this.groupSort = groupSort;
-
-    this.topNGroups = topNGroups;
-
-    final SortField[] sortFields = groupSort.getSort();
-    comparators = new FieldComparator[sortFields.length];
-    compIDXEnd = comparators.length - 1;
-    reversed = new int[sortFields.length];
-    for (int i = 0; i < sortFields.length; i++) {
-      final SortField sortField = sortFields[i];
-
-      // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
-      comparators[i] = sortField.getComparator(topNGroups + 1, i);
-      reversed[i] = sortField.getReverse() ? -1 : 1;
-    }
-
-    spareSlot = topNGroups;
-    groupMap = new HashMap<GROUP_VALUE_TYPE, CollectedSearchGroup<GROUP_VALUE_TYPE>>(topNGroups);
-  }
-
-  /**
-   * Returns top groups, starting from offset.  This may
-   * return null, if no groups were collected, or if the
-   * number of unique groups collected is <= offset.
-   *
-   * @param groupOffset The offset in the collected groups
-   * @param fillFields Whether to fill to {@link SearchGroup#sortValues}
-   * @return top groups, starting from offset
-   */
-  public Collection<SearchGroup<GROUP_VALUE_TYPE>> getTopGroups(int groupOffset, boolean fillFields) {
-
-    //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
-
-    if (groupOffset < 0) {
-      throw new IllegalArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")");
-    }
-
-    if (groupMap.size() <= groupOffset) {
-      return null;
-    }
-
-    if (orderedGroups == null) {
-      buildSortedSet();
-    }
-
-    final Collection<SearchGroup<GROUP_VALUE_TYPE>> result = new ArrayList<SearchGroup<GROUP_VALUE_TYPE>>();
-    int upto = 0;
-    final int sortFieldCount = groupSort.getSort().length;
-    for(CollectedSearchGroup<GROUP_VALUE_TYPE> group : orderedGroups) {
-      if (upto++ < groupOffset) {
-        continue;
-      }
-      //System.out.println("  group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
-      SearchGroup<GROUP_VALUE_TYPE> searchGroup = new SearchGroup<GROUP_VALUE_TYPE>();
-      searchGroup.groupValue = group.groupValue;
-      if (fillFields) {
-        searchGroup.sortValues = new Object[sortFieldCount];
-        for(int sortFieldIDX=0;sortFieldIDX<sortFieldCount;sortFieldIDX++) {
-          searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].value(group.comparatorSlot);
-        }
-      }
-      result.add(searchGroup);
-    }
-    //System.out.println("  return " + result.size() + " groups");
-    return result;
-  }
-
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    for (FieldComparator comparator : comparators) {
-      comparator.setScorer(scorer);
-    }
-  }
-
-  @Override
-  public void collect(int doc) throws IOException {
-    //System.out.println("FP.collect doc=" + doc);
-
-    // If orderedGroups != null we already have collected N groups and
-    // can short circuit by comparing this document to the bottom group,
-    // without having to find what group this document belongs to.
-    
-    // Even if this document belongs to a group in the top N, we'll know that
-    // we don't have to update that group.
-
-    // Downside: if the number of unique groups is very low, this is
-    // wasted effort as we will most likely be updating an existing group.
-    if (orderedGroups != null) {
-      for (int compIDX = 0;; compIDX++) {
-        final int c = reversed[compIDX] * comparators[compIDX].compareBottom(doc);
-        if (c < 0) {
-          // Definitely not competitive. So don't even bother to continue
-          return;
-        } else if (c > 0) {
-          // Definitely competitive.
-          break;
-        } else if (compIDX == compIDXEnd) {
-          // Here c=0. If we're at the last comparator, this doc is not
-          // competitive, since docs are visited in doc Id order, which means
-          // this doc cannot compete with any other document in the queue.
-          return;
-        }
-      }
-    }
-
-    // TODO: should we add option to mean "ignore docs that
-    // don't have the group field" (instead of stuffing them
-    // under null group)?
-    final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
-
-    final CollectedSearchGroup<GROUP_VALUE_TYPE> group = groupMap.get(groupValue);
-
-    if (group == null) {
-
-      // First time we are seeing this group, or, we've seen
-      // it before but it fell out of the top N and is now
-      // coming back
-
-      if (groupMap.size() < topNGroups) {
-
-        // Still in startup transient: we have not
-        // seen enough unique groups to start pruning them;
-        // just keep collecting them
-
-        // Add a new CollectedSearchGroup:
-        CollectedSearchGroup<GROUP_VALUE_TYPE> sg = new CollectedSearchGroup<GROUP_VALUE_TYPE>();
-        sg.groupValue = copyDocGroupValue(groupValue, null);
-        sg.comparatorSlot = groupMap.size();
-        sg.topDoc = docBase + doc;
-        for (FieldComparator fc : comparators) {
-          fc.copy(sg.comparatorSlot, doc);
-        }
-        groupMap.put(sg.groupValue, sg);
-
-        if (groupMap.size() == topNGroups) {
-          // End of startup transient: we now have max
-          // number of groups; from here on we will drop
-          // bottom group when we insert new one:
-          buildSortedSet();
-        }
-
-        return;
-      }
-
-      // We already tested that the document is competitive, so replace
-      // the bottom group with this new group.
-
-      // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
-      final CollectedSearchGroup<GROUP_VALUE_TYPE> bottomGroup = orderedGroups.last();
-      orderedGroups.remove(bottomGroup);
-      assert orderedGroups.size() == topNGroups -1;
-
-      groupMap.remove(bottomGroup.groupValue);
-
-      // reuse the removed CollectedSearchGroup
-      bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
-      bottomGroup.topDoc = docBase + doc;
-
-      for (FieldComparator fc : comparators) {
-        fc.copy(bottomGroup.comparatorSlot, doc);
-      }
-
-      groupMap.put(bottomGroup.groupValue, bottomGroup);
-      orderedGroups.add(bottomGroup);
-      assert orderedGroups.size() == topNGroups;
-
-      final int lastComparatorSlot = orderedGroups.last().comparatorSlot;
-      for (FieldComparator fc : comparators) {
-        fc.setBottom(lastComparatorSlot);
-      }
-
-      return;
-    }
-
-    // Update existing group:
-    for (int compIDX = 0;; compIDX++) {
-      final FieldComparator fc = comparators[compIDX];
-      fc.copy(spareSlot, doc);
-
-      final int c = reversed[compIDX] * fc.compare(group.comparatorSlot, spareSlot);
-      if (c < 0) {
-        // Definitely not competitive.
-        return;
-      } else if (c > 0) {
-        // Definitely competitive; set remaining comparators:
-        for (int compIDX2=compIDX+1; compIDX2<comparators.length; compIDX2++) {
-          comparators[compIDX2].copy(spareSlot, doc);
-        }
-        break;
-      } else if (compIDX == compIDXEnd) {
-        // Here c=0. If we're at the last comparator, this doc is not
-        // competitive, since docs are visited in doc Id order, which means
-        // this doc cannot compete with any other document in the queue.
-        return;
-      }
-    }
-
-    // Remove before updating the group since lookup is done via comparators
-    // TODO: optimize this
-
-    final CollectedSearchGroup<GROUP_VALUE_TYPE> prevLast;
-    if (orderedGroups != null) {
-      prevLast = orderedGroups.last();
-      orderedGroups.remove(group);
-      assert orderedGroups.size() == topNGroups-1;
-    } else {
-      prevLast = null;
-    }
-
-    group.topDoc = docBase + doc;
-
-    // Swap slots
-    final int tmp = spareSlot;
-    spareSlot = group.comparatorSlot;
-    group.comparatorSlot = tmp;
-
-    // Re-add the changed group
-    if (orderedGroups != null) {
-      orderedGroups.add(group);
-      assert orderedGroups.size() == topNGroups;
-      final CollectedSearchGroup newLast = orderedGroups.last();
-      // If we changed the value of the last group, or changed which group was last, then update bottom:
-      if (group == newLast || prevLast != newLast) {
-        for (FieldComparator fc : comparators) {
-          fc.setBottom(newLast.comparatorSlot);
-        }
-      }
-    }
-  }
-
-  private void buildSortedSet() {
-    final Comparator<CollectedSearchGroup> comparator = new Comparator<CollectedSearchGroup>() {
-      public int compare(CollectedSearchGroup o1, CollectedSearchGroup o2) {
-        for (int compIDX = 0;; compIDX++) {
-          FieldComparator fc = comparators[compIDX];
-          final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
-          if (c != 0) {
-            return c;
-          } else if (compIDX == compIDXEnd) {
-            return o1.topDoc - o2.topDoc;
-          }
-        }
-      }
-    };
-
-    orderedGroups = new TreeSet<CollectedSearchGroup<GROUP_VALUE_TYPE>>(comparator);
-    orderedGroups.addAll(groupMap.values());
-    assert orderedGroups.size() > 0;
-
-    for (FieldComparator fc : comparators) {
-      fc.setBottom(orderedGroups.last().comparatorSlot);
-    }
-  }
-
-  @Override
-  public boolean acceptsDocsOutOfOrder() {
-    return false;
-  }
-
-  @Override
-  public void setNextReader(IndexReader reader, int docBase) throws IOException {
-    this.docBase = docBase;
-    for (int i=0; i<comparators.length; i++) {
-      comparators[i].setNextReader(reader, docBase);
-    }
-  }
-
-  /**
-   * Returns the group value for the specified doc.
-   *
-   * @param doc The specified doc
-   * @return the group value for the specified doc
-   */
-  protected abstract GROUP_VALUE_TYPE getDocGroupValue(int doc);
-
-  /**
-   * Returns a copy of the specified group value by creating a new instance and copying the value from the specified
-   * groupValue in the new instance. Or optionally the reuse argument can be used to copy the group value in.
-   *
-   * @param groupValue The group value to copy
-   * @param reuse Optionally a reuse instance to prevent a new instance creation
-   * @return a copy of the specified group value
-   */
-  protected abstract GROUP_VALUE_TYPE copyDocGroupValue(GROUP_VALUE_TYPE groupValue, GROUP_VALUE_TYPE reuse);
-}
-
-class CollectedSearchGroup<T> extends SearchGroup<T> {
-  int topDoc;
-  int comparatorSlot;
-}