pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / facet / src / java / org / apache / lucene / facet / search / sampling / SamplingAccumulator.java
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java
new file mode 100644 (file)
index 0000000..fa48c68
--- /dev/null
@@ -0,0 +1,143 @@
+package org.apache.lucene.facet.search.sampling;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.search.FacetResultsHandler;
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.FloatArrayAllocator;
+import org.apache.lucene.facet.search.IntArrayAllocator;
+import org.apache.lucene.facet.search.SamplingWrapper;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Facets accumulation with sampling.<br>
+ * <p>
+ * Note two major differences between this class and {@link SamplingWrapper}:
+ * <ol>
+ * <li>Latter can wrap any other {@link FacetsAccumulator} while this class
+ * directly extends {@link StandardFacetsAccumulator}.</li>
+ * <li>This class can effectively apply sampling on the complement set of
+ * matching document, thereby working efficiently with the complement
+ * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.</li>
+ * </ol>
+ * <p>
+ * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
+ * does not guarantee accurate values for
+ * {@link FacetResult#getNumValidDescendants()} &
+ * {@link FacetResultNode#getResidue()}.
+ * 
+ * @see Sampler
+ * @lucene.experimental
+ */
+public class SamplingAccumulator extends StandardFacetsAccumulator {
+  
+  private double samplingRatio = -1d;
+  private final Sampler sampler;
+  
+  /**
+   * Constructor...
+   */
+  public SamplingAccumulator(
+      Sampler sampler,
+      FacetSearchParams searchParams,
+      IndexReader indexReader, TaxonomyReader taxonomyReader,
+      IntArrayAllocator intArrayAllocator,
+      FloatArrayAllocator floatArrayAllocator) {
+    super(searchParams, indexReader, taxonomyReader, intArrayAllocator,
+        floatArrayAllocator);
+    this.sampler = sampler;
+  }
+
+  /**
+   * Constructor...
+   */
+  public SamplingAccumulator(
+      Sampler sampler,
+      FacetSearchParams searchParams,
+      IndexReader indexReader, TaxonomyReader taxonomyReader) {
+    super(searchParams, indexReader, taxonomyReader);
+    this.sampler = sampler;
+  }
+
+  @Override
+  public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
+    // first let delegee accumulate without labeling at all (though
+    // currently it doesn't matter because we have to label all returned anyhow)
+    boolean origAllowLabeling = isAllowLabeling();
+    setAllowLabeling(false);
+    
+    // Replacing the original searchParams with the over-sampled
+    FacetSearchParams original = searchParams;
+    searchParams = sampler.overSampledSearchParams(original);
+    
+    List<FacetResult> sampleRes = super.accumulate(docids);
+    setAllowLabeling(origAllowLabeling);
+    
+    List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+    for (FacetResult fres : sampleRes) {
+      // for sure fres is not null because this is guaranteed by the delegee.
+      FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(
+          taxonomyReader);
+      // fix the result of current request
+      sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
+          .fixResult(docids, fres);
+      
+      fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+
+      // Using the sampler to trim the extra (over-sampled) results
+      fres = sampler.trimResult(fres);
+                                              // arranging it needs to
+      // final labeling if allowed (because labeling is a costly operation)
+      if (isAllowLabeling()) {
+        frh.labelResult(fres);
+      }
+      fixedRes.add(fres); // add to final results
+    }
+    
+    searchParams = original; // Back to original params
+    
+    return fixedRes; 
+  }
+
+  @Override
+  protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
+    SampleResult sampleRes = sampler.getSampleSet(docids);
+    samplingRatio = sampleRes.actualSampleRatio;
+    return sampleRes.docids;
+  }
+  
+  @Override
+  protected double getTotalCountsFactor() {
+    if (samplingRatio<0) {
+      throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
+    }
+    return samplingRatio;
+  }
+}