--- /dev/null
+package org.apache.lucene.facet.search.sampling;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.search.FacetResultsHandler;
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.FloatArrayAllocator;
+import org.apache.lucene.facet.search.IntArrayAllocator;
+import org.apache.lucene.facet.search.SamplingWrapper;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Facets accumulation with sampling.<br>
+ * <p>
+ * Note two major differences between this class and {@link SamplingWrapper}:
+ * <ol>
+ * <li>Latter can wrap any other {@link FacetsAccumulator} while this class
+ * directly extends {@link StandardFacetsAccumulator}.</li>
+ * <li>This class can effectively apply sampling on the complement set of
+ * matching document, thereby working efficiently with the complement
+ * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.</li>
+ * </ol>
+ * <p>
+ * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
+ * does not guarantee accurate values for
+ * {@link FacetResult#getNumValidDescendants()} &
+ * {@link FacetResultNode#getResidue()}.
+ *
+ * @see Sampler
+ * @lucene.experimental
+ */
+public class SamplingAccumulator extends StandardFacetsAccumulator {
+
+ private double samplingRatio = -1d;
+ private final Sampler sampler;
+
+ /**
+ * Constructor...
+ */
+ public SamplingAccumulator(
+ Sampler sampler,
+ FacetSearchParams searchParams,
+ IndexReader indexReader, TaxonomyReader taxonomyReader,
+ IntArrayAllocator intArrayAllocator,
+ FloatArrayAllocator floatArrayAllocator) {
+ super(searchParams, indexReader, taxonomyReader, intArrayAllocator,
+ floatArrayAllocator);
+ this.sampler = sampler;
+ }
+
+ /**
+ * Constructor...
+ */
+ public SamplingAccumulator(
+ Sampler sampler,
+ FacetSearchParams searchParams,
+ IndexReader indexReader, TaxonomyReader taxonomyReader) {
+ super(searchParams, indexReader, taxonomyReader);
+ this.sampler = sampler;
+ }
+
+ @Override
+ public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
+ // first let delegee accumulate without labeling at all (though
+ // currently it doesn't matter because we have to label all returned anyhow)
+ boolean origAllowLabeling = isAllowLabeling();
+ setAllowLabeling(false);
+
+ // Replacing the original searchParams with the over-sampled
+ FacetSearchParams original = searchParams;
+ searchParams = sampler.overSampledSearchParams(original);
+
+ List<FacetResult> sampleRes = super.accumulate(docids);
+ setAllowLabeling(origAllowLabeling);
+
+ List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+ for (FacetResult fres : sampleRes) {
+ // for sure fres is not null because this is guaranteed by the delegee.
+ FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(
+ taxonomyReader);
+ // fix the result of current request
+ sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
+ .fixResult(docids, fres);
+
+ fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+
+ // Using the sampler to trim the extra (over-sampled) results
+ fres = sampler.trimResult(fres);
+ // arranging it needs to
+ // final labeling if allowed (because labeling is a costly operation)
+ if (isAllowLabeling()) {
+ frh.labelResult(fres);
+ }
+ fixedRes.add(fres); // add to final results
+ }
+
+ searchParams = original; // Back to original params
+
+ return fixedRes;
+ }
+
+ @Override
+ protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
+ SampleResult sampleRes = sampler.getSampleSet(docids);
+ samplingRatio = sampleRes.actualSampleRatio;
+ return sampleRes.docids;
+ }
+
+ @Override
+ protected double getTotalCountsFactor() {
+ if (samplingRatio<0) {
+ throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
+ }
+ return samplingRatio;
+ }
+}