X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java new file mode 100644 index 0000000..fa48c68 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java @@ -0,0 +1,143 @@ +package org.apache.lucene.facet.search.sampling; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.index.IndexReader; + +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FloatArrayAllocator; +import org.apache.lucene.facet.search.IntArrayAllocator; +import org.apache.lucene.facet.search.SamplingWrapper; +import org.apache.lucene.facet.search.ScoredDocIDs; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.sampling.Sampler.SampleResult; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Facets accumulation with sampling.
+ *

+ * Note two major differences between this class and {@link SamplingWrapper}: + *

    + *
  1. Latter can wrap any other {@link FacetsAccumulator} while this class + * directly extends {@link StandardFacetsAccumulator}.
  2. + *
  3. This class can effectively apply sampling on the complement set of + * matching document, thereby working efficiently with the complement + * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.
  4. + *
+ *

+ * Note: Sampling accumulation (Accumulation over a sampled-set of the results), + * does not guarantee accurate values for + * {@link FacetResult#getNumValidDescendants()} & + * {@link FacetResultNode#getResidue()}. + * + * @see Sampler + * @lucene.experimental + */ +public class SamplingAccumulator extends StandardFacetsAccumulator { + + private double samplingRatio = -1d; + private final Sampler sampler; + + /** + * Constructor... + */ + public SamplingAccumulator( + Sampler sampler, + FacetSearchParams searchParams, + IndexReader indexReader, TaxonomyReader taxonomyReader, + IntArrayAllocator intArrayAllocator, + FloatArrayAllocator floatArrayAllocator) { + super(searchParams, indexReader, taxonomyReader, intArrayAllocator, + floatArrayAllocator); + this.sampler = sampler; + } + + /** + * Constructor... + */ + public SamplingAccumulator( + Sampler sampler, + FacetSearchParams searchParams, + IndexReader indexReader, TaxonomyReader taxonomyReader) { + super(searchParams, indexReader, taxonomyReader); + this.sampler = sampler; + } + + @Override + public List accumulate(ScoredDocIDs docids) throws IOException { + // first let delegee accumulate without labeling at all (though + // currently it doesn't matter because we have to label all returned anyhow) + boolean origAllowLabeling = isAllowLabeling(); + setAllowLabeling(false); + + // Replacing the original searchParams with the over-sampled + FacetSearchParams original = searchParams; + searchParams = sampler.overSampledSearchParams(original); + + List sampleRes = super.accumulate(docids); + setAllowLabeling(origAllowLabeling); + + List fixedRes = new ArrayList(); + for (FacetResult fres : sampleRes) { + // for sure fres is not null because this is guaranteed by the delegee. + FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler( + taxonomyReader); + // fix the result of current request + sampler.getSampleFixer(indexReader, taxonomyReader, searchParams) + .fixResult(docids, fres); + + fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any + + // Using the sampler to trim the extra (over-sampled) results + fres = sampler.trimResult(fres); + // arranging it needs to + // final labeling if allowed (because labeling is a costly operation) + if (isAllowLabeling()) { + frh.labelResult(fres); + } + fixedRes.add(fres); // add to final results + } + + searchParams = original; // Back to original params + + return fixedRes; + } + + @Override + protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException { + SampleResult sampleRes = sampler.getSampleSet(docids); + samplingRatio = sampleRes.actualSampleRatio; + return sampleRes.docids; + } + + @Override + protected double getTotalCountsFactor() { + if (samplingRatio<0) { + throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked"); + } + return samplingRatio; + } +}