1 package org.apache.lucene.facet.search.sampling;
3 import java.io.IOException;
4 import java.util.ArrayList;
7 import org.apache.lucene.index.IndexReader;
9 import org.apache.lucene.facet.search.FacetResultsHandler;
10 import org.apache.lucene.facet.search.FacetsAccumulator;
11 import org.apache.lucene.facet.search.FloatArrayAllocator;
12 import org.apache.lucene.facet.search.IntArrayAllocator;
13 import org.apache.lucene.facet.search.SamplingWrapper;
14 import org.apache.lucene.facet.search.ScoredDocIDs;
15 import org.apache.lucene.facet.search.StandardFacetsAccumulator;
16 import org.apache.lucene.facet.search.params.FacetSearchParams;
17 import org.apache.lucene.facet.search.results.FacetResult;
18 import org.apache.lucene.facet.search.results.FacetResultNode;
19 import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
20 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
23 * Licensed to the Apache Software Foundation (ASF) under one or more
24 * contributor license agreements. See the NOTICE file distributed with
25 * this work for additional information regarding copyright ownership.
26 * The ASF licenses this file to You under the Apache License, Version 2.0
27 * (the "License"); you may not use this file except in compliance with
28 * the License. You may obtain a copy of the License at
30 * http://www.apache.org/licenses/LICENSE-2.0
32 * Unless required by applicable law or agreed to in writing, software
33 * distributed under the License is distributed on an "AS IS" BASIS,
34 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35 * See the License for the specific language governing permissions and
36 * limitations under the License.
40 * Facets accumulation with sampling.<br>
42 * Note two major differences between this class and {@link SamplingWrapper}:
44 * <li>Latter can wrap any other {@link FacetsAccumulator} while this class
45 * directly extends {@link StandardFacetsAccumulator}.</li>
46 * <li>This class can effectively apply sampling on the complement set of
47 * matching document, thereby working efficiently with the complement
48 * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.</li>
51 * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
52 * does not guarantee accurate values for
53 * {@link FacetResult#getNumValidDescendants()} &
54 * {@link FacetResultNode#getResidue()}.
57 * @lucene.experimental
59 public class SamplingAccumulator extends StandardFacetsAccumulator {
61 private double samplingRatio = -1d;
62 private final Sampler sampler;
67 public SamplingAccumulator(
69 FacetSearchParams searchParams,
70 IndexReader indexReader, TaxonomyReader taxonomyReader,
71 IntArrayAllocator intArrayAllocator,
72 FloatArrayAllocator floatArrayAllocator) {
73 super(searchParams, indexReader, taxonomyReader, intArrayAllocator,
75 this.sampler = sampler;
81 public SamplingAccumulator(
83 FacetSearchParams searchParams,
84 IndexReader indexReader, TaxonomyReader taxonomyReader) {
85 super(searchParams, indexReader, taxonomyReader);
86 this.sampler = sampler;
90 public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
91 // first let delegee accumulate without labeling at all (though
92 // currently it doesn't matter because we have to label all returned anyhow)
93 boolean origAllowLabeling = isAllowLabeling();
94 setAllowLabeling(false);
96 // Replacing the original searchParams with the over-sampled
97 FacetSearchParams original = searchParams;
98 searchParams = sampler.overSampledSearchParams(original);
100 List<FacetResult> sampleRes = super.accumulate(docids);
101 setAllowLabeling(origAllowLabeling);
103 List<FacetResult> fixedRes = new ArrayList<FacetResult>();
104 for (FacetResult fres : sampleRes) {
105 // for sure fres is not null because this is guaranteed by the delegee.
106 FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(
108 // fix the result of current request
109 sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
110 .fixResult(docids, fres);
112 fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
114 // Using the sampler to trim the extra (over-sampled) results
115 fres = sampler.trimResult(fres);
116 // arranging it needs to
117 // final labeling if allowed (because labeling is a costly operation)
118 if (isAllowLabeling()) {
119 frh.labelResult(fres);
121 fixedRes.add(fres); // add to final results
124 searchParams = original; // Back to original params
130 protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
131 SampleResult sampleRes = sampler.getSampleSet(docids);
132 samplingRatio = sampleRes.actualSampleRatio;
133 return sampleRes.docids;
137 protected double getTotalCountsFactor() {
138 if (samplingRatio<0) {
139 throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
141 return samplingRatio;