1 package org.apache.lucene.facet.search.sampling;
3 import java.io.IOException;
5 import org.apache.lucene.index.IndexReader;
6 import org.apache.lucene.index.Term;
7 import org.apache.lucene.index.TermDocs;
9 import org.apache.lucene.facet.search.DrillDown;
10 import org.apache.lucene.facet.search.ScoredDocIDs;
11 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
12 import org.apache.lucene.facet.search.params.FacetSearchParams;
13 import org.apache.lucene.facet.search.results.FacetResult;
14 import org.apache.lucene.facet.search.results.FacetResultNode;
15 import org.apache.lucene.facet.taxonomy.CategoryPath;
16 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
19 * Licensed to the Apache Software Foundation (ASF) under one or more
20 * contributor license agreements. See the NOTICE file distributed with
21 * this work for additional information regarding copyright ownership.
22 * The ASF licenses this file to You under the Apache License, Version 2.0
23 * (the "License"); you may not use this file except in compliance with
24 * the License. You may obtain a copy of the License at
26 * http://www.apache.org/licenses/LICENSE-2.0
28 * Unless required by applicable law or agreed to in writing, software
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
36 * Fix sampling results by counting the intersection between two lists: a
37 * TermDocs (list of documents in a certain category) and a DocIdSetIterator
38 * (list of documents matching the query).
41 * @lucene.experimental
43 // TODO (Facet): implement also an estimated fixing by ratio (taking into
44 // account "translation" of counts!)
45 class TakmiSampleFixer implements SampleFixer {
47 private TaxonomyReader taxonomyReader;
48 private IndexReader indexReader;
49 private FacetSearchParams searchParams;
51 public TakmiSampleFixer(IndexReader indexReader,
52 TaxonomyReader taxonomyReader, FacetSearchParams searchParams) {
53 this.indexReader = indexReader;
54 this.taxonomyReader = taxonomyReader;
55 this.searchParams = searchParams;
58 public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
60 FacetResultNode topRes = fres.getFacetResultNode();
61 fixResultNode(topRes, origDocIds);
65 * Fix result node count, and, recursively, fix all its children
68 * result node to be fixed
73 private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds)
75 recount(facetResNode, docIds);
76 for (FacetResultNode frn : facetResNode.getSubResults()) {
77 fixResultNode(frn, docIds);
82 * Internal utility: recount for a facet result node
85 * result node to be recounted
87 * full set of matching documents.
90 private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
92 // TODO (Facet): change from void to return the new, smaller docSet, and use
93 // that for the children, as this will make their intersection ops faster.
94 // can do this only when the new set is "sufficiently" smaller.
96 /* We need the category's path name in order to do its recounting.
97 * If it is missing, because the option to label only part of the
98 * facet results was exercise, we need to calculate them anyway, so
99 * in essence sampling with recounting spends some extra cycles for
100 * labeling results for which labels are not required. */
101 CategoryPath catPath = fresNode.getLabel(taxonomyReader); // force labeling
103 Term drillDownTerm = DrillDown.term(searchParams, catPath);
104 int updatedCount = countIntersection(indexReader.termDocs(drillDownTerm),
107 fresNode.setValue(updatedCount);
111 * Count the size of the intersection between two lists: a TermDocs (list of
112 * documents in a certain category) and a DocIdSetIterator (list of documents
115 private static int countIntersection(TermDocs p1, ScoredDocIDsIterator p2)
117 // The documentation of of both TermDocs and DocIdSetIterator claim
118 // that we must do next() before doc(). So we do, and if one of the
119 // lists is empty, obviously return 0;
128 int d2 = p2.getDocID();
135 break; // end of list 1, nothing more in intersection
138 if (!advance(p2, d1)) {
139 break; // end of list 2, nothing more in intersection
142 } else if (d1 < d2) {
143 if (!p1.skipTo(d2)) {
144 break; // end of list 1, nothing more in intersection
148 if (!advance(p2, d1)) {
149 break; // end of list 2, nothing more in intersection
158 * utility: advance the iterator until finding (or exceeding) specific
162 * iterator being advanced
164 * target of advancing
165 * @return false if iterator exhausted, true otherwise.
167 private static boolean advance(ScoredDocIDsIterator iterator, int targetDoc) {
168 while (iterator.next()) {
169 if (iterator.getDocID() >= targetDoc) {
170 return true; // target reached
173 return false; // exhausted