X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java diff --git a/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java b/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java deleted file mode 100644 index 015a7f0..0000000 --- a/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java +++ /dev/null @@ -1,175 +0,0 @@ -package org.apache.lucene.facet.search.sampling; - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; - -import org.apache.lucene.facet.search.DrillDown; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Fix sampling results by counting the intersection between two lists: a - * TermDocs (list of documents in a certain category) and a DocIdSetIterator - * (list of documents matching the query). - * - * - * @lucene.experimental - */ -// TODO (Facet): implement also an estimated fixing by ratio (taking into -// account "translation" of counts!) -class TakmiSampleFixer implements SampleFixer { - - private TaxonomyReader taxonomyReader; - private IndexReader indexReader; - private FacetSearchParams searchParams; - - public TakmiSampleFixer(IndexReader indexReader, - TaxonomyReader taxonomyReader, FacetSearchParams searchParams) { - this.indexReader = indexReader; - this.taxonomyReader = taxonomyReader; - this.searchParams = searchParams; - } - - public void fixResult(ScoredDocIDs origDocIds, FacetResult fres) - throws IOException { - FacetResultNode topRes = fres.getFacetResultNode(); - fixResultNode(topRes, origDocIds); - } - - /** - * Fix result node count, and, recursively, fix all its children - * - * @param facetResNode - * result node to be fixed - * @param docIds - * docids in effect - * @throws IOException - */ - private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) - throws IOException { - recount(facetResNode, docIds); - for (FacetResultNode frn : facetResNode.getSubResults()) { - fixResultNode(frn, docIds); - } - } - - /** - * Internal utility: recount for a facet result node - * - * @param fresNode - * result node to be recounted - * @param docIds - * full set of matching documents. - * @throws IOException - */ - private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) - throws IOException { - // TODO (Facet): change from void to return the new, smaller docSet, and use - // that for the children, as this will make their intersection ops faster. - // can do this only when the new set is "sufficiently" smaller. - - /* We need the category's path name in order to do its recounting. - * If it is missing, because the option to label only part of the - * facet results was exercise, we need to calculate them anyway, so - * in essence sampling with recounting spends some extra cycles for - * labeling results for which labels are not required. */ - CategoryPath catPath = fresNode.getLabel(taxonomyReader); // force labeling - - Term drillDownTerm = DrillDown.term(searchParams, catPath); - int updatedCount = countIntersection(indexReader.termDocs(drillDownTerm), - docIds.iterator()); - - fresNode.setValue(updatedCount); - } - - /** - * Count the size of the intersection between two lists: a TermDocs (list of - * documents in a certain category) and a DocIdSetIterator (list of documents - * matching a query). - */ - private static int countIntersection(TermDocs p1, ScoredDocIDsIterator p2) - throws IOException { - // The documentation of of both TermDocs and DocIdSetIterator claim - // that we must do next() before doc(). So we do, and if one of the - // lists is empty, obviously return 0; - if (!p1.next()) { - return 0; - } - if (!p2.next()) { - return 0; - } - - int d1 = p1.doc(); - int d2 = p2.getDocID(); - - int count = 0; - for (;;) { - if (d1 == d2) { - ++count; - if (!p1.next()) { - break; // end of list 1, nothing more in intersection - } - d1 = p1.doc(); - if (!advance(p2, d1)) { - break; // end of list 2, nothing more in intersection - } - d2 = p2.getDocID(); - } else if (d1 < d2) { - if (!p1.skipTo(d2)) { - break; // end of list 1, nothing more in intersection - } - d1 = p1.doc(); - } else /* d1>d2 */ { - if (!advance(p2, d1)) { - break; // end of list 2, nothing more in intersection - } - d2 = p2.getDocID(); - } - } - return count; - } - - /** - * utility: advance the iterator until finding (or exceeding) specific - * document - * - * @param iterator - * iterator being advanced - * @param targetDoc - * target of advancing - * @return false if iterator exhausted, true otherwise. - */ - private static boolean advance(ScoredDocIDsIterator iterator, int targetDoc) { - while (iterator.next()) { - if (iterator.getDocID() >= targetDoc) { - return true; // target reached - } - } - return false; // exhausted - } -} \ No newline at end of file