X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java new file mode 100644 index 0000000..839bd09 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java @@ -0,0 +1,394 @@ +package org.apache.lucene.facet.util; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.OpenBitSetDISI; + +import org.apache.lucene.facet.search.ScoredDocIDs; +import org.apache.lucene.facet.search.ScoredDocIDsIterator; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utility methods for Scored Doc IDs. + * + * @lucene.experimental + */ +public class ScoredDocIdsUtils { + + /** + * Create a complement of the input set. The returned {@link ScoredDocIDs} + * does not contain any scores, which makes sense given that the complementing + * documents were not scored. + * + * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader + * + * @param docids to be complemented. + * @param reader holding the number of documents & information about deletions. + */ + public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader) + throws IOException { + final int maxDoc = reader.maxDoc(); + + DocIdSet docIdSet = docids.getDocIDs(); + final OpenBitSet complement; + if (docIdSet instanceof OpenBitSet) { + // That is the most common case, if ScoredDocIdsCollector was used. + complement = (OpenBitSet) ((OpenBitSet) docIdSet).clone(); + } else { + complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc); + } + + complement.flip(0, maxDoc); + + // Remove all Deletions from the complement set + clearDeleted(reader, complement); + + return createScoredDocIds(complement, maxDoc); + } + + /** + * Clear all deleted documents from a given open-bit-set according to a given reader + */ + private static void clearDeleted(final IndexReader reader, + final OpenBitSet set) throws IOException { + + // If there are no deleted docs + if (!reader.hasDeletions()) { + return; // return immediately + } + + DocIdSetIterator it = set.iterator(); + int doc = DocIdSetIterator.NO_MORE_DOCS; + while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (reader.isDeleted(doc)) { + set.fastClear(doc); + } + } + } + + /** + * Create a subset of an existing ScoredDocIDs object. + * + * @param allDocIds orginal set + * @param sampleSet Doc Ids of the subset. + */ + public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds, + final int[] sampleSet) throws IOException { + + // sort so that we can scan docs in order + final int[] docids = sampleSet; + Arrays.sort(docids); + final float[] scores = new float[docids.length]; + // fetch scores and compute size + ScoredDocIDsIterator it = allDocIds.iterator(); + int n = 0; + while (it.next() && n < docids.length) { + int doc = it.getDocID(); + if (doc == docids[n]) { + scores[n] = it.getScore(); + ++n; + } + } + final int size = n; + + return new ScoredDocIDs() { + + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { return true; } + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + + private int next = -1; + + @Override + public int advance(int target) throws IOException { + while (next < size && docids[next++] < target) { + } + return next == size ? NO_MORE_DOCS : docids[next]; + } + + @Override + public int docID() { + return docids[next]; + } + + @Override + public int nextDoc() throws IOException { + if (++next >= size) { + return NO_MORE_DOCS; + } + return docids[next]; + } + + }; + } + }; + } + + public ScoredDocIDsIterator iterator() throws IOException { + return new ScoredDocIDsIterator() { + + int next = -1; + + public boolean next() { return ++next < size; } + + public float getScore() { return scores[next]; } + + public int getDocID() { return docids[next]; } + }; + } + + public int size() { return size; } + + }; + } + + /** + * Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids + * according to the given reader. + * The returned set contains the range of [0 .. reader.maxDoc ) doc ids + */ + public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) { + if (reader.hasDeletions()) { + return new AllLiveDocsScoredDocIDs(reader); + } + return new AllDocsScoredDocIDs(reader); + } + + /** + * Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index + */ + public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) { + return new ScoredDocIDs() { + private int size = -1; + public DocIdSet getDocIDs() { return docIdSet; } + + public ScoredDocIDsIterator iterator() throws IOException { + final DocIdSetIterator docIterator = docIdSet.iterator(); + return new ScoredDocIDsIterator() { + public boolean next() { + try { + return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public float getScore() { return DEFAULT_SCORE; } + + public int getDocID() { return docIterator.docID(); } + }; + } + + public int size() { + // lazy size computation + if (size < 0) { + OpenBitSetDISI openBitSetDISI; + try { + openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc); + } catch (IOException e) { + throw new RuntimeException(e); + } + size = (int) openBitSetDISI.cardinality(); + } + return size; + } + }; + } + + /** + * All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when + * there are no deletions in the index and we wish to go through each and + * every document + */ + private static class AllDocsScoredDocIDs implements ScoredDocIDs { + final int maxDoc; + + public AllDocsScoredDocIDs(IndexReader reader) { + this.maxDoc = reader.maxDoc(); + } + + public int size() { + return maxDoc; + } + + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { + return true; + } + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + private int next = -1; + + @Override + public int advance(int target) throws IOException { + if (target <= next) { + target = next + 1; + } + return next = target >= maxDoc ? NO_MORE_DOCS + : target; + } + + @Override + public int docID() { + return next; + } + + @Override + public int nextDoc() throws IOException { + return ++next < maxDoc ? next : NO_MORE_DOCS; + } + + }; + } + }; + } + + public ScoredDocIDsIterator iterator() { + try { + final DocIdSetIterator iter = getDocIDs().iterator(); + return new ScoredDocIDsIterator() { + public boolean next() { + try { + return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + // cannot happen + return false; + } + } + + public float getScore() { + return DEFAULT_SCORE; + } + + public int getDocID() { + return iter.docID(); + } + }; + } catch (IOException e) { + // cannot happen + throw new RuntimeException(e); + } + } + } + + /** + * An All-docs bitset which has '0' for deleted documents and '1' for the + * rest. Useful for iterating over all 'live' documents in a given index. + *

+ * NOTE: this class would work for indexes with no deletions at all, + * although it is recommended to use {@link AllDocsScoredDocIDs} to ease + * the performance cost of validating isDeleted() on each and every docId + */ + private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs { + final int maxDoc; + final IndexReader reader; + + AllLiveDocsScoredDocIDs(IndexReader reader) { + this.maxDoc = reader.maxDoc(); + this.reader = reader; + } + + public int size() { + return reader.numDocs(); + } + + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { + return true; + } + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + private int next = -1; + + @Override + public int advance(int target) throws IOException { + if (target > next) { + next = target - 1; + } + return nextDoc(); + } + + @Override + public int docID() { + return next; + } + + @Override + public int nextDoc() throws IOException { + do { + ++next; + } while (next < maxDoc && reader.isDeleted(next)); + + return next < maxDoc ? next : NO_MORE_DOCS; + } + + }; + } + }; + } + + public ScoredDocIDsIterator iterator() { + try { + final DocIdSetIterator iter = getDocIDs().iterator(); + return new ScoredDocIDsIterator() { + public boolean next() { + try { + return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + // cannot happen + return false; + } + } + + public float getScore() { + return DEFAULT_SCORE; + } + + public int getDocID() { + return iter.docID(); + } + }; + } catch (IOException e) { + // cannot happen + throw new RuntimeException(e); + } + } + } +} \ No newline at end of file