+++ /dev/null
-package org.apache.lucene.facet.util;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.OpenBitSetDISI;
-
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Utility methods for Scored Doc IDs.
- *
- * @lucene.experimental
- */
-public class ScoredDocIdsUtils {
-
- /**
- * Create a complement of the input set. The returned {@link ScoredDocIDs}
- * does not contain any scores, which makes sense given that the complementing
- * documents were not scored.
- *
- * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader
- *
- * @param docids to be complemented.
- * @param reader holding the number of documents & information about deletions.
- */
- public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
- throws IOException {
- final int maxDoc = reader.maxDoc();
-
- DocIdSet docIdSet = docids.getDocIDs();
- final OpenBitSet complement;
- if (docIdSet instanceof OpenBitSet) {
- // That is the most common case, if ScoredDocIdsCollector was used.
- complement = (OpenBitSet) ((OpenBitSet) docIdSet).clone();
- } else {
- complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
- }
-
- complement.flip(0, maxDoc);
-
- // Remove all Deletions from the complement set
- clearDeleted(reader, complement);
-
- return createScoredDocIds(complement, maxDoc);
- }
-
- /**
- * Clear all deleted documents from a given open-bit-set according to a given reader
- */
- private static void clearDeleted(final IndexReader reader,
- final OpenBitSet set) throws IOException {
-
- // If there are no deleted docs
- if (!reader.hasDeletions()) {
- return; // return immediately
- }
-
- DocIdSetIterator it = set.iterator();
- int doc = DocIdSetIterator.NO_MORE_DOCS;
- while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (reader.isDeleted(doc)) {
- set.fastClear(doc);
- }
- }
- }
-
- /**
- * Create a subset of an existing ScoredDocIDs object.
- *
- * @param allDocIds orginal set
- * @param sampleSet Doc Ids of the subset.
- */
- public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds,
- final int[] sampleSet) throws IOException {
-
- // sort so that we can scan docs in order
- final int[] docids = sampleSet;
- Arrays.sort(docids);
- final float[] scores = new float[docids.length];
- // fetch scores and compute size
- ScoredDocIDsIterator it = allDocIds.iterator();
- int n = 0;
- while (it.next() && n < docids.length) {
- int doc = it.getDocID();
- if (doc == docids[n]) {
- scores[n] = it.getScore();
- ++n;
- }
- }
- final int size = n;
-
- return new ScoredDocIDs() {
-
- public DocIdSet getDocIDs() {
- return new DocIdSet() {
-
- @Override
- public boolean isCacheable() { return true; }
-
- @Override
- public DocIdSetIterator iterator() throws IOException {
- return new DocIdSetIterator() {
-
- private int next = -1;
-
- @Override
- public int advance(int target) throws IOException {
- while (next < size && docids[next++] < target) {
- }
- return next == size ? NO_MORE_DOCS : docids[next];
- }
-
- @Override
- public int docID() {
- return docids[next];
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (++next >= size) {
- return NO_MORE_DOCS;
- }
- return docids[next];
- }
-
- };
- }
- };
- }
-
- public ScoredDocIDsIterator iterator() throws IOException {
- return new ScoredDocIDsIterator() {
-
- int next = -1;
-
- public boolean next() { return ++next < size; }
-
- public float getScore() { return scores[next]; }
-
- public int getDocID() { return docids[next]; }
- };
- }
-
- public int size() { return size; }
-
- };
- }
-
- /**
- * Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids
- * according to the given reader.
- * The returned set contains the range of [0 .. reader.maxDoc ) doc ids
- */
- public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) {
- if (reader.hasDeletions()) {
- return new AllLiveDocsScoredDocIDs(reader);
- }
- return new AllDocsScoredDocIDs(reader);
- }
-
- /**
- * Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index
- */
- public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) {
- return new ScoredDocIDs() {
- private int size = -1;
- public DocIdSet getDocIDs() { return docIdSet; }
-
- public ScoredDocIDsIterator iterator() throws IOException {
- final DocIdSetIterator docIterator = docIdSet.iterator();
- return new ScoredDocIDsIterator() {
- public boolean next() {
- try {
- return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- public float getScore() { return DEFAULT_SCORE; }
-
- public int getDocID() { return docIterator.docID(); }
- };
- }
-
- public int size() {
- // lazy size computation
- if (size < 0) {
- OpenBitSetDISI openBitSetDISI;
- try {
- openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- size = (int) openBitSetDISI.cardinality();
- }
- return size;
- }
- };
- }
-
- /**
- * All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when
- * there are no deletions in the index and we wish to go through each and
- * every document
- */
- private static class AllDocsScoredDocIDs implements ScoredDocIDs {
- final int maxDoc;
-
- public AllDocsScoredDocIDs(IndexReader reader) {
- this.maxDoc = reader.maxDoc();
- }
-
- public int size() {
- return maxDoc;
- }
-
- public DocIdSet getDocIDs() {
- return new DocIdSet() {
-
- @Override
- public boolean isCacheable() {
- return true;
- }
-
- @Override
- public DocIdSetIterator iterator() throws IOException {
- return new DocIdSetIterator() {
- private int next = -1;
-
- @Override
- public int advance(int target) throws IOException {
- if (target <= next) {
- target = next + 1;
- }
- return next = target >= maxDoc ? NO_MORE_DOCS
- : target;
- }
-
- @Override
- public int docID() {
- return next;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return ++next < maxDoc ? next : NO_MORE_DOCS;
- }
-
- };
- }
- };
- }
-
- public ScoredDocIDsIterator iterator() {
- try {
- final DocIdSetIterator iter = getDocIDs().iterator();
- return new ScoredDocIDsIterator() {
- public boolean next() {
- try {
- return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
- } catch (IOException e) {
- // cannot happen
- return false;
- }
- }
-
- public float getScore() {
- return DEFAULT_SCORE;
- }
-
- public int getDocID() {
- return iter.docID();
- }
- };
- } catch (IOException e) {
- // cannot happen
- throw new RuntimeException(e);
- }
- }
- }
-
- /**
- * An All-docs bitset which has '0' for deleted documents and '1' for the
- * rest. Useful for iterating over all 'live' documents in a given index.
- * <p>
- * NOTE: this class would work for indexes with no deletions at all,
- * although it is recommended to use {@link AllDocsScoredDocIDs} to ease
- * the performance cost of validating isDeleted() on each and every docId
- */
- private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs {
- final int maxDoc;
- final IndexReader reader;
-
- AllLiveDocsScoredDocIDs(IndexReader reader) {
- this.maxDoc = reader.maxDoc();
- this.reader = reader;
- }
-
- public int size() {
- return reader.numDocs();
- }
-
- public DocIdSet getDocIDs() {
- return new DocIdSet() {
-
- @Override
- public boolean isCacheable() {
- return true;
- }
-
- @Override
- public DocIdSetIterator iterator() throws IOException {
- return new DocIdSetIterator() {
- private int next = -1;
-
- @Override
- public int advance(int target) throws IOException {
- if (target > next) {
- next = target - 1;
- }
- return nextDoc();
- }
-
- @Override
- public int docID() {
- return next;
- }
-
- @Override
- public int nextDoc() throws IOException {
- do {
- ++next;
- } while (next < maxDoc && reader.isDeleted(next));
-
- return next < maxDoc ? next : NO_MORE_DOCS;
- }
-
- };
- }
- };
- }
-
- public ScoredDocIDsIterator iterator() {
- try {
- final DocIdSetIterator iter = getDocIDs().iterator();
- return new ScoredDocIDsIterator() {
- public boolean next() {
- try {
- return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
- } catch (IOException e) {
- // cannot happen
- return false;
- }
- }
-
- public float getScore() {
- return DEFAULT_SCORE;
- }
-
- public int getDocID() {
- return iter.docID();
- }
- };
- } catch (IOException e) {
- // cannot happen
- throw new RuntimeException(e);
- }
- }
- }
-}
\ No newline at end of file