1 package org.apache.lucene.facet.util;
3 import java.io.IOException;
4 import java.util.Arrays;
6 import org.apache.lucene.index.IndexReader;
7 import org.apache.lucene.search.DocIdSet;
8 import org.apache.lucene.search.DocIdSetIterator;
9 import org.apache.lucene.util.OpenBitSet;
10 import org.apache.lucene.util.OpenBitSetDISI;
12 import org.apache.lucene.facet.search.ScoredDocIDs;
13 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
16 * Licensed to the Apache Software Foundation (ASF) under one or more
17 * contributor license agreements. See the NOTICE file distributed with
18 * this work for additional information regarding copyright ownership.
19 * The ASF licenses this file to You under the Apache License, Version 2.0
20 * (the "License"); you may not use this file except in compliance with
21 * the License. You may obtain a copy of the License at
23 * http://www.apache.org/licenses/LICENSE-2.0
25 * Unless required by applicable law or agreed to in writing, software
26 * distributed under the License is distributed on an "AS IS" BASIS,
27 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 * See the License for the specific language governing permissions and
29 * limitations under the License.
33 * Utility methods for Scored Doc IDs.
35 * @lucene.experimental
37 public class ScoredDocIdsUtils {
40 * Create a complement of the input set. The returned {@link ScoredDocIDs}
41 * does not contain any scores, which makes sense given that the complementing
42 * documents were not scored.
44 * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader
46 * @param docids to be complemented.
47 * @param reader holding the number of documents & information about deletions.
49 public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
51 final int maxDoc = reader.maxDoc();
53 DocIdSet docIdSet = docids.getDocIDs();
54 final OpenBitSet complement;
55 if (docIdSet instanceof OpenBitSet) {
56 // That is the most common case, if ScoredDocIdsCollector was used.
57 complement = (OpenBitSet) ((OpenBitSet) docIdSet).clone();
59 complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
62 complement.flip(0, maxDoc);
64 // Remove all Deletions from the complement set
65 clearDeleted(reader, complement);
67 return createScoredDocIds(complement, maxDoc);
71 * Clear all deleted documents from a given open-bit-set according to a given reader
73 private static void clearDeleted(final IndexReader reader,
74 final OpenBitSet set) throws IOException {
76 // If there are no deleted docs
77 if (!reader.hasDeletions()) {
78 return; // return immediately
81 DocIdSetIterator it = set.iterator();
82 int doc = DocIdSetIterator.NO_MORE_DOCS;
83 while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
84 if (reader.isDeleted(doc)) {
91 * Create a subset of an existing ScoredDocIDs object.
93 * @param allDocIds orginal set
94 * @param sampleSet Doc Ids of the subset.
96 public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds,
97 final int[] sampleSet) throws IOException {
99 // sort so that we can scan docs in order
100 final int[] docids = sampleSet;
102 final float[] scores = new float[docids.length];
103 // fetch scores and compute size
104 ScoredDocIDsIterator it = allDocIds.iterator();
106 while (it.next() && n < docids.length) {
107 int doc = it.getDocID();
108 if (doc == docids[n]) {
109 scores[n] = it.getScore();
115 return new ScoredDocIDs() {
117 public DocIdSet getDocIDs() {
118 return new DocIdSet() {
121 public boolean isCacheable() { return true; }
124 public DocIdSetIterator iterator() throws IOException {
125 return new DocIdSetIterator() {
127 private int next = -1;
130 public int advance(int target) throws IOException {
131 while (next < size && docids[next++] < target) {
133 return next == size ? NO_MORE_DOCS : docids[next];
142 public int nextDoc() throws IOException {
143 if (++next >= size) {
154 public ScoredDocIDsIterator iterator() throws IOException {
155 return new ScoredDocIDsIterator() {
159 public boolean next() { return ++next < size; }
161 public float getScore() { return scores[next]; }
163 public int getDocID() { return docids[next]; }
167 public int size() { return size; }
173 * Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids
174 * according to the given reader.
175 * The returned set contains the range of [0 .. reader.maxDoc ) doc ids
177 public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) {
178 if (reader.hasDeletions()) {
179 return new AllLiveDocsScoredDocIDs(reader);
181 return new AllDocsScoredDocIDs(reader);
185 * Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index
187 public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) {
188 return new ScoredDocIDs() {
189 private int size = -1;
190 public DocIdSet getDocIDs() { return docIdSet; }
192 public ScoredDocIDsIterator iterator() throws IOException {
193 final DocIdSetIterator docIterator = docIdSet.iterator();
194 return new ScoredDocIDsIterator() {
195 public boolean next() {
197 return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
198 } catch (IOException e) {
199 throw new RuntimeException(e);
203 public float getScore() { return DEFAULT_SCORE; }
205 public int getDocID() { return docIterator.docID(); }
210 // lazy size computation
212 OpenBitSetDISI openBitSetDISI;
214 openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
215 } catch (IOException e) {
216 throw new RuntimeException(e);
218 size = (int) openBitSetDISI.cardinality();
226 * All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when
227 * there are no deletions in the index and we wish to go through each and
230 private static class AllDocsScoredDocIDs implements ScoredDocIDs {
233 public AllDocsScoredDocIDs(IndexReader reader) {
234 this.maxDoc = reader.maxDoc();
241 public DocIdSet getDocIDs() {
242 return new DocIdSet() {
245 public boolean isCacheable() {
250 public DocIdSetIterator iterator() throws IOException {
251 return new DocIdSetIterator() {
252 private int next = -1;
255 public int advance(int target) throws IOException {
256 if (target <= next) {
259 return next = target >= maxDoc ? NO_MORE_DOCS
269 public int nextDoc() throws IOException {
270 return ++next < maxDoc ? next : NO_MORE_DOCS;
278 public ScoredDocIDsIterator iterator() {
280 final DocIdSetIterator iter = getDocIDs().iterator();
281 return new ScoredDocIDsIterator() {
282 public boolean next() {
284 return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
285 } catch (IOException e) {
291 public float getScore() {
292 return DEFAULT_SCORE;
295 public int getDocID() {
299 } catch (IOException e) {
301 throw new RuntimeException(e);
307 * An All-docs bitset which has '0' for deleted documents and '1' for the
308 * rest. Useful for iterating over all 'live' documents in a given index.
310 * NOTE: this class would work for indexes with no deletions at all,
311 * although it is recommended to use {@link AllDocsScoredDocIDs} to ease
312 * the performance cost of validating isDeleted() on each and every docId
314 private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs {
316 final IndexReader reader;
318 AllLiveDocsScoredDocIDs(IndexReader reader) {
319 this.maxDoc = reader.maxDoc();
320 this.reader = reader;
324 return reader.numDocs();
327 public DocIdSet getDocIDs() {
328 return new DocIdSet() {
331 public boolean isCacheable() {
336 public DocIdSetIterator iterator() throws IOException {
337 return new DocIdSetIterator() {
338 private int next = -1;
341 public int advance(int target) throws IOException {
354 public int nextDoc() throws IOException {
357 } while (next < maxDoc && reader.isDeleted(next));
359 return next < maxDoc ? next : NO_MORE_DOCS;
367 public ScoredDocIDsIterator iterator() {
369 final DocIdSetIterator iter = getDocIDs().iterator();
370 return new ScoredDocIDsIterator() {
371 public boolean next() {
373 return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
374 } catch (IOException e) {
380 public float getScore() {
381 return DEFAULT_SCORE;
384 public int getDocID() {
388 } catch (IOException e) {
390 throw new RuntimeException(e);