1 package org.apache.lucene.util;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.BitSet;
23 import org.apache.lucene.search.DocIdSet;
24 import org.apache.lucene.search.DocIdSetIterator;
27 * Stores and iterate on sorted integers in compressed form in RAM. <br>
28 * The code for compressing the differences between ascending integers was
29 * borrowed from {@link org.apache.lucene.store.IndexInput} and
30 * {@link org.apache.lucene.store.IndexOutput}.
32 * <b>NOTE:</b> this class assumes the stored integers are doc Ids (hence why it
33 * extends {@link DocIdSet}). Therefore its {@link #iterator()} assumes {@link
34 * DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you intent to use
35 * this value, then make sure it's not used during search
40 public class SortedVIntList extends DocIdSet {
41 /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
42 * a SortedVIntList representing the index numbers of the set bits
43 * will be smaller than that BitSet.
45 final static int BITS2VINTLIST_SIZE = 8;
49 private int lastBytePos;
52 * Create a SortedVIntList from all elements of an array of integers.
54 * @param sortedInts A sorted array of non negative integers.
56 public SortedVIntList(int... sortedInts) {
57 this(sortedInts, sortedInts.length);
61 * Create a SortedVIntList from an array of integers.
62 * @param sortedInts An array of sorted non negative integers.
63 * @param inputSize The number of integers to be used from the array.
65 public SortedVIntList(int[] sortedInts, int inputSize) {
66 SortedVIntListBuilder builder = new SortedVIntListBuilder();
67 for (int i = 0; i < inputSize; i++) {
68 builder.addInt(sortedInts[i]);
74 * Create a SortedVIntList from a BitSet.
75 * @param bits A bit set representing a set of integers.
77 public SortedVIntList(BitSet bits) {
78 SortedVIntListBuilder builder = new SortedVIntListBuilder();
79 int nextInt = bits.nextSetBit(0);
80 while (nextInt != -1) {
81 builder.addInt(nextInt);
82 nextInt = bits.nextSetBit(nextInt + 1);
88 * Create a SortedVIntList.
89 * @param docIdSetIterator An iterator providing document numbers as a set of integers.
90 * This DocIdSetIterator is iterated completely when this constructor
91 * is called and it must provide the integers in non
94 public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
95 SortedVIntListBuilder builder = new SortedVIntListBuilder();
97 while ((doc = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
104 private class SortedVIntListBuilder {
105 private int lastInt = 0;
107 SortedVIntListBuilder() {
112 void addInt(int nextInt) {
113 int diff = nextInt - lastInt;
115 throw new IllegalArgumentException(
116 "Input not sorted or first element negative.");
119 if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
120 // Biggest possible int does not fit.
121 resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
124 // See org.apache.lucene.store.IndexOutput.writeVInt()
125 while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
126 bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
129 bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
135 resizeBytes(lastBytePos);
140 private void initBytes() {
142 bytes = new byte[128]; // initial byte size
146 private void resizeBytes(int newSize) {
147 if (newSize != bytes.length) {
148 byte[] newBytes = new byte[newSize];
149 System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
154 private static final int VB1 = 0x7F;
155 private static final int BIT_SHIFT = 7;
156 private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
159 * @return The total number of sorted integers.
166 * @return The size of the byte array storing the compressed sorted integers.
168 public int getByteSize() {
172 /** This DocIdSet implementation is cacheable. */
174 public boolean isCacheable() {
179 * @return An iterator over the sorted integers.
182 public DocIdSetIterator iterator() {
183 return new DocIdSetIterator() {
188 private void advance() {
189 // See org.apache.lucene.store.IndexInput.readVInt()
190 byte b = bytes[bytePos++];
192 for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
193 b = bytes[bytePos++];
194 lastInt += (b & VB1) << s;
204 public int nextDoc() {
205 if (bytePos >= lastBytePos) {
215 public int advance(int target) {
216 while (bytePos < lastBytePos) {
218 if (lastInt >= target) {
219 return doc = lastInt;
222 return doc = NO_MORE_DOCS;