1 package org.apache.lucene.util.packed;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.DataInput;
21 import org.apache.lucene.store.DataOutput;
22 import org.apache.lucene.util.CodecUtil;
23 import org.apache.lucene.util.Constants;
25 import java.io.IOException;
28 * Simplistic compression for array of unsigned long values.
29 * Each value is >= 0 and <= a specified maximum value. The
30 * values are stored as packed ints, with each value
31 * consuming a fixed number of bits.
36 public class PackedInts {
38 private final static String CODEC_NAME = "PackedInts";
39 private final static int VERSION_START = 0;
40 private final static int VERSION_CURRENT = VERSION_START;
43 * A read-only random access array of positive integers.
46 public static interface Reader {
48 * @param index the position of the wanted value.
49 * @return the value at the stated index.
54 * @return the number of bits used to store any given value.
55 * Note: This does not imply that memory usage is
56 * {@code bitsPerValue * #values} as implementations are free to
57 * use non-space-optimal packing of bits.
59 int getBitsPerValue();
62 * @return the number of values.
67 * Expert: if the bit-width of this reader matches one of
68 * java's native types, returns the underlying array
69 * (ie, byte[], short[], int[], long[]); else, returns
70 * null. Note that when accessing the array you must
71 * upgrade the type (bitwise AND with all ones), to
72 * interpret the full value as unsigned. Ie,
73 * bytes[idx]&0xFF, shorts[idx]&0xFFFF, etc.
78 * Returns true if this implementation is backed by a
87 * A packed integer array that can be modified.
90 public static interface Mutable extends Reader {
92 * Set the value at the given index in the array.
93 * @param index where the value should be positioned.
94 * @param value a value conforming to the constraints set by the array.
96 void set(int index, long value);
99 * Sets all values to 0.
106 * A simple base for Readers that keeps track of valueCount and bitsPerValue.
109 public static abstract class ReaderImpl implements Reader {
110 protected final int bitsPerValue;
111 protected final int valueCount;
113 protected ReaderImpl(int valueCount, int bitsPerValue) {
114 this.bitsPerValue = bitsPerValue;
115 assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
116 this.valueCount = valueCount;
119 public int getBitsPerValue() {
127 public long getMaxValue() { // Convenience method
128 return maxValue(bitsPerValue);
131 public Object getArray() {
135 public boolean hasArray() {
140 /** A write-once Writer.
143 public static abstract class Writer {
144 protected final DataOutput out;
145 protected final int bitsPerValue;
146 protected final int valueCount;
148 protected Writer(DataOutput out, int valueCount, int bitsPerValue)
150 assert bitsPerValue <= 64;
153 this.valueCount = valueCount;
154 this.bitsPerValue = bitsPerValue;
155 CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
156 out.writeVInt(bitsPerValue);
157 out.writeVInt(valueCount);
160 public abstract void add(long v) throws IOException;
161 public abstract void finish() throws IOException;
165 * Retrieve PackedInt data from the DataInput and return a packed int
166 * structure based on it.
167 * @param in positioned at the beginning of a stored packed int structure.
168 * @return a read only random access capable array of positive integers.
169 * @throws IOException if the structure could not be retrieved.
172 public static Reader getReader(DataInput in) throws IOException {
173 CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
174 final int bitsPerValue = in.readVInt();
175 assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
176 final int valueCount = in.readVInt();
178 switch (bitsPerValue) {
180 return new Direct8(in, valueCount);
182 return new Direct16(in, valueCount);
184 return new Direct32(in, valueCount);
186 return new Direct64(in, valueCount);
188 if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
189 return new Packed64(in, valueCount, bitsPerValue);
191 return new Packed32(in, valueCount, bitsPerValue);
197 * Create a packed integer array with the given amount of values initialized
198 * to 0. the valueCount and the bitsPerValue cannot be changed after creation.
199 * All Mutables known by this factory are kept fully in RAM.
200 * @param valueCount the number of elements.
201 * @param bitsPerValue the number of bits available for any given value.
202 * @return a mutable packed integer array.
203 * @throws java.io.IOException if the Mutable could not be created. With the
204 * current implementations, this never happens, but the method
205 * signature allows for future persistence-backed Mutables.
208 public static Mutable getMutable(
209 int valueCount, int bitsPerValue) {
210 switch (bitsPerValue) {
212 return new Direct8(valueCount);
214 return new Direct16(valueCount);
216 return new Direct32(valueCount);
218 return new Direct64(valueCount);
220 if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
221 return new Packed64(valueCount, bitsPerValue);
223 return new Packed32(valueCount, bitsPerValue);
229 * Create a packed integer array writer for the given number of values at the
230 * given bits/value. Writers append to the given IndexOutput and has very
231 * low memory overhead.
232 * @param out the destination for the produced bits.
233 * @param valueCount the number of elements.
234 * @param bitsPerValue the number of bits available for any given value.
235 * @return a Writer ready for receiving values.
236 * @throws IOException if bits could not be written to out.
239 public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
241 return new PackedWriter(out, valueCount, bitsPerValue);
244 /** Returns how many bits are required to hold values up
245 * to and including maxValue
246 * @param maxValue the maximum value that should be representable.
247 * @return the amount of bits needed to represent values from 0 to maxValue.
250 public static int bitsRequired(long maxValue) {
251 // Very high long values does not translate well to double, so we do an
252 // explicit check for the edge cases
253 if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
255 } if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
258 return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
262 * Calculates the maximum unsigned long that can be expressed with the given
264 * @param bitsPerValue the number of bits available for any given value.
265 * @return the maximum value for the given bits.
268 public static long maxValue(int bitsPerValue) {
269 return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
272 /** Rounds bitsPerValue up to 8, 16, 32 or 64. */
273 public static int getNextFixedSize(int bitsPerValue) {
274 if (bitsPerValue <= 8) {
276 } else if (bitsPerValue <= 16) {
278 } else if (bitsPerValue <= 32) {
285 /** Possibly wastes some storage in exchange for faster lookups */
286 public static int getRoundedFixedSize(int bitsPerValue) {
287 if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
288 return getNextFixedSize(bitsPerValue);