X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java new file mode 100644 index 0000000..6cc72e7 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java @@ -0,0 +1,293 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.Constants; + +import java.io.IOException; + +/** + * Simplistic compression for array of unsigned long values. + * Each value is >= 0 and <= a specified maximum value. The + * values are stored as packed ints, with each value + * consuming a fixed number of bits. + * + * @lucene.internal + */ + +public class PackedInts { + + private final static String CODEC_NAME = "PackedInts"; + private final static int VERSION_START = 0; + private final static int VERSION_CURRENT = VERSION_START; + + /** + * A read-only random access array of positive integers. + * @lucene.internal + */ + public static interface Reader { + /** + * @param index the position of the wanted value. + * @return the value at the stated index. + */ + long get(int index); + + /** + * @return the number of bits used to store any given value. + * Note: This does not imply that memory usage is + * {@code bitsPerValue * #values} as implementations are free to + * use non-space-optimal packing of bits. + */ + int getBitsPerValue(); + + /** + * @return the number of values. + */ + int size(); + + /** + * Expert: if the bit-width of this reader matches one of + * java's native types, returns the underlying array + * (ie, byte[], short[], int[], long[]); else, returns + * null. Note that when accessing the array you must + * upgrade the type (bitwise AND with all ones), to + * interpret the full value as unsigned. Ie, + * bytes[idx]&0xFF, shorts[idx]&0xFFFF, etc. + */ + Object getArray(); + + /** + * Returns true if this implementation is backed by a + * native java array. + * + * @see #getArray + */ + boolean hasArray(); + } + + /** + * A packed integer array that can be modified. + * @lucene.internal + */ + public static interface Mutable extends Reader { + /** + * Set the value at the given index in the array. + * @param index where the value should be positioned. + * @param value a value conforming to the constraints set by the array. + */ + void set(int index, long value); + + /** + * Sets all values to 0. + */ + + void clear(); + } + + /** + * A simple base for Readers that keeps track of valueCount and bitsPerValue. + * @lucene.internal + */ + public static abstract class ReaderImpl implements Reader { + protected final int bitsPerValue; + protected final int valueCount; + + protected ReaderImpl(int valueCount, int bitsPerValue) { + this.bitsPerValue = bitsPerValue; + assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue; + this.valueCount = valueCount; + } + + public int getBitsPerValue() { + return bitsPerValue; + } + + public int size() { + return valueCount; + } + + public long getMaxValue() { // Convenience method + return maxValue(bitsPerValue); + } + + public Object getArray() { + return null; + } + + public boolean hasArray() { + return false; + } + } + + /** A write-once Writer. + * @lucene.internal + */ + public static abstract class Writer { + protected final DataOutput out; + protected final int bitsPerValue; + protected final int valueCount; + + protected Writer(DataOutput out, int valueCount, int bitsPerValue) + throws IOException { + assert bitsPerValue <= 64; + + this.out = out; + this.valueCount = valueCount; + this.bitsPerValue = bitsPerValue; + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + out.writeVInt(bitsPerValue); + out.writeVInt(valueCount); + } + + public abstract void add(long v) throws IOException; + public abstract void finish() throws IOException; + } + + /** + * Retrieve PackedInt data from the DataInput and return a packed int + * structure based on it. + * @param in positioned at the beginning of a stored packed int structure. + * @return a read only random access capable array of positive integers. + * @throws IOException if the structure could not be retrieved. + * @lucene.internal + */ + public static Reader getReader(DataInput in) throws IOException { + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START); + final int bitsPerValue = in.readVInt(); + assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; + final int valueCount = in.readVInt(); + + switch (bitsPerValue) { + case 8: + return new Direct8(in, valueCount); + case 16: + return new Direct16(in, valueCount); + case 32: + return new Direct32(in, valueCount); + case 64: + return new Direct64(in, valueCount); + default: + if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) { + return new Packed64(in, valueCount, bitsPerValue); + } else { + return new Packed32(in, valueCount, bitsPerValue); + } + } + } + + /** + * Create a packed integer array with the given amount of values initialized + * to 0. the valueCount and the bitsPerValue cannot be changed after creation. + * All Mutables known by this factory are kept fully in RAM. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @return a mutable packed integer array. + * @throws java.io.IOException if the Mutable could not be created. With the + * current implementations, this never happens, but the method + * signature allows for future persistence-backed Mutables. + * @lucene.internal + */ + public static Mutable getMutable( + int valueCount, int bitsPerValue) { + switch (bitsPerValue) { + case 8: + return new Direct8(valueCount); + case 16: + return new Direct16(valueCount); + case 32: + return new Direct32(valueCount); + case 64: + return new Direct64(valueCount); + default: + if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) { + return new Packed64(valueCount, bitsPerValue); + } else { + return new Packed32(valueCount, bitsPerValue); + } + } + } + + /** + * Create a packed integer array writer for the given number of values at the + * given bits/value. Writers append to the given IndexOutput and has very + * low memory overhead. + * @param out the destination for the produced bits. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @return a Writer ready for receiving values. + * @throws IOException if bits could not be written to out. + * @lucene.internal + */ + public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue) + throws IOException { + return new PackedWriter(out, valueCount, bitsPerValue); + } + + /** Returns how many bits are required to hold values up + * to and including maxValue + * @param maxValue the maximum value that should be representable. + * @return the amount of bits needed to represent values from 0 to maxValue. + * @lucene.internal + */ + public static int bitsRequired(long maxValue) { + // Very high long values does not translate well to double, so we do an + // explicit check for the edge cases + if (maxValue > 0x3FFFFFFFFFFFFFFFL) { + return 63; + } if (maxValue > 0x1FFFFFFFFFFFFFFFL) { + return 62; + } + return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0))); + } + + /** + * Calculates the maximum unsigned long that can be expressed with the given + * number of bits. + * @param bitsPerValue the number of bits available for any given value. + * @return the maximum value for the given bits. + * @lucene.internal + */ + public static long maxValue(int bitsPerValue) { + return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); + } + + /** Rounds bitsPerValue up to 8, 16, 32 or 64. */ + public static int getNextFixedSize(int bitsPerValue) { + if (bitsPerValue <= 8) { + return 8; + } else if (bitsPerValue <= 16) { + return 16; + } else if (bitsPerValue <= 32) { + return 32; + } else { + return 64; + } + } + + /** Possibly wastes some storage in exchange for faster lookups */ + public static int getRoundedFixedSize(int bitsPerValue) { + if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok + return getNextFixedSize(bitsPerValue); + } else { + return bitsPerValue; + } + } +}