+++ /dev/null
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-
-/** Optimized implementation of a vector of bits. This is more-or-less like
- * java.util.BitSet, but also includes the following:
- * <ul>
- * <li>a count() method, which efficiently computes the number of one bits;</li>
- * <li>optimized read from and write to disk;</li>
- * <li>inlinable get() method;</li>
- * <li>store and load, as bit set or d-gaps, depending on sparseness;</li>
- * </ul>
- *
- * @lucene.internal
- */
-public final class BitVector implements Cloneable {
-
- private byte[] bits;
- private int size;
- private int count;
-
- /** Constructs a vector capable of holding <code>n</code> bits. */
- public BitVector(int n) {
- size = n;
- bits = new byte[getNumBytes(size)];
- count = 0;
- }
-
- BitVector(byte[] bits, int size) {
- this.bits = bits;
- this.size = size;
- count = -1;
- }
-
- private int getNumBytes(int size) {
- int bytesLength = size >>> 3;
- if ((size & 7) != 0) {
- bytesLength++;
- }
- return bytesLength;
- }
-
- @Override
- public Object clone() {
- byte[] copyBits = new byte[bits.length];
- System.arraycopy(bits, 0, copyBits, 0, bits.length);
- BitVector clone = new BitVector(copyBits, size);
- clone.count = count;
- return clone;
- }
-
- /** Sets the value of <code>bit</code> to one. */
- public final void set(int bit) {
- if (bit >= size) {
- throw new ArrayIndexOutOfBoundsException("bit=" + bit + " size=" + size);
- }
- bits[bit >> 3] |= 1 << (bit & 7);
- count = -1;
- }
-
- /** Sets the value of <code>bit</code> to true, and
- * returns true if bit was already set */
- public final boolean getAndSet(int bit) {
- if (bit >= size) {
- throw new ArrayIndexOutOfBoundsException("bit=" + bit + " size=" + size);
- }
- final int pos = bit >> 3;
- final int v = bits[pos];
- final int flag = 1 << (bit & 7);
- if ((flag & v) != 0)
- return true;
- else {
- bits[pos] = (byte) (v | flag);
- if (count != -1)
- count++;
- return false;
- }
- }
-
- /** Sets the value of <code>bit</code> to zero. */
- public final void clear(int bit) {
- if (bit >= size) {
- throw new ArrayIndexOutOfBoundsException(bit);
- }
- bits[bit >> 3] &= ~(1 << (bit & 7));
- count = -1;
- }
-
- /** Returns <code>true</code> if <code>bit</code> is one and
- <code>false</code> if it is zero. */
- public final boolean get(int bit) {
- assert bit >= 0 && bit < size: "bit " + bit + " is out of bounds 0.." + (size-1);
- return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
- }
-
- /** Returns the number of bits in this vector. This is also one greater than
- the number of the largest valid bit number. */
- public final int size() {
- return size;
- }
-
- /** Returns the total number of one bits in this vector. This is efficiently
- computed and cached, so that, if the vector is not changed, no
- recomputation is done for repeated calls. */
- public final int count() {
- // if the vector has been modified
- if (count == -1) {
- int c = 0;
- int end = bits.length;
- for (int i = 0; i < end; i++)
- c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
- count = c;
- }
- return count;
- }
-
- /** For testing */
- public final int getRecomputedCount() {
- int c = 0;
- int end = bits.length;
- for (int i = 0; i < end; i++)
- c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte
- return c;
- }
-
- private static final byte[] BYTE_COUNTS = { // table of bits/byte
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
- };
-
- private static String CODEC = "BitVector";
-
- // Version before version tracking was added:
- private final static int VERSION_PRE = -1;
-
- // First version:
- private final static int VERSION_START = 0;
-
- // Increment version to change it:
- private final static int VERSION_CURRENT = VERSION_START;
-
- /** Writes this vector to the file <code>name</code> in Directory
- <code>d</code>, in a format that can be read by the constructor {@link
- #BitVector(Directory, String)}. */
- public final void write(Directory d, String name) throws IOException {
- IndexOutput output = d.createOutput(name);
- try {
- output.writeInt(-2);
- CodecUtil.writeHeader(output, CODEC, VERSION_CURRENT);
- if (isSparse()) {
- writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
- } else {
- writeBits(output);
- }
- } finally {
- output.close();
- }
- }
-
- /** Write as a bit set */
- private void writeBits(IndexOutput output) throws IOException {
- output.writeInt(size()); // write size
- output.writeInt(count()); // write count
- output.writeBytes(bits, bits.length);
- }
-
- /** Write as a d-gaps list */
- private void writeDgaps(IndexOutput output) throws IOException {
- output.writeInt(-1); // mark using d-gaps
- output.writeInt(size()); // write size
- output.writeInt(count()); // write count
- int last=0;
- int n = count();
- int m = bits.length;
- for (int i=0; i<m && n>0; i++) {
- if (bits[i]!=0) {
- output.writeVInt(i-last);
- output.writeByte(bits[i]);
- last = i;
- n -= BYTE_COUNTS[bits[i] & 0xFF];
- }
- }
- }
-
- /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */
- private boolean isSparse() {
-
- final int setCount = count();
- if (setCount == 0) {
- return true;
- }
-
- final int avgGapLength = bits.length / setCount;
-
- // expected number of bytes for vInt encoding of each gap
- final int expectedDGapBytes;
- if (avgGapLength <= (1<< 7)) {
- expectedDGapBytes = 1;
- } else if (avgGapLength <= (1<<14)) {
- expectedDGapBytes = 2;
- } else if (avgGapLength <= (1<<21)) {
- expectedDGapBytes = 3;
- } else if (avgGapLength <= (1<<28)) {
- expectedDGapBytes = 4;
- } else {
- expectedDGapBytes = 5;
- }
-
- // +1 because we write the byte itself that contains the
- // set bit
- final int bytesPerSetBit = expectedDGapBytes + 1;
-
- // note: adding 32 because we start with ((int) -1) to indicate d-gaps format.
- final long expectedBits = 32 + 8 * bytesPerSetBit * count();
-
- // note: factor is for read/write of byte-arrays being faster than vints.
- final long factor = 10;
- return factor * expectedBits < size();
- }
-
- /** Constructs a bit vector from the file <code>name</code> in Directory
- <code>d</code>, as written by the {@link #write} method.
- */
- public BitVector(Directory d, String name) throws IOException {
- IndexInput input = d.openInput(name);
-
- try {
- final int firstInt = input.readInt();
- final int version;
- if (firstInt == -2) {
- // New format, with full header & version:
- version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_START);
- size = input.readInt();
- } else {
- version = VERSION_PRE;
- size = firstInt;
- }
- if (size == -1) {
- readDgaps(input);
- } else {
- readBits(input);
- }
- } finally {
- input.close();
- }
- }
-
- /** Read as a bit set */
- private void readBits(IndexInput input) throws IOException {
- count = input.readInt(); // read count
- bits = new byte[getNumBytes(size)]; // allocate bits
- input.readBytes(bits, 0, bits.length);
- }
-
- /** read as a d-gaps list */
- private void readDgaps(IndexInput input) throws IOException {
- size = input.readInt(); // (re)read size
- count = input.readInt(); // read count
- bits = new byte[(size >> 3) + 1]; // allocate bits
- int last=0;
- int n = count();
- while (n>0) {
- last += input.readVInt();
- bits[last] = input.readByte();
- n -= BYTE_COUNTS[bits[last] & 0xFF];
- }
- }
-}