X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BitVector.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BitVector.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BitVector.java deleted file mode 100644 index 9538722..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BitVector.java +++ /dev/null @@ -1,303 +0,0 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; - -/** Optimized implementation of a vector of bits. This is more-or-less like - * java.util.BitSet, but also includes the following: - * - * - * @lucene.internal - */ -public final class BitVector implements Cloneable { - - private byte[] bits; - private int size; - private int count; - - /** Constructs a vector capable of holding n bits. */ - public BitVector(int n) { - size = n; - bits = new byte[getNumBytes(size)]; - count = 0; - } - - BitVector(byte[] bits, int size) { - this.bits = bits; - this.size = size; - count = -1; - } - - private int getNumBytes(int size) { - int bytesLength = size >>> 3; - if ((size & 7) != 0) { - bytesLength++; - } - return bytesLength; - } - - @Override - public Object clone() { - byte[] copyBits = new byte[bits.length]; - System.arraycopy(bits, 0, copyBits, 0, bits.length); - BitVector clone = new BitVector(copyBits, size); - clone.count = count; - return clone; - } - - /** Sets the value of bit to one. */ - public final void set(int bit) { - if (bit >= size) { - throw new ArrayIndexOutOfBoundsException("bit=" + bit + " size=" + size); - } - bits[bit >> 3] |= 1 << (bit & 7); - count = -1; - } - - /** Sets the value of bit to true, and - * returns true if bit was already set */ - public final boolean getAndSet(int bit) { - if (bit >= size) { - throw new ArrayIndexOutOfBoundsException("bit=" + bit + " size=" + size); - } - final int pos = bit >> 3; - final int v = bits[pos]; - final int flag = 1 << (bit & 7); - if ((flag & v) != 0) - return true; - else { - bits[pos] = (byte) (v | flag); - if (count != -1) - count++; - return false; - } - } - - /** Sets the value of bit to zero. */ - public final void clear(int bit) { - if (bit >= size) { - throw new ArrayIndexOutOfBoundsException(bit); - } - bits[bit >> 3] &= ~(1 << (bit & 7)); - count = -1; - } - - /** Returns true if bit is one and - false if it is zero. */ - public final boolean get(int bit) { - assert bit >= 0 && bit < size: "bit " + bit + " is out of bounds 0.." + (size-1); - return (bits[bit >> 3] & (1 << (bit & 7))) != 0; - } - - /** Returns the number of bits in this vector. This is also one greater than - the number of the largest valid bit number. */ - public final int size() { - return size; - } - - /** Returns the total number of one bits in this vector. This is efficiently - computed and cached, so that, if the vector is not changed, no - recomputation is done for repeated calls. */ - public final int count() { - // if the vector has been modified - if (count == -1) { - int c = 0; - int end = bits.length; - for (int i = 0; i < end; i++) - c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte - count = c; - } - return count; - } - - /** For testing */ - public final int getRecomputedCount() { - int c = 0; - int end = bits.length; - for (int i = 0; i < end; i++) - c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte - return c; - } - - private static final byte[] BYTE_COUNTS = { // table of bits/byte - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 - }; - - private static String CODEC = "BitVector"; - - // Version before version tracking was added: - private final static int VERSION_PRE = -1; - - // First version: - private final static int VERSION_START = 0; - - // Increment version to change it: - private final static int VERSION_CURRENT = VERSION_START; - - /** Writes this vector to the file name in Directory - d, in a format that can be read by the constructor {@link - #BitVector(Directory, String)}. */ - public final void write(Directory d, String name) throws IOException { - IndexOutput output = d.createOutput(name); - try { - output.writeInt(-2); - CodecUtil.writeHeader(output, CODEC, VERSION_CURRENT); - if (isSparse()) { - writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. - } else { - writeBits(output); - } - } finally { - output.close(); - } - } - - /** Write as a bit set */ - private void writeBits(IndexOutput output) throws IOException { - output.writeInt(size()); // write size - output.writeInt(count()); // write count - output.writeBytes(bits, bits.length); - } - - /** Write as a d-gaps list */ - private void writeDgaps(IndexOutput output) throws IOException { - output.writeInt(-1); // mark using d-gaps - output.writeInt(size()); // write size - output.writeInt(count()); // write count - int last=0; - int n = count(); - int m = bits.length; - for (int i=0; i0; i++) { - if (bits[i]!=0) { - output.writeVInt(i-last); - output.writeByte(bits[i]); - last = i; - n -= BYTE_COUNTS[bits[i] & 0xFF]; - } - } - } - - /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */ - private boolean isSparse() { - - final int setCount = count(); - if (setCount == 0) { - return true; - } - - final int avgGapLength = bits.length / setCount; - - // expected number of bytes for vInt encoding of each gap - final int expectedDGapBytes; - if (avgGapLength <= (1<< 7)) { - expectedDGapBytes = 1; - } else if (avgGapLength <= (1<<14)) { - expectedDGapBytes = 2; - } else if (avgGapLength <= (1<<21)) { - expectedDGapBytes = 3; - } else if (avgGapLength <= (1<<28)) { - expectedDGapBytes = 4; - } else { - expectedDGapBytes = 5; - } - - // +1 because we write the byte itself that contains the - // set bit - final int bytesPerSetBit = expectedDGapBytes + 1; - - // note: adding 32 because we start with ((int) -1) to indicate d-gaps format. - final long expectedBits = 32 + 8 * bytesPerSetBit * count(); - - // note: factor is for read/write of byte-arrays being faster than vints. - final long factor = 10; - return factor * expectedBits < size(); - } - - /** Constructs a bit vector from the file name in Directory - d, as written by the {@link #write} method. - */ - public BitVector(Directory d, String name) throws IOException { - IndexInput input = d.openInput(name); - - try { - final int firstInt = input.readInt(); - final int version; - if (firstInt == -2) { - // New format, with full header & version: - version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_START); - size = input.readInt(); - } else { - version = VERSION_PRE; - size = firstInt; - } - if (size == -1) { - readDgaps(input); - } else { - readBits(input); - } - } finally { - input.close(); - } - } - - /** Read as a bit set */ - private void readBits(IndexInput input) throws IOException { - count = input.readInt(); // read count - bits = new byte[getNumBytes(size)]; // allocate bits - input.readBytes(bits, 0, bits.length); - } - - /** read as a d-gaps list */ - private void readDgaps(IndexInput input) throws IOException { - size = input.readInt(); // (re)read size - count = input.readInt(); // read count - bits = new byte[(size >> 3) + 1]; // allocate bits - int last=0; - int n = count(); - while (n>0) { - last += input.readVInt(); - bits[last] = input.readByte(); - n -= BYTE_COUNTS[bits[last] & 0xFF]; - } - } -}