X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/PagedBytes.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/PagedBytes.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/PagedBytes.java new file mode 100644 index 0000000..983f3ba --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/util/PagedBytes.java @@ -0,0 +1,552 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexInput; + +/** Represents a logical byte[] as a series of pages. You + * can write-once into the logical byte[] (append only), + * using copy, and then retrieve slices (BytesRef) into it + * using fill. + * + * @lucene.internal + **/ +public final class PagedBytes { + private final List blocks = new ArrayList(); + private final List blockEnd = new ArrayList(); + private final int blockSize; + private final int blockBits; + private final int blockMask; + private boolean didSkipBytes; + private boolean frozen; + private int upto; + private byte[] currentBlock; + + private static final byte[] EMPTY_BYTES = new byte[0]; + + public final static class Reader implements Closeable { + private final byte[][] blocks; + private final int[] blockEnds; + private final int blockBits; + private final int blockMask; + private final int blockSize; + private final CloseableThreadLocal threadBuffers = new CloseableThreadLocal(); + + public Reader(PagedBytes pagedBytes) { + blocks = new byte[pagedBytes.blocks.size()][]; + for(int i=0;istart with a + * given length. Iff the slice spans across a block border this method will + * allocate sufficient resources and copy the paged data. + *

+ * Slices spanning more than one block are not supported. + *

+ * @lucene.internal + **/ + public BytesRef fillSlice(BytesRef b, long start, int length) { + assert length >= 0: "length=" + length; + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + b.length = length; + if (blockSize - offset >= length) { + // Within block + b.bytes = blocks[index]; + b.offset = offset; + } else { + // Split + byte[] buffer = threadBuffers.get(); + if (buffer == null) { + buffer = new byte[length]; + threadBuffers.set(buffer); + } else if (buffer.length < length) { + buffer = ArrayUtil.grow(buffer, length); + threadBuffers.set(buffer); + } + b.bytes = buffer; + b.offset = 0; + System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset); + System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset)); + } + return b; + } + + /** + * Reads length as 1 or 2 byte vInt prefix, starting at start. + *

+ * Note: this method does not support slices spanning across block + * borders. + *

+ * + * @return the given {@link BytesRef} + * + * @lucene.internal + **/ + public BytesRef fill(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + final byte[] block = b.bytes = blocks[index]; + + if ((block[offset] & 128) == 0) { + b.length = block[offset]; + b.offset = offset+1; + } else { + b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); + b.offset = offset+2; + assert b.length > 0; + } + return b; + } + + /** + * Reads length as 1 or 2 byte vInt prefix, starting at start. * + *

+ * Note: this method does not support slices spanning across block + * borders. + *

+ * + * @return the internal block number of the slice. + * @lucene.internal + **/ + public int fillAndGetIndex(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + final byte[] block = b.bytes = blocks[index]; + + if ((block[offset] & 128) == 0) { + b.length = block[offset]; + b.offset = offset+1; + } else { + b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); + b.offset = offset+2; + assert b.length > 0; + } + return index; + } + + /** + * Reads length as 1 or 2 byte vInt prefix, starting at start and + * returns the start offset of the next part, suitable as start parameter on + * next call to sequentially read all {@link BytesRef}. + * + *

+ * Note: this method does not support slices spanning across block + * borders. + *

+ * + * @return the start offset of the next part, suitable as start parameter on + * next call to sequentially read all {@link BytesRef}. + * @lucene.internal + **/ + public long fillAndGetStart(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + final byte[] block = b.bytes = blocks[index]; + + if ((block[offset] & 128) == 0) { + b.length = block[offset]; + b.offset = offset+1; + start += 1L + b.length; + } else { + b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); + b.offset = offset+2; + start += 2L + b.length; + assert b.length > 0; + } + return start; + } + + + /** + * Gets a slice out of {@link PagedBytes} starting at start, the + * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a + * block border this method will allocate sufficient resources and copy the + * paged data. + *

+ * Slices spanning more than one block are not supported. + *

+ * + * @lucene.internal + **/ + public BytesRef fillSliceWithPrefix(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + int offset = (int) (start & blockMask); + final byte[] block = blocks[index]; + final int length; + if ((block[offset] & 128) == 0) { + length = block[offset]; + offset = offset+1; + } else { + length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); + offset = offset+2; + assert length > 0; + } + assert length >= 0: "length=" + length; + b.length = length; + if (blockSize - offset >= length) { + // Within block + b.offset = offset; + b.bytes = blocks[index]; + } else { + // Split + byte[] buffer = threadBuffers.get(); + if (buffer == null) { + buffer = new byte[length]; + threadBuffers.set(buffer); + } else if (buffer.length < length) { + buffer = ArrayUtil.grow(buffer, length); + threadBuffers.set(buffer); + } + b.bytes = buffer; + b.offset = 0; + System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset); + System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset)); + } + return b; + } + + /** @lucene.internal */ + public byte[][] getBlocks() { + return blocks; + } + + /** @lucene.internal */ + public int[] getBlockEnds() { + return blockEnds; + } + + public void close() { + threadBuffers.close(); + } + } + + /** 1< 0) { + int left = blockSize - upto; + if (left == 0) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + left = blockSize; + } + if (left < byteCount) { + in.readBytes(currentBlock, upto, left, false); + upto = blockSize; + byteCount -= left; + } else { + in.readBytes(currentBlock, upto, (int) byteCount, false); + upto += byteCount; + break; + } + } + } + + /** Copy BytesRef in */ + public void copy(BytesRef bytes) throws IOException { + int byteCount = bytes.length; + int bytesUpto = bytes.offset; + while (byteCount > 0) { + int left = blockSize - upto; + if (left == 0) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + left = blockSize; + } + if (left < byteCount) { + System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left); + upto = blockSize; + byteCount -= left; + bytesUpto += left; + } else { + System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount); + upto += byteCount; + break; + } + } + } + + /** Copy BytesRef in, setting BytesRef out to the result. + * Do not use this if you will use freeze(true). + * This only supports bytes.length <= blockSize */ + public void copy(BytesRef bytes, BytesRef out) throws IOException { + int left = blockSize - upto; + if (bytes.length > left || currentBlock==null) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + didSkipBytes = true; + } + currentBlock = new byte[blockSize]; + upto = 0; + left = blockSize; + assert bytes.length <= blockSize; + // TODO: we could also support variable block sizes + } + + out.bytes = currentBlock; + out.offset = upto; + out.length = bytes.length; + + System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); + upto += bytes.length; + } + + /** Commits final byte[], trimming it if necessary and if trim=true */ + public Reader freeze(boolean trim) { + if (frozen) { + throw new IllegalStateException("already frozen"); + } + if (didSkipBytes) { + throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used"); + } + if (trim && upto < blockSize) { + final byte[] newBlock = new byte[upto]; + System.arraycopy(currentBlock, 0, newBlock, 0, upto); + currentBlock = newBlock; + } + if (currentBlock == null) { + currentBlock = EMPTY_BYTES; + } + blocks.add(currentBlock); + blockEnd.add(upto); + frozen = true; + currentBlock = null; + return new Reader(this); + } + + public long getPointer() { + if (currentBlock == null) { + return 0; + } else { + return (blocks.size() * ((long) blockSize)) + upto; + } + } + + /** Copy bytes in, writing the length as a 1 or 2 byte + * vInt prefix. */ + public long copyUsingLengthPrefix(BytesRef bytes) throws IOException { + + if (upto + bytes.length + 2 > blockSize) { + if (bytes.length + 2 > blockSize) { + throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes"); + } + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + } + + final long pointer = getPointer(); + + if (bytes.length < 128) { + currentBlock[upto++] = (byte) bytes.length; + } else { + currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8)); + currentBlock[upto++] = (byte) (bytes.length & 0xff); + } + System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); + upto += bytes.length; + + return pointer; + } + + public final class PagedBytesDataInput extends DataInput { + private int currentBlockIndex; + private int currentBlockUpto; + private byte[] currentBlock; + + PagedBytesDataInput() { + currentBlock = blocks.get(0); + } + + @Override + public Object clone() { + PagedBytesDataInput clone = getDataInput(); + clone.setPosition(getPosition()); + return clone; + } + + /** Returns the current byte position. */ + public long getPosition() { + return currentBlockIndex * blockSize + currentBlockUpto; + } + + /** Seek to a position previously obtained from + * {@link #getPosition}. */ + public void setPosition(long pos) { + currentBlockIndex = (int) (pos >> blockBits); + currentBlock = blocks.get(currentBlockIndex); + currentBlockUpto = (int) (pos & blockMask); + } + + @Override + public byte readByte() { + if (currentBlockUpto == blockSize) { + nextBlock(); + } + return currentBlock[currentBlockUpto++]; + } + + @Override + public void readBytes(byte[] b, int offset, int len) { + assert b.length >= offset + len; + final int offsetEnd = offset + len; + while (true) { + final int blockLeft = blockSize - currentBlockUpto; + final int left = offsetEnd - offset; + if (blockLeft < left) { + System.arraycopy(currentBlock, currentBlockUpto, + b, offset, + blockLeft); + nextBlock(); + offset += blockLeft; + } else { + // Last block + System.arraycopy(currentBlock, currentBlockUpto, + b, offset, + left); + currentBlockUpto += left; + break; + } + } + } + + private void nextBlock() { + currentBlockIndex++; + currentBlockUpto = 0; + currentBlock = blocks.get(currentBlockIndex); + } + } + + public final class PagedBytesDataOutput extends DataOutput { + @Override + public void writeByte(byte b) { + if (upto == blockSize) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + } + currentBlock[upto++] = b; + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + assert b.length >= offset + length; + if (length == 0) { + return; + } + + if (upto == blockSize) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + } + + final int offsetEnd = offset + length; + while(true) { + final int left = offsetEnd - offset; + final int blockLeft = blockSize - upto; + if (blockLeft < left) { + System.arraycopy(b, offset, currentBlock, upto, blockLeft); + blocks.add(currentBlock); + blockEnd.add(blockSize); + currentBlock = new byte[blockSize]; + upto = 0; + offset += blockLeft; + } else { + // Last block + System.arraycopy(b, offset, currentBlock, upto, left); + upto += left; + break; + } + } + } + + /** Return the current byte position. */ + public long getPosition() { + if (currentBlock == null) { + return 0; + } else { + return blocks.size() * blockSize + upto; + } + } + } + + /** Returns a DataInput to read values from this + * PagedBytes instance. */ + public PagedBytesDataInput getDataInput() { + if (!frozen) { + throw new IllegalStateException("must call freeze() before getDataInput"); + } + return new PagedBytesDataInput(); + } + + /** Returns a DataOutput that you may use to write into + * this PagedBytes instance. If you do this, you should + * not call the other writing methods (eg, copy); + * results are undefined. */ + public PagedBytesDataOutput getDataOutput() { + if (frozen) { + throw new IllegalStateException("cannot get DataOutput after freeze()"); + } + return new PagedBytesDataOutput(); + } +}