1 package org.apache.lucene.util;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
25 import org.apache.lucene.store.DataInput;
26 import org.apache.lucene.store.DataOutput;
27 import org.apache.lucene.store.IndexInput;
29 /** Represents a logical byte[] as a series of pages. You
30 * can write-once into the logical byte[] (append only),
31 * using copy, and then retrieve slices (BytesRef) into it
36 public final class PagedBytes {
37 private final List<byte[]> blocks = new ArrayList<byte[]>();
38 private final List<Integer> blockEnd = new ArrayList<Integer>();
39 private final int blockSize;
40 private final int blockBits;
41 private final int blockMask;
42 private boolean didSkipBytes;
43 private boolean frozen;
45 private byte[] currentBlock;
47 private static final byte[] EMPTY_BYTES = new byte[0];
49 public final static class Reader implements Closeable {
50 private final byte[][] blocks;
51 private final int[] blockEnds;
52 private final int blockBits;
53 private final int blockMask;
54 private final int blockSize;
55 private final CloseableThreadLocal<byte[]> threadBuffers = new CloseableThreadLocal<byte[]>();
57 public Reader(PagedBytes pagedBytes) {
58 blocks = new byte[pagedBytes.blocks.size()][];
59 for(int i=0;i<blocks.length;i++) {
60 blocks[i] = pagedBytes.blocks.get(i);
62 blockEnds = new int[blocks.length];
63 for(int i=0;i< blockEnds.length;i++) {
64 blockEnds[i] = pagedBytes.blockEnd.get(i);
66 blockBits = pagedBytes.blockBits;
67 blockMask = pagedBytes.blockMask;
68 blockSize = pagedBytes.blockSize;
72 * Gets a slice out of {@link PagedBytes} starting at <i>start</i> with a
73 * given length. Iff the slice spans across a block border this method will
74 * allocate sufficient resources and copy the paged data.
76 * Slices spanning more than one block are not supported.
80 public BytesRef fillSlice(BytesRef b, long start, int length) {
81 assert length >= 0: "length=" + length;
82 final int index = (int) (start >> blockBits);
83 final int offset = (int) (start & blockMask);
85 if (blockSize - offset >= length) {
87 b.bytes = blocks[index];
91 byte[] buffer = threadBuffers.get();
93 buffer = new byte[length];
94 threadBuffers.set(buffer);
95 } else if (buffer.length < length) {
96 buffer = ArrayUtil.grow(buffer, length);
97 threadBuffers.set(buffer);
101 System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset);
102 System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset));
108 * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>.
110 * <b>Note:</b> this method does not support slices spanning across block
114 * @return the given {@link BytesRef}
118 public BytesRef fill(BytesRef b, long start) {
119 final int index = (int) (start >> blockBits);
120 final int offset = (int) (start & blockMask);
121 final byte[] block = b.bytes = blocks[index];
123 if ((block[offset] & 128) == 0) {
124 b.length = block[offset];
127 b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
135 * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>. *
137 * <b>Note:</b> this method does not support slices spanning across block
141 * @return the internal block number of the slice.
144 public int fillAndGetIndex(BytesRef b, long start) {
145 final int index = (int) (start >> blockBits);
146 final int offset = (int) (start & blockMask);
147 final byte[] block = b.bytes = blocks[index];
149 if ((block[offset] & 128) == 0) {
150 b.length = block[offset];
153 b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
161 * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i> and
162 * returns the start offset of the next part, suitable as start parameter on
163 * next call to sequentially read all {@link BytesRef}.
166 * <b>Note:</b> this method does not support slices spanning across block
170 * @return the start offset of the next part, suitable as start parameter on
171 * next call to sequentially read all {@link BytesRef}.
174 public long fillAndGetStart(BytesRef b, long start) {
175 final int index = (int) (start >> blockBits);
176 final int offset = (int) (start & blockMask);
177 final byte[] block = b.bytes = blocks[index];
179 if ((block[offset] & 128) == 0) {
180 b.length = block[offset];
182 start += 1L + b.length;
184 b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
186 start += 2L + b.length;
194 * Gets a slice out of {@link PagedBytes} starting at <i>start</i>, the
195 * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
196 * block border this method will allocate sufficient resources and copy the
199 * Slices spanning more than one block are not supported.
204 public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
205 final int index = (int) (start >> blockBits);
206 int offset = (int) (start & blockMask);
207 final byte[] block = blocks[index];
209 if ((block[offset] & 128) == 0) {
210 length = block[offset];
213 length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
217 assert length >= 0: "length=" + length;
219 if (blockSize - offset >= length) {
222 b.bytes = blocks[index];
225 byte[] buffer = threadBuffers.get();
226 if (buffer == null) {
227 buffer = new byte[length];
228 threadBuffers.set(buffer);
229 } else if (buffer.length < length) {
230 buffer = ArrayUtil.grow(buffer, length);
231 threadBuffers.set(buffer);
235 System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset);
236 System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset));
241 /** @lucene.internal */
242 public byte[][] getBlocks() {
246 /** @lucene.internal */
247 public int[] getBlockEnds() {
251 public void close() {
252 threadBuffers.close();
256 /** 1<<blockBits must be bigger than biggest single
257 * BytesRef slice that will be pulled */
258 public PagedBytes(int blockBits) {
259 this.blockSize = 1 << blockBits;
260 this.blockBits = blockBits;
261 blockMask = blockSize-1;
265 /** Read this many bytes from in */
266 public void copy(IndexInput in, long byteCount) throws IOException {
267 while (byteCount > 0) {
268 int left = blockSize - upto;
270 if (currentBlock != null) {
271 blocks.add(currentBlock);
274 currentBlock = new byte[blockSize];
278 if (left < byteCount) {
279 in.readBytes(currentBlock, upto, left, false);
283 in.readBytes(currentBlock, upto, (int) byteCount, false);
290 /** Copy BytesRef in */
291 public void copy(BytesRef bytes) throws IOException {
292 int byteCount = bytes.length;
293 int bytesUpto = bytes.offset;
294 while (byteCount > 0) {
295 int left = blockSize - upto;
297 if (currentBlock != null) {
298 blocks.add(currentBlock);
301 currentBlock = new byte[blockSize];
305 if (left < byteCount) {
306 System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
311 System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
318 /** Copy BytesRef in, setting BytesRef out to the result.
319 * Do not use this if you will use freeze(true).
320 * This only supports bytes.length <= blockSize */
321 public void copy(BytesRef bytes, BytesRef out) throws IOException {
322 int left = blockSize - upto;
323 if (bytes.length > left || currentBlock==null) {
324 if (currentBlock != null) {
325 blocks.add(currentBlock);
329 currentBlock = new byte[blockSize];
332 assert bytes.length <= blockSize;
333 // TODO: we could also support variable block sizes
336 out.bytes = currentBlock;
338 out.length = bytes.length;
340 System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
341 upto += bytes.length;
344 /** Commits final byte[], trimming it if necessary and if trim=true */
345 public Reader freeze(boolean trim) {
347 throw new IllegalStateException("already frozen");
350 throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
352 if (trim && upto < blockSize) {
353 final byte[] newBlock = new byte[upto];
354 System.arraycopy(currentBlock, 0, newBlock, 0, upto);
355 currentBlock = newBlock;
357 if (currentBlock == null) {
358 currentBlock = EMPTY_BYTES;
360 blocks.add(currentBlock);
364 return new Reader(this);
367 public long getPointer() {
368 if (currentBlock == null) {
371 return (blocks.size() * ((long) blockSize)) + upto;
375 /** Copy bytes in, writing the length as a 1 or 2 byte
377 public long copyUsingLengthPrefix(BytesRef bytes) throws IOException {
379 if (upto + bytes.length + 2 > blockSize) {
380 if (bytes.length + 2 > blockSize) {
381 throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
383 if (currentBlock != null) {
384 blocks.add(currentBlock);
387 currentBlock = new byte[blockSize];
391 final long pointer = getPointer();
393 if (bytes.length < 128) {
394 currentBlock[upto++] = (byte) bytes.length;
396 currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
397 currentBlock[upto++] = (byte) (bytes.length & 0xff);
399 System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
400 upto += bytes.length;
405 public final class PagedBytesDataInput extends DataInput {
406 private int currentBlockIndex;
407 private int currentBlockUpto;
408 private byte[] currentBlock;
410 PagedBytesDataInput() {
411 currentBlock = blocks.get(0);
415 public Object clone() {
416 PagedBytesDataInput clone = getDataInput();
417 clone.setPosition(getPosition());
421 /** Returns the current byte position. */
422 public long getPosition() {
423 return currentBlockIndex * blockSize + currentBlockUpto;
426 /** Seek to a position previously obtained from
427 * {@link #getPosition}. */
428 public void setPosition(long pos) {
429 currentBlockIndex = (int) (pos >> blockBits);
430 currentBlock = blocks.get(currentBlockIndex);
431 currentBlockUpto = (int) (pos & blockMask);
435 public byte readByte() {
436 if (currentBlockUpto == blockSize) {
439 return currentBlock[currentBlockUpto++];
443 public void readBytes(byte[] b, int offset, int len) {
444 assert b.length >= offset + len;
445 final int offsetEnd = offset + len;
447 final int blockLeft = blockSize - currentBlockUpto;
448 final int left = offsetEnd - offset;
449 if (blockLeft < left) {
450 System.arraycopy(currentBlock, currentBlockUpto,
457 System.arraycopy(currentBlock, currentBlockUpto,
460 currentBlockUpto += left;
466 private void nextBlock() {
468 currentBlockUpto = 0;
469 currentBlock = blocks.get(currentBlockIndex);
473 public final class PagedBytesDataOutput extends DataOutput {
475 public void writeByte(byte b) {
476 if (upto == blockSize) {
477 if (currentBlock != null) {
478 blocks.add(currentBlock);
481 currentBlock = new byte[blockSize];
484 currentBlock[upto++] = b;
488 public void writeBytes(byte[] b, int offset, int length) throws IOException {
489 assert b.length >= offset + length;
494 if (upto == blockSize) {
495 if (currentBlock != null) {
496 blocks.add(currentBlock);
499 currentBlock = new byte[blockSize];
503 final int offsetEnd = offset + length;
505 final int left = offsetEnd - offset;
506 final int blockLeft = blockSize - upto;
507 if (blockLeft < left) {
508 System.arraycopy(b, offset, currentBlock, upto, blockLeft);
509 blocks.add(currentBlock);
510 blockEnd.add(blockSize);
511 currentBlock = new byte[blockSize];
516 System.arraycopy(b, offset, currentBlock, upto, left);
523 /** Return the current byte position. */
524 public long getPosition() {
525 if (currentBlock == null) {
528 return blocks.size() * blockSize + upto;
533 /** Returns a DataInput to read values from this
534 * PagedBytes instance. */
535 public PagedBytesDataInput getDataInput() {
537 throw new IllegalStateException("must call freeze() before getDataInput");
539 return new PagedBytesDataInput();
542 /** Returns a DataOutput that you may use to write into
543 * this PagedBytes instance. If you do this, you should
544 * not call the other writing methods (eg, copy);
545 * results are undefined. */
546 public PagedBytesDataOutput getDataOutput() {
548 throw new IllegalStateException("cannot get DataOutput after freeze()");
550 return new PagedBytesDataOutput();