1 package org.apache.lucene.util.encoding;
3 import java.io.IOException;
4 import java.io.OutputStream;
7 * Licensed to the Apache Software Foundation (ASF) under one or more
8 * contributor license agreements. See the NOTICE file distributed with
9 * this work for additional information regarding copyright ownership.
10 * The ASF licenses this file to You under the Apache License, Version 2.0
11 * (the "License"); you may not use this file except in compliance with
12 * the License. You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
24 * An {@link IntEncoder} which encodes values in chunks. Implementations of this
25 * class assume the data which needs encoding consists of small, consecutive
26 * values, and therefore the encoder is able to compress them better. You can
27 * read more on the two implementations {@link FourFlagsIntEncoder} and
28 * {@link EightFlagsIntEncoder}.
30 * Extensions of this class need to implement {@link #encode(int)} in order to
31 * build the proper indicator (flags). When enough values were accumulated
32 * (typically the batch size), extensions can call {@link #encodeChunk()} to
33 * flush the indicator and the rest of the values.
35 * <b>NOTE:</b> flags encoders do not accept values ≤ 0 (zero) in their
36 * {@link #encode(int)}. For performance reasons they do not check that
37 * condition, however if such value is passed the result stream may be corrupt
38 * or an exception will be thrown. Also, these encoders perform the best when
39 * there are many consecutive small values (depends on the encoder
40 * implementation). If that is not the case, the encoder will occupy 1 more byte
41 * for every <i>batch</i> number of integers, over whatever
42 * {@link VInt8IntEncoder} would have occupied. Therefore make sure to check
43 * whether your data fits into the conditions of the specific encoder.
45 * For the reasons mentioned above, these encoders are usually chained with
46 * {@link UniqueValuesIntEncoder} and {@link DGapIntEncoder} in the following
48 * IntEncoder fourFlags =
49 * new SortingEncoderFilter(new UniqueValuesIntEncoder(new DGapIntEncoder(new FlagsIntEncoderImpl())));
52 * @lucene.experimental
54 public abstract class ChunksIntEncoder extends IntEncoder {
56 /** Holds the values which must be encoded, outside the indicator. */
57 protected final int[] encodeQueue;
58 protected int encodeQueueSize = 0;
60 /** Encoder used to encode values outside the indicator. */
61 protected final IntEncoder encoder = new VInt8IntEncoder();
63 /** Represents bits flag byte. */
64 protected int indicator = 0;
66 /** Counts the current ordinal of the encoded value. */
67 protected byte ordinal = 0;
69 protected ChunksIntEncoder(int chunkSize) {
70 encodeQueue = new int[chunkSize];
74 * Encodes the values of the current chunk. First it writes the indicator, and
75 * then it encodes the values outside the indicator.
77 protected void encodeChunk() throws IOException {
79 for (int i = 0; i < encodeQueueSize; ++i) {
80 encoder.encode(encodeQueue[i]);
88 public void close() throws IOException {
97 public void reInit(OutputStream out) {