1 package org.apache.lucene.store;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.HashMap;
25 * Abstract base class for performing read operations of Lucene's low-level
28 public abstract class DataInput implements Cloneable {
30 private boolean preUTF8Strings; // true if we are reading old (modified UTF8) string format
32 /** Call this if readString should read characters stored
33 * in the old modified UTF8 format (length in java chars
34 * and java's modified UTF8 encoding). This is used for
35 * indices written pre-2.4 See LUCENE-510 for details. */
36 public void setModifiedUTF8StringsMode() {
37 preUTF8Strings = true;
40 /** Reads and returns a single byte.
41 * @see DataOutput#writeByte(byte)
43 public abstract byte readByte() throws IOException;
45 /** Reads a specified number of bytes into an array at the specified offset.
46 * @param b the array to read bytes into
47 * @param offset the offset in the array to start storing bytes
48 * @param len the number of bytes to read
49 * @see DataOutput#writeBytes(byte[],int)
51 public abstract void readBytes(byte[] b, int offset, int len)
54 /** Reads a specified number of bytes into an array at the
55 * specified offset with control over whether the read
56 * should be buffered (callers who have their own buffer
57 * should pass in "false" for useBuffer). Currently only
58 * {@link BufferedIndexInput} respects this parameter.
59 * @param b the array to read bytes into
60 * @param offset the offset in the array to start storing bytes
61 * @param len the number of bytes to read
62 * @param useBuffer set to false if the caller will handle
64 * @see DataOutput#writeBytes(byte[],int)
66 public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
69 // Default to ignoring useBuffer entirely
70 readBytes(b, offset, len);
73 /** Reads two bytes and returns a short.
74 * @see DataOutput#writeByte(byte)
76 public short readShort() throws IOException {
77 return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF));
80 /** Reads four bytes and returns an int.
81 * @see DataOutput#writeInt(int)
83 public int readInt() throws IOException {
84 return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
85 | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
88 /** Reads an int stored in variable-length format. Reads between one and
89 * five bytes. Smaller values take fewer bytes. Negative numbers are not
91 * @see DataOutput#writeVInt(int)
93 public int readVInt() throws IOException {
94 /* This is the original code of this method,
95 * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
96 * readByte() is inlined. So the loop was unwinded!
99 for (int shift = 7; (b & 0x80) != 0; shift += 7) {
101 i |= (b & 0x7F) << shift;
107 if ((b & 0x80) == 0) return i;
109 i |= (b & 0x7F) << 7;
110 if ((b & 0x80) == 0) return i;
112 i |= (b & 0x7F) << 14;
113 if ((b & 0x80) == 0) return i;
115 i |= (b & 0x7F) << 21;
116 if ((b & 0x80) == 0) return i;
118 assert (b & 0x80) == 0;
119 return i | ((b & 0x7F) << 28);
122 /** Reads eight bytes and returns a long.
123 * @see DataOutput#writeLong(long)
125 public long readLong() throws IOException {
126 return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
129 /** Reads a long stored in variable-length format. Reads between one and
130 * nine bytes. Smaller values take fewer bytes. Negative numbers are not
132 public long readVLong() throws IOException {
133 /* This is the original code of this method,
134 * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
135 * readByte() is inlined. So the loop was unwinded!
138 for (int shift = 7; (b & 0x80) != 0; shift += 7) {
140 i |= (b & 0x7FL) << shift;
146 if ((b & 0x80) == 0) return i;
148 i |= (b & 0x7FL) << 7;
149 if ((b & 0x80) == 0) return i;
151 i |= (b & 0x7FL) << 14;
152 if ((b & 0x80) == 0) return i;
154 i |= (b & 0x7FL) << 21;
155 if ((b & 0x80) == 0) return i;
157 i |= (b & 0x7FL) << 28;
158 if ((b & 0x80) == 0) return i;
160 i |= (b & 0x7FL) << 35;
161 if ((b & 0x80) == 0) return i;
163 i |= (b & 0x7FL) << 42;
164 if ((b & 0x80) == 0) return i;
166 i |= (b & 0x7FL) << 49;
167 if ((b & 0x80) == 0) return i;
169 assert (b & 0x80) == 0;
170 return i | ((b & 0x7FL) << 56);
174 * @see DataOutput#writeString(String)
176 public String readString() throws IOException {
178 return readModifiedUTF8String();
179 int length = readVInt();
180 final byte[] bytes = new byte[length];
181 readBytes(bytes, 0, length);
182 return new String(bytes, 0, length, "UTF-8");
185 private String readModifiedUTF8String() throws IOException {
186 int length = readVInt();
187 final char[] chars = new char[length];
188 readChars(chars, 0, length);
189 return new String(chars, 0, length);
192 /** Reads Lucene's old "modified UTF-8" encoded
193 * characters into an array.
194 * @param buffer the array to read characters into
195 * @param start the offset in the array to start storing characters
196 * @param length the number of characters to read
197 * @see DataOutput#writeChars(String,int,int)
198 * @deprecated -- please use readString or readBytes
199 * instead, and construct the string
200 * from those utf8 bytes
203 public void readChars(char[] buffer, int start, int length)
205 final int end = start + length;
206 for (int i = start; i < end; i++) {
209 buffer[i] = (char)(b & 0x7F);
210 else if ((b & 0xE0) != 0xE0) {
211 buffer[i] = (char)(((b & 0x1F) << 6)
212 | (readByte() & 0x3F));
214 buffer[i] = (char)(((b & 0x0F) << 12)
215 | ((readByte() & 0x3F) << 6)
216 | (readByte() & 0x3F));
221 /** Returns a clone of this stream.
223 * <p>Clones of a stream access the same data, and are positioned at the same
224 * point as the stream they were cloned from.
226 * <p>Expert: Subclasses must ensure that clones may be positioned at
227 * different points in the input from each other and from the stream they
231 public Object clone() {
232 DataInput clone = null;
234 clone = (DataInput)super.clone();
235 } catch (CloneNotSupportedException e) {}
240 public Map<String,String> readStringStringMap() throws IOException {
241 final Map<String,String> map = new HashMap<String,String>();
242 final int count = readInt();
243 for(int i=0;i<count;i++) {
244 final String key = readString();
245 final String val = readString();