1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.util.LuceneTestCase;
21 import org.apache.lucene.store.IndexInput;
22 import org.apache.lucene.store.IndexOutput;
23 import org.apache.lucene.store.RAMDirectory;
25 import java.io.IOException;
27 public class TestIndexInput extends LuceneTestCase {
29 static final byte[] READ_TEST_BYTES = new byte[] {
32 (byte) 0x80, (byte) 0x80, 0x01,
33 (byte) 0x81, (byte) 0x80, 0x01,
34 (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
35 (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
36 (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
37 0x06, 'L', 'u', 'c', 'e', 'n', 'e',
39 // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
40 0x02, (byte) 0xC2, (byte) 0xBF,
41 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
42 'c', 'e', (byte) 0xC2, (byte) 0xBF,
45 // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
46 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
47 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
48 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
52 // (U+1D11E "MUSICAL SYMBOL G CLEF")
53 // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
54 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
55 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
56 (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
58 (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
60 (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
65 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
68 private void checkReads(IndexInput is) throws IOException {
69 assertEquals(128,is.readVInt());
70 assertEquals(16383,is.readVInt());
71 assertEquals(16384,is.readVInt());
72 assertEquals(16385,is.readVInt());
73 assertEquals(Integer.MAX_VALUE, is.readVInt());
74 assertEquals((long) Integer.MAX_VALUE, is.readVLong());
75 assertEquals(Long.MAX_VALUE, is.readVLong());
76 assertEquals("Lucene",is.readString());
78 assertEquals("\u00BF",is.readString());
79 assertEquals("Lu\u00BFce\u00BFne",is.readString());
81 assertEquals("\u2620",is.readString());
82 assertEquals("Lu\u2620ce\u2620ne",is.readString());
84 assertEquals("\uD834\uDD1E",is.readString());
85 assertEquals("\uD834\uDD1E\uD834\uDD60",is.readString());
86 assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne",is.readString());
88 assertEquals("\u0000",is.readString());
89 assertEquals("Lu\u0000ce\u0000ne",is.readString());
92 // this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
93 public void testBufferedIndexInputRead() throws IOException {
94 final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
99 // this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
100 public void testRawIndexInputRead() throws IOException {
101 final RAMDirectory dir = new RAMDirectory();
102 final IndexOutput os = dir.createOutput("foo");
103 os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
105 final IndexInput is = dir.openInput("foo");
114 * @throws IOException
116 public void testSkipChars() throws IOException {
117 byte[] bytes = new byte[]{(byte) 0x80, 0x01,
119 (byte) 0x80, (byte) 0x80, 0x01,
120 (byte) 0x81, (byte) 0x80, 0x01,
121 0x06, 'L', 'u', 'c', 'e', 'n', 'e',
123 String utf8Str = "\u0634\u1ea1";
124 byte [] utf8Bytes = utf8Str.getBytes("UTF-8");
125 byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length];
126 System.arraycopy(bytes, 0, theBytes, 0, bytes.length);
127 theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test
128 System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length);
129 IndexInput is = new MockIndexInput(theBytes);
130 assertEquals(128, is.readVInt());
131 assertEquals(16383, is.readVInt());
132 assertEquals(16384, is.readVInt());
133 assertEquals(16385, is.readVInt());
134 int charsToRead = is.readVInt();//number of chars in the Lucene string
135 assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead);
137 char [] chars = new char[3];//there should be 6 chars remaining
138 is.readChars(chars, 0, 3);
139 String tmpStr = new String(chars);
140 assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true);
141 //Now read the UTF8 stuff
142 charsToRead = is.readVInt() - 1;//since we are skipping one
144 assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead);
145 chars = new char[charsToRead];
146 is.readChars(chars, 0, charsToRead);
147 tmpStr = new String(chars);
148 assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true);