1 package org.apache.lucene.util;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.BitSet;
21 import java.util.HashMap;
22 import java.util.HashSet;
25 import java.util.SortedSet;
26 import java.util.TreeSet;
27 import java.util.Map.Entry;
29 import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
30 import org.junit.Before;
31 import org.junit.Test;
36 public class TestBytesRefHash extends LuceneTestCase {
45 public void setUp() throws Exception {
51 private ByteBlockPool newPool(){
52 return random.nextBoolean() && pool != null ? pool
53 : new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random.nextInt(25)));
56 private BytesRefHash newHash(ByteBlockPool blockPool) {
57 final int initSize = 2 << 1 + random.nextInt(5);
58 return random.nextBoolean() ? new BytesRefHash(blockPool) : new BytesRefHash(
59 blockPool, initSize, new BytesRefHash.DirectBytesStartArray(initSize));
63 * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}.
66 public void testSize() {
67 BytesRef ref = new BytesRef();
69 for (int j = 0; j < num; j++) {
70 final int mod = 1+random.nextInt(39);
71 for (int i = 0; i < 797; i++) {
74 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
75 } while (str.length() == 0);
77 int count = hash.size();
78 int key = hash.add(ref);
80 assertEquals(hash.size(), count);
82 assertEquals(hash.size(), count + 1);
85 assertEquals(0, hash.size());
94 * {@link org.apache.lucene.util.BytesRefHash#get(org.apache.lucene.util.BytesRefHash.Entry)}
98 public void testGet() {
99 BytesRef ref = new BytesRef();
100 BytesRef scratch = new BytesRef();
101 int num = atLeast(2);
102 for (int j = 0; j < num; j++) {
103 Map<String, Integer> strings = new HashMap<String, Integer>();
105 for (int i = 0; i < 797; i++) {
108 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
109 } while (str.length() == 0);
111 int count = hash.size();
112 int key = hash.add(ref);
114 assertNull(strings.put(str, Integer.valueOf(key)));
115 assertEquals(uniqueCount, key);
117 assertEquals(hash.size(), count + 1);
119 assertTrue((-key)-1 < count);
120 assertEquals(hash.size(), count);
123 for (Entry<String, Integer> entry : strings.entrySet()) {
124 ref.copy(entry.getKey());
125 assertEquals(ref, hash.get(entry.getValue().intValue(), scratch));
128 assertEquals(0, hash.size());
134 * Test method for {@link org.apache.lucene.util.BytesRefHash#compact()}.
137 public void testCompact() {
138 BytesRef ref = new BytesRef();
139 int num = atLeast(2);
140 for (int j = 0; j < num; j++) {
142 final int size = 797;
143 BitSet bits = new BitSet(size);
144 for (int i = 0; i < size; i++) {
147 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
148 } while (str.length() == 0);
150 final int key = hash.add(ref);
152 assertTrue(bits.get((-key)-1));
154 assertFalse(bits.get(key));
159 assertEquals(hash.size(), bits.cardinality());
160 assertEquals(numEntries, bits.cardinality());
161 assertEquals(numEntries, hash.size());
162 int[] compact = hash.compact();
163 assertTrue(numEntries < compact.length);
164 for (int i = 0; i < numEntries; i++) {
165 bits.set(compact[i], false);
167 assertEquals(0, bits.cardinality());
169 assertEquals(0, hash.size());
176 * {@link org.apache.lucene.util.BytesRefHash#sort(java.util.Comparator)}.
179 public void testSort() {
180 BytesRef ref = new BytesRef();
181 int num = atLeast(2);
182 for (int j = 0; j < num; j++) {
183 SortedSet<String> strings = new TreeSet<String>();
184 for (int i = 0; i < 797; i++) {
187 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
188 } while (str.length() == 0);
193 // We use the UTF-16 comparator here, because we need to be able to
194 // compare to native String.compareTo() [UTF-16]:
195 int[] sort = hash.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
196 assertTrue(strings.size() < sort.length);
198 BytesRef scratch = new BytesRef();
199 for (String string : strings) {
201 assertEquals(ref, hash.get(sort[i++], scratch));
204 assertEquals(0, hash.size());
212 * {@link org.apache.lucene.util.BytesRefHash#add(org.apache.lucene.util.BytesRef)}
216 public void testAdd() {
217 BytesRef ref = new BytesRef();
218 BytesRef scratch = new BytesRef();
219 int num = atLeast(2);
220 for (int j = 0; j < num; j++) {
221 Set<String> strings = new HashSet<String>();
223 for (int i = 0; i < 797; i++) {
226 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
227 } while (str.length() == 0);
229 int count = hash.size();
230 int key = hash.add(ref);
233 assertTrue(strings.add(str));
234 assertEquals(uniqueCount, key);
235 assertEquals(hash.size(), count + 1);
238 assertFalse(strings.add(str));
239 assertTrue((-key)-1 < count);
240 assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
241 assertEquals(count, hash.size());
245 assertAllIn(strings, hash);
247 assertEquals(0, hash.size());
252 @Test(expected = MaxBytesLengthExceededException.class)
253 public void testLargeValue() {
254 int[] sizes = new int[] { random.nextInt(5),
255 ByteBlockPool.BYTE_BLOCK_SIZE - 33 + random.nextInt(31),
256 ByteBlockPool.BYTE_BLOCK_SIZE - 1 + random.nextInt(37) };
257 BytesRef ref = new BytesRef();
258 for (int i = 0; i < sizes.length; i++) {
259 ref.bytes = new byte[sizes[i]];
261 ref.length = sizes[i];
263 assertEquals(i, hash.add(ref));
264 } catch (MaxBytesLengthExceededException e) {
265 if (i < sizes.length - 1)
266 fail("unexpected exception at size: " + sizes[i]);
274 * {@link org.apache.lucene.util.BytesRefHash#addByPoolOffset(int)}
278 public void testAddByPoolOffset() {
279 BytesRef ref = new BytesRef();
280 BytesRef scratch = new BytesRef();
281 BytesRefHash offsetHash = newHash(pool);
282 int num = atLeast(2);
283 for (int j = 0; j < num; j++) {
284 Set<String> strings = new HashSet<String>();
286 for (int i = 0; i < 797; i++) {
289 str = _TestUtil.randomRealisticUnicodeString(random, 1000);
290 } while (str.length() == 0);
292 int count = hash.size();
293 int key = hash.add(ref);
296 assertTrue(strings.add(str));
297 assertEquals(uniqueCount, key);
298 assertEquals(hash.size(), count + 1);
299 int offsetKey = offsetHash.addByPoolOffset(hash.byteStart(key));
300 assertEquals(uniqueCount, offsetKey);
301 assertEquals(offsetHash.size(), count + 1);
304 assertFalse(strings.add(str));
305 assertTrue((-key)-1 < count);
306 assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
307 assertEquals(count, hash.size());
308 int offsetKey = offsetHash.addByPoolOffset(hash.byteStart((-key)-1));
309 assertTrue((-offsetKey)-1 < count);
310 assertEquals(str, hash.get((-offsetKey)-1, scratch).utf8ToString());
311 assertEquals(count, hash.size());
315 assertAllIn(strings, hash);
316 for (String string : strings) {
318 int key = hash.add(ref);
319 BytesRef bytesRef = offsetHash.get((-key)-1, scratch);
320 assertEquals(ref, bytesRef);
324 assertEquals(0, hash.size());
326 assertEquals(0, offsetHash.size());
327 hash.reinit(); // init for the next round
332 private void assertAllIn(Set<String> strings, BytesRefHash hash) {
333 BytesRef ref = new BytesRef();
334 BytesRef scratch = new BytesRef();
335 int count = hash.size();
336 for (String string : strings) {
338 int key = hash.add(ref); // add again to check duplicates
339 assertEquals(string, hash.get((-key)-1, scratch).utf8ToString());
340 assertEquals(count, hash.size());
341 assertTrue("key: " + key + " count: " + count + " string: " + string,