--- /dev/null
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.Map.Entry;
+
+import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class TestBytesRefHash extends LuceneTestCase {
+
+ BytesRefHash hash;
+ ByteBlockPool pool;
+
+ /**
+ */
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ pool = newPool();
+ hash = newHash(pool);
+ }
+
+ private ByteBlockPool newPool(){
+ return random.nextBoolean() && pool != null ? pool
+ : new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random.nextInt(25)));
+ }
+
+ private BytesRefHash newHash(ByteBlockPool blockPool) {
+ final int initSize = 2 << 1 + random.nextInt(5);
+ return random.nextBoolean() ? new BytesRefHash(blockPool) : new BytesRefHash(
+ blockPool, initSize, new BytesRefHash.DirectBytesStartArray(initSize));
+ }
+
+ /**
+ * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}.
+ */
+ @Test
+ public void testSize() {
+ BytesRef ref = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ final int mod = 1+random.nextInt(39);
+ for (int i = 0; i < 797; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ int count = hash.size();
+ int key = hash.add(ref);
+ if (key < 0)
+ assertEquals(hash.size(), count);
+ else
+ assertEquals(hash.size(), count + 1);
+ if(i % mod == 0) {
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+ }
+ }
+ }
+ }
+
+ /**
+ * Test method for
+ * {@link org.apache.lucene.util.BytesRefHash#get(org.apache.lucene.util.BytesRefHash.Entry)}
+ * .
+ */
+ @Test
+ public void testGet() {
+ BytesRef ref = new BytesRef();
+ BytesRef scratch = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ Map<String, Integer> strings = new HashMap<String, Integer>();
+ int uniqueCount = 0;
+ for (int i = 0; i < 797; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ int count = hash.size();
+ int key = hash.add(ref);
+ if (key >= 0) {
+ assertNull(strings.put(str, Integer.valueOf(key)));
+ assertEquals(uniqueCount, key);
+ uniqueCount++;
+ assertEquals(hash.size(), count + 1);
+ } else {
+ assertTrue((-key)-1 < count);
+ assertEquals(hash.size(), count);
+ }
+ }
+ for (Entry<String, Integer> entry : strings.entrySet()) {
+ ref.copy(entry.getKey());
+ assertEquals(ref, hash.get(entry.getValue().intValue(), scratch));
+ }
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+ }
+ }
+
+ /**
+ * Test method for {@link org.apache.lucene.util.BytesRefHash#compact()}.
+ */
+ @Test
+ public void testCompact() {
+ BytesRef ref = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ int numEntries = 0;
+ final int size = 797;
+ BitSet bits = new BitSet(size);
+ for (int i = 0; i < size; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ final int key = hash.add(ref);
+ if (key < 0) {
+ assertTrue(bits.get((-key)-1));
+ } else {
+ assertFalse(bits.get(key));
+ bits.set(key);
+ numEntries++;
+ }
+ }
+ assertEquals(hash.size(), bits.cardinality());
+ assertEquals(numEntries, bits.cardinality());
+ assertEquals(numEntries, hash.size());
+ int[] compact = hash.compact();
+ assertTrue(numEntries < compact.length);
+ for (int i = 0; i < numEntries; i++) {
+ bits.set(compact[i], false);
+ }
+ assertEquals(0, bits.cardinality());
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+ }
+ }
+
+ /**
+ * Test method for
+ * {@link org.apache.lucene.util.BytesRefHash#sort(java.util.Comparator)}.
+ */
+ @Test
+ public void testSort() {
+ BytesRef ref = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ SortedSet<String> strings = new TreeSet<String>();
+ for (int i = 0; i < 797; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ hash.add(ref);
+ strings.add(str);
+ }
+ // We use the UTF-16 comparator here, because we need to be able to
+ // compare to native String.compareTo() [UTF-16]:
+ int[] sort = hash.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
+ assertTrue(strings.size() < sort.length);
+ int i = 0;
+ BytesRef scratch = new BytesRef();
+ for (String string : strings) {
+ ref.copy(string);
+ assertEquals(ref, hash.get(sort[i++], scratch));
+ }
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+
+ }
+ }
+
+ /**
+ * Test method for
+ * {@link org.apache.lucene.util.BytesRefHash#add(org.apache.lucene.util.BytesRef)}
+ * .
+ */
+ @Test
+ public void testAdd() {
+ BytesRef ref = new BytesRef();
+ BytesRef scratch = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ Set<String> strings = new HashSet<String>();
+ int uniqueCount = 0;
+ for (int i = 0; i < 797; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ int count = hash.size();
+ int key = hash.add(ref);
+
+ if (key >=0) {
+ assertTrue(strings.add(str));
+ assertEquals(uniqueCount, key);
+ assertEquals(hash.size(), count + 1);
+ uniqueCount++;
+ } else {
+ assertFalse(strings.add(str));
+ assertTrue((-key)-1 < count);
+ assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
+ assertEquals(count, hash.size());
+ }
+ }
+
+ assertAllIn(strings, hash);
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+ }
+ }
+
+ @Test(expected = MaxBytesLengthExceededException.class)
+ public void testLargeValue() {
+ int[] sizes = new int[] { random.nextInt(5),
+ ByteBlockPool.BYTE_BLOCK_SIZE - 33 + random.nextInt(31),
+ ByteBlockPool.BYTE_BLOCK_SIZE - 1 + random.nextInt(37) };
+ BytesRef ref = new BytesRef();
+ for (int i = 0; i < sizes.length; i++) {
+ ref.bytes = new byte[sizes[i]];
+ ref.offset = 0;
+ ref.length = sizes[i];
+ try {
+ assertEquals(i, hash.add(ref));
+ } catch (MaxBytesLengthExceededException e) {
+ if (i < sizes.length - 1)
+ fail("unexpected exception at size: " + sizes[i]);
+ throw e;
+ }
+ }
+ }
+
+ /**
+ * Test method for
+ * {@link org.apache.lucene.util.BytesRefHash#addByPoolOffset(int)}
+ * .
+ */
+ @Test
+ public void testAddByPoolOffset() {
+ BytesRef ref = new BytesRef();
+ BytesRef scratch = new BytesRef();
+ BytesRefHash offsetHash = newHash(pool);
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ Set<String> strings = new HashSet<String>();
+ int uniqueCount = 0;
+ for (int i = 0; i < 797; i++) {
+ String str;
+ do {
+ str = _TestUtil.randomRealisticUnicodeString(random, 1000);
+ } while (str.length() == 0);
+ ref.copy(str);
+ int count = hash.size();
+ int key = hash.add(ref);
+
+ if (key >= 0) {
+ assertTrue(strings.add(str));
+ assertEquals(uniqueCount, key);
+ assertEquals(hash.size(), count + 1);
+ int offsetKey = offsetHash.addByPoolOffset(hash.byteStart(key));
+ assertEquals(uniqueCount, offsetKey);
+ assertEquals(offsetHash.size(), count + 1);
+ uniqueCount++;
+ } else {
+ assertFalse(strings.add(str));
+ assertTrue((-key)-1 < count);
+ assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
+ assertEquals(count, hash.size());
+ int offsetKey = offsetHash.addByPoolOffset(hash.byteStart((-key)-1));
+ assertTrue((-offsetKey)-1 < count);
+ assertEquals(str, hash.get((-offsetKey)-1, scratch).utf8ToString());
+ assertEquals(count, hash.size());
+ }
+ }
+
+ assertAllIn(strings, hash);
+ for (String string : strings) {
+ ref.copy(string);
+ int key = hash.add(ref);
+ BytesRef bytesRef = offsetHash.get((-key)-1, scratch);
+ assertEquals(ref, bytesRef);
+ }
+
+ hash.clear();
+ assertEquals(0, hash.size());
+ offsetHash.clear();
+ assertEquals(0, offsetHash.size());
+ hash.reinit(); // init for the next round
+ offsetHash.reinit();
+ }
+ }
+
+ private void assertAllIn(Set<String> strings, BytesRefHash hash) {
+ BytesRef ref = new BytesRef();
+ BytesRef scratch = new BytesRef();
+ int count = hash.size();
+ for (String string : strings) {
+ ref.copy(string);
+ int key = hash.add(ref); // add again to check duplicates
+ assertEquals(string, hash.get((-key)-1, scratch).utf8ToString());
+ assertEquals(count, hash.size());
+ assertTrue("key: " + key + " count: " + count + " string: " + string,
+ key < count);
+ }
+ }
+
+
+}