+++ /dev/null
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.BitSet;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-import java.util.Map.Entry;
-
-import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- *
- */
-public class TestBytesRefHash extends LuceneTestCase {
-
- BytesRefHash hash;
- ByteBlockPool pool;
-
- /**
- */
- @Override
- @Before
- public void setUp() throws Exception {
- super.setUp();
- pool = newPool();
- hash = newHash(pool);
- }
-
- private ByteBlockPool newPool(){
- return random.nextBoolean() && pool != null ? pool
- : new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random.nextInt(25)));
- }
-
- private BytesRefHash newHash(ByteBlockPool blockPool) {
- final int initSize = 2 << 1 + random.nextInt(5);
- return random.nextBoolean() ? new BytesRefHash(blockPool) : new BytesRefHash(
- blockPool, initSize, new BytesRefHash.DirectBytesStartArray(initSize));
- }
-
- /**
- * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}.
- */
- @Test
- public void testSize() {
- BytesRef ref = new BytesRef();
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- final int mod = 1+random.nextInt(39);
- for (int i = 0; i < 797; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- int count = hash.size();
- int key = hash.add(ref);
- if (key < 0)
- assertEquals(hash.size(), count);
- else
- assertEquals(hash.size(), count + 1);
- if(i % mod == 0) {
- hash.clear();
- assertEquals(0, hash.size());
- hash.reinit();
- }
- }
- }
- }
-
- /**
- * Test method for
- * {@link org.apache.lucene.util.BytesRefHash#get(org.apache.lucene.util.BytesRefHash.Entry)}
- * .
- */
- @Test
- public void testGet() {
- BytesRef ref = new BytesRef();
- BytesRef scratch = new BytesRef();
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- Map<String, Integer> strings = new HashMap<String, Integer>();
- int uniqueCount = 0;
- for (int i = 0; i < 797; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- int count = hash.size();
- int key = hash.add(ref);
- if (key >= 0) {
- assertNull(strings.put(str, Integer.valueOf(key)));
- assertEquals(uniqueCount, key);
- uniqueCount++;
- assertEquals(hash.size(), count + 1);
- } else {
- assertTrue((-key)-1 < count);
- assertEquals(hash.size(), count);
- }
- }
- for (Entry<String, Integer> entry : strings.entrySet()) {
- ref.copy(entry.getKey());
- assertEquals(ref, hash.get(entry.getValue().intValue(), scratch));
- }
- hash.clear();
- assertEquals(0, hash.size());
- hash.reinit();
- }
- }
-
- /**
- * Test method for {@link org.apache.lucene.util.BytesRefHash#compact()}.
- */
- @Test
- public void testCompact() {
- BytesRef ref = new BytesRef();
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- int numEntries = 0;
- final int size = 797;
- BitSet bits = new BitSet(size);
- for (int i = 0; i < size; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- final int key = hash.add(ref);
- if (key < 0) {
- assertTrue(bits.get((-key)-1));
- } else {
- assertFalse(bits.get(key));
- bits.set(key);
- numEntries++;
- }
- }
- assertEquals(hash.size(), bits.cardinality());
- assertEquals(numEntries, bits.cardinality());
- assertEquals(numEntries, hash.size());
- int[] compact = hash.compact();
- assertTrue(numEntries < compact.length);
- for (int i = 0; i < numEntries; i++) {
- bits.set(compact[i], false);
- }
- assertEquals(0, bits.cardinality());
- hash.clear();
- assertEquals(0, hash.size());
- hash.reinit();
- }
- }
-
- /**
- * Test method for
- * {@link org.apache.lucene.util.BytesRefHash#sort(java.util.Comparator)}.
- */
- @Test
- public void testSort() {
- BytesRef ref = new BytesRef();
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- SortedSet<String> strings = new TreeSet<String>();
- for (int i = 0; i < 797; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- hash.add(ref);
- strings.add(str);
- }
- // We use the UTF-16 comparator here, because we need to be able to
- // compare to native String.compareTo() [UTF-16]:
- int[] sort = hash.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
- assertTrue(strings.size() < sort.length);
- int i = 0;
- BytesRef scratch = new BytesRef();
- for (String string : strings) {
- ref.copy(string);
- assertEquals(ref, hash.get(sort[i++], scratch));
- }
- hash.clear();
- assertEquals(0, hash.size());
- hash.reinit();
-
- }
- }
-
- /**
- * Test method for
- * {@link org.apache.lucene.util.BytesRefHash#add(org.apache.lucene.util.BytesRef)}
- * .
- */
- @Test
- public void testAdd() {
- BytesRef ref = new BytesRef();
- BytesRef scratch = new BytesRef();
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- Set<String> strings = new HashSet<String>();
- int uniqueCount = 0;
- for (int i = 0; i < 797; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- int count = hash.size();
- int key = hash.add(ref);
-
- if (key >=0) {
- assertTrue(strings.add(str));
- assertEquals(uniqueCount, key);
- assertEquals(hash.size(), count + 1);
- uniqueCount++;
- } else {
- assertFalse(strings.add(str));
- assertTrue((-key)-1 < count);
- assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
- assertEquals(count, hash.size());
- }
- }
-
- assertAllIn(strings, hash);
- hash.clear();
- assertEquals(0, hash.size());
- hash.reinit();
- }
- }
-
- @Test(expected = MaxBytesLengthExceededException.class)
- public void testLargeValue() {
- int[] sizes = new int[] { random.nextInt(5),
- ByteBlockPool.BYTE_BLOCK_SIZE - 33 + random.nextInt(31),
- ByteBlockPool.BYTE_BLOCK_SIZE - 1 + random.nextInt(37) };
- BytesRef ref = new BytesRef();
- for (int i = 0; i < sizes.length; i++) {
- ref.bytes = new byte[sizes[i]];
- ref.offset = 0;
- ref.length = sizes[i];
- try {
- assertEquals(i, hash.add(ref));
- } catch (MaxBytesLengthExceededException e) {
- if (i < sizes.length - 1)
- fail("unexpected exception at size: " + sizes[i]);
- throw e;
- }
- }
- }
-
- /**
- * Test method for
- * {@link org.apache.lucene.util.BytesRefHash#addByPoolOffset(int)}
- * .
- */
- @Test
- public void testAddByPoolOffset() {
- BytesRef ref = new BytesRef();
- BytesRef scratch = new BytesRef();
- BytesRefHash offsetHash = newHash(pool);
- int num = atLeast(2);
- for (int j = 0; j < num; j++) {
- Set<String> strings = new HashSet<String>();
- int uniqueCount = 0;
- for (int i = 0; i < 797; i++) {
- String str;
- do {
- str = _TestUtil.randomRealisticUnicodeString(random, 1000);
- } while (str.length() == 0);
- ref.copy(str);
- int count = hash.size();
- int key = hash.add(ref);
-
- if (key >= 0) {
- assertTrue(strings.add(str));
- assertEquals(uniqueCount, key);
- assertEquals(hash.size(), count + 1);
- int offsetKey = offsetHash.addByPoolOffset(hash.byteStart(key));
- assertEquals(uniqueCount, offsetKey);
- assertEquals(offsetHash.size(), count + 1);
- uniqueCount++;
- } else {
- assertFalse(strings.add(str));
- assertTrue((-key)-1 < count);
- assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
- assertEquals(count, hash.size());
- int offsetKey = offsetHash.addByPoolOffset(hash.byteStart((-key)-1));
- assertTrue((-offsetKey)-1 < count);
- assertEquals(str, hash.get((-offsetKey)-1, scratch).utf8ToString());
- assertEquals(count, hash.size());
- }
- }
-
- assertAllIn(strings, hash);
- for (String string : strings) {
- ref.copy(string);
- int key = hash.add(ref);
- BytesRef bytesRef = offsetHash.get((-key)-1, scratch);
- assertEquals(ref, bytesRef);
- }
-
- hash.clear();
- assertEquals(0, hash.size());
- offsetHash.clear();
- assertEquals(0, offsetHash.size());
- hash.reinit(); // init for the next round
- offsetHash.reinit();
- }
- }
-
- private void assertAllIn(Set<String> strings, BytesRefHash hash) {
- BytesRef ref = new BytesRef();
- BytesRef scratch = new BytesRef();
- int count = hash.size();
- for (String string : strings) {
- ref.copy(string);
- int key = hash.add(ref); // add again to check duplicates
- assertEquals(string, hash.get((-key)-1, scratch).utf8ToString());
- assertEquals(count, hash.size());
- assertTrue("key: " + key + " count: " + count + " string: " + string,
- key < count);
- }
- }
-
-
-}