X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java diff --git a/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java new file mode 100644 index 0000000..b677ff9 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java @@ -0,0 +1,347 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.Map.Entry; + +import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException; +import org.junit.Before; +import org.junit.Test; + +/** + * + */ +public class TestBytesRefHash extends LuceneTestCase { + + BytesRefHash hash; + ByteBlockPool pool; + + /** + */ + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + pool = newPool(); + hash = newHash(pool); + } + + private ByteBlockPool newPool(){ + return random.nextBoolean() && pool != null ? pool + : new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random.nextInt(25))); + } + + private BytesRefHash newHash(ByteBlockPool blockPool) { + final int initSize = 2 << 1 + random.nextInt(5); + return random.nextBoolean() ? new BytesRefHash(blockPool) : new BytesRefHash( + blockPool, initSize, new BytesRefHash.DirectBytesStartArray(initSize)); + } + + /** + * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}. + */ + @Test + public void testSize() { + BytesRef ref = new BytesRef(); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + final int mod = 1+random.nextInt(39); + for (int i = 0; i < 797; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + int count = hash.size(); + int key = hash.add(ref); + if (key < 0) + assertEquals(hash.size(), count); + else + assertEquals(hash.size(), count + 1); + if(i % mod == 0) { + hash.clear(); + assertEquals(0, hash.size()); + hash.reinit(); + } + } + } + } + + /** + * Test method for + * {@link org.apache.lucene.util.BytesRefHash#get(org.apache.lucene.util.BytesRefHash.Entry)} + * . + */ + @Test + public void testGet() { + BytesRef ref = new BytesRef(); + BytesRef scratch = new BytesRef(); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + Map strings = new HashMap(); + int uniqueCount = 0; + for (int i = 0; i < 797; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + int count = hash.size(); + int key = hash.add(ref); + if (key >= 0) { + assertNull(strings.put(str, Integer.valueOf(key))); + assertEquals(uniqueCount, key); + uniqueCount++; + assertEquals(hash.size(), count + 1); + } else { + assertTrue((-key)-1 < count); + assertEquals(hash.size(), count); + } + } + for (Entry entry : strings.entrySet()) { + ref.copy(entry.getKey()); + assertEquals(ref, hash.get(entry.getValue().intValue(), scratch)); + } + hash.clear(); + assertEquals(0, hash.size()); + hash.reinit(); + } + } + + /** + * Test method for {@link org.apache.lucene.util.BytesRefHash#compact()}. + */ + @Test + public void testCompact() { + BytesRef ref = new BytesRef(); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + int numEntries = 0; + final int size = 797; + BitSet bits = new BitSet(size); + for (int i = 0; i < size; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + final int key = hash.add(ref); + if (key < 0) { + assertTrue(bits.get((-key)-1)); + } else { + assertFalse(bits.get(key)); + bits.set(key); + numEntries++; + } + } + assertEquals(hash.size(), bits.cardinality()); + assertEquals(numEntries, bits.cardinality()); + assertEquals(numEntries, hash.size()); + int[] compact = hash.compact(); + assertTrue(numEntries < compact.length); + for (int i = 0; i < numEntries; i++) { + bits.set(compact[i], false); + } + assertEquals(0, bits.cardinality()); + hash.clear(); + assertEquals(0, hash.size()); + hash.reinit(); + } + } + + /** + * Test method for + * {@link org.apache.lucene.util.BytesRefHash#sort(java.util.Comparator)}. + */ + @Test + public void testSort() { + BytesRef ref = new BytesRef(); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + SortedSet strings = new TreeSet(); + for (int i = 0; i < 797; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + hash.add(ref); + strings.add(str); + } + // We use the UTF-16 comparator here, because we need to be able to + // compare to native String.compareTo() [UTF-16]: + int[] sort = hash.sort(BytesRef.getUTF8SortedAsUTF16Comparator()); + assertTrue(strings.size() < sort.length); + int i = 0; + BytesRef scratch = new BytesRef(); + for (String string : strings) { + ref.copy(string); + assertEquals(ref, hash.get(sort[i++], scratch)); + } + hash.clear(); + assertEquals(0, hash.size()); + hash.reinit(); + + } + } + + /** + * Test method for + * {@link org.apache.lucene.util.BytesRefHash#add(org.apache.lucene.util.BytesRef)} + * . + */ + @Test + public void testAdd() { + BytesRef ref = new BytesRef(); + BytesRef scratch = new BytesRef(); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + Set strings = new HashSet(); + int uniqueCount = 0; + for (int i = 0; i < 797; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + int count = hash.size(); + int key = hash.add(ref); + + if (key >=0) { + assertTrue(strings.add(str)); + assertEquals(uniqueCount, key); + assertEquals(hash.size(), count + 1); + uniqueCount++; + } else { + assertFalse(strings.add(str)); + assertTrue((-key)-1 < count); + assertEquals(str, hash.get((-key)-1, scratch).utf8ToString()); + assertEquals(count, hash.size()); + } + } + + assertAllIn(strings, hash); + hash.clear(); + assertEquals(0, hash.size()); + hash.reinit(); + } + } + + @Test(expected = MaxBytesLengthExceededException.class) + public void testLargeValue() { + int[] sizes = new int[] { random.nextInt(5), + ByteBlockPool.BYTE_BLOCK_SIZE - 33 + random.nextInt(31), + ByteBlockPool.BYTE_BLOCK_SIZE - 1 + random.nextInt(37) }; + BytesRef ref = new BytesRef(); + for (int i = 0; i < sizes.length; i++) { + ref.bytes = new byte[sizes[i]]; + ref.offset = 0; + ref.length = sizes[i]; + try { + assertEquals(i, hash.add(ref)); + } catch (MaxBytesLengthExceededException e) { + if (i < sizes.length - 1) + fail("unexpected exception at size: " + sizes[i]); + throw e; + } + } + } + + /** + * Test method for + * {@link org.apache.lucene.util.BytesRefHash#addByPoolOffset(int)} + * . + */ + @Test + public void testAddByPoolOffset() { + BytesRef ref = new BytesRef(); + BytesRef scratch = new BytesRef(); + BytesRefHash offsetHash = newHash(pool); + int num = atLeast(2); + for (int j = 0; j < num; j++) { + Set strings = new HashSet(); + int uniqueCount = 0; + for (int i = 0; i < 797; i++) { + String str; + do { + str = _TestUtil.randomRealisticUnicodeString(random, 1000); + } while (str.length() == 0); + ref.copy(str); + int count = hash.size(); + int key = hash.add(ref); + + if (key >= 0) { + assertTrue(strings.add(str)); + assertEquals(uniqueCount, key); + assertEquals(hash.size(), count + 1); + int offsetKey = offsetHash.addByPoolOffset(hash.byteStart(key)); + assertEquals(uniqueCount, offsetKey); + assertEquals(offsetHash.size(), count + 1); + uniqueCount++; + } else { + assertFalse(strings.add(str)); + assertTrue((-key)-1 < count); + assertEquals(str, hash.get((-key)-1, scratch).utf8ToString()); + assertEquals(count, hash.size()); + int offsetKey = offsetHash.addByPoolOffset(hash.byteStart((-key)-1)); + assertTrue((-offsetKey)-1 < count); + assertEquals(str, hash.get((-offsetKey)-1, scratch).utf8ToString()); + assertEquals(count, hash.size()); + } + } + + assertAllIn(strings, hash); + for (String string : strings) { + ref.copy(string); + int key = hash.add(ref); + BytesRef bytesRef = offsetHash.get((-key)-1, scratch); + assertEquals(ref, bytesRef); + } + + hash.clear(); + assertEquals(0, hash.size()); + offsetHash.clear(); + assertEquals(0, offsetHash.size()); + hash.reinit(); // init for the next round + offsetHash.reinit(); + } + } + + private void assertAllIn(Set strings, BytesRefHash hash) { + BytesRef ref = new BytesRef(); + BytesRef scratch = new BytesRef(); + int count = hash.size(); + for (String string : strings) { + ref.copy(string); + int key = hash.add(ref); // add again to check duplicates + assertEquals(string, hash.get((-key)-1, scratch).utf8ToString()); + assertEquals(count, hash.size()); + assertTrue("key: " + key + " count: " + count + " string: " + string, + key < count); + } + } + + +}