1 package org.apache.lucene.util.collections;
3 import java.util.Arrays;
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements. See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * A Set or primitive int. Implemented as a HashMap of int->int. *
25 * @lucene.experimental
27 public class IntHashSet {
29 // TODO (Facet): This is wasteful as the "values" are actually the "keys" and
30 // we could spare this amount of space (capacity * sizeof(int)). Perhaps even
31 // though it is not OOP, we should re-implement the hash for just that cause.
34 * Implements an IntIterator which iterates over all the allocated indexes.
36 private final class IndexIterator implements IntIterator {
38 * The last used baseHashIndex. Needed for "jumping" from one hash entry
41 private int baseHashIndex = 0;
44 * The next not-yet-visited index.
46 private int index = 0;
49 * Index of the last visited pair. Used in {@link #remove()}.
51 private int lastIndex = 0;
54 * Create the Iterator, make <code>index</code> point to the "first"
55 * index which is not empty. If such does not exist (eg. the map is
56 * empty) it would be zero.
58 public IndexIterator() {
59 for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
60 index = baseHash[baseHashIndex];
67 public boolean hasNext() {
72 // Save the last index visited
78 // if the next index points to the 'Ground' it means we're done with
79 // the current hash entry and we need to jump to the next one. This
80 // is done until all the hash entries had been visited.
81 while (index == 0 && ++baseHashIndex < baseHash.length) {
82 index = baseHash[baseHashIndex];
88 public void remove() {
89 IntHashSet.this.remove(keys[lastIndex]);
95 * Implements an IntIterator, used for iteration over the map's keys.
97 private final class KeyIterator implements IntIterator {
98 private IntIterator iterator = new IndexIterator();
102 public boolean hasNext() {
103 return iterator.hasNext();
107 return keys[iterator.next()];
110 public void remove() {
116 * Default capacity - in case no capacity was specified in the constructor
118 private static int defaultCapacity = 16;
121 * Holds the base hash entries. if the capacity is 2^N, than the base hash
122 * holds 2^(N+1). It can hold
127 * The current capacity of the map. Always 2^N and never less than 16. We
128 * never use the zero index. It is needed to improve performance and is also
131 private int capacity;
134 * All objects are being allocated at map creation. Those objects are "free"
135 * or empty. Whenever a new pair comes along, a pair is being "allocated" or
136 * taken from the free-linked list. as this is just a free list.
138 private int firstEmpty;
141 * hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
143 private int hashFactor;
146 * This array holds the unique keys
151 * In case of collisions, we implement a double linked list of the colliding
152 * hash's with the following next[] and prev[]. Those are also used to store
160 * Number of currently objects in the map.
165 * Constructs a map with default capacity.
167 public IntHashSet() {
168 this(defaultCapacity);
172 * Constructs a map with given capacity. Capacity is adjusted to a native
173 * power of 2, with minimum of 16.
176 * minimum capacity for the map.
178 public IntHashSet(int capacity) {
180 // Minimum capacity is 16..
181 while (this.capacity < capacity) {
182 // Multiply by 2 as long as we're still under the requested capacity
186 // As mentioned, we use the first index (0) as 'Ground', so we need the
187 // length of the arrays to be one more than the capacity
188 int arrayLength = this.capacity + 1;
190 this.keys = new int[arrayLength];
191 this.next = new int[arrayLength];
193 // Hash entries are twice as big as the capacity.
194 int baseHashSize = this.capacity << 1;
196 this.baseHash = new int[baseHashSize];
198 // The has factor is 2^M - 1 which is used as an "AND" hashing operator.
199 // {@link #calcBaseHash()}
200 this.hashFactor = baseHashSize - 1;
208 * Adds a pair to the map. Takes the first empty position from the
209 * empty-linked-list's head - {@link firstEmpty}.
211 * New pairs are always inserted to baseHash, and are followed by the old
215 * integer which maps the given value
217 * value which is being mapped using the given key
219 private void prvt_add(int key) {
220 // Hash entry to which the new pair would be inserted
221 int hashIndex = calcBaseHashIndex(key);
223 // 'Allocating' a pair from the "Empty" list.
224 int objectIndex = firstEmpty;
227 firstEmpty = next[firstEmpty];
228 keys[objectIndex] = key;
230 // Inserting the new pair as the first node in the specific hash entry
231 next[objectIndex] = baseHash[hashIndex];
232 baseHash[hashIndex] = objectIndex;
234 // Announcing a new pair was added!
239 * Calculating the baseHash index using the internal <code>hashFactor</code>
244 protected int calcBaseHashIndex(int key) {
245 return key & hashFactor;
249 * Empties the map. Generates the "Empty" space list for later allocation.
251 public void clear() {
252 // Clears the hash entries
253 Arrays.fill(this.baseHash, 0);
258 // Mark all array entries as empty. This is done with
259 // <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
260 // used as 'Ground').
263 // And setting all the <code>next[i]</code> to point at
265 for (int i = 1; i < this.capacity;) {
269 // Surly, the last one should point to the 'Ground'.
270 next[this.capacity] = 0;
274 * Checks if a given key exists in the map.
277 * that is checked against the map data.
278 * @return true if the key exists in the map. false otherwise.
280 public boolean contains(int value) {
281 return find(value) != 0;
285 * Find the actual index of a given key.
288 * @return index of the key. zero if the key wasn't found.
290 protected int find(int key) {
291 // Calculate the hash entry.
292 int baseHashIndex = calcBaseHashIndex(key);
294 // Start from the hash entry.
295 int localIndex = baseHash[baseHashIndex];
297 // while the index does not point to the 'Ground'
298 while (localIndex != 0) {
299 // returns the index found in case of of a matching key.
300 if (keys[localIndex] == key) {
304 // next the local index
305 localIndex = next[localIndex];
308 // If we got this far, it could only mean we did not find the key we
309 // were asked for. return 'Ground' index.
314 * Find the actual index of a given key with it's baseHashIndex.<br>
315 * Some methods use the baseHashIndex. If those call {@link #find()} there's
316 * no need to re-calculate that hash.
319 * @param baseHashIndex
320 * @return the index of the given key, or 0 as 'Ground' if the key wasn't
323 private int findForRemove(int key, int baseHashIndex) {
324 // Start from the hash entry.
326 int index = baseHash[baseHashIndex];
328 // while the index does not point to the 'Ground'
330 // returns the index found in case of of a matching key.
331 if (keys[index] == key) {
335 // next the local index
340 // If we got this far, it could only mean we did not find the key we
341 // were asked for. return 'Ground' index.
347 * Grows the map. Allocates a new map of double the capacity, and
348 * fast-insert the old key-value pairs.
350 protected void grow() {
351 IntHashSet that = new IntHashSet(this.capacity * 2);
353 // Iterates fast over the collection. Any valid pair is put into the new
354 // map without checking for duplicates or if there's enough space for
356 for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
357 int index = iterator.next();
358 that.prvt_add(this.keys[index]);
360 // for (int i = capacity; i > 0; --i) {
362 // that._add(this.keys[i]);
366 // Copy that's data into this.
367 this.capacity = that.capacity;
368 this.size = that.size;
369 this.firstEmpty = that.firstEmpty;
370 this.keys = that.keys;
371 this.next = that.next;
372 this.baseHash = that.baseHash;
373 this.hashFactor = that.hashFactor;
378 * @return true if the map is empty. false otherwise.
380 public boolean isEmpty() {
385 * Returns a new iterator for the mapped objects.
387 public IntIterator iterator() {
388 return new KeyIterator();
392 * Prints the baseHash array, used for debug purposes.
394 public void printBaseHash() {
395 for (int i = 0; i < this.baseHash.length; i++) {
396 if (baseHash[i] != 0) {
397 System.out.println(i + ".\t" + baseHash[i]);
403 * Add a mapping int key -> int value.
405 * If the key was already inside just
406 * updating the value it refers to as the given object.
408 * Otherwise if the map is full, first {@link #grow()} the map.
411 * integer which maps the given value
412 * @return true always.
414 public boolean add(int value) {
416 int index = find(value);
423 // Is there enough room for a new pair?
424 if (size == capacity) {
429 // Now that everything is set, the pair can be just put inside with no
437 * Remove a pair from the map, specified by it's key.
440 * specify the value to be removed
442 * @return true if the map was changed (the key was found and removed).
445 public boolean remove(int value) {
446 int baseHashIndex = calcBaseHashIndex(value);
447 int index = findForRemove(value, baseHashIndex);
449 // If it is the first in the collision list, we should promote its
450 // next colliding element.
452 baseHash[baseHashIndex] = next[index];
455 next[prev] = next[index];
456 next[index] = firstEmpty;
466 * @return number of pairs currently in the map
473 * Translates the mapped pairs' values into an array of Objects
475 * @return an object array of all the values currently in the map.
477 public int[] toArray() {
479 int[] array = new int[size];
481 // Iterates over the values, adding them to the array.
482 for (IntIterator iterator = iterator(); iterator.hasNext();) {
483 array[++j] = iterator.next();
489 * Translates the mapped pairs' values into an array of ints
492 * the array into which the elements of the map are to be stored,
493 * if it is big enough; otherwise, a new array of the same
494 * runtime type is allocated for this purpose.
496 * @return an array containing the values stored in the map
499 public int[] toArray(int[] a) {
501 if (a.length < size) {
504 // Iterates over the values, adding them to the array.
505 for (IntIterator iterator = iterator(); j < a.length
506 && iterator.hasNext(); ++j) {
507 a[j] = iterator.next();
513 * I have no idea why would anyone call it - but for debug purposes.<br>
514 * Prints the entire map, including the index, key, object, next and prev.
517 public String toString() {
518 StringBuffer sb = new StringBuffer();
520 IntIterator iterator = iterator();
521 while (iterator.hasNext()) {
522 sb.append(iterator.next());
523 if (iterator.hasNext()) {
529 return sb.toString();
532 public String toHashString() {
533 String string = "\n";
534 StringBuffer sb = new StringBuffer();
536 for (int i = 0; i < this.baseHash.length; i++) {
537 StringBuffer sb2 = new StringBuffer();
538 boolean shouldAppend = false;
539 sb2.append(i + ".\t");
540 for (int index = baseHash[i]; index != 0; index = next[index]) {
541 sb2.append(" -> " + keys[index] + "@" + index);
550 return sb.toString();