lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/util/collections/IntHashSet.java

   1 package org.apache.lucene.util.collections;
   2
   3 import java.util.Arrays;
   4
   5 /**
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *     http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21
  22 /**
  23  * A Set or primitive int. Implemented as a HashMap of int->int. *
  24  *
  25  * @lucene.experimental
  26  */
  27 public class IntHashSet {
  28
  29   // TODO (Facet): This is wasteful as the "values" are actually the "keys" and
  30   // we could spare this amount of space (capacity * sizeof(int)). Perhaps even
  31   // though it is not OOP, we should re-implement the hash for just that cause.
  32
  33   /**
  34    * Implements an IntIterator which iterates over all the allocated indexes.
  35    */
  36   private final class IndexIterator implements IntIterator {
  37     /**
  38      * The last used baseHashIndex. Needed for "jumping" from one hash entry
  39      * to another.
  40      */
  41     private int baseHashIndex = 0;
  42
  43     /**
  44      * The next not-yet-visited index.
  45      */
  46     private int index = 0;
  47
  48     /**
  49      * Index of the last visited pair. Used in {@link #remove()}.
  50      */
  51     private int lastIndex = 0;
  52
  53     /**
  54      * Create the Iterator, make <code>index</code> point to the "first"
  55      * index which is not empty. If such does not exist (eg. the map is
  56      * empty) it would be zero.
  57      */
  58     public IndexIterator() {
  59       for (baseHashIndex = 0; baseHashIndex < baseHash.length; ++baseHashIndex) {
  60         index = baseHash[baseHashIndex];
  61         if (index != 0) {
  62           break;
  63         }
  64       }
  65     }
  66
  67     public boolean hasNext() {
  68       return (index != 0);
  69     }
  70
  71     public int next() {
  72       // Save the last index visited
  73       lastIndex = index;
  74
  75       // next the index
  76       index = next[index];
  77
  78       // if the next index points to the 'Ground' it means we're done with
  79       // the current hash entry and we need to jump to the next one. This
  80       // is done until all the hash entries had been visited.
  81       while (index == 0 && ++baseHashIndex < baseHash.length) {
  82         index = baseHash[baseHashIndex];
  83       }
  84
  85       return lastIndex;
  86     }
  87
  88     public void remove() {
  89       IntHashSet.this.remove(keys[lastIndex]);
  90     }
  91
  92   }
  93
  94   /**
  95    * Implements an IntIterator, used for iteration over the map's keys.
  96    */
  97   private final class KeyIterator implements IntIterator {
  98     private IntIterator iterator = new IndexIterator();
  99
 100     KeyIterator() { }
 101
 102     public boolean hasNext() {
 103       return iterator.hasNext();
 104     }
 105
 106     public int next() {
 107       return keys[iterator.next()];
 108     }
 109
 110     public void remove() {
 111       iterator.remove();
 112     }
 113   }
 114
 115   /**
 116    * Default capacity - in case no capacity was specified in the constructor
 117    */
 118   private static int defaultCapacity = 16;
 119
 120   /**
 121    * Holds the base hash entries. if the capacity is 2^N, than the base hash
 122    * holds 2^(N+1). It can hold
 123    */
 124   int[] baseHash;
 125
 126   /**
 127    * The current capacity of the map. Always 2^N and never less than 16. We
 128    * never use the zero index. It is needed to improve performance and is also
 129    * used as "ground".
 130    */
 131   private int capacity;
 132
 133   /**
 134    * All objects are being allocated at map creation. Those objects are "free"
 135    * or empty. Whenever a new pair comes along, a pair is being "allocated" or
 136    * taken from the free-linked list. as this is just a free list.
 137    */
 138   private int firstEmpty;
 139
 140   /**
 141    * hashFactor is always (2^(N+1)) - 1. Used for faster hashing.
 142    */
 143   private int hashFactor;
 144
 145   /**
 146    * This array holds the unique keys
 147    */
 148   int[] keys;
 149
 150   /**
 151    * In case of collisions, we implement a double linked list of the colliding
 152    * hash's with the following next[] and prev[]. Those are also used to store
 153    * the "empty" list.
 154    */
 155   int[] next;
 156
 157   private int prev;
 158
 159   /**
 160    * Number of currently objects in the map.
 161    */
 162   private int size;
 163
 164   /**
 165    * Constructs a map with default capacity.
 166    */
 167   public IntHashSet() {
 168     this(defaultCapacity);
 169   }
 170
 171   /**
 172    * Constructs a map with given capacity. Capacity is adjusted to a native
 173    * power of 2, with minimum of 16.
 174    *
 175    * @param capacity
 176    *            minimum capacity for the map.
 177    */
 178   public IntHashSet(int capacity) {
 179     this.capacity = 16;
 180     // Minimum capacity is 16..
 181     while (this.capacity < capacity) {
 182       // Multiply by 2 as long as we're still under the requested capacity
 183       this.capacity <<= 1;
 184     }
 185
 186     // As mentioned, we use the first index (0) as 'Ground', so we need the
 187     // length of the arrays to be one more than the capacity
 188     int arrayLength = this.capacity + 1;
 189
 190     this.keys = new int[arrayLength];
 191     this.next = new int[arrayLength];
 192
 193     // Hash entries are twice as big as the capacity.
 194     int baseHashSize = this.capacity << 1;
 195
 196     this.baseHash = new int[baseHashSize];
 197
 198     // The has factor is 2^M - 1 which is used as an "AND" hashing operator.
 199     // {@link #calcBaseHash()}
 200     this.hashFactor = baseHashSize - 1;
 201
 202     this.size = 0;
 203
 204     clear();
 205   }
 206
 207   /**
 208    * Adds a pair to the map. Takes the first empty position from the
 209    * empty-linked-list's head - {@link firstEmpty}.
 210    *
 211    * New pairs are always inserted to baseHash, and are followed by the old
 212    * colliding pair.
 213    *
 214    * @param key
 215    *            integer which maps the given value
 216    * @param e
 217    *            value which is being mapped using the given key
 218    */
 219   private void prvt_add(int key) {
 220     // Hash entry to which the new pair would be inserted
 221     int hashIndex = calcBaseHashIndex(key);
 222
 223     // 'Allocating' a pair from the "Empty" list.
 224     int objectIndex = firstEmpty;
 225
 226     // Setting data
 227     firstEmpty = next[firstEmpty];
 228     keys[objectIndex] = key;
 229
 230     // Inserting the new pair as the first node in the specific hash entry
 231     next[objectIndex] = baseHash[hashIndex];
 232     baseHash[hashIndex] = objectIndex;
 233
 234     // Announcing a new pair was added!
 235     ++size;
 236   }
 237
 238   /**
 239    * Calculating the baseHash index using the internal <code>hashFactor</code>
 240    * .
 241    *
 242    * @param key
 243    */
 244   protected int calcBaseHashIndex(int key) {
 245     return key & hashFactor;
 246   }
 247
 248   /**
 249    * Empties the map. Generates the "Empty" space list for later allocation.
 250    */
 251   public void clear() {
 252     // Clears the hash entries
 253     Arrays.fill(this.baseHash, 0);
 254
 255     // Set size to zero
 256     size = 0;
 257
 258     // Mark all array entries as empty. This is done with
 259     // <code>firstEmpty</code> pointing to the first valid index (1 as 0 is
 260     // used as 'Ground').
 261     firstEmpty = 1;
 262
 263     // And setting all the <code>next[i]</code> to point at
 264     // <code>i+1</code>.
 265     for (int i = 1; i < this.capacity;) {
 266       next[i] = ++i;
 267     }
 268
 269     // Surly, the last one should point to the 'Ground'.
 270     next[this.capacity] = 0;
 271   }
 272
 273   /**
 274    * Checks if a given key exists in the map.
 275    *
 276    * @param value
 277    *            that is checked against the map data.
 278    * @return true if the key exists in the map. false otherwise.
 279    */
 280   public boolean contains(int value) {
 281     return find(value) != 0;
 282   }
 283
 284   /**
 285    * Find the actual index of a given key.
 286    *
 287    * @param key
 288    * @return index of the key. zero if the key wasn't found.
 289    */
 290   protected int find(int key) {
 291     // Calculate the hash entry.
 292     int baseHashIndex = calcBaseHashIndex(key);
 293
 294     // Start from the hash entry.
 295     int localIndex = baseHash[baseHashIndex];
 296
 297     // while the index does not point to the 'Ground'
 298     while (localIndex != 0) {
 299       // returns the index found in case of of a matching key.
 300       if (keys[localIndex] == key) {
 301         return localIndex;
 302       }
 303
 304       // next the local index
 305       localIndex = next[localIndex];
 306     }
 307
 308     // If we got this far, it could only mean we did not find the key we
 309     // were asked for. return 'Ground' index.
 310     return 0;
 311   }
 312
 313   /**
 314    * Find the actual index of a given key with it's baseHashIndex.<br>
 315    * Some methods use the baseHashIndex. If those call {@link #find()} there's
 316    * no need to re-calculate that hash.
 317    *
 318    * @param key
 319    * @param baseHashIndex
 320    * @return the index of the given key, or 0 as 'Ground' if the key wasn't
 321    *         found.
 322    */
 323   private int findForRemove(int key, int baseHashIndex) {
 324     // Start from the hash entry.
 325     this.prev = 0;
 326     int index = baseHash[baseHashIndex];
 327
 328     // while the index does not point to the 'Ground'
 329     while (index != 0) {
 330       // returns the index found in case of of a matching key.
 331       if (keys[index] == key) {
 332         return index;
 333       }
 334
 335       // next the local index
 336       prev = index;
 337       index = next[index];
 338     }
 339
 340     // If we got this far, it could only mean we did not find the key we
 341     // were asked for. return 'Ground' index.
 342     this.prev = 0;
 343     return 0;
 344   }
 345
 346   /**
 347    * Grows the map. Allocates a new map of double the capacity, and
 348    * fast-insert the old key-value pairs.
 349    */
 350   protected void grow() {
 351     IntHashSet that = new IntHashSet(this.capacity * 2);
 352
 353     // Iterates fast over the collection. Any valid pair is put into the new
 354     // map without checking for duplicates or if there's enough space for
 355     // it.
 356     for (IndexIterator iterator = new IndexIterator(); iterator.hasNext();) {
 357       int index = iterator.next();
 358       that.prvt_add(this.keys[index]);
 359     }
 360     // for (int i = capacity; i > 0; --i) {
 361     //
 362     // that._add(this.keys[i]);
 363     //
 364     // }
 365
 366     // Copy that's data into this.
 367     this.capacity = that.capacity;
 368     this.size = that.size;
 369     this.firstEmpty = that.firstEmpty;
 370     this.keys = that.keys;
 371     this.next = that.next;
 372     this.baseHash = that.baseHash;
 373     this.hashFactor = that.hashFactor;
 374   }
 375
 376   /**
 377    *
 378    * @return true if the map is empty. false otherwise.
 379    */
 380   public boolean isEmpty() {
 381     return size == 0;
 382   }
 383
 384   /**
 385    * Returns a new iterator for the mapped objects.
 386    */
 387   public IntIterator iterator() {
 388     return new KeyIterator();
 389   }
 390
 391   /**
 392    * Prints the baseHash array, used for debug purposes.
 393    */
 394   public void printBaseHash() {
 395     for (int i = 0; i < this.baseHash.length; i++) {
 396       if (baseHash[i] != 0) {
 397         System.out.println(i + ".\t" + baseHash[i]);
 398       }
 399     }
 400   }
 401
 402   /**
 403    * Add a mapping int key -> int value.
 404    * <p>
 405    * If the key was already inside just
 406    * updating the value it refers to as the given object.
 407    * <p>
 408    * Otherwise if the map is full, first {@link #grow()} the map.
 409    *
 410    * @param value
 411    *            integer which maps the given value
 412    * @return true always.
 413    */
 414   public boolean add(int value) {
 415     // Does key exists?
 416     int index = find(value);
 417
 418     // Yes!
 419     if (index != 0) {
 420       return true;
 421     }
 422
 423     // Is there enough room for a new pair?
 424     if (size == capacity) {
 425       // No? Than grow up!
 426       grow();
 427     }
 428
 429     // Now that everything is set, the pair can be just put inside with no
 430     // worries.
 431     prvt_add(value);
 432
 433     return true;
 434   }
 435
 436   /**
 437    * Remove a pair from the map, specified by it's key.
 438    *
 439    * @param value
 440    *            specify the value to be removed
 441    *
 442    * @return true if the map was changed (the key was found and removed).
 443    *         false otherwise.
 444    */
 445   public boolean remove(int value) {
 446     int baseHashIndex = calcBaseHashIndex(value);
 447     int index = findForRemove(value, baseHashIndex);
 448     if (index != 0) {
 449       // If it is the first in the collision list, we should promote its
 450       // next colliding element.
 451       if (prev == 0) {
 452         baseHash[baseHashIndex] = next[index];
 453       }
 454
 455       next[prev] = next[index];
 456       next[index] = firstEmpty;
 457       firstEmpty = index;
 458       --size;
 459       return true;
 460     }
 461
 462     return false;
 463   }
 464
 465   /**
 466    * @return number of pairs currently in the map
 467    */
 468   public int size() {
 469     return this.size;
 470   }
 471
 472   /**
 473    * Translates the mapped pairs' values into an array of Objects
 474    *
 475    * @return an object array of all the values currently in the map.
 476    */
 477   public int[] toArray() {
 478     int j = -1;
 479     int[] array = new int[size];
 480
 481     // Iterates over the values, adding them to the array.
 482     for (IntIterator iterator = iterator(); iterator.hasNext();) {
 483       array[++j] = iterator.next();
 484     }
 485     return array;
 486   }
 487
 488   /**
 489    * Translates the mapped pairs' values into an array of ints
 490    *
 491    * @param a
 492    *            the array into which the elements of the map are to be stored,
 493    *            if it is big enough; otherwise, a new array of the same
 494    *            runtime type is allocated for this purpose.
 495    *
 496    * @return an array containing the values stored in the map
 497    *
 498    */
 499   public int[] toArray(int[] a) {
 500     int j = 0;
 501     if (a.length < size) {
 502       a = new int[size];
 503     }
 504     // Iterates over the values, adding them to the array.
 505     for (IntIterator iterator = iterator(); j < a.length
 506         && iterator.hasNext(); ++j) {
 507       a[j] = iterator.next();
 508     }
 509     return a;
 510   }
 511
 512   /**
 513    * I have no idea why would anyone call it - but for debug purposes.<br>
 514    * Prints the entire map, including the index, key, object, next and prev.
 515    */
 516   @Override
 517   public String toString() {
 518     StringBuffer sb = new StringBuffer();
 519     sb.append('{');
 520     IntIterator iterator = iterator();
 521     while (iterator.hasNext()) {
 522       sb.append(iterator.next());
 523       if (iterator.hasNext()) {
 524         sb.append(',');
 525         sb.append(' ');
 526       }
 527     }
 528     sb.append('}');
 529     return sb.toString();
 530   }
 531
 532   public String toHashString() {
 533     String string = "\n";
 534     StringBuffer sb = new StringBuffer();
 535
 536     for (int i = 0; i < this.baseHash.length; i++) {
 537       StringBuffer sb2 = new StringBuffer();
 538       boolean shouldAppend = false;
 539       sb2.append(i + ".\t");
 540       for (int index = baseHash[i]; index != 0; index = next[index]) {
 541         sb2.append(" -> " + keys[index] + "@" + index);
 542         shouldAppend = true;
 543       }
 544       if (shouldAppend) {
 545         sb.append(sb2);
 546         sb.append(string);
 547       }
 548     }
 549
 550     return sb.toString();
 551   }
 552 }