--- /dev/null
+package org.apache.lucene.util.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+// Used to dedup states (lookup already-frozen states)
+final class NodeHash<T> {
+
+ private int[] table;
+ private int count;
+ private int mask;
+ private final FST<T> fst;
+ private final FST.Arc<T> scratchArc = new FST.Arc<T>();
+
+ public NodeHash(FST<T> fst) {
+ table = new int[16];
+ mask = 15;
+ this.fst = fst;
+ }
+
+ private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
+ fst.readFirstRealArc(address, scratchArc);
+ if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
+ return false;
+ }
+ for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
+ final Builder.Arc<T> arc = node.arcs[arcUpto];
+ if (arc.label != scratchArc.label ||
+ !arc.output.equals(scratchArc.output) ||
+ ((Builder.CompiledNode) arc.target).address != scratchArc.target ||
+ !arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
+ arc.isFinal != scratchArc.isFinal()) {
+ return false;
+ }
+
+ if (scratchArc.isLast()) {
+ if (arcUpto == node.numArcs-1) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ fst.readNextRealArc(scratchArc, in);
+ }
+
+ return false;
+ }
+
+ // hash code for an unfrozen node. This must be identical
+ // to the un-frozen case (below)!!
+ private int hash(Builder.UnCompiledNode<T> node) {
+ final int PRIME = 31;
+ //System.out.println("hash unfrozen");
+ int h = 0;
+ // TODO: maybe if number of arcs is high we can safely subsample?
+ for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+ final Builder.Arc<T> arc = node.arcs[arcIdx];
+ //System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).address + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
+ h = PRIME * h + arc.label;
+ h = PRIME * h + ((Builder.CompiledNode) arc.target).address;
+ h = PRIME * h + arc.output.hashCode();
+ h = PRIME * h + arc.nextFinalOutput.hashCode();
+ if (arc.isFinal) {
+ h += 17;
+ }
+ }
+ //System.out.println(" ret " + (h&Integer.MAX_VALUE));
+ return h & Integer.MAX_VALUE;
+ }
+
+ // hash code for a frozen node
+ private int hash(int node) throws IOException {
+ final int PRIME = 31;
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
+ //System.out.println("hash frozen");
+ int h = 0;
+ fst.readFirstRealArc(node, scratchArc);
+ while(true) {
+ //System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
+ h = PRIME * h + scratchArc.label;
+ h = PRIME * h + scratchArc.target;
+ h = PRIME * h + scratchArc.output.hashCode();
+ h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
+ if (scratchArc.isFinal()) {
+ h += 17;
+ }
+ if (scratchArc.isLast()) {
+ break;
+ }
+ fst.readNextRealArc(scratchArc, in);
+ }
+ //System.out.println(" ret " + (h&Integer.MAX_VALUE));
+ return h & Integer.MAX_VALUE;
+ }
+
+ public int add(Builder.UnCompiledNode<T> node) throws IOException {
+ // System.out.println("hash: add count=" + count + " vs " + table.length);
+ final int h = hash(node);
+ int pos = h & mask;
+ int c = 0;
+ while(true) {
+ final int v = table[pos];
+ if (v == 0) {
+ // freeze & add
+ final int address = fst.addNode(node);
+ //System.out.println(" now freeze addr=" + address);
+ assert hash(address) == h : "frozenHash=" + hash(address) + " vs h=" + h;
+ count++;
+ table[pos] = address;
+ if (table.length < 2*count) {
+ rehash();
+ }
+ return address;
+ } else if (nodesEqual(node, v)) {
+ // same node is already here
+ return v;
+ }
+
+ // quadratic probe
+ pos = (pos + (++c)) & mask;
+ }
+ }
+
+ // called only by rehash
+ private void addNew(int address) throws IOException {
+ int pos = hash(address) & mask;
+ int c = 0;
+ while(true) {
+ if (table[pos] == 0) {
+ table[pos] = address;
+ break;
+ }
+
+ // quadratic probe
+ pos = (pos + (++c)) & mask;
+ }
+ }
+
+ private void rehash() throws IOException {
+ final int[] oldTable = table;
+ table = new int[2*table.length];
+ mask = table.length-1;
+ for(int idx=0;idx<oldTable.length;idx++) {
+ final int address = oldTable[idx];
+ if (address != 0) {
+ addNew(address);
+ }
+ }
+ }
+
+ public int count() {
+ return count;
+ }
+}