1 package org.apache.lucene.util.fst;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 // Used to dedup states (lookup already-frozen states)
23 final class NodeHash<T> {
28 private final FST<T> fst;
29 private final FST.Arc<T> scratchArc = new FST.Arc<T>();
31 public NodeHash(FST<T> fst) {
37 private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
38 final FST<T>.BytesReader in = fst.getBytesReader(0);
39 fst.readFirstRealArc(address, scratchArc);
40 if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
43 for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
44 final Builder.Arc<T> arc = node.arcs[arcUpto];
45 if (arc.label != scratchArc.label ||
46 !arc.output.equals(scratchArc.output) ||
47 ((Builder.CompiledNode) arc.target).address != scratchArc.target ||
48 !arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
49 arc.isFinal != scratchArc.isFinal()) {
53 if (scratchArc.isLast()) {
54 if (arcUpto == node.numArcs-1) {
60 fst.readNextRealArc(scratchArc, in);
66 // hash code for an unfrozen node. This must be identical
67 // to the un-frozen case (below)!!
68 private int hash(Builder.UnCompiledNode<T> node) {
70 //System.out.println("hash unfrozen");
72 // TODO: maybe if number of arcs is high we can safely subsample?
73 for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
74 final Builder.Arc<T> arc = node.arcs[arcIdx];
75 //System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).address + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
76 h = PRIME * h + arc.label;
77 h = PRIME * h + ((Builder.CompiledNode) arc.target).address;
78 h = PRIME * h + arc.output.hashCode();
79 h = PRIME * h + arc.nextFinalOutput.hashCode();
84 //System.out.println(" ret " + (h&Integer.MAX_VALUE));
85 return h & Integer.MAX_VALUE;
88 // hash code for a frozen node
89 private int hash(int node) throws IOException {
91 final FST<T>.BytesReader in = fst.getBytesReader(0);
92 //System.out.println("hash frozen");
94 fst.readFirstRealArc(node, scratchArc);
96 //System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
97 h = PRIME * h + scratchArc.label;
98 h = PRIME * h + scratchArc.target;
99 h = PRIME * h + scratchArc.output.hashCode();
100 h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
101 if (scratchArc.isFinal()) {
104 if (scratchArc.isLast()) {
107 fst.readNextRealArc(scratchArc, in);
109 //System.out.println(" ret " + (h&Integer.MAX_VALUE));
110 return h & Integer.MAX_VALUE;
113 public int add(Builder.UnCompiledNode<T> node) throws IOException {
114 // System.out.println("hash: add count=" + count + " vs " + table.length);
115 final int h = hash(node);
119 final int v = table[pos];
122 final int address = fst.addNode(node);
123 //System.out.println(" now freeze addr=" + address);
124 assert hash(address) == h : "frozenHash=" + hash(address) + " vs h=" + h;
126 table[pos] = address;
127 if (table.length < 2*count) {
131 } else if (nodesEqual(node, v)) {
132 // same node is already here
137 pos = (pos + (++c)) & mask;
141 // called only by rehash
142 private void addNew(int address) throws IOException {
143 int pos = hash(address) & mask;
146 if (table[pos] == 0) {
147 table[pos] = address;
152 pos = (pos + (++c)) & mask;
156 private void rehash() throws IOException {
157 final int[] oldTable = table;
158 table = new int[2*table.length];
159 mask = table.length-1;
160 for(int idx=0;idx<oldTable.length;idx++) {
161 final int address = oldTable[idx];