--- /dev/null
+package org.apache.lucene.util.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+
+/** Can next() and advance() through the terms in an FST
+ *
+ * @lucene.experimental
+*/
+
+abstract class FSTEnum<T> {
+ protected final FST<T> fst;
+
+ @SuppressWarnings("unchecked") protected FST.Arc<T>[] arcs = new FST.Arc[10];
+ // outputs are cumulative
+ @SuppressWarnings("unchecked") protected T[] output = (T[]) new Object[10];
+
+ protected final T NO_OUTPUT;
+ protected final FST.Arc<T> scratchArc = new FST.Arc<T>();
+
+ protected int upto;
+ protected int targetLength;
+
+ /** doFloor controls the behavior of advance: if it's true
+ * doFloor is true, advance positions to the biggest
+ * term before target. */
+ protected FSTEnum(FST<T> fst) {
+ this.fst = fst;
+ NO_OUTPUT = fst.outputs.getNoOutput();
+ fst.getFirstArc(getArc(0));
+ output[0] = NO_OUTPUT;
+ }
+
+ protected abstract int getTargetLabel();
+ protected abstract int getCurrentLabel();
+
+ protected abstract void setCurrentLabel(int label);
+ protected abstract void grow();
+
+ /** Rewinds enum state to match the shared prefix between
+ * current term and target term */
+ protected final void rewindPrefix() throws IOException {
+ if (upto == 0) {
+ //System.out.println(" init");
+ upto = 1;
+ fst.readFirstTargetArc(getArc(0), getArc(1));
+ return;
+ }
+ //System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
+
+ final int currentLimit = upto;
+ upto = 1;
+ while (upto < currentLimit && upto <= targetLength+1) {
+ final int cmp = getCurrentLabel() - getTargetLabel();
+ if (cmp < 0) {
+ // seek forward
+ break;
+ } else if (cmp > 0) {
+ // seek backwards -- reset this arc to the first arc
+ final FST.Arc<T> arc = getArc(upto);
+ fst.readFirstTargetArc(getArc(upto-1), arc);
+ //System.out.println(" seek first arc");
+ break;
+ }
+ upto++;
+ }
+ }
+
+ protected void doNext() throws IOException {
+ //System.out.println("FE: next upto=" + upto);
+ if (upto == 0) {
+ //System.out.println(" init");
+ upto = 1;
+ fst.readFirstTargetArc(getArc(0), getArc(1));
+ } else {
+ // pop
+ //System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast());
+ while (arcs[upto].isLast()) {
+ upto--;
+ if (upto == 0) {
+ //System.out.println(" eof");
+ return;
+ }
+ }
+ fst.readNextArc(arcs[upto]);
+ }
+
+ pushFirst();
+ }
+
+ // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
+ // SEEK_END)? saves the eq check above?
+
+ /** Seeks to smallest term that's >= target. */
+ protected void doSeekCeil() throws IOException {
+
+ //System.out.println(" advance len=" + target.length + " curlen=" + current.length);
+
+ // TODO: possibly caller could/should provide common
+ // prefix length? ie this work may be redundant if
+ // caller is in fact intersecting against its own
+ // automaton
+
+ //System.out.println("FE.seekCeil upto=" + upto);
+
+ // Save time by starting at the end of the shared prefix
+ // b/w our current term & the target:
+ rewindPrefix();
+ //System.out.println(" after rewind upto=" + upto);
+
+ FST.Arc<T> arc = getArc(upto);
+ int targetLabel = getTargetLabel();
+ //System.out.println(" init targetLabel=" + targetLabel);
+
+ // Now scan forward, matching the new suffix of the target
+ while(true) {
+
+ //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);
+
+ if (arc.bytesPerArc != 0 && arc.label != -1) {
+
+ // Arcs are fixed array -- use binary search to find
+ // the target.
+
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
+ int low = arc.arcIdx;
+ int high = arc.numArcs-1;
+ int mid = 0;
+ //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
+ boolean found = false;
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
+ final int midLabel = fst.readLabel(in);
+ final int cmp = midLabel - targetLabel;
+ //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else {
+ found = true;
+ break;
+ }
+ }
+
+ // NOTE: this code is dup'd w/ the code below (in
+ // the outer else clause):
+ if (found) {
+ // Match
+ arc.arcIdx = mid-1;
+ fst.readNextRealArc(arc, in);
+ assert arc.arcIdx == mid;
+ assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (targetLabel == FST.END_LABEL) {
+ return;
+ }
+ setCurrentLabel(arc.label);
+ incr();
+ arc = fst.readFirstTargetArc(arc, getArc(upto));
+ targetLabel = getTargetLabel();
+ continue;
+ } else if (low == arc.numArcs) {
+ // Dead end
+ arc.arcIdx = arc.numArcs-2;
+ fst.readNextRealArc(arc, in);
+ assert arc.isLast();
+ // Dead end (target is after the last arc);
+ // rollback to last fork then push
+ upto--;
+ while(true) {
+ if (upto == 0) {
+ return;
+ }
+ final FST.Arc<T> prevArc = getArc(upto);
+ //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
+ if (!prevArc.isLast()) {
+ fst.readNextArc(prevArc);
+ pushFirst();
+ return;
+ }
+ upto--;
+ }
+ } else {
+ arc.arcIdx = (low > high ? low : high)-1;
+ fst.readNextRealArc(arc, in);
+ assert arc.label > targetLabel;
+ pushFirst();
+ return;
+ }
+ } else {
+ // Arcs are not array'd -- must do linear scan:
+ if (arc.label == targetLabel) {
+ // recurse
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (targetLabel == FST.END_LABEL) {
+ return;
+ }
+ setCurrentLabel(arc.label);
+ incr();
+ arc = fst.readFirstTargetArc(arc, getArc(upto));
+ targetLabel = getTargetLabel();
+ } else if (arc.label > targetLabel) {
+ pushFirst();
+ return;
+ } else if (arc.isLast()) {
+ // Dead end (target is after the last arc);
+ // rollback to last fork then push
+ upto--;
+ while(true) {
+ if (upto == 0) {
+ return;
+ }
+ final FST.Arc<T> prevArc = getArc(upto);
+ //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
+ if (!prevArc.isLast()) {
+ fst.readNextArc(prevArc);
+ pushFirst();
+ return;
+ }
+ upto--;
+ }
+ } else {
+ // keep scanning
+ //System.out.println(" next scan");
+ fst.readNextArc(arc);
+ }
+ }
+ }
+ }
+
+ // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
+ // SEEK_END)? saves the eq check above?
+ /** Seeks to largest term that's <= target. */
+ protected void doSeekFloor() throws IOException {
+
+ // TODO: possibly caller could/should provide common
+ // prefix length? ie this work may be redundant if
+ // caller is in fact intersecting against its own
+ // automaton
+ //System.out.println("FE: seek floor upto=" + upto);
+
+ // Save CPU by starting at the end of the shared prefix
+ // b/w our current term & the target:
+ rewindPrefix();
+
+ //System.out.println("FE: after rewind upto=" + upto);
+
+ FST.Arc<T> arc = getArc(upto);
+ int targetLabel = getTargetLabel();
+
+ //System.out.println("FE: init targetLabel=" + targetLabel);
+
+ // Now scan forward, matching the new suffix of the target
+ while(true) {
+ //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
+
+ if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
+ // Arcs are fixed array -- use binary search to find
+ // the target.
+
+ final FST<T>.BytesReader in = fst.getBytesReader(0);
+ int low = arc.arcIdx;
+ int high = arc.numArcs-1;
+ int mid = 0;
+ //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
+ boolean found = false;
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
+ final int midLabel = fst.readLabel(in);
+ final int cmp = midLabel - targetLabel;
+ //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else {
+ found = true;
+ break;
+ }
+ }
+
+ // NOTE: this code is dup'd w/ the code below (in
+ // the outer else clause):
+ if (found) {
+ // Match -- recurse
+ //System.out.println(" match! arcIdx=" + mid);
+ arc.arcIdx = mid-1;
+ fst.readNextRealArc(arc, in);
+ assert arc.arcIdx == mid;
+ assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (targetLabel == FST.END_LABEL) {
+ return;
+ }
+ setCurrentLabel(arc.label);
+ incr();
+ arc = fst.readFirstTargetArc(arc, getArc(upto));
+ targetLabel = getTargetLabel();
+ continue;
+ } else if (high == -1) {
+ //System.out.println(" before first");
+ // Very first arc is after our target
+ // TODO: if each arc could somehow read the arc just
+ // before, we can save this re-scan. The ceil case
+ // doesn't need this because it reads the next arc
+ // instead:
+ while(true) {
+ // First, walk backwards until we find a first arc
+ // that's before our target label:
+ fst.readFirstTargetArc(getArc(upto-1), arc);
+ if (arc.label < targetLabel) {
+ // Then, scan forwards to the arc just before
+ // the targetLabel:
+ while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
+ fst.readNextArc(arc);
+ }
+ pushLast();
+ return;
+ }
+ upto--;
+ if (upto == 0) {
+ return;
+ }
+ targetLabel = getTargetLabel();
+ arc = getArc(upto);
+ }
+ } else {
+ // There is a floor arc:
+ arc.arcIdx = (low > high ? high : low)-1;
+ //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
+ fst.readNextRealArc(arc, in);
+ assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
+ assert arc.label < targetLabel;
+ pushLast();
+ return;
+ }
+ } else {
+
+ if (arc.label == targetLabel) {
+ // Match -- recurse
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (targetLabel == FST.END_LABEL) {
+ return;
+ }
+ setCurrentLabel(arc.label);
+ incr();
+ arc = fst.readFirstTargetArc(arc, getArc(upto));
+ targetLabel = getTargetLabel();
+ } else if (arc.label > targetLabel) {
+ // TODO: if each arc could somehow read the arc just
+ // before, we can save this re-scan. The ceil case
+ // doesn't need this because it reads the next arc
+ // instead:
+ while(true) {
+ // First, walk backwards until we find a first arc
+ // that's before our target label:
+ fst.readFirstTargetArc(getArc(upto-1), arc);
+ if (arc.label < targetLabel) {
+ // Then, scan forwards to the arc just before
+ // the targetLabel:
+ while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
+ fst.readNextArc(arc);
+ }
+ pushLast();
+ return;
+ }
+ upto--;
+ if (upto == 0) {
+ return;
+ }
+ targetLabel = getTargetLabel();
+ arc = getArc(upto);
+ }
+ } else if (!arc.isLast()) {
+ //System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
+ if (fst.readNextArcLabel(arc) > targetLabel) {
+ pushLast();
+ return;
+ } else {
+ // keep scanning
+ fst.readNextArc(arc);
+ }
+ } else {
+ pushLast();
+ return;
+ }
+ }
+ }
+ }
+
+ private void incr() {
+ upto++;
+ grow();
+ if (arcs.length <= upto) {
+ @SuppressWarnings("unchecked") final FST.Arc<T>[] newArcs =
+ new FST.Arc[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(arcs, 0, newArcs, 0, arcs.length);
+ arcs = newArcs;
+ }
+ if (output.length <= upto) {
+ @SuppressWarnings("unchecked") final T[] newOutput =
+ (T[]) new Object[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(output, 0, newOutput, 0, output.length);
+ output = newOutput;
+ }
+ }
+
+ // Appends current arc, and then recurses from its target,
+ // appending first arc all the way to the final node
+ private void pushFirst() throws IOException {
+
+ FST.Arc<T> arc = arcs[upto];
+ assert arc != null;
+
+ while (true) {
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (arc.label == FST.END_LABEL) {
+ // Final node
+ break;
+ }
+ //System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto]));
+ setCurrentLabel(arc.label);
+ incr();
+
+ final FST.Arc<T> nextArc = getArc(upto);
+ fst.readFirstTargetArc(arc, nextArc);
+ arc = nextArc;
+ }
+ }
+
+ // Recurses from current arc, appending last arc all the
+ // way to the first final node
+ private void pushLast() throws IOException {
+
+ FST.Arc<T> arc = arcs[upto];
+ assert arc != null;
+
+ while (true) {
+ setCurrentLabel(arc.label);
+ output[upto] = fst.outputs.add(output[upto-1], arc.output);
+ if (arc.label == FST.END_LABEL) {
+ // Final node
+ break;
+ }
+ incr();
+
+ arc = fst.readLastTargetArc(arc, getArc(upto));
+ }
+ }
+
+ private FST.Arc<T> getArc(int idx) {
+ if (arcs[idx] == null) {
+ arcs[idx] = new FST.Arc<T>();
+ }
+ return arcs[idx];
+ }
+}