X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java deleted file mode 100644 index 9dfebd5..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java +++ /dev/null @@ -1,479 +0,0 @@ -package org.apache.lucene.util.fst; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -import java.io.IOException; - -/** Can next() and advance() through the terms in an FST - * - * @lucene.experimental -*/ - -abstract class FSTEnum { - protected final FST fst; - - @SuppressWarnings("unchecked") protected FST.Arc[] arcs = new FST.Arc[10]; - // outputs are cumulative - @SuppressWarnings("unchecked") protected T[] output = (T[]) new Object[10]; - - protected final T NO_OUTPUT; - protected final FST.Arc scratchArc = new FST.Arc(); - - protected int upto; - protected int targetLength; - - /** doFloor controls the behavior of advance: if it's true - * doFloor is true, advance positions to the biggest - * term before target. */ - protected FSTEnum(FST fst) { - this.fst = fst; - NO_OUTPUT = fst.outputs.getNoOutput(); - fst.getFirstArc(getArc(0)); - output[0] = NO_OUTPUT; - } - - protected abstract int getTargetLabel(); - protected abstract int getCurrentLabel(); - - protected abstract void setCurrentLabel(int label); - protected abstract void grow(); - - /** Rewinds enum state to match the shared prefix between - * current term and target term */ - protected final void rewindPrefix() throws IOException { - if (upto == 0) { - //System.out.println(" init"); - upto = 1; - fst.readFirstTargetArc(getArc(0), getArc(1)); - return; - } - //System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength); - - final int currentLimit = upto; - upto = 1; - while (upto < currentLimit && upto <= targetLength+1) { - final int cmp = getCurrentLabel() - getTargetLabel(); - if (cmp < 0) { - // seek forward - break; - } else if (cmp > 0) { - // seek backwards -- reset this arc to the first arc - final FST.Arc arc = getArc(upto); - fst.readFirstTargetArc(getArc(upto-1), arc); - //System.out.println(" seek first arc"); - break; - } - upto++; - } - } - - protected void doNext() throws IOException { - //System.out.println("FE: next upto=" + upto); - if (upto == 0) { - //System.out.println(" init"); - upto = 1; - fst.readFirstTargetArc(getArc(0), getArc(1)); - } else { - // pop - //System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast()); - while (arcs[upto].isLast()) { - upto--; - if (upto == 0) { - //System.out.println(" eof"); - return; - } - } - fst.readNextArc(arcs[upto]); - } - - pushFirst(); - } - - // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND / - // SEEK_END)? saves the eq check above? - - /** Seeks to smallest term that's >= target. */ - protected void doSeekCeil() throws IOException { - - //System.out.println(" advance len=" + target.length + " curlen=" + current.length); - - // TODO: possibly caller could/should provide common - // prefix length? ie this work may be redundant if - // caller is in fact intersecting against its own - // automaton - - //System.out.println("FE.seekCeil upto=" + upto); - - // Save time by starting at the end of the shared prefix - // b/w our current term & the target: - rewindPrefix(); - //System.out.println(" after rewind upto=" + upto); - - FST.Arc arc = getArc(upto); - int targetLabel = getTargetLabel(); - //System.out.println(" init targetLabel=" + targetLabel); - - // Now scan forward, matching the new suffix of the target - while(true) { - - //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel); - - if (arc.bytesPerArc != 0 && arc.label != -1) { - - // Arcs are fixed array -- use binary search to find - // the target. - - final FST.BytesReader in = fst.getBytesReader(0); - int low = arc.arcIdx; - int high = arc.numArcs-1; - int mid = 0; - //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel); - boolean found = false; - while (low <= high) { - mid = (low + high) >>> 1; - in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1; - final int midLabel = fst.readLabel(in); - final int cmp = midLabel - targetLabel; - //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp); - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else { - found = true; - break; - } - } - - // NOTE: this code is dup'd w/ the code below (in - // the outer else clause): - if (found) { - // Match - arc.arcIdx = mid-1; - fst.readNextRealArc(arc, in); - assert arc.arcIdx == mid; - assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (targetLabel == FST.END_LABEL) { - return; - } - setCurrentLabel(arc.label); - incr(); - arc = fst.readFirstTargetArc(arc, getArc(upto)); - targetLabel = getTargetLabel(); - continue; - } else if (low == arc.numArcs) { - // Dead end - arc.arcIdx = arc.numArcs-2; - fst.readNextRealArc(arc, in); - assert arc.isLast(); - // Dead end (target is after the last arc); - // rollback to last fork then push - upto--; - while(true) { - if (upto == 0) { - return; - } - final FST.Arc prevArc = getArc(upto); - //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); - if (!prevArc.isLast()) { - fst.readNextArc(prevArc); - pushFirst(); - return; - } - upto--; - } - } else { - arc.arcIdx = (low > high ? low : high)-1; - fst.readNextRealArc(arc, in); - assert arc.label > targetLabel; - pushFirst(); - return; - } - } else { - // Arcs are not array'd -- must do linear scan: - if (arc.label == targetLabel) { - // recurse - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (targetLabel == FST.END_LABEL) { - return; - } - setCurrentLabel(arc.label); - incr(); - arc = fst.readFirstTargetArc(arc, getArc(upto)); - targetLabel = getTargetLabel(); - } else if (arc.label > targetLabel) { - pushFirst(); - return; - } else if (arc.isLast()) { - // Dead end (target is after the last arc); - // rollback to last fork then push - upto--; - while(true) { - if (upto == 0) { - return; - } - final FST.Arc prevArc = getArc(upto); - //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); - if (!prevArc.isLast()) { - fst.readNextArc(prevArc); - pushFirst(); - return; - } - upto--; - } - } else { - // keep scanning - //System.out.println(" next scan"); - fst.readNextArc(arc); - } - } - } - } - - // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND / - // SEEK_END)? saves the eq check above? - /** Seeks to largest term that's <= target. */ - protected void doSeekFloor() throws IOException { - - // TODO: possibly caller could/should provide common - // prefix length? ie this work may be redundant if - // caller is in fact intersecting against its own - // automaton - //System.out.println("FE: seek floor upto=" + upto); - - // Save CPU by starting at the end of the shared prefix - // b/w our current term & the target: - rewindPrefix(); - - //System.out.println("FE: after rewind upto=" + upto); - - FST.Arc arc = getArc(upto); - int targetLabel = getTargetLabel(); - - //System.out.println("FE: init targetLabel=" + targetLabel); - - // Now scan forward, matching the new suffix of the target - while(true) { - //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast()); - - if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) { - // Arcs are fixed array -- use binary search to find - // the target. - - final FST.BytesReader in = fst.getBytesReader(0); - int low = arc.arcIdx; - int high = arc.numArcs-1; - int mid = 0; - //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel); - boolean found = false; - while (low <= high) { - mid = (low + high) >>> 1; - in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1; - final int midLabel = fst.readLabel(in); - final int cmp = midLabel - targetLabel; - //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp); - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else { - found = true; - break; - } - } - - // NOTE: this code is dup'd w/ the code below (in - // the outer else clause): - if (found) { - // Match -- recurse - //System.out.println(" match! arcIdx=" + mid); - arc.arcIdx = mid-1; - fst.readNextRealArc(arc, in); - assert arc.arcIdx == mid; - assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (targetLabel == FST.END_LABEL) { - return; - } - setCurrentLabel(arc.label); - incr(); - arc = fst.readFirstTargetArc(arc, getArc(upto)); - targetLabel = getTargetLabel(); - continue; - } else if (high == -1) { - //System.out.println(" before first"); - // Very first arc is after our target - // TODO: if each arc could somehow read the arc just - // before, we can save this re-scan. The ceil case - // doesn't need this because it reads the next arc - // instead: - while(true) { - // First, walk backwards until we find a first arc - // that's before our target label: - fst.readFirstTargetArc(getArc(upto-1), arc); - if (arc.label < targetLabel) { - // Then, scan forwards to the arc just before - // the targetLabel: - while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) { - fst.readNextArc(arc); - } - pushLast(); - return; - } - upto--; - if (upto == 0) { - return; - } - targetLabel = getTargetLabel(); - arc = getArc(upto); - } - } else { - // There is a floor arc: - arc.arcIdx = (low > high ? high : low)-1; - //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1)); - fst.readNextRealArc(arc, in); - assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel; - assert arc.label < targetLabel; - pushLast(); - return; - } - } else { - - if (arc.label == targetLabel) { - // Match -- recurse - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (targetLabel == FST.END_LABEL) { - return; - } - setCurrentLabel(arc.label); - incr(); - arc = fst.readFirstTargetArc(arc, getArc(upto)); - targetLabel = getTargetLabel(); - } else if (arc.label > targetLabel) { - // TODO: if each arc could somehow read the arc just - // before, we can save this re-scan. The ceil case - // doesn't need this because it reads the next arc - // instead: - while(true) { - // First, walk backwards until we find a first arc - // that's before our target label: - fst.readFirstTargetArc(getArc(upto-1), arc); - if (arc.label < targetLabel) { - // Then, scan forwards to the arc just before - // the targetLabel: - while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) { - fst.readNextArc(arc); - } - pushLast(); - return; - } - upto--; - if (upto == 0) { - return; - } - targetLabel = getTargetLabel(); - arc = getArc(upto); - } - } else if (!arc.isLast()) { - //System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")"); - if (fst.readNextArcLabel(arc) > targetLabel) { - pushLast(); - return; - } else { - // keep scanning - fst.readNextArc(arc); - } - } else { - pushLast(); - return; - } - } - } - } - - private void incr() { - upto++; - grow(); - if (arcs.length <= upto) { - @SuppressWarnings("unchecked") final FST.Arc[] newArcs = - new FST.Arc[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(arcs, 0, newArcs, 0, arcs.length); - arcs = newArcs; - } - if (output.length <= upto) { - @SuppressWarnings("unchecked") final T[] newOutput = - (T[]) new Object[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(output, 0, newOutput, 0, output.length); - output = newOutput; - } - } - - // Appends current arc, and then recurses from its target, - // appending first arc all the way to the final node - private void pushFirst() throws IOException { - - FST.Arc arc = arcs[upto]; - assert arc != null; - - while (true) { - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (arc.label == FST.END_LABEL) { - // Final node - break; - } - //System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto])); - setCurrentLabel(arc.label); - incr(); - - final FST.Arc nextArc = getArc(upto); - fst.readFirstTargetArc(arc, nextArc); - arc = nextArc; - } - } - - // Recurses from current arc, appending last arc all the - // way to the first final node - private void pushLast() throws IOException { - - FST.Arc arc = arcs[upto]; - assert arc != null; - - while (true) { - setCurrentLabel(arc.label); - output[upto] = fst.outputs.add(output[upto-1], arc.output); - if (arc.label == FST.END_LABEL) { - // Final node - break; - } - incr(); - - arc = fst.readLastTargetArc(arc, getArc(upto)); - } - } - - private FST.Arc getArc(int idx) { - if (arcs[idx] == null) { - arcs[idx] = new FST.Arc(); - } - return arcs[idx]; - } -}