1 package org.apache.lucene.util.fst;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.util.ArrayUtil;
21 import org.apache.lucene.util.RamUsageEstimator;
23 import java.io.IOException;
25 /** Can next() and advance() through the terms in an FST
27 * @lucene.experimental
30 abstract class FSTEnum<T> {
31 protected final FST<T> fst;
33 @SuppressWarnings("unchecked") protected FST.Arc<T>[] arcs = new FST.Arc[10];
34 // outputs are cumulative
35 @SuppressWarnings("unchecked") protected T[] output = (T[]) new Object[10];
37 protected final T NO_OUTPUT;
38 protected final FST.Arc<T> scratchArc = new FST.Arc<T>();
41 protected int targetLength;
43 /** doFloor controls the behavior of advance: if it's true
44 * doFloor is true, advance positions to the biggest
45 * term before target. */
46 protected FSTEnum(FST<T> fst) {
48 NO_OUTPUT = fst.outputs.getNoOutput();
49 fst.getFirstArc(getArc(0));
50 output[0] = NO_OUTPUT;
53 protected abstract int getTargetLabel();
54 protected abstract int getCurrentLabel();
56 protected abstract void setCurrentLabel(int label);
57 protected abstract void grow();
59 /** Rewinds enum state to match the shared prefix between
60 * current term and target term */
61 protected final void rewindPrefix() throws IOException {
63 //System.out.println(" init");
65 fst.readFirstTargetArc(getArc(0), getArc(1));
68 //System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
70 final int currentLimit = upto;
72 while (upto < currentLimit && upto <= targetLength+1) {
73 final int cmp = getCurrentLabel() - getTargetLabel();
78 // seek backwards -- reset this arc to the first arc
79 final FST.Arc<T> arc = getArc(upto);
80 fst.readFirstTargetArc(getArc(upto-1), arc);
81 //System.out.println(" seek first arc");
88 protected void doNext() throws IOException {
89 //System.out.println("FE: next upto=" + upto);
91 //System.out.println(" init");
93 fst.readFirstTargetArc(getArc(0), getArc(1));
96 //System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast());
97 while (arcs[upto].isLast()) {
100 //System.out.println(" eof");
104 fst.readNextArc(arcs[upto]);
110 // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
111 // SEEK_END)? saves the eq check above?
113 /** Seeks to smallest term that's >= target. */
114 protected void doSeekCeil() throws IOException {
116 //System.out.println(" advance len=" + target.length + " curlen=" + current.length);
118 // TODO: possibly caller could/should provide common
119 // prefix length? ie this work may be redundant if
120 // caller is in fact intersecting against its own
123 //System.out.println("FE.seekCeil upto=" + upto);
125 // Save time by starting at the end of the shared prefix
126 // b/w our current term & the target:
128 //System.out.println(" after rewind upto=" + upto);
130 FST.Arc<T> arc = getArc(upto);
131 int targetLabel = getTargetLabel();
132 //System.out.println(" init targetLabel=" + targetLabel);
134 // Now scan forward, matching the new suffix of the target
137 //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);
139 if (arc.bytesPerArc != 0 && arc.label != -1) {
141 // Arcs are fixed array -- use binary search to find
144 final FST<T>.BytesReader in = fst.getBytesReader(0);
145 int low = arc.arcIdx;
146 int high = arc.numArcs-1;
148 //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
149 boolean found = false;
150 while (low <= high) {
151 mid = (low + high) >>> 1;
152 in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
153 final int midLabel = fst.readLabel(in);
154 final int cmp = midLabel - targetLabel;
155 //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
166 // NOTE: this code is dup'd w/ the code below (in
167 // the outer else clause):
171 fst.readNextRealArc(arc, in);
172 assert arc.arcIdx == mid;
173 assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
174 output[upto] = fst.outputs.add(output[upto-1], arc.output);
175 if (targetLabel == FST.END_LABEL) {
178 setCurrentLabel(arc.label);
180 arc = fst.readFirstTargetArc(arc, getArc(upto));
181 targetLabel = getTargetLabel();
183 } else if (low == arc.numArcs) {
185 arc.arcIdx = arc.numArcs-2;
186 fst.readNextRealArc(arc, in);
188 // Dead end (target is after the last arc);
189 // rollback to last fork then push
195 final FST.Arc<T> prevArc = getArc(upto);
196 //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
197 if (!prevArc.isLast()) {
198 fst.readNextArc(prevArc);
205 arc.arcIdx = (low > high ? low : high)-1;
206 fst.readNextRealArc(arc, in);
207 assert arc.label > targetLabel;
212 // Arcs are not array'd -- must do linear scan:
213 if (arc.label == targetLabel) {
215 output[upto] = fst.outputs.add(output[upto-1], arc.output);
216 if (targetLabel == FST.END_LABEL) {
219 setCurrentLabel(arc.label);
221 arc = fst.readFirstTargetArc(arc, getArc(upto));
222 targetLabel = getTargetLabel();
223 } else if (arc.label > targetLabel) {
226 } else if (arc.isLast()) {
227 // Dead end (target is after the last arc);
228 // rollback to last fork then push
234 final FST.Arc<T> prevArc = getArc(upto);
235 //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
236 if (!prevArc.isLast()) {
237 fst.readNextArc(prevArc);
245 //System.out.println(" next scan");
246 fst.readNextArc(arc);
252 // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
253 // SEEK_END)? saves the eq check above?
254 /** Seeks to largest term that's <= target. */
255 protected void doSeekFloor() throws IOException {
257 // TODO: possibly caller could/should provide common
258 // prefix length? ie this work may be redundant if
259 // caller is in fact intersecting against its own
261 //System.out.println("FE: seek floor upto=" + upto);
263 // Save CPU by starting at the end of the shared prefix
264 // b/w our current term & the target:
267 //System.out.println("FE: after rewind upto=" + upto);
269 FST.Arc<T> arc = getArc(upto);
270 int targetLabel = getTargetLabel();
272 //System.out.println("FE: init targetLabel=" + targetLabel);
274 // Now scan forward, matching the new suffix of the target
276 //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
278 if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
279 // Arcs are fixed array -- use binary search to find
282 final FST<T>.BytesReader in = fst.getBytesReader(0);
283 int low = arc.arcIdx;
284 int high = arc.numArcs-1;
286 //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
287 boolean found = false;
288 while (low <= high) {
289 mid = (low + high) >>> 1;
290 in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
291 final int midLabel = fst.readLabel(in);
292 final int cmp = midLabel - targetLabel;
293 //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
304 // NOTE: this code is dup'd w/ the code below (in
305 // the outer else clause):
308 //System.out.println(" match! arcIdx=" + mid);
310 fst.readNextRealArc(arc, in);
311 assert arc.arcIdx == mid;
312 assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
313 output[upto] = fst.outputs.add(output[upto-1], arc.output);
314 if (targetLabel == FST.END_LABEL) {
317 setCurrentLabel(arc.label);
319 arc = fst.readFirstTargetArc(arc, getArc(upto));
320 targetLabel = getTargetLabel();
322 } else if (high == -1) {
323 //System.out.println(" before first");
324 // Very first arc is after our target
325 // TODO: if each arc could somehow read the arc just
326 // before, we can save this re-scan. The ceil case
327 // doesn't need this because it reads the next arc
330 // First, walk backwards until we find a first arc
331 // that's before our target label:
332 fst.readFirstTargetArc(getArc(upto-1), arc);
333 if (arc.label < targetLabel) {
334 // Then, scan forwards to the arc just before
336 while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
337 fst.readNextArc(arc);
346 targetLabel = getTargetLabel();
350 // There is a floor arc:
351 arc.arcIdx = (low > high ? high : low)-1;
352 //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
353 fst.readNextRealArc(arc, in);
354 assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
355 assert arc.label < targetLabel;
361 if (arc.label == targetLabel) {
363 output[upto] = fst.outputs.add(output[upto-1], arc.output);
364 if (targetLabel == FST.END_LABEL) {
367 setCurrentLabel(arc.label);
369 arc = fst.readFirstTargetArc(arc, getArc(upto));
370 targetLabel = getTargetLabel();
371 } else if (arc.label > targetLabel) {
372 // TODO: if each arc could somehow read the arc just
373 // before, we can save this re-scan. The ceil case
374 // doesn't need this because it reads the next arc
377 // First, walk backwards until we find a first arc
378 // that's before our target label:
379 fst.readFirstTargetArc(getArc(upto-1), arc);
380 if (arc.label < targetLabel) {
381 // Then, scan forwards to the arc just before
383 while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
384 fst.readNextArc(arc);
393 targetLabel = getTargetLabel();
396 } else if (!arc.isLast()) {
397 //System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
398 if (fst.readNextArcLabel(arc) > targetLabel) {
403 fst.readNextArc(arc);
413 private void incr() {
416 if (arcs.length <= upto) {
417 @SuppressWarnings("unchecked") final FST.Arc<T>[] newArcs =
418 new FST.Arc[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
419 System.arraycopy(arcs, 0, newArcs, 0, arcs.length);
422 if (output.length <= upto) {
423 @SuppressWarnings("unchecked") final T[] newOutput =
424 (T[]) new Object[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
425 System.arraycopy(output, 0, newOutput, 0, output.length);
430 // Appends current arc, and then recurses from its target,
431 // appending first arc all the way to the final node
432 private void pushFirst() throws IOException {
434 FST.Arc<T> arc = arcs[upto];
438 output[upto] = fst.outputs.add(output[upto-1], arc.output);
439 if (arc.label == FST.END_LABEL) {
443 //System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto]));
444 setCurrentLabel(arc.label);
447 final FST.Arc<T> nextArc = getArc(upto);
448 fst.readFirstTargetArc(arc, nextArc);
453 // Recurses from current arc, appending last arc all the
454 // way to the first final node
455 private void pushLast() throws IOException {
457 FST.Arc<T> arc = arcs[upto];
461 setCurrentLabel(arc.label);
462 output[upto] = fst.outputs.add(output[upto-1], arc.output);
463 if (arc.label == FST.END_LABEL) {
469 arc = fst.readLastTargetArc(arc, getArc(upto));
473 private FST.Arc<T> getArc(int idx) {
474 if (arcs[idx] == null) {
475 arcs[idx] = new FST.Arc<T>();