1 package org.apache.lucene.util.fst;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.BufferedReader;
22 import java.io.FileInputStream;
23 import java.io.FileOutputStream;
24 import java.io.IOException;
25 import java.io.InputStreamReader;
26 import java.io.OutputStreamWriter;
27 import java.io.Writer;
30 import org.apache.lucene.analysis.MockAnalyzer;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.index.IndexReader;
33 import org.apache.lucene.index.IndexWriter;
34 import org.apache.lucene.index.IndexWriterConfig;
35 import org.apache.lucene.index.Term;
36 import org.apache.lucene.index.TermEnum;
37 import org.apache.lucene.store.Directory;
38 import org.apache.lucene.store.FSDirectory;
39 import org.apache.lucene.store.IndexInput;
40 import org.apache.lucene.store.IndexOutput;
41 import org.apache.lucene.store.MockDirectoryWrapper;
42 import org.apache.lucene.util.BytesRef;
43 import org.apache.lucene.util.IntsRef;
44 import org.apache.lucene.util.LineFileDocs;
45 import org.apache.lucene.util.LuceneTestCase;
46 import org.apache.lucene.util.UnicodeUtil;
47 import org.apache.lucene.util._TestUtil;
48 import org.apache.lucene.util.fst.FST.Arc;
50 public class TestFSTs extends LuceneTestCase {
52 private MockDirectoryWrapper dir;
55 public void setUp() throws Exception {
58 dir.setPreventDoubleWrite(false);
62 public void tearDown() throws Exception {
67 private static BytesRef toBytesRef(IntsRef ir) {
68 BytesRef br = new BytesRef(ir.length);
69 for(int i=0;i<ir.length;i++) {
70 int x = ir.ints[ir.offset+i];
71 assert x >= 0 && x <= 255;
72 br.bytes[i] = (byte) x;
74 br.length = ir.length;
78 private static IntsRef toIntsRef(String s, int inputMode) {
79 return toIntsRef(s, inputMode, new IntsRef(10));
82 private static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
85 return toIntsRef(new BytesRef(s), ir);
88 return toIntsRefUTF32(s, ir);
92 private static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
93 final int charLength = s.length();
96 while(charIdx < charLength) {
97 if (intIdx == ir.ints.length) {
100 final int utf32 = s.codePointAt(charIdx);
101 ir.ints[intIdx] = utf32;
102 charIdx += Character.charCount(utf32);
109 private static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
110 if (br.length > ir.ints.length) {
113 for(int i=0;i<br.length;i++) {
114 ir.ints[i] = br.bytes[br.offset+i]&0xFF;
116 ir.length = br.length;
120 public void testBasicFSA() throws IOException {
121 String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
122 String[] strings2 = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
123 IntsRef[] terms = new IntsRef[strings.length];
124 IntsRef[] terms2 = new IntsRef[strings2.length];
125 for(int inputMode=0;inputMode<2;inputMode++) {
127 System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
130 for(int idx=0;idx<strings.length;idx++) {
131 terms[idx] = toIntsRef(strings[idx], inputMode);
133 for(int idx=0;idx<strings2.length;idx++) {
134 terms2[idx] = toIntsRef(strings2[idx], inputMode);
138 doTest(inputMode, terms);
140 // Test pre-determined FST sizes to make sure we haven't lost minimality (at least on this trivial set of terms):
144 final Outputs<Object> outputs = NoOutputs.getSingleton();
145 final Object NO_OUTPUT = outputs.getNoOutput();
146 final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms2.length);
147 for(IntsRef term : terms2) {
148 pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
150 FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
152 assertEquals(22, fst.getNodeCount());
153 assertEquals(27, fst.getArcCount());
158 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
159 final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
160 for(int idx=0;idx<terms2.length;idx++) {
161 pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
163 final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
165 assertEquals(22, fst.getNodeCount());
166 assertEquals(27, fst.getArcCount());
169 // FST byte sequence ord
171 final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
172 final BytesRef NO_OUTPUT = outputs.getNoOutput();
173 final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms2.length);
174 for(int idx=0;idx<terms2.length;idx++) {
175 final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
176 pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
178 final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0, false);
180 assertEquals(24, fst.getNodeCount());
181 assertEquals(30, fst.getArcCount());
186 private static String simpleRandomString(Random r) {
187 final int end = r.nextInt(10);
192 final char[] buffer = new char[end];
193 for (int i = 0; i < end; i++) {
194 buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
196 return new String(buffer, 0, end);
199 // given set of terms, test the different outputs for them
200 private void doTest(int inputMode, IntsRef[] terms) throws IOException {
203 // NoOutputs (simple FSA)
205 final Outputs<Object> outputs = NoOutputs.getSingleton();
206 final Object NO_OUTPUT = outputs.getNoOutput();
207 final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
208 for(IntsRef term : terms) {
209 pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
211 new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
214 // PositiveIntOutput (ord)
216 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
217 final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
218 for(int idx=0;idx<terms.length;idx++) {
219 pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
221 new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
224 // PositiveIntOutput (random monotonically increasing positive number)
226 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
227 final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
229 for(int idx=0;idx<terms.length;idx++) {
230 final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
232 pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(value)));
234 new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
237 // PositiveIntOutput (random positive number)
239 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
240 final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
241 for(int idx=0;idx<terms.length;idx++) {
242 pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(random.nextLong()) & Long.MAX_VALUE));
244 new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
247 // Pair<ord, (random monotonically increasing positive number>
249 final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(random.nextBoolean());
250 final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(random.nextBoolean());
251 final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
252 final List<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>> pairs = new ArrayList<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>>(terms.length);
254 for(int idx=0;idx<terms.length;idx++) {
255 final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
257 pairs.add(new FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>(terms[idx],
258 outputs.get(o1.get(idx),
261 new FSTTester<PairOutputs.Pair<Long,Long>>(random, dir, inputMode, pairs, outputs).doTest();
266 final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
267 final BytesRef NO_OUTPUT = outputs.getNoOutput();
268 final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
269 for(int idx=0;idx<terms.length;idx++) {
270 final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
271 pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
273 new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest();
278 final IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
279 final List<FSTTester.InputOutput<IntsRef>> pairs = new ArrayList<FSTTester.InputOutput<IntsRef>>(terms.length);
280 for(int idx=0;idx<terms.length;idx++) {
281 final String s = Integer.toString(idx);
282 final IntsRef output = new IntsRef(s.length());
283 output.length = s.length();
284 for(int idx2=0;idx2<output.length;idx2++) {
285 output.ints[idx2] = s.charAt(idx2);
287 pairs.add(new FSTTester.InputOutput<IntsRef>(terms[idx], output));
289 new FSTTester<IntsRef>(random, dir, inputMode, pairs, outputs).doTest();
292 // Up to two positive ints, shared, generally but not
293 // monotonically increasing
296 System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
298 final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
299 final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
301 for(int idx=0;idx<terms.length;idx++) {
302 // Sometimes go backwards
303 long value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
305 value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
308 if (random.nextInt(5) == 3) {
309 long value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
311 value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
313 output = outputs.get(value, value2);
315 output = outputs.get(value);
317 pairs.add(new FSTTester.InputOutput<Object>(terms[idx], output));
319 new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
323 private static class FSTTester<T> {
326 final List<InputOutput<T>> pairs;
328 final Outputs<T> outputs;
331 public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
332 this.random = random;
334 this.inputMode = inputMode;
336 this.outputs = outputs;
339 private static class InputOutput<T> implements Comparable<InputOutput<T>> {
340 public final IntsRef input;
341 public final T output;
343 public InputOutput(IntsRef input, T output) {
345 this.output = output;
348 public int compareTo(InputOutput<T> other) {
349 if (other instanceof InputOutput) {
350 return input.compareTo((other).input);
352 throw new IllegalArgumentException();
357 public void doTest() throws IOException {
361 if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
363 doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true);
366 doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()), true);
370 // runs the term, returning the output, or null if term
371 // isn't accepted. if prefixLength is non-null it must be
372 // length 1 int array; prefixLength[0] is set to the length
373 // of the term prefix that matches
374 private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
375 assert prefixLength == null || prefixLength.length == 1;
376 final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
377 final T NO_OUTPUT = fst.outputs.getNoOutput();
378 T output = NO_OUTPUT;
380 for(int i=0;i<=term.length;i++) {
382 if (i == term.length) {
383 label = FST.END_LABEL;
385 label = term.ints[term.offset+i];
387 //System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
388 if (fst.findTargetArc(label, arc, arc) == null) {
389 if (prefixLength != null) {
396 output = fst.outputs.add(output, arc.output);
399 if (prefixLength != null) {
400 prefixLength[0] = term.length;
406 private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
407 FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
409 final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
412 final T NO_OUTPUT = fst.outputs.getNoOutput();
413 T output = NO_OUTPUT;
417 fst.readFirstTargetArc(arc, arc);
418 arcs.add(new FST.Arc<T>().copyFrom(arc));
419 while(!arc.isLast()) {
420 fst.readNextArc(arc);
421 arcs.add(new FST.Arc<T>().copyFrom(arc));
425 arc = arcs.get(random.nextInt(arcs.size()));
429 output = fst.outputs.add(output, arc.output);
432 if (arc.label == FST.END_LABEL) {
436 if (in.ints.length == in.length) {
437 in.grow(1+in.length);
439 in.ints[in.length++] = arc.label;
446 FST<T> doTest(int prune1, int prune2, boolean allowRandomSuffixSharing) throws IOException {
448 System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
451 final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
453 prune1==0 && prune2==0,
454 allowRandomSuffixSharing ? random.nextBoolean() : true,
455 allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
458 for(InputOutput<T> pair : pairs) {
459 if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
460 final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
461 final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
462 @SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
463 builderObject.add(pair.input, _outputs.get(twoLongs.first));
464 builderObject.add(pair.input, _outputs.get(twoLongs.second));
466 builder.add(pair.input, pair.output);
469 FST<T> fst = builder.finish();
471 if (random.nextBoolean() && fst != null) {
472 IndexOutput out = dir.createOutput("fst.bin");
475 IndexInput in = dir.openInput("fst.bin");
477 fst = new FST<T>(in, outputs);
480 dir.deleteFile("fst.bin");
484 if (VERBOSE && pairs.size() <= 20 && fst != null) {
485 Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
486 Util.toDot(fst, w, false, false);
488 System.out.println("SAVED out.dot");
493 System.out.println(" fst has 0 nodes (fully pruned)");
495 System.out.println(" fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs");
499 if (prune1 == 0 && prune2 == 0) {
500 verifyUnPruned(inputMode, fst);
502 verifyPruned(inputMode, fst, prune1, prune2);
509 private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
511 if (pairs.size() == 0) {
517 System.out.println("TEST: now verify " + pairs.size() + " terms");
518 for(InputOutput<T> pair : pairs) {
520 assertNotNull(pair.input);
521 assertNotNull(pair.output);
522 System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
528 // visit valid paris in order -- make sure all words
529 // are accepted, and FSTEnum's next() steps through
532 System.out.println("TEST: check valid terms/next()");
535 IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
536 for(InputOutput<T> pair : pairs) {
537 IntsRef term = pair.input;
539 System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
541 Object output = run(fst, term, null);
543 assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
544 assertEquals(pair.output, output);
546 // verify enum's next
547 IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
549 assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
550 assertEquals(pair.output, t.output);
552 assertNull(fstEnum.next());
555 final Map<IntsRef,T> termsMap = new HashMap<IntsRef,T>();
556 for(InputOutput<T> pair : pairs) {
557 termsMap.put(pair.input, pair.output);
560 // find random matching word and make sure it's valid
562 System.out.println("TEST: verify random accepted terms");
564 final IntsRef scratch = new IntsRef(10);
565 int num = atLeast(500);
566 for(int iter=0;iter<num;iter++) {
567 T output = randomAcceptedWord(fst, scratch);
568 assertTrue("accepted word " + inputToString(inputMode, scratch) + " is not valid", termsMap.containsKey(scratch));
569 assertEquals(termsMap.get(scratch), output);
572 // test IntsRefFSTEnum.seek:
574 System.out.println("TEST: verify seek");
576 IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
578 for(int iter=0;iter<num;iter++) {
580 System.out.println("TEST: iter=" + iter);
582 if (random.nextBoolean()) {
583 // seek to term that doesn't exist:
585 final IntsRef term = toIntsRef(getRandomString(), inputMode);
586 int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
590 //System.out.println(" seek " + inputToString(inputMode, term));
591 final IntsRefFSTEnum.InputOutput<T> seekResult;
592 if (random.nextBoolean()) {
594 System.out.println(" do non-exist seekFloor term=" + inputToString(inputMode, term));
596 seekResult = fstEnum.seekFloor(term);
600 System.out.println(" do non-exist seekCeil term=" + inputToString(inputMode, term));
602 seekResult = fstEnum.seekCeil(term);
605 if (pos != -1 && pos < pairs.size()) {
606 //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
607 assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
609 System.out.println(" got " + inputToString(inputMode, seekResult.input));
611 assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
612 assertEquals(pairs.get(pos).output, seekResult.output);
614 // seeked before start or beyond end
615 //System.out.println("seek=" + seekTerm);
616 assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
618 System.out.println(" got null");
626 // seek to term that does exist:
627 InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
628 final IntsRefFSTEnum.InputOutput<T> seekResult;
629 if (random.nextBoolean()) {
631 System.out.println(" do exists seekFloor " + inputToString(inputMode, pair.input));
633 seekResult = fstEnum.seekFloor(pair.input);
636 System.out.println(" do exists seekCeil " + inputToString(inputMode, pair.input));
638 seekResult = fstEnum.seekCeil(pair.input);
640 assertNotNull(seekResult);
641 assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
642 assertEquals(pair.output, seekResult.output);
647 System.out.println("TEST: mixed next/seek");
650 // test mixed next/seek
652 for(int iter=0;iter<num;iter++) {
654 System.out.println("TEST: iter " + iter);
657 fstEnum = new IntsRefFSTEnum<T>(fst);
660 boolean isDone = false;
661 if (upto == pairs.size()-1 || random.nextBoolean()) {
665 System.out.println(" do next");
667 isDone = fstEnum.next() == null;
668 } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) {
670 for(;attempt<10;attempt++) {
671 IntsRef term = toIntsRef(getRandomString(), inputMode);
672 if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
673 int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
677 if (random.nextBoolean()) {
679 assertTrue(upto != -1);
681 System.out.println(" do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
683 isDone = fstEnum.seekFloor(term) == null;
686 System.out.println(" do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
688 isDone = fstEnum.seekCeil(term) == null;
699 final int inc = random.nextInt(pairs.size() - upto - 1);
705 if (random.nextBoolean()) {
707 System.out.println(" do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
709 isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
712 System.out.println(" do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
714 isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
719 System.out.println(" got " + inputToString(inputMode, fstEnum.current().input));
721 System.out.println(" got null");
725 if (upto == pairs.size()) {
730 assertEquals(pairs.get(upto).input, fstEnum.current().input);
731 assertEquals(pairs.get(upto).output, fstEnum.current().output);
734 if (upto < pairs.size()-1) {
736 while(tryCount < 10) {
737 final IntsRef t = toIntsRef(getRandomString(), inputMode);
738 if (pairs.get(upto).input.compareTo(t) < 0) {
739 final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
741 System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
743 assertEquals(expected, fstEnum.beforeNext(t));
755 private static class CountMinOutput<T> {
759 boolean isLeaf = true;
764 private void verifyPruned(int inputMode, FST<T> fst, int prune1, int prune2) throws IOException {
767 System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs);
768 for(InputOutput<T> pair : pairs) {
769 System.out.println(" " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
773 // To validate the FST, we brute-force compute all prefixes
774 // in the terms, matched to their "common" outputs, prune that
775 // set according to the prune thresholds, then assert the FST
776 // matches that same set.
778 // NOTE: Crazy RAM intensive!!
780 //System.out.println("TEST: tally prefixes");
782 // build all prefixes
783 final Map<IntsRef,CountMinOutput<T>> prefixes = new HashMap<IntsRef,CountMinOutput<T>>();
784 final IntsRef scratch = new IntsRef(10);
785 for(InputOutput<T> pair: pairs) {
786 scratch.copy(pair.input);
787 for(int idx=0;idx<=pair.input.length;idx++) {
788 scratch.length = idx;
789 CountMinOutput<T> cmo = prefixes.get(scratch);
791 cmo = new CountMinOutput<T>();
793 cmo.output = pair.output;
794 prefixes.put(new IntsRef(scratch), cmo);
797 cmo.output = outputs.common(cmo.output, pair.output);
799 if (idx == pair.input.length) {
801 cmo.finalOutput = cmo.output;
807 System.out.println("TEST: now prune");
811 final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
812 while(it.hasNext()) {
813 Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
814 final IntsRef prefix = ent.getKey();
815 final CountMinOutput<T> cmo = ent.getValue();
817 System.out.println(" term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
821 keep = cmo.count >= prune1;
824 if (prune2 > 1 && cmo.count >= prune2) {
826 } else if (prefix.length > 0) {
827 // consult our parent
828 scratch.length = prefix.length-1;
829 System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length);
830 final CountMinOutput<T> cmo2 = prefixes.get(scratch);
831 //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count));
832 keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
833 } else if (cmo.count >= prune2) {
842 //System.out.println(" remove");
844 // clear isLeaf for all ancestors
845 //System.out.println(" keep");
846 scratch.copy(prefix);
848 while(scratch.length >= 0) {
849 final CountMinOutput<T> cmo2 = prefixes.get(scratch);
851 //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch));
859 //System.out.println("TEST: after prune");
861 for(Map.Entry<BytesRef,CountMinOutput> ent : prefixes.entrySet()) {
862 System.out.println(" " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
863 if (ent.getValue().isFinal) {
864 System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
869 if (prefixes.size() <= 1) {
876 // make sure FST only enums valid prefixes
878 System.out.println("TEST: check pruned enum");
880 IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
881 IntsRefFSTEnum.InputOutput<T> current;
882 while((current = fstEnum.next()) != null) {
884 System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output));
886 final CountMinOutput cmo = prefixes.get(current.input);
888 assertTrue(cmo.isLeaf || cmo.isFinal);
889 //if (cmo.isFinal && !cmo.isLeaf) {
891 assertEquals(cmo.finalOutput, current.output);
893 assertEquals(cmo.output, current.output);
897 // make sure all non-pruned prefixes are present in the FST
899 System.out.println("TEST: verify all prefixes");
901 final int[] stopNode = new int[1];
902 for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
903 if (ent.getKey().length > 0) {
904 final CountMinOutput<T> cmo = ent.getValue();
905 final T output = run(fst, ent.getKey(), stopNode);
907 System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output));
909 // if (cmo.isFinal && !cmo.isLeaf) {
911 assertEquals(cmo.finalOutput, output);
913 assertEquals(cmo.output, output);
915 assertEquals(ent.getKey().length, stopNode[0]);
921 public void testRandomWords() throws IOException {
922 testRandomWords(1000, atLeast(2));
923 //testRandomWords(20, 100);
926 private String inputModeToString(int mode) {
934 private void testRandomWords(int maxNumWords, int numIter) throws IOException {
935 for(int iter=0;iter<numIter;iter++) {
937 System.out.println("\nTEST: iter " + iter);
939 for(int inputMode=0;inputMode<2;inputMode++) {
940 final int numWords = random.nextInt(maxNumWords+1);
941 Set<IntsRef> termsSet = new HashSet<IntsRef>();
942 IntsRef[] terms = new IntsRef[numWords];
943 while(termsSet.size() < numWords) {
944 final String term = getRandomString();
945 termsSet.add(toIntsRef(term, inputMode));
947 doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
952 static String getRandomString() {
954 if (random.nextBoolean()) {
955 term = _TestUtil.randomRealisticUnicodeString(random);
957 // we want to mix in limited-alphabet symbols so
958 // we get more sharing of the nodes given how few
959 // terms we are testing...
960 term = simpleRandomString(random);
966 public void testBigSet() throws IOException {
967 testRandomWords(_TestUtil.nextInt(random, 50000, 60000), 1);
970 private static String inputToString(int inputMode, IntsRef term) {
971 return inputToString(inputMode, term, true);
974 private static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
975 if (!isValidUnicode) {
976 return term.toString();
977 } else if (inputMode == 0) {
979 return toBytesRef(term).utf8ToString() + " " + term;
982 return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
986 private static IntsRef toIntsRef(String s) {
987 final int charCount = s.length();
988 IntsRef ir = new IntsRef(charCount);
989 for(int charIDX=0;charIDX<charCount;charIDX++) {
990 ir.ints[charIDX] = s.charAt(charIDX);
992 ir.length = charCount;
996 private static String toString(IntsRef ints) {
997 char[] chars = new char[ints.length];
998 for(int charIDX=0;charIDX<ints.length;charIDX++) {
999 final int ch = ints.ints[ints.offset+charIDX];
1000 assertTrue(ch >= 0 && ch < 65536);
1001 chars[charIDX] = (char) ch;
1003 return new String(chars);
1006 // Build FST for all unique terms in the test line docs
1007 // file, up until a time limit
1008 public void testRealTerms() throws Exception {
1011 if (CodecProvider.getDefault().getDefaultFieldCodec().equals("SimpleText")) {
1013 CodecProvider.getDefault().setDefaultFieldCodec("Standard");
1017 final LineFileDocs docs = new LineFileDocs(random);
1018 final int RUN_TIME_MSEC = atLeast(500);
1019 final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
1020 final File tempDir = _TestUtil.getTempDir("fstlines");
1021 final MockDirectoryWrapper dir = newFSDirectory(tempDir);
1022 final IndexWriter writer = new IndexWriter(dir, conf);
1023 writer.setInfoStream(VERBOSE ? System.out : null);
1024 final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
1027 while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
1028 writer.addDocument(doc);
1031 IndexReader r = IndexReader.open(writer, true);
1033 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
1034 Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, outputs);
1036 boolean storeOrd = false;
1039 System.out.println("FST stores ord");
1041 System.out.println("FST stores docFreq");
1044 TermEnum termEnum = r.terms(new Term("body", ""));
1046 System.out.println("TEST: got termEnum=" + termEnum);
1050 final Term term = termEnum.term();
1051 if (term == null || !"body".equals(term.field())) {
1060 } catch (UnsupportedOperationException uoe) {
1062 System.out.println("TEST: codec doesn't support ord; FST stores docFreq");
1072 output = termEnum.docFreq();
1074 //System.out.println("ADD: " + term.text() + " ch[0]=" + (term.text().length() == 0 ? -1 : term.text().charAt(0)));
1075 builder.add(toIntsRef(term.text()), outputs.get(output));
1077 if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
1078 System.out.println(ord + " terms...");
1082 final FST<Long> fst = builder.finish();
1084 System.out.println("FST: " + docCount + " docs; " + ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs;" + " " + fst.sizeInBytes() + " bytes");
1088 // Now confirm BytesRefFSTEnum and TermEnum act the
1090 final IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
1091 int num = atLeast(1000);
1092 for(int iter=0;iter<num;iter++) {
1093 final String randomTerm = getRandomString();
1096 System.out.println("TEST: seek " + randomTerm + " ch[0]=" + (randomTerm.length() == 0 ? -1 : randomTerm.charAt(0)));
1099 termEnum = r.terms(new Term("body", randomTerm));
1100 final IntsRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(toIntsRef(randomTerm));
1102 if (termEnum.term() == null || !"body".equals(termEnum.term().field())) {
1103 assertNull("got " + (fstSeekResult == null ? "null" : toString(fstSeekResult.input) + " but expected null"), fstSeekResult);
1105 assertSame(termEnum, fstEnum, storeOrd);
1106 for(int nextIter=0;nextIter<10;nextIter++) {
1108 System.out.println("TEST: next");
1110 //System.out.println(" ord=" + termEnum.ord());
1114 if (termEnum.term() != null && "body".equals(termEnum.term().field())) {
1116 System.out.println(" term=" + termEnum.term());
1118 assertNotNull(fstEnum.next());
1119 assertSame(termEnum, fstEnum, storeOrd);
1122 System.out.println(" end!");
1124 IntsRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
1125 if (nextResult != null) {
1126 System.out.println("expected null but got: input=" + toString(nextResult.input) + " output=" + outputs.outputToString(nextResult.output));
1140 private void assertSame(TermEnum termEnum, IntsRefFSTEnum fstEnum, boolean storeOrd) throws Exception {
1141 if (termEnum.term() == null || !"body".equals(termEnum.term().field())) {
1142 if (fstEnum.current() != null) {
1143 fail("fstEnum.current().input=" + toString(fstEnum.current().input));
1146 assertNotNull(fstEnum.current());
1147 assertEquals(termEnum.term() + " != " + toString(fstEnum.current().input), termEnum.term().text(), toString(fstEnum.current().input));
1149 // fst stored the ord
1151 // assertEquals(termEnum.ord(), ((Long) fstEnum.current().output).longValue());
1153 // fst stored the docFreq
1154 assertEquals(termEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue()));
1159 private static abstract class VisitTerms<T> {
1160 private final String dirOut;
1161 private final String wordsFileIn;
1162 private int inputMode;
1163 private final Outputs<T> outputs;
1164 private final Builder<T> builder;
1166 public VisitTerms(String dirOut, String wordsFileIn, int inputMode, int prune, Outputs<T> outputs) {
1167 this.dirOut = dirOut;
1168 this.wordsFileIn = wordsFileIn;
1169 this.inputMode = inputMode;
1170 this.outputs = outputs;
1172 builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs);
1175 protected abstract T getOutput(IntsRef input, int ord) throws IOException;
1177 public void run(int limit, boolean verify) throws IOException {
1178 BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
1180 final IntsRef intsRef = new IntsRef(10);
1181 long tStart = System.currentTimeMillis();
1184 String w = is.readLine();
1188 toIntsRef(w, inputMode, intsRef);
1189 builder.add(intsRef,
1190 getOutput(intsRef, ord));
1193 if (ord % 500000 == 0) {
1195 String.format(Locale.ENGLISH,
1196 "%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
1203 assert builder.getTermCount() == ord;
1204 final FST<T> fst = builder.finish();
1206 System.out.println("FST was fully pruned!");
1213 System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
1214 if (fst.getNodeCount() < 100) {
1215 Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
1216 Util.toDot(fst, w, false, false);
1218 System.out.println("Wrote FST to out.dot");
1221 Directory dir = FSDirectory.open(new File(dirOut));
1222 IndexOutput out = dir.createOutput("fst.bin");
1226 System.out.println("Saved FST to fst.bin.");
1232 System.out.println("\nNow verify...");
1235 is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
1238 tStart = System.currentTimeMillis();
1240 String w = is.readLine();
1244 toIntsRef(w, inputMode, intsRef);
1245 T expected = getOutput(intsRef, ord);
1246 T actual = Util.get(fst, intsRef);
1247 if (actual == null) {
1248 throw new RuntimeException("unexpected null output on input=" + w);
1250 if (!actual.equals(expected)) {
1251 throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
1255 if (ord % 500000 == 0) {
1256 System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
1263 double totSec = ((System.currentTimeMillis() - tStart)/1000.0);
1264 System.out.println("Verify took " + totSec + " sec + (" + (int) ((totSec*1000000000/ord)) + " nsec per lookup)");
1272 // java -cp build/classes/test:build/classes/java:build/classes/test-framework:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
1273 public static void main(String[] args) throws IOException {
1275 int limit = Integer.MAX_VALUE;
1276 int inputMode = 0; // utf8
1277 boolean storeOrds = false;
1278 boolean storeDocFreqs = false;
1279 boolean verify = true;
1281 String wordsFileIn = null;
1282 String dirOut = null;
1285 while (idx < args.length) {
1286 if (args[idx].equals("-prune")) {
1287 prune = Integer.valueOf(args[1 + idx]);
1289 } else if (args[idx].equals("-limit")) {
1290 limit = Integer.valueOf(args[1 + idx]);
1292 } else if (args[idx].equals("-utf8")) {
1294 } else if (args[idx].equals("-utf32")) {
1296 } else if (args[idx].equals("-docFreq")) {
1297 storeDocFreqs = true;
1298 } else if (args[idx].equals("-ords")) {
1300 } else if (args[idx].equals("-noverify")) {
1302 } else if (args[idx].startsWith("-")) {
1303 System.err.println("Unrecognized option: " + args[idx]);
1306 if (wordsFileIn == null) {
1307 wordsFileIn = args[idx];
1308 } else if (dirOut == null) {
1311 System.err.println("Too many arguments, expected: input [output]");
1318 if (wordsFileIn == null) {
1319 System.err.println("No input file.");
1323 // ord benefits from share, docFreqs don't:
1325 if (storeOrds && storeDocFreqs) {
1326 // Store both ord & docFreq:
1327 final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
1328 final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
1329 final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
1330 new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1333 public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
1335 rand = new Random(17);
1337 return new PairOutputs.Pair<Long,Long>(o1.get(ord),
1338 o2.get(_TestUtil.nextInt(rand, 1, 5000)));
1340 }.run(limit, verify);
1341 } else if (storeOrds) {
1343 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1344 new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1346 public Long getOutput(IntsRef input, int ord) {
1347 return outputs.get(ord);
1349 }.run(limit, verify);
1350 } else if (storeDocFreqs) {
1351 // Store only docFreq
1352 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
1353 new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1356 public Long getOutput(IntsRef input, int ord) {
1358 rand = new Random(17);
1360 return outputs.get(_TestUtil.nextInt(rand, 1, 5000));
1362 }.run(limit, verify);
1365 final NoOutputs outputs = NoOutputs.getSingleton();
1366 final Object NO_OUTPUT = outputs.getNoOutput();
1367 new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs) {
1369 public Object getOutput(IntsRef input, int ord) {
1372 }.run(limit, verify);
1376 public void testSingleString() throws Exception {
1377 final Outputs<Object> outputs = NoOutputs.getSingleton();
1378 final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
1379 b.add(new BytesRef("foobar"), outputs.getNoOutput());
1380 final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
1381 assertNull(fstEnum.seekFloor(new BytesRef("foo")));
1382 assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
1385 public void testSimple() throws Exception {
1387 // Get outputs -- passing true means FST will share
1388 // (delta code) the outputs. This should result in
1389 // smaller FST if the outputs grow monotonically. But
1390 // if numbers are "random", false should give smaller
1392 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1394 // Build an FST mapping BytesRef -> Long
1395 final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1397 final BytesRef a = new BytesRef("a");
1398 final BytesRef b = new BytesRef("b");
1399 final BytesRef c = new BytesRef("c");
1401 builder.add(a, outputs.get(17));
1402 builder.add(b, outputs.get(42));
1403 builder.add(c, outputs.get(13824324872317238L));
1405 final FST<Long> fst = builder.finish();
1407 assertEquals(13824324872317238L, (long) Util.get(fst, c));
1408 assertEquals(42, (long) Util.get(fst, b));
1409 assertEquals(17, (long) Util.get(fst, a));
1411 BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
1412 BytesRefFSTEnum.InputOutput<Long> seekResult;
1413 seekResult = fstEnum.seekFloor(a);
1414 assertNotNull(seekResult);
1415 assertEquals(17, (long) seekResult.output);
1418 seekResult = fstEnum.seekFloor(new BytesRef("aa"));
1419 assertNotNull(seekResult);
1420 assertEquals(17, (long) seekResult.output);
1423 seekResult = fstEnum.seekCeil(new BytesRef("aa"));
1424 assertNotNull(seekResult);
1425 assertEquals(b, seekResult.input);
1426 assertEquals(42, (long) seekResult.output);
1430 * Test state expansion (array format) on close-to-root states. Creates
1431 * synthetic input that has one expanded state on each level.
1433 * @see "https://issues.apache.org/jira/browse/LUCENE-2933"
1435 public void testExpandedCloseToRoot() throws Exception {
1436 class SyntheticData {
1437 FST<Object> compile(String[] lines) throws IOException {
1438 final NoOutputs outputs = NoOutputs.getSingleton();
1439 final Object nothing = outputs.getNoOutput();
1440 final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
1443 final BytesRef term = new BytesRef();
1444 while (line < lines.length) {
1445 String w = lines[line++];
1450 b.add(term, nothing);
1456 void generate(ArrayList<String> out, StringBuilder b, char from, char to,
1458 if (depth == 0 || from == to) {
1459 String seq = b.toString() + "_" + out.size() + "_end";
1462 for (char c = from; c <= to; c++) {
1464 generate(out, b, from, c == to ? to : from, depth - 1);
1465 b.deleteCharAt(b.length() - 1);
1470 public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
1471 throws IOException {
1472 if (fst.targetHasArcs(arc)) {
1474 for (arc = fst.readFirstTargetArc(arc, arc);;
1475 arc = fst.readNextArc(arc), childCount++)
1477 boolean expanded = fst.isExpandedTarget(arc);
1478 int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
1482 (depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE &&
1483 children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
1484 children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
1485 if (arc.isLast()) break;
1495 assertTrue(FST.FIXED_ARRAY_NUM_ARCS_SHALLOW < FST.FIXED_ARRAY_NUM_ARCS_DEEP);
1496 assertTrue(FST.FIXED_ARRAY_SHALLOW_DISTANCE >= 0);
1498 SyntheticData s = new SyntheticData();
1500 ArrayList<String> out = new ArrayList<String>();
1501 StringBuilder b = new StringBuilder();
1502 s.generate(out, b, 'a', 'i', 10);
1503 String[] input = out.toArray(new String[out.size()]);
1505 FST<Object> fst = s.compile(input);
1506 FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
1507 s.verifyStateAndBelow(fst, arc, 1);
1510 // Make sure raw FST can differentiate between final vs
1511 // non-final end nodes
1512 public void testNonFinalStopNodes() throws Exception {
1513 final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
1514 final Long nothing = outputs.getNoOutput();
1515 final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1517 final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
1519 final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
1521 // Add final stop node
1523 final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
1524 node.isFinal = true;
1525 rootNode.addArc('a', node);
1526 final Builder.CompiledNode frozen = new Builder.CompiledNode();
1527 frozen.address = fst.addNode(node);
1528 rootNode.arcs[0].nextFinalOutput = outputs.get(17);
1529 rootNode.arcs[0].isFinal = true;
1530 rootNode.arcs[0].output = nothing;
1531 rootNode.arcs[0].target = frozen;
1534 // Add non-final stop node
1536 final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
1537 rootNode.addArc('b', node);
1538 final Builder.CompiledNode frozen = new Builder.CompiledNode();
1539 frozen.address = fst.addNode(node);
1540 rootNode.arcs[1].nextFinalOutput = nothing;
1541 rootNode.arcs[1].output = outputs.get(42);
1542 rootNode.arcs[1].target = frozen;
1545 fst.finish(fst.addNode(rootNode));
1547 checkStopNodes(fst, outputs);
1549 // Make sure it still works after save/load:
1550 Directory dir = newDirectory();
1551 IndexOutput out = dir.createOutput("fst");
1555 IndexInput in = dir.openInput("fst");
1556 final FST<Long> fst2 = new FST<Long>(in, outputs);
1557 checkStopNodes(fst2, outputs);
1562 private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
1563 final Long nothing = outputs.getNoOutput();
1564 FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
1565 assertEquals(nothing, startArc.output);
1566 assertEquals(nothing, startArc.nextFinalOutput);
1568 FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
1569 assertEquals('a', arc.label);
1570 assertEquals(17, arc.nextFinalOutput.longValue());
1571 assertTrue(arc.isFinal());
1573 arc = fst.readNextArc(arc);
1574 assertEquals('b', arc.label);
1575 assertFalse(arc.isFinal());
1576 assertEquals(42, arc.output.longValue());