--- /dev/null
+package org.apache.lucene.search.suggest.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
+import org.apache.lucene.search.suggest.fst.FSTLookup;
+import org.apache.lucene.util.LuceneTestCase;
+
+import org.apache.lucene.search.suggest.LookupBenchmarkTest;
+import org.apache.lucene.search.suggest.TermFreq;
+import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+
+/**
+ * Unit tests for {@link FSTLookup}.
+ */
+public class FSTLookupTest extends LuceneTestCase {
+ public static TermFreq tf(String t, float v) {
+ return new TermFreq(t, v);
+ }
+
+ private FSTLookup lookup;
+
+ public void setUp() throws Exception {
+ super.setUp();
+
+ lookup = new FSTLookup();
+ lookup.build(new TermFreqArrayIterator(evalKeys()));
+ }
+
+ private TermFreq[] evalKeys() {
+ final TermFreq[] keys = new TermFreq[] {
+ tf("one", 0.5f),
+ tf("oneness", 1),
+ tf("onerous", 1),
+ tf("onesimus", 1),
+ tf("two", 1),
+ tf("twofold", 1),
+ tf("twonk", 1),
+ tf("thrive", 1),
+ tf("through", 1),
+ tf("threat", 1),
+ tf("three", 1),
+ tf("foundation", 1),
+ tf("fourblah", 1),
+ tf("fourteen", 1),
+ tf("four", 0.5f),
+ tf("fourier", 0.5f),
+ tf("fourty", 0.5f),
+ tf("xo", 1),
+ };
+ return keys;
+ }
+
+ public void testExactMatchHighPriority() throws Exception {
+ assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0");
+ }
+
+ public void testExactMatchLowPriority() throws Exception {
+ assertMatchEquals(lookup.lookup("one", true, 2),
+ "one/0.0",
+ "oneness/1.0");
+ }
+
+ public void testRequestedCount() throws Exception {
+ // 'one' is promoted after collecting two higher ranking results.
+ assertMatchEquals(lookup.lookup("one", true, 2),
+ "one/0.0",
+ "oneness/1.0");
+
+ // 'one' is at the top after collecting all alphabetical results.
+ assertMatchEquals(lookup.lookup("one", false, 2),
+ "one/0.0",
+ "oneness/1.0");
+
+ // 'four' is collected in a bucket and then again as an exact match.
+ assertMatchEquals(lookup.lookup("four", true, 2),
+ "four/0.0",
+ "fourblah/1.0");
+
+ // Check reordering of exact matches.
+ assertMatchEquals(lookup.lookup("four", true, 4),
+ "four/0.0",
+ "fourblah/1.0",
+ "fourteen/1.0",
+ "fourier/0.0");
+
+ lookup = new FSTLookup(10, false);
+ lookup.build(new TermFreqArrayIterator(evalKeys()));
+
+ // 'one' is not promoted after collecting two higher ranking results.
+ assertMatchEquals(lookup.lookup("one", true, 2),
+ "oneness/1.0",
+ "onerous/1.0");
+
+ // 'one' is at the top after collecting all alphabetical results.
+ assertMatchEquals(lookup.lookup("one", false, 2),
+ "one/0.0",
+ "oneness/1.0");
+ }
+
+ public void testMiss() throws Exception {
+ assertMatchEquals(lookup.lookup("xyz", true, 1));
+ }
+
+ public void testAlphabeticWithWeights() throws Exception {
+ assertEquals(0, lookup.lookup("xyz", false, 1).size());
+ }
+
+ public void testFullMatchList() throws Exception {
+ assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE),
+ "oneness/1.0",
+ "onerous/1.0",
+ "onesimus/1.0",
+ "one/0.0");
+ }
+
+ public void testMultilingualInput() throws Exception {
+ List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
+
+ lookup = new FSTLookup();
+ lookup.build(new TermFreqArrayIterator(input));
+
+ for (TermFreq tf : input) {
+ assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null);
+ assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key);
+ }
+ }
+
+ public void testEmptyInput() throws Exception {
+ lookup = new FSTLookup();
+ lookup.build(new TermFreqArrayIterator(new TermFreq[0]));
+
+ assertMatchEquals(lookup.lookup("", true, 10));
+ }
+
+ public void testRandom() throws Exception {
+ List<TermFreq> freqs = new ArrayList<TermFreq>();
+ Random rnd = random;
+ for (int i = 0; i < 5000; i++) {
+ freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100)));
+ }
+ lookup = new FSTLookup();
+ lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
+
+ for (TermFreq tf : freqs) {
+ final String term = tf.term;
+ for (int i = 1; i < term.length(); i++) {
+ String prefix = term.substring(0, i);
+ for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
+ assertTrue(lr.key.startsWith(prefix));
+ }
+ }
+ }
+ }
+
+ private void assertMatchEquals(List<LookupResult> res, String... expected) {
+ String [] result = new String [res.size()];
+ for (int i = 0; i < res.size(); i++)
+ result[i] = res.get(i).toString();
+
+ if (!Arrays.equals(expected, result)) {
+ int colLen = Math.max(maxLen(expected), maxLen(result));
+
+ StringBuilder b = new StringBuilder();
+ String format = "%" + colLen + "s " + "%" + colLen + "s\n";
+ b.append(String.format(Locale.ENGLISH, format, "Expected", "Result"));
+ for (int i = 0; i < Math.max(result.length, expected.length); i++) {
+ b.append(String.format(Locale.ENGLISH, format,
+ i < expected.length ? expected[i] : "--",
+ i < result.length ? result[i] : "--"));
+ }
+
+ System.err.println(b.toString());
+ fail("Expected different output:\n" + b.toString());
+ }
+ }
+
+ private int maxLen(String[] result) {
+ int len = 0;
+ for (String s : result)
+ len = Math.max(len, s.length());
+ return len;
+ }
+}