X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java diff --git a/lucene-java-3.5.0/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java b/lucene-java-3.5.0/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java new file mode 100644 index 0000000..a2cf686 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java @@ -0,0 +1,205 @@ +package org.apache.lucene.search.suggest.fst; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Random; + +import org.apache.lucene.search.suggest.Lookup.LookupResult; +import org.apache.lucene.search.suggest.fst.FSTLookup; +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.search.suggest.LookupBenchmarkTest; +import org.apache.lucene.search.suggest.TermFreq; +import org.apache.lucene.search.suggest.TermFreqArrayIterator; + +/** + * Unit tests for {@link FSTLookup}. + */ +public class FSTLookupTest extends LuceneTestCase { + public static TermFreq tf(String t, float v) { + return new TermFreq(t, v); + } + + private FSTLookup lookup; + + public void setUp() throws Exception { + super.setUp(); + + lookup = new FSTLookup(); + lookup.build(new TermFreqArrayIterator(evalKeys())); + } + + private TermFreq[] evalKeys() { + final TermFreq[] keys = new TermFreq[] { + tf("one", 0.5f), + tf("oneness", 1), + tf("onerous", 1), + tf("onesimus", 1), + tf("two", 1), + tf("twofold", 1), + tf("twonk", 1), + tf("thrive", 1), + tf("through", 1), + tf("threat", 1), + tf("three", 1), + tf("foundation", 1), + tf("fourblah", 1), + tf("fourteen", 1), + tf("four", 0.5f), + tf("fourier", 0.5f), + tf("fourty", 0.5f), + tf("xo", 1), + }; + return keys; + } + + public void testExactMatchHighPriority() throws Exception { + assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0"); + } + + public void testExactMatchLowPriority() throws Exception { + assertMatchEquals(lookup.lookup("one", true, 2), + "one/0.0", + "oneness/1.0"); + } + + public void testRequestedCount() throws Exception { + // 'one' is promoted after collecting two higher ranking results. + assertMatchEquals(lookup.lookup("one", true, 2), + "one/0.0", + "oneness/1.0"); + + // 'one' is at the top after collecting all alphabetical results. + assertMatchEquals(lookup.lookup("one", false, 2), + "one/0.0", + "oneness/1.0"); + + // 'four' is collected in a bucket and then again as an exact match. + assertMatchEquals(lookup.lookup("four", true, 2), + "four/0.0", + "fourblah/1.0"); + + // Check reordering of exact matches. + assertMatchEquals(lookup.lookup("four", true, 4), + "four/0.0", + "fourblah/1.0", + "fourteen/1.0", + "fourier/0.0"); + + lookup = new FSTLookup(10, false); + lookup.build(new TermFreqArrayIterator(evalKeys())); + + // 'one' is not promoted after collecting two higher ranking results. + assertMatchEquals(lookup.lookup("one", true, 2), + "oneness/1.0", + "onerous/1.0"); + + // 'one' is at the top after collecting all alphabetical results. + assertMatchEquals(lookup.lookup("one", false, 2), + "one/0.0", + "oneness/1.0"); + } + + public void testMiss() throws Exception { + assertMatchEquals(lookup.lookup("xyz", true, 1)); + } + + public void testAlphabeticWithWeights() throws Exception { + assertEquals(0, lookup.lookup("xyz", false, 1).size()); + } + + public void testFullMatchList() throws Exception { + assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE), + "oneness/1.0", + "onerous/1.0", + "onesimus/1.0", + "one/0.0"); + } + + public void testMultilingualInput() throws Exception { + List input = LookupBenchmarkTest.readTop50KWiki(); + + lookup = new FSTLookup(); + lookup.build(new TermFreqArrayIterator(input)); + + for (TermFreq tf : input) { + assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null); + assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key); + } + } + + public void testEmptyInput() throws Exception { + lookup = new FSTLookup(); + lookup.build(new TermFreqArrayIterator(new TermFreq[0])); + + assertMatchEquals(lookup.lookup("", true, 10)); + } + + public void testRandom() throws Exception { + List freqs = new ArrayList(); + Random rnd = random; + for (int i = 0; i < 5000; i++) { + freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100))); + } + lookup = new FSTLookup(); + lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); + + for (TermFreq tf : freqs) { + final String term = tf.term; + for (int i = 1; i < term.length(); i++) { + String prefix = term.substring(0, i); + for (LookupResult lr : lookup.lookup(prefix, true, 10)) { + assertTrue(lr.key.startsWith(prefix)); + } + } + } + } + + private void assertMatchEquals(List res, String... expected) { + String [] result = new String [res.size()]; + for (int i = 0; i < res.size(); i++) + result[i] = res.get(i).toString(); + + if (!Arrays.equals(expected, result)) { + int colLen = Math.max(maxLen(expected), maxLen(result)); + + StringBuilder b = new StringBuilder(); + String format = "%" + colLen + "s " + "%" + colLen + "s\n"; + b.append(String.format(Locale.ENGLISH, format, "Expected", "Result")); + for (int i = 0; i < Math.max(result.length, expected.length); i++) { + b.append(String.format(Locale.ENGLISH, format, + i < expected.length ? expected[i] : "--", + i < result.length ? result[i] : "--")); + } + + System.err.println(b.toString()); + fail("Expected different output:\n" + b.toString()); + } + } + + private int maxLen(String[] result) { + int len = 0; + for (String s : result) + len = Math.max(len, s.length()); + return len; + } +}