--- /dev/null
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class FuzzyLikeThisQueryTest extends LuceneTestCase {
+ private Directory directory;
+ private IndexSearcher searcher;
+ private IndexReader reader;
+ private Analyzer analyzer=new MockAnalyzer(random);
+
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ //Add series of docs with misspelt names
+ addDoc(writer, "jonathon smythe","1");
+ addDoc(writer, "jonathan smith","2");
+ addDoc(writer, "johnathon smyth","3");
+ addDoc(writer, "johnny smith","4" );
+ addDoc(writer, "jonny smith","5" );
+ addDoc(writer, "johnathon smythe","6");
+ reader = writer.getReader();
+ writer.close();
+ searcher=newSearcher(reader);
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ searcher.close();
+ reader.close();
+ directory.close();
+ super.tearDown();
+ }
+
+ private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException
+ {
+ Document doc=new Document();
+ doc.add(newField("name",name,Field.Store.YES,Field.Index.ANALYZED));
+ doc.add(newField("id",id,Field.Store.YES,Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+
+
+ //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
+ public void testClosestEditDistanceMatchComesFirst() throws Throwable
+ {
+ FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
+ flt.addTerms("smith", "name", 0.3f, 1);
+ Query q=flt.rewrite(searcher.getIndexReader());
+ HashSet<Term> queryTerms=new HashSet<Term>();
+ q.extractTerms(queryTerms);
+ assertTrue("Should have variant smythe",queryTerms.contains(new Term("name","smythe")));
+ assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
+ assertTrue("Should have variant smyth",queryTerms.contains(new Term("name","smyth")));
+ TopDocs topDocs = searcher.search(flt, 1);
+ ScoreDoc[] sd = topDocs.scoreDocs;
+ assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
+ Document doc=searcher.doc(sd[0].doc);
+ assertEquals("Should match most similar not most rare variant", "2",doc.get("id"));
+ }
+ //Test multiple input words are having variants produced
+ public void testMultiWord() throws Throwable
+ {
+ FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
+ flt.addTerms("jonathin smoth", "name", 0.3f, 1);
+ Query q=flt.rewrite(searcher.getIndexReader());
+ HashSet<Term> queryTerms=new HashSet<Term>();
+ q.extractTerms(queryTerms);
+ assertTrue("Should have variant jonathan",queryTerms.contains(new Term("name","jonathan")));
+ assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
+ TopDocs topDocs = searcher.search(flt, 1);
+ ScoreDoc[] sd = topDocs.scoreDocs;
+ assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
+ Document doc=searcher.doc(sd[0].doc);
+ assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
+ }
+ //Test bug found when first query word does not match anything
+ public void testNoMatchFirstWordBug() throws Throwable
+ {
+ FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
+ flt.addTerms("fernando smith", "name", 0.3f, 1);
+ Query q=flt.rewrite(searcher.getIndexReader());
+ HashSet<Term> queryTerms=new HashSet<Term>();
+ q.extractTerms(queryTerms);
+ assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
+ TopDocs topDocs = searcher.search(flt, 1);
+ ScoreDoc[] sd = topDocs.scoreDocs;
+ assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
+ Document doc=searcher.doc(sd[0].doc);
+ assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
+ }
+
+ public void testFuzzyLikeThisQueryEquals() {
+ Analyzer analyzer = new MockAnalyzer(random);
+ FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
+ fltq1.addTerms("javi", "subject", 0.5f, 2);
+ FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
+ fltq2.addTerms("javi", "subject", 0.5f, 2);
+ assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
+ fltq2);
+ }
+}