1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.HashSet;
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.MockAnalyzer;
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.document.Field;
27 import org.apache.lucene.index.IndexReader;
28 import org.apache.lucene.index.RandomIndexWriter;
29 import org.apache.lucene.index.Term;
30 import org.apache.lucene.store.Directory;
31 import org.apache.lucene.util.LuceneTestCase;
33 public class FuzzyLikeThisQueryTest extends LuceneTestCase {
34 private Directory directory;
35 private IndexSearcher searcher;
36 private IndexReader reader;
37 private Analyzer analyzer=new MockAnalyzer(random);
41 public void setUp() throws Exception {
43 directory = newDirectory();
44 RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
45 //Add series of docs with misspelt names
46 addDoc(writer, "jonathon smythe","1");
47 addDoc(writer, "jonathan smith","2");
48 addDoc(writer, "johnathon smyth","3");
49 addDoc(writer, "johnny smith","4" );
50 addDoc(writer, "jonny smith","5" );
51 addDoc(writer, "johnathon smythe","6");
52 reader = writer.getReader();
54 searcher=newSearcher(reader);
58 public void tearDown() throws Exception {
65 private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException
67 Document doc=new Document();
68 doc.add(newField("name",name,Field.Store.YES,Field.Index.ANALYZED));
69 doc.add(newField("id",id,Field.Store.YES,Field.Index.ANALYZED));
70 writer.addDocument(doc);
74 //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
75 public void testClosestEditDistanceMatchComesFirst() throws Throwable
77 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
78 flt.addTerms("smith", "name", 0.3f, 1);
79 Query q=flt.rewrite(searcher.getIndexReader());
80 HashSet<Term> queryTerms=new HashSet<Term>();
81 q.extractTerms(queryTerms);
82 assertTrue("Should have variant smythe",queryTerms.contains(new Term("name","smythe")));
83 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
84 assertTrue("Should have variant smyth",queryTerms.contains(new Term("name","smyth")));
85 TopDocs topDocs = searcher.search(flt, 1);
86 ScoreDoc[] sd = topDocs.scoreDocs;
87 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
88 Document doc=searcher.doc(sd[0].doc);
89 assertEquals("Should match most similar not most rare variant", "2",doc.get("id"));
91 //Test multiple input words are having variants produced
92 public void testMultiWord() throws Throwable
94 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
95 flt.addTerms("jonathin smoth", "name", 0.3f, 1);
96 Query q=flt.rewrite(searcher.getIndexReader());
97 HashSet<Term> queryTerms=new HashSet<Term>();
98 q.extractTerms(queryTerms);
99 assertTrue("Should have variant jonathan",queryTerms.contains(new Term("name","jonathan")));
100 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
101 TopDocs topDocs = searcher.search(flt, 1);
102 ScoreDoc[] sd = topDocs.scoreDocs;
103 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
104 Document doc=searcher.doc(sd[0].doc);
105 assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
107 //Test bug found when first query word does not match anything
108 public void testNoMatchFirstWordBug() throws Throwable
110 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
111 flt.addTerms("fernando smith", "name", 0.3f, 1);
112 Query q=flt.rewrite(searcher.getIndexReader());
113 HashSet<Term> queryTerms=new HashSet<Term>();
114 q.extractTerms(queryTerms);
115 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
116 TopDocs topDocs = searcher.search(flt, 1);
117 ScoreDoc[] sd = topDocs.scoreDocs;
118 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
119 Document doc=searcher.doc(sd[0].doc);
120 assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
123 public void testFuzzyLikeThisQueryEquals() {
124 Analyzer analyzer = new MockAnalyzer(random);
125 FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
126 fltq1.addTerms("javi", "subject", 0.5f, 2);
127 FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
128 fltq2.addTerms("javi", "subject", 0.5f, 2);
129 assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,