pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / queries / src / test / org / apache / lucene / search / FuzzyLikeThisQueryTest.java
1 package org.apache.lucene.search;
2
3 /**
4  * Licensed to the Apache Software Foundation (ASF) under one or more
5  * contributor license agreements.  See the NOTICE file distributed with
6  * this work for additional information regarding copyright ownership.
7  * The ASF licenses this file to You under the Apache License, Version 2.0
8  * (the "License"); you may not use this file except in compliance with
9  * the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 import java.io.IOException;
21 import java.util.HashSet;
22
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.MockAnalyzer;
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.document.Field;
27 import org.apache.lucene.index.IndexReader;
28 import org.apache.lucene.index.RandomIndexWriter;
29 import org.apache.lucene.index.Term;
30 import org.apache.lucene.store.Directory;
31 import org.apache.lucene.util.LuceneTestCase;
32
33 public class FuzzyLikeThisQueryTest extends LuceneTestCase {
34         private Directory directory;
35         private IndexSearcher searcher;
36         private IndexReader reader;
37         private Analyzer analyzer=new MockAnalyzer(random);
38
39
40         @Override
41         public void setUp() throws Exception    {
42           super.setUp();
43                 directory = newDirectory();
44                 RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
45                 //Add series of docs with misspelt names
46                 addDoc(writer, "jonathon smythe","1");
47                 addDoc(writer, "jonathan smith","2");
48                 addDoc(writer, "johnathon smyth","3");
49                 addDoc(writer, "johnny smith","4" );
50                 addDoc(writer, "jonny smith","5" );
51                 addDoc(writer, "johnathon smythe","6");
52                 reader = writer.getReader();
53                 writer.close();
54                 searcher=newSearcher(reader);                   
55         }
56         
57         @Override
58         public void tearDown() throws Exception {
59           searcher.close();
60           reader.close();
61           directory.close();
62           super.tearDown();
63         }
64         
65         private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException
66         {
67                 Document doc=new Document();
68                 doc.add(newField("name",name,Field.Store.YES,Field.Index.ANALYZED));
69                 doc.add(newField("id",id,Field.Store.YES,Field.Index.ANALYZED));
70                 writer.addDocument(doc);
71         }
72         
73                 
74         //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match 
75         public void testClosestEditDistanceMatchComesFirst() throws Throwable
76         {
77                 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
78                 flt.addTerms("smith", "name", 0.3f, 1);
79                 Query q=flt.rewrite(searcher.getIndexReader());
80                 HashSet<Term> queryTerms=new HashSet<Term>();
81                 q.extractTerms(queryTerms);
82                 assertTrue("Should have variant smythe",queryTerms.contains(new Term("name","smythe")));
83                 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
84                 assertTrue("Should have variant smyth",queryTerms.contains(new Term("name","smyth")));
85                 TopDocs topDocs = searcher.search(flt, 1);
86                 ScoreDoc[] sd = topDocs.scoreDocs;
87                 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
88                 Document doc=searcher.doc(sd[0].doc);
89                 assertEquals("Should match most similar not most rare variant", "2",doc.get("id"));
90         }
91         //Test multiple input words are having variants produced
92         public void testMultiWord() throws Throwable
93         {
94                 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
95                 flt.addTerms("jonathin smoth", "name", 0.3f, 1);
96                 Query q=flt.rewrite(searcher.getIndexReader());
97                 HashSet<Term> queryTerms=new HashSet<Term>();
98                 q.extractTerms(queryTerms);
99                 assertTrue("Should have variant jonathan",queryTerms.contains(new Term("name","jonathan")));
100                 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
101                 TopDocs topDocs = searcher.search(flt, 1);
102                 ScoreDoc[] sd = topDocs.scoreDocs;
103                 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
104                 Document doc=searcher.doc(sd[0].doc);
105                 assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
106         }
107         //Test bug found when first query word does not match anything
108         public void testNoMatchFirstWordBug() throws Throwable
109         {
110                 FuzzyLikeThisQuery flt=new FuzzyLikeThisQuery(10,analyzer);
111                 flt.addTerms("fernando smith", "name", 0.3f, 1);
112                 Query q=flt.rewrite(searcher.getIndexReader());
113                 HashSet<Term> queryTerms=new HashSet<Term>();
114                 q.extractTerms(queryTerms);
115                 assertTrue("Should have variant smith",queryTerms.contains(new Term("name","smith")));
116                 TopDocs topDocs = searcher.search(flt, 1);
117                 ScoreDoc[] sd = topDocs.scoreDocs;
118                 assertTrue("score docs must match 1 doc", (sd!=null)&&(sd.length>0));
119                 Document doc=searcher.doc(sd[0].doc);
120                 assertEquals("Should match most similar when using 2 words", "2",doc.get("id"));
121         }
122         
123         public void testFuzzyLikeThisQueryEquals() {
124           Analyzer analyzer = new MockAnalyzer(random);
125     FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
126     fltq1.addTerms("javi", "subject", 0.5f, 2);
127     FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
128     fltq2.addTerms("javi", "subject", 0.5f, 2);
129     assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
130         fltq2);
131   } 
132 }