Configured build for Ubuntu and added Stempel polish analyzer
[pylucene.git] / samples / LuceneInAction / lia / searching / ScoreTest.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 from lia.common.LiaTestCase import LiaTestCase
16
17 from lucene import \
18      WhitespaceAnalyzer, Document, Field, IndexWriter, Term, Explanation, \
19      FuzzyQuery, IndexSearcher, Similarity, TermQuery, WildcardQuery, \
20      RAMDirectory, PythonSimilarity
21
22
23 class ScoreTest(LiaTestCase):
24
25     def setUp(self):
26
27         super(ScoreTest, self).setUp()
28         self.directory = RAMDirectory()
29
30     def testSimple(self):
31
32         class SimpleSimilarity(PythonSimilarity):
33
34             def lengthNorm(_self, field, numTerms):
35                 return 1.0
36
37             def queryNorm(_self, sumOfSquaredWeights):
38                 return 1.0
39
40             def tf(_self, freq):
41                 return freq
42
43             def sloppyFreq(_self, distance):
44                 return 2.0
45
46             def idfTerms(_self, terms, searcher):
47                 return 1.0
48
49             def idf(_self, docFreq, numDocs):
50                 return 1.0
51
52             def coord(_self, overlap, maxOverlap):
53                 return 1.0
54
55             def scorePayload(_self, docId, fieldName, start, end, payload,
56                              offset, length):
57                 return 1.0
58
59         self.indexSingleFieldDocs([Field("contents", "x", Field.Store.YES,
60                                          Field.Index.ANALYZED)])
61         searcher = IndexSearcher(self.directory)
62         searcher.setSimilarity(SimpleSimilarity())
63
64         query = TermQuery(Term("contents", "x"))
65         explanation = searcher.explain(query, 0)
66         print explanation
67
68         scoreDocs = searcher.search(query, 50).scoreDocs
69         self.assertEqual(1, len(scoreDocs))
70
71         self.assertEqual(scoreDocs[0].score, 1.0)
72         searcher.close()
73
74     def indexSingleFieldDocs(self, fields):
75
76         writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
77                              IndexWriter.MaxFieldLength.UNLIMITED)
78
79         for field in fields:
80             doc = Document()
81             doc.add(field)
82             writer.addDocument(doc)
83
84         writer.optimize()
85         writer.close()
86
87     def testWildcard(self):
88
89         self.indexSingleFieldDocs([Field("contents", "wild", Field.Store.YES,
90                                          Field.Index.ANALYZED),
91                                    Field("contents", "child", Field.Store.YES,
92                                          Field.Index.ANALYZED),
93                                    Field("contents", "mild", Field.Store.YES,
94                                          Field.Index.ANALYZED),
95                                    Field("contents", "mildew", Field.Store.YES,
96                                          Field.Index.ANALYZED)])
97
98         searcher = IndexSearcher(self.directory)
99         query = WildcardQuery(Term("contents", "?ild*"))
100         scoreDocs = searcher.search(query, 50).scoreDocs
101         self.assertEqual(3, len(scoreDocs), "child no match")
102
103         self.assertEqual(scoreDocs[0].score, scoreDocs[1].score,
104                          "score the same")
105         self.assertEqual(scoreDocs[1].score, scoreDocs[1].score,
106                          "score the same")
107
108     def testFuzzy(self):
109
110         self.indexSingleFieldDocs([Field("contents", "fuzzy", Field.Store.YES,
111                                          Field.Index.ANALYZED),
112                                    Field("contents", "wuzzy", Field.Store.YES,
113                                          Field.Index.ANALYZED)])
114
115         searcher = IndexSearcher(self.directory)
116         query = FuzzyQuery(Term("contents", "wuzza"))
117         scoreDocs = searcher.search(query, 50).scoreDocs
118         self.assertEqual(2, len(scoreDocs), "both close enough")
119
120         self.assert_(scoreDocs[0].score != scoreDocs[1].score,
121                      "wuzzy closer than fuzzy")
122         self.assertEqual("wuzzy",
123                          searcher.doc(scoreDocs[0].doc).get("contents"),
124                          "wuzza bear")