1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from lia.common.LiaTestCase import LiaTestCase
18 WhitespaceAnalyzer, Document, Field, IndexWriter, Term, Explanation, \
19 FuzzyQuery, IndexSearcher, Similarity, TermQuery, WildcardQuery, \
20 RAMDirectory, PythonSimilarity
23 class ScoreTest(LiaTestCase):
27 super(ScoreTest, self).setUp()
28 self.directory = RAMDirectory()
32 class SimpleSimilarity(PythonSimilarity):
34 def lengthNorm(_self, field, numTerms):
37 def queryNorm(_self, sumOfSquaredWeights):
43 def sloppyFreq(_self, distance):
46 def idfTerms(_self, terms, searcher):
49 def idf(_self, docFreq, numDocs):
52 def coord(_self, overlap, maxOverlap):
55 def scorePayload(_self, docId, fieldName, start, end, payload,
59 self.indexSingleFieldDocs([Field("contents", "x", Field.Store.YES,
60 Field.Index.ANALYZED)])
61 searcher = IndexSearcher(self.directory)
62 searcher.setSimilarity(SimpleSimilarity())
64 query = TermQuery(Term("contents", "x"))
65 explanation = searcher.explain(query, 0)
68 scoreDocs = searcher.search(query, 50).scoreDocs
69 self.assertEqual(1, len(scoreDocs))
71 self.assertEqual(scoreDocs[0].score, 1.0)
74 def indexSingleFieldDocs(self, fields):
76 writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
77 IndexWriter.MaxFieldLength.UNLIMITED)
82 writer.addDocument(doc)
87 def testWildcard(self):
89 self.indexSingleFieldDocs([Field("contents", "wild", Field.Store.YES,
90 Field.Index.ANALYZED),
91 Field("contents", "child", Field.Store.YES,
92 Field.Index.ANALYZED),
93 Field("contents", "mild", Field.Store.YES,
94 Field.Index.ANALYZED),
95 Field("contents", "mildew", Field.Store.YES,
96 Field.Index.ANALYZED)])
98 searcher = IndexSearcher(self.directory)
99 query = WildcardQuery(Term("contents", "?ild*"))
100 scoreDocs = searcher.search(query, 50).scoreDocs
101 self.assertEqual(3, len(scoreDocs), "child no match")
103 self.assertEqual(scoreDocs[0].score, scoreDocs[1].score,
105 self.assertEqual(scoreDocs[1].score, scoreDocs[1].score,
110 self.indexSingleFieldDocs([Field("contents", "fuzzy", Field.Store.YES,
111 Field.Index.ANALYZED),
112 Field("contents", "wuzzy", Field.Store.YES,
113 Field.Index.ANALYZED)])
115 searcher = IndexSearcher(self.directory)
116 query = FuzzyQuery(Term("contents", "wuzza"))
117 scoreDocs = searcher.search(query, 50).scoreDocs
118 self.assertEqual(2, len(scoreDocs), "both close enough")
120 self.assert_(scoreDocs[0].score != scoreDocs[1].score,
121 "wuzzy closer than fuzzy")
122 self.assertEqual("wuzzy",
123 searcher.doc(scoreDocs[0].doc).get("contents"),