PyLucene 3.4.0-1 import
[pylucene.git] / test / test_Similarity.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 from unittest import TestCase, main
16 from lucene import *
17
18
19 class SimpleSimilarity(PythonSimilarity):
20
21     def computeNorm(self, field, state):
22         return 1.0
23
24     def queryNorm(self, sumOfSquaredWeights):
25         return 1.0
26
27     def tf(self, freq):
28         return freq
29
30     def sloppyFreq(self, distance):
31         return 2.0
32
33     def idfTerms(self, terms, searcher):
34         return 1.0
35
36     def idf(self, docFreq, numDocs):
37         return 1.0
38
39     def coord(self, overlap, maxOverlap):
40         return 1.0
41
42     def scorePayload(self, docId, fieldName, start, end, payload,
43                      offset, length):
44         return 1.0
45
46
47 class SimilarityTestCase(TestCase):
48     """
49     Unit tests ported from Java Lucene
50     """
51
52     def testSimilarity(self):
53
54         store = RAMDirectory()
55         writer = IndexWriter(store, SimpleAnalyzer(), True,
56                              IndexWriter.MaxFieldLength.LIMITED)
57         writer.setSimilarity(SimpleSimilarity())
58     
59         d1 = Document()
60         d1.add(Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED))
61
62         d2 = Document()
63         d2.add(Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED))
64     
65         writer.addDocument(d1)
66         writer.addDocument(d2)
67         writer.optimize()
68         writer.close()
69
70         searcher = IndexSearcher(store, True)
71         searcher.setSimilarity(SimpleSimilarity())
72
73         a = Term("field", "a")
74         b = Term("field", "b")
75         c = Term("field", "c")
76
77         class collector1(PythonCollector):
78             def collect(_self, doc, score):
79                 self.assertEqual(1.0, score)
80             def setNextReader(_self, reader, docBase):
81                 pass
82             def acceptsDocsOutOfOrder(_self):
83                 return True
84
85         searcher.search(TermQuery(b), collector1())
86
87
88         bq = BooleanQuery()
89         bq.add(TermQuery(a), BooleanClause.Occur.SHOULD)
90         bq.add(TermQuery(b), BooleanClause.Occur.SHOULD)
91
92         class collector2(PythonCollector):
93             def collect(_self, doc, score):
94                 self.assertEqual(doc + _self.base + 1, score)
95             def setNextReader(_self, reader, docBase):
96                 _self.base = docBase
97             def acceptsDocsOutOfOrder(_self):
98                 return True
99
100         searcher.search(bq, collector2())
101
102
103         pq = PhraseQuery()
104         pq.add(a)
105         pq.add(c)
106
107         class collector3(PythonCollector):
108             def collect(_self, doc, score):
109                 self.assertEqual(1.0, score)
110             def setNextReader(_self, reader, docBase):
111                 pass
112             def acceptsDocsOutOfOrder(_self):
113                 return True
114
115         searcher.search(pq, collector3())
116
117
118         pq.setSlop(2)
119
120         class collector4(PythonCollector):
121             def collect(_self, doc, score):
122                 self.assertEqual(2.0, score)
123             def setNextReader(_self, reader, docBase):
124                 pass
125             def acceptsDocsOutOfOrder(_self):
126                 return True
127
128         searcher.search(pq, collector4())
129
130
131 if __name__ == "__main__":
132     import sys, lucene
133     lucene.initVM()
134     if '-loop' in sys.argv:
135         sys.argv.remove('-loop')
136         while True:
137             try:
138                 main()
139             except:
140                 pass
141     else:
142          main()