1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import main
16 from BaseTestRangeFilter import BaseTestRangeFilter
21 # A basic 'positive' Unit test class for the TermRangeFilter class.
23 # NOTE: at the moment, this class only tests for 'positive' results,
24 # it does not verify the results to ensure there are no 'false positives',
25 # nor does it adequately test 'negative' results. It also does not test
26 # that garbage in results in an Exception.
29 class TestTermRangeFilter(BaseTestRangeFilter):
31 def testRangeFilterId(self):
33 index = self.signedIndex
34 reader = IndexReader.open(index.index, True);
35 search = IndexSearcher(reader)
37 medId = ((self.maxId - self.minId) / 2)
39 minIP = self.pad(self.minId)
40 maxIP = self.pad(self.maxId)
41 medIP = self.pad(medId)
43 numDocs = reader.numDocs()
45 self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
47 q = TermQuery(Term("body","body"))
49 # test id, bounded on both ends
51 result = search.search(q, TermRangeFilter("id", minIP, maxIP,
53 self.assertEqual(numDocs, result.totalHits, "find all")
55 result = search.search(q, TermRangeFilter("id", minIP, maxIP,
57 self.assertEqual(numDocs - 1, result.totalHits, "all but last")
59 result = search.search(q, TermRangeFilter("id", minIP, maxIP,
61 self.assertEqual(numDocs - 1, result.totalHits, "all but first")
63 result = search.search(q, TermRangeFilter("id", minIP, maxIP,
65 self.assertEqual(numDocs - 2, result.totalHits, "all but ends")
67 result = search.search(q, TermRangeFilter("id", medIP, maxIP,
69 self.assertEqual(1 + self.maxId - medId, result.totalHits, "med and up")
71 result = search.search(q, TermRangeFilter("id", minIP, medIP,
73 self.assertEqual(1 + medId - self.minId, result.totalHits, "up to med")
77 result = search.search(q, TermRangeFilter("id", minIP, None,
79 self.assertEqual(numDocs, result.totalHits, "min and up")
81 result = search.search(q, TermRangeFilter("id", None, maxIP,
83 self.assertEqual(numDocs, result.totalHits, "max and down")
85 result = search.search(q, TermRangeFilter("id", minIP, None,
87 self.assertEqual(numDocs - 1, result.totalHits, "not min, but up")
89 result = search.search(q, TermRangeFilter("id", None, maxIP,
91 self.assertEqual(numDocs - 1, result.totalHits, "not max, but down")
93 result = search.search(q, TermRangeFilter("id",medIP, maxIP,
95 self.assertEqual(self.maxId - medId, result.totalHits, "med and up, not max")
97 result = search.search(q, TermRangeFilter("id", minIP, medIP,
99 self.assertEqual(medId - self.minId, result.totalHits, "not min, up to med")
103 result = search.search(q, TermRangeFilter("id", minIP, minIP,
105 self.assertEqual(0, result.totalHits, "min, min, False, False")
107 result = search.search(q, TermRangeFilter("id", medIP, medIP,
109 self.assertEqual(0, result.totalHits, "med, med, False, False")
110 result = search.search(q, TermRangeFilter("id", maxIP, maxIP,
112 self.assertEqual(0, result.totalHits, "max, max, False, False")
114 result = search.search(q, TermRangeFilter("id", minIP, minIP,
116 self.assertEqual(1, result.totalHits, "min, min, True, True")
117 result = search.search(q, TermRangeFilter("id", None, minIP,
119 self.assertEqual(1, result.totalHits, "nul, min, False, True")
121 result = search.search(q, TermRangeFilter("id", maxIP, maxIP,
123 self.assertEqual(1, result.totalHits, "max, max, True, True")
124 result = search.search(q, TermRangeFilter("id", maxIP, None,
126 self.assertEqual(1, result.totalHits, "max, nul, True, True")
128 result = search.search(q, TermRangeFilter("id", medIP, medIP,
130 self.assertEqual(1, result.totalHits, "med, med, True, True")
132 def testRangeFilterIdCollating(self):
134 index = self.signedIndex
135 reader = IndexReader.open(index.index, True)
136 search = IndexSearcher(reader)
138 c = Collator.getInstance(Locale.ENGLISH)
140 medId = ((self.maxId - self.minId) / 2)
142 minIP = self.pad(self.minId)
143 maxIP = self.pad(self.maxId)
144 medIP = self.pad(medId)
146 numDocs = reader.numDocs()
148 self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
150 q = TermQuery(Term("body", "body"))
152 # test id, bounded on both ends
153 numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
154 True, True, c), 1000).totalHits
155 self.assertEqual(numDocs, numHits, "find all")
157 numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
158 True, False, c), 1000).totalHits
159 self.assertEqual(numDocs - 1, numHits, "all but last")
161 numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
162 False, True, c), 1000).totalHits
163 self.assertEqual(numDocs - 1, numHits, "all but first")
165 numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
166 False, False, c), 1000).totalHits
167 self.assertEqual(numDocs - 2, numHits, "all but ends")
169 numHits = search.search(q, TermRangeFilter("id", medIP, maxIP,
170 True, True, c), 1000).totalHits
171 self.assertEqual(1 + self.maxId - medId, numHits, "med and up")
173 numHits = search.search(q, TermRangeFilter("id", minIP, medIP,
174 True, True, c), 1000).totalHits
175 self.assertEqual(1 + medId - self.minId, numHits, "up to med")
179 numHits = search.search(q, TermRangeFilter("id", minIP, None,
180 True, False, c), 1000).totalHits
181 self.assertEqual(numDocs, numHits, "min and up")
183 numHits = search.search(q, TermRangeFilter("id", None, maxIP,
184 False, True, c), 1000).totalHits
185 self.assertEqual(numDocs, numHits, "max and down")
187 numHits = search.search(q, TermRangeFilter("id", minIP, None,
188 False, False, c), 1000).totalHits
189 self.assertEqual(numDocs - 1, numHits, "not min, but up")
191 numHits = search.search(q, TermRangeFilter("id", None, maxIP,
192 False, False, c), 1000).totalHits
193 self.assertEqual(numDocs - 1, numHits, "not max, but down")
195 numHits = search.search(q, TermRangeFilter("id", medIP, maxIP,
196 True, False, c), 1000).totalHits
197 self.assertEqual(self.maxId - medId, numHits, "med and up, not max")
199 numHits = search.search(q, TermRangeFilter("id", minIP, medIP,
200 False, True, c), 1000).totalHits
201 self.assertEqual(medId - self.minId, numHits, "not min, up to med")
205 numHits = search.search(q, TermRangeFilter("id", minIP, minIP,
206 False, False, c), 1000).totalHits
207 self.assertEqual(0, numHits, "min, min, F, F")
208 numHits = search.search(q, TermRangeFilter("id", medIP, medIP,
209 False, False, c), 1000).totalHits
210 self.assertEqual(0, numHits, "med, med, F, F")
211 numHits = search.search(q, TermRangeFilter("id", maxIP, maxIP,
212 False, False, c), 1000).totalHits
213 self.assertEqual(0, numHits, "max, max, F, F")
215 numHits = search.search(q, TermRangeFilter("id", minIP, minIP,
216 True, True, c), 1000).totalHits
217 self.assertEqual(1, numHits, "min, min, T, T")
218 numHits = search.search(q, TermRangeFilter("id", None, minIP,
219 False, True, c), 1000).totalHits
220 self.assertEqual(1, numHits, "nul, min, F, T")
222 numHits = search.search(q, TermRangeFilter("id", maxIP, maxIP,
223 True, True, c), 1000).totalHits
224 self.assertEqual(1, numHits, "max, max, T, T")
225 numHits = search.search(q, TermRangeFilter("id", maxIP, None,
226 True, False, c), 1000).totalHits
227 self.assertEqual(1, numHits, "max, nul, T, T")
229 numHits = search.search(q, TermRangeFilter("id", medIP, medIP,
230 True, True, c), 1000).totalHits
231 self.assertEqual(1, numHits, "med, med, T, T")
233 def testRangeFilterRand(self):
235 index = self.signedIndex
236 reader = IndexReader.open(index.index, True)
237 search = IndexSearcher(reader)
239 minRP = self.pad(index.minR)
240 maxRP = self.pad(index.maxR)
242 numDocs = reader.numDocs()
244 self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
246 q = TermQuery(Term("body", "body"))
248 # test extremes, bounded on both ends
250 result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
252 self.assertEqual(numDocs, result.totalHits, "find all")
254 result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
256 self.assertEqual(numDocs - 1, result.totalHits, "all but biggest")
258 result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
260 self.assertEqual(numDocs - 1, result.totalHits, "all but smallest")
262 result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
264 self.assertEqual(numDocs - 2, result.totalHits, "all but extremes")
268 result = search.search(q, TermRangeFilter("rand", minRP, None,
270 self.assertEqual(numDocs, result.totalHits, "smallest and up")
272 result = search.search(q, TermRangeFilter("rand", None, maxRP,
274 self.assertEqual(numDocs, result.totalHits, "biggest and down")
276 result = search.search(q, TermRangeFilter("rand", minRP, None,
278 self.assertEqual(numDocs - 1, result.totalHits, "not smallest, but up")
280 result = search.search(q, TermRangeFilter("rand", None, maxRP,
282 self.assertEqual(numDocs - 1, result.totalHits, "not biggest, but down")
286 result = search.search(q, TermRangeFilter("rand", minRP, minRP,
288 self.assertEqual(0, result.totalHits, "min, min, False, False")
290 result = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
292 self.assertEqual(0, result.totalHits, "max, max, False, False")
294 result = search.search(q, TermRangeFilter("rand", minRP, minRP,
296 self.assertEqual(1, result.totalHits, "min, min, True, True")
298 result = search.search(q, TermRangeFilter("rand", None, minRP,
300 self.assertEqual(1, result.totalHits, "nul, min, False, True")
302 result = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
304 self.assertEqual(1, result.totalHits, "max, max, True, True")
306 result = search.search(q, TermRangeFilter("rand", maxRP, None,
308 self.assertEqual(1, result.totalHits, "max, nul, True, True")
310 def testRangeFilterRandCollating(self):
312 # using the unsigned index because collation seems to ignore hyphens
313 index = self.unsignedIndex
314 reader = IndexReader.open(index.index, True)
315 search = IndexSearcher(reader)
317 c = Collator.getInstance(Locale.ENGLISH)
319 minRP = self.pad(index.minR)
320 maxRP = self.pad(index.maxR)
322 numDocs = reader.numDocs()
324 self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
326 q = TermQuery(Term("body", "body"))
328 # test extremes, bounded on both ends
330 numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
331 True, True, c), 1000).totalHits
332 self.assertEqual(numDocs, numHits, "find all")
334 numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
335 True, False, c), 1000).totalHits
336 self.assertEqual(numDocs - 1, numHits, "all but biggest")
338 numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
339 False, True, c), 1000).totalHits
340 self.assertEqual(numDocs - 1, numHits, "all but smallest")
342 numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
343 False, False, c), 1000).totalHits
344 self.assertEqual(numDocs - 2, numHits, "all but extremes")
348 numHits = search.search(q, TermRangeFilter("rand", minRP, None,
349 True, False, c), 1000).totalHits
350 self.assertEqual(numDocs, numHits, "smallest and up")
352 numHits = search.search(q, TermRangeFilter("rand", None, maxRP,
353 False, True, c), 1000).totalHits
354 self.assertEqual(numDocs, numHits, "biggest and down")
356 numHits = search.search(q, TermRangeFilter("rand", minRP, None,
357 False, False, c), 1000).totalHits
358 self.assertEqual(numDocs - 1, numHits, "not smallest, but up")
360 numHits = search.search(q, TermRangeFilter("rand", None, maxRP,
361 False, False, c), 1000).totalHits
362 self.assertEqual(numDocs - 1, numHits, "not biggest, but down")
366 numHits = search.search(q, TermRangeFilter("rand", minRP, minRP,
367 False, False, c), 1000).totalHits
368 self.assertEqual(0, numHits, "min, min, F, F")
370 numHits = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
371 False, False, c), 1000).totalHits
372 self.assertEqual(0, numHits, "max, max, F, F")
374 numHits = search.search(q, TermRangeFilter("rand", minRP, minRP,
375 True, True, c), 1000).totalHits
376 self.assertEqual(1, numHits, "min, min, T, T")
378 numHits = search.search(q, TermRangeFilter("rand", None, minRP,
379 False, True, c), 1000).totalHits
380 self.assertEqual(1, numHits, "nul, min, F, T")
382 numHits = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
383 True, True, c), 1000).totalHits
384 self.assertEqual(1, numHits, "max, max, T, T")
385 numHits = search.search(q, TermRangeFilter("rand", maxRP, None,
386 True, False, c), 1000).totalHits
387 self.assertEqual(1, numHits, "max, nul, T, T")
392 farsiIndex = RAMDirectory()
393 writer = IndexWriter(farsiIndex, SimpleAnalyzer(), True,
394 IndexWriter.MaxFieldLength.LIMITED)
396 doc.add(Field("content", u"\u0633\u0627\u0628",
397 Field.Store.YES, Field.Index.NOT_ANALYZED))
398 doc.add(Field("body", "body",
399 Field.Store.YES, Field.Index.NOT_ANALYZED))
400 writer.addDocument(doc)
405 reader = IndexReader.open(farsiIndex, True)
406 search = IndexSearcher(reader)
407 q = TermQuery(Term("body", "body"))
409 # Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
410 # RuleBasedCollator. However, the Arabic Locale seems to order the
411 # Farsi characters properly.
412 collator = Collator.getInstance(Locale("ar"))
414 # Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
415 # orders the U+0698 character before the U+0633 character, so the
416 # single index Term below should NOT be returned by a
417 # TermRangeFilter with a Farsi Collator (or an Arabic one for the
418 # case when Farsi is not supported).
419 numHits = search.search(q, TermRangeFilter("content", u"\u062F", u"\u0698", True, True, collator), 1000).totalHits
420 self.assertEqual(0, numHits, "The index Term should not be included.")
422 numHits = search.search(q, TermRangeFilter("content", u"\u0633", u"\u0638", True, True, collator), 1000).totalHits
423 self.assertEqual(1, numHits, "The index Term should be included.")
426 def testDanish(self):
429 danishIndex = RAMDirectory()
430 writer = IndexWriter(danishIndex, SimpleAnalyzer(), True,
431 IndexWriter.MaxFieldLength.LIMITED)
433 # Danish collation orders the words below in the given order
434 # (example taken from TestSort.testInternationalSort() ).
435 words = [u"H\u00D8T", u"H\u00C5T", "MAND"]
438 doc.add(Field("content", word, Field.Store.YES,
439 Field.Index.NOT_ANALYZED))
440 doc.add(Field("body", "body", Field.Store.YES,
441 Field.Index.NOT_ANALYZED))
442 writer.addDocument(doc)
447 reader = IndexReader.open(danishIndex, True)
448 search = IndexSearcher(reader)
449 q = TermQuery(Term("body", "body"))
451 collator = Collator.getInstance(Locale("da", "dk"))
452 query = TermRangeQuery("content", "H\u00D8T", "MAND", False, False,
455 # Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
456 # but Danish collation does.
457 numHits = search.search(q, TermRangeFilter("content", u"H\u00D8T", "MAND", False, False, collator), 1000).totalHits
458 self.assertEqual(1, numHits, "The index Term should be included.")
460 numHits = search.search(q, TermRangeFilter("content", u"H\u00C5T", "MAND", False, False, collator), 1000).totalHits
461 self.assertEqual(0, numHits, "The index Term should not be included.")
465 if __name__ == "__main__":
468 if '-loop' in sys.argv:
469 sys.argv.remove('-loop')
472 main(defaultTest='TestTermRangeFilter')
476 main(defaultTest='TestTermRangeFilter')