1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
17 from itertools import izip
18 from random import randint
19 from unittest import TestCase, main
26 class SortTestCase(TestCase):
28 Unit tests for sorting code, ported from Java Lucene
31 def __init__(self, *args, **kwds):
33 super(SortTestCase, self).__init__(*args, **kwds)
36 # tracer contents int float string custom i18n long double, short, byte, custom parser encoding'
37 [ "A", "x a", "5", "4f", "c", "A-3", u"p\u00EAche", "10", "-4.0", "3", "126", "J" ],
38 [ "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I" ],
39 [ "C", "x a b c", "2147483647", "1.0", "j", "A-2", u"p\u00E9ch\u00E9", "99999999", "40.00002343", "125", "15", "H" ],
40 [ "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", str(Long.MAX_VALUE), str(Double.MIN_VALUE), str(Short.MIN_VALUE), str(Byte.MIN_VALUE), "G" ],
41 [ "E", "x a b c d", "5", "2f", "h", "B-8", "peach", str(Long.MIN_VALUE), str(Double.MAX_VALUE), str(Short.MAX_VALUE), str(Byte.MAX_VALUE), "F" ],
42 [ "F", "y a b c d", "2", "3.14159f", "g", "B-1", u"H\u00C5T", "-44", "343.034435444", "-3", "0", "E" ],
43 [ "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D" ],
44 [ "H", "y a b c d", "0", "1.4E-45", "e", "C-88", u"H\u00D8T", "1023423423005", "4.043545", "10", "-50", "C" ],
45 [ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", u"s\u00EDn", "332422459999", "4.043546", "-340", "51", "B" ],
46 [ "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A" ],
47 [ "W", "g", "1", None, None, None, None, None, None, None, None, None ],
48 [ "X", "g", "1", "0.1", None, None, None, None, None, None, None, None ],
49 [ "Y", "g", "1", "0.2", None, None, None, None, None, None, None, None ],
50 [ "Z", "f g", None, None, None, None, None, None, None, None, None, None ],
53 def _getIndex(self, even, odd):
55 indexStore = RAMDirectory()
56 writer = IndexWriter(indexStore, SimpleAnalyzer(), True,
57 IndexWriter.MaxFieldLength.LIMITED)
58 writer.setMaxBufferedDocs(2)
59 writer.setMergeFactor(1000)
61 for i in xrange(len(self.data)):
62 if (i % 2 == 0 and even) or (i % 2 == 1 and odd):
64 doc.add(Field("tracer", self.data[i][0], Field.Store.YES,
66 doc.add(Field("contents", self.data[i][1], Field.Store.NO,
67 Field.Index.ANALYZED))
68 if self.data[i][2] is not None:
69 doc.add(Field("int", self.data[i][2], Field.Store.NO,
70 Field.Index.NOT_ANALYZED))
71 if self.data[i][3] is not None:
72 doc.add(Field("float", self.data[i][3], Field.Store.NO,
73 Field.Index.NOT_ANALYZED))
74 if self.data[i][4] is not None:
75 doc.add(Field("string", self.data[i][4], Field.Store.NO,
76 Field.Index.NOT_ANALYZED))
77 if self.data[i][5] is not None:
78 doc.add(Field("custom", self.data[i][5], Field.Store.NO,
79 Field.Index.NOT_ANALYZED))
80 if self.data[i][6] is not None:
81 doc.add(Field("i18n", self.data[i][6], Field.Store.NO,
82 Field.Index.NOT_ANALYZED))
83 if self.data[i][7] is not None:
84 doc.add(Field("long", self.data[i][7], Field.Store.NO,
85 Field.Index.NOT_ANALYZED))
86 if self.data[i][8] is not None:
87 doc.add(Field("double", self.data[i][8], Field.Store.NO,
88 Field.Index.NOT_ANALYZED))
89 if self.data[i][9] is not None:
90 doc.add(Field("short", self.data[i][9], Field.Store.NO,
91 Field.Index.NOT_ANALYZED))
92 if self.data[i][10] is not None:
93 doc.add(Field("byte", self.data[i][10], Field.Store.NO,
94 Field.Index.NOT_ANALYZED))
95 if self.data[i][11] is not None:
96 doc.add(Field("parser", self.data[i][11], Field.Store.NO,
97 Field.Index.NOT_ANALYZED))
98 doc.setBoost(2.0) # produce some scores above 1.0
99 writer.addDocument(doc)
102 s = IndexSearcher(indexStore, True)
103 s.setDefaultFieldSortScoring(True, True)
107 def _getFullIndex(self):
108 return self._getIndex(True, True)
110 def getFullStrings(self):
112 indexStore = RAMDirectory()
113 writer = IndexWriter(indexStore, SimpleAnalyzer(), True,
114 IndexWriter.MaxFieldLength.LIMITED)
115 writer.setMaxBufferedDocs(4)
116 writer.setMergeFactor(97)
118 for i in xrange(NUM_STRINGS):
120 num = self.getRandomCharString(self.getRandomNumber(2, 8), 48, 52)
121 doc.add(Field("tracer", num, Field.Store.YES, Field.Index.NO))
122 # doc.add(Field("contents", str(i), Field.Store.NO,
123 # Field.Index.ANALYZED))
124 doc.add(Field("string", num, Field.Store.NO,
125 Field.Index.NOT_ANALYZED))
126 num2 = self.getRandomCharString(self.getRandomNumber(1, 4), 48, 50)
127 doc.add(Field("string2", num2, Field.Store.NO,
128 Field.Index.NOT_ANALYZED))
129 doc.add(Field("tracer2", num2, Field.Store.YES, Field.Index.NO))
130 doc.setBoost(2.0) # produce some scores above 1.0
131 writer.setMaxBufferedDocs(self.getRandomNumber(2, 12))
132 writer.addDocument(doc)
135 # print writer.getSegmentCount()
138 return IndexSearcher(indexStore, True)
140 def getRandomNumberString(self, num, low, high):
142 return ''.join([self.getRandomNumber(low, high) for i in xrange(num)])
144 def getRandomCharString(self, num):
146 return self.getRandomCharString(num, 48, 122)
148 def getRandomCharString(self, num, start, end):
150 return ''.join([chr(self.getRandomNumber(start, end))
151 for i in xrange(num)])
153 def getRandomNumber(self, low, high):
155 return randint(low, high)
157 def _getXIndex(self):
158 return self._getIndex(True, False)
160 def _getYIndex(self):
161 return self._getIndex(False, True)
163 def _getEmptyIndex(self):
164 return self._getIndex(False, False)
168 self.full = self._getFullIndex()
169 self.searchX = self._getXIndex()
170 self.searchY = self._getYIndex()
171 self.queryX = TermQuery(Term("contents", "x"))
172 self.queryY = TermQuery(Term("contents", "y"))
173 self.queryA = TermQuery(Term("contents", "a"))
174 self.queryE = TermQuery(Term("contents", "e"))
175 self.queryF = TermQuery(Term("contents", "f"))
176 self.queryG = TermQuery(Term("contents", "g"))
178 def testBuiltInSorts(self):
180 test the sorts by score and document number
184 self._assertMatches(self.full, self.queryX, sort, "ACEGI")
185 self._assertMatches(self.full, self.queryY, sort, "BDFHJ")
187 sort.setSort(SortField.FIELD_DOC)
188 self._assertMatches(self.full, self.queryX, sort, "ACEGI")
189 self._assertMatches(self.full, self.queryY, sort, "BDFHJ")
191 def testTypedSort(self):
193 test sorts where the type of field is specified
198 sort.setSort([SortField("int", SortField.INT),
199 SortField.FIELD_DOC])
200 self._assertMatches(self.full, self.queryX, sort, "IGAEC")
201 self._assertMatches(self.full, self.queryY, sort, "DHFJB")
203 sort.setSort([SortField("float", SortField.FLOAT),
204 SortField.FIELD_DOC])
205 self._assertMatches(self.full, self.queryX, sort, "GCIEA")
206 self._assertMatches(self.full, self.queryY, sort, "DHJFB")
208 sort.setSort([SortField("long", SortField.LONG),
209 SortField.FIELD_DOC])
210 self._assertMatches(self.full, self.queryX, sort, "EACGI")
211 self._assertMatches(self.full, self.queryY, sort, "FBJHD")
213 sort.setSort([SortField("double", SortField.DOUBLE),
214 SortField.FIELD_DOC])
215 self._assertMatches(self.full, self.queryX, sort, "AGICE")
216 self._assertMatches(self.full, self.queryY, sort, "DJHBF")
218 sort.setSort([SortField("byte", SortField.BYTE),
219 SortField.FIELD_DOC])
220 self._assertMatches(self.full, self.queryX, sort, "CIGAE")
221 self._assertMatches(self.full, self.queryY, sort, "DHFBJ")
223 sort.setSort([SortField("short", SortField.SHORT),
224 SortField.FIELD_DOC])
225 self._assertMatches(self.full, self.queryX, sort, "IAGCE")
226 self._assertMatches(self.full, self.queryY, sort, "DFHBJ")
228 sort.setSort([SortField("string", SortField.STRING),
229 SortField.FIELD_DOC])
230 self._assertMatches(self.full, self.queryX, sort, "AIGEC")
231 self._assertMatches(self.full, self.queryY, sort, "DJHFB")
233 def testStringSort(self):
235 Test String sorting: small queue to many matches, multi field sort,
240 searcher = self.getFullStrings()
242 sort.setSort([SortField("string", SortField.STRING),
243 SortField("string2", SortField.STRING, True),
244 SortField.FIELD_DOC])
246 result = searcher.search(MatchAllDocsQuery(), None, 500, sort).scoreDocs
254 for scoreDoc in result:
255 doc2 = searcher.doc(scoreDoc.doc)
256 v = doc2.getValues("tracer")
257 v2 = doc2.getValues("tracer2")
258 for _v, _v2 in izip(v, v2):
261 if _cmp < 0: # ensure first field is in order
263 print "fail:", _v, "<", last
265 if _cmp == 0: # ensure second field is in reverse order
266 _cmp = cmp(_v2, lastSub)
269 print "rev field fail:", _v2, ">", lastSub
270 elif _cmp == 0: # ensure docid is in order
271 if scoreDoc.doc < lastDocId:
273 print "doc fail:", scoreDoc.doc, ">", lastDocId
277 lastDocId = scoreDoc.doc
278 buff.append(_v + "(" + _v2 + ")(" + str(scoreDoc.doc) + ") ")
281 print "topn field1(field2)(docID):", ''.join(buff)
283 self.assert_(not fail, "Found sort results out of order")
285 def testCustomFieldParserSort(self):
287 test sorts where the type of field is specified and a custom field
288 parser is used, that uses a simple char encoding. The sorted string
289 contains a character beginning from 'A' that is mapped to a numeric
290 value using some "funny" algorithm to be different for each data
294 # since tests explicitly use different parsers on the same field name
295 # we explicitly check/purge the FieldCache between each assertMatch
296 fc = FieldCache.DEFAULT
298 class intParser(PythonIntParser):
299 def parseInt(_self, val):
300 return (ord(val[0]) - ord('A')) * 123456
302 class floatParser(PythonFloatParser):
303 def parseFloat(_self, val):
304 return math.sqrt(ord(val[0]))
306 class longParser(PythonLongParser):
307 def parseLong(_self, val):
308 return (ord(val[0]) - ord('A')) * 1234567890L
310 class doubleParser(PythonDoubleParser):
311 def parseDouble(_self, val):
312 return math.pow(ord(val[0]), ord(val[0]) - ord('A'))
314 class byteParser(PythonByteParser):
315 def parseByte(_self, val):
316 return chr(ord(val[0]) - ord('A'))
318 class shortParser(PythonShortParser):
319 def parseShort(_self, val):
320 return ord(val[0]) - ord('A')
323 sort.setSort([SortField("parser", intParser()),
324 SortField.FIELD_DOC])
325 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
326 self._assertSaneFieldCaches(self.getName() + " IntParser")
329 sort.setSort([SortField("parser", floatParser()),
330 SortField.FIELD_DOC])
331 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
332 self._assertSaneFieldCaches(self.getName() + " FloatParser")
335 sort.setSort([SortField("parser", longParser()),
336 SortField.FIELD_DOC])
337 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
338 self._assertSaneFieldCaches(self.getName() + " LongParser")
341 sort.setSort([SortField("parser", doubleParser()),
342 SortField.FIELD_DOC])
343 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
344 self._assertSaneFieldCaches(self.getName() + " DoubleParser")
347 sort.setSort([SortField("parser", byteParser()),
348 SortField.FIELD_DOC])
349 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
350 self._assertSaneFieldCaches(self.getName() + " ByteParser")
353 sort.setSort([SortField("parser", shortParser()),
354 SortField.FIELD_DOC])
355 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
356 self._assertSaneFieldCaches(self.getName() + " ShortParser")
359 def testEmptyIndex(self):
361 test sorts when there's nothing in the index
365 empty = self._getEmptyIndex()
367 self._assertMatches(empty, self.queryX, sort, "")
369 sort.setSort(SortField.FIELD_DOC)
370 self._assertMatches(empty, self.queryX, sort, "")
372 sort.setSort([SortField("int", SortField.INT), SortField.FIELD_DOC])
373 self._assertMatches(empty, self.queryX, sort, "")
375 sort.setSort([SortField("string", SortField.STRING, True),
376 SortField.FIELD_DOC])
377 self._assertMatches(empty, self.queryX, sort, "")
379 sort.setSort([SortField("float", SortField.FLOAT),
380 SortField("string", SortField.STRING)])
381 self._assertMatches(empty, self.queryX, sort, "")
384 def testNewCustomFieldParserSort(self):
386 Test sorting w/ custom FieldComparator
390 sort.setSort([SortField("parser", MyFieldComparatorSource())])
391 self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
393 def testReverseSort(self):
395 test sorts in reverse
399 sort.setSort([SortField(None, SortField.SCORE, True),
400 SortField.FIELD_DOC])
401 self._assertMatches(self.full, self.queryX, sort, "IEGCA")
402 self._assertMatches(self.full, self.queryY, sort, "JFHDB")
404 sort.setSort(SortField(None, SortField.DOC, True))
405 self._assertMatches(self.full, self.queryX, sort, "IGECA")
406 self._assertMatches(self.full, self.queryY, sort, "JHFDB")
408 sort.setSort(SortField("int", SortField.INT, True))
409 self._assertMatches(self.full, self.queryX, sort, "CAEGI")
410 self._assertMatches(self.full, self.queryY, sort, "BJFHD")
412 sort.setSort(SortField("float", SortField.FLOAT, True))
413 self._assertMatches(self.full, self.queryX, sort, "AECIG")
414 self._assertMatches(self.full, self.queryY, sort, "BFJHD")
416 sort.setSort(SortField("string", SortField.STRING, True))
417 self._assertMatches(self.full, self.queryX, sort, "CEGIA")
418 self._assertMatches(self.full, self.queryY, sort, "BFHJD")
420 def testEmptyFieldSort(self):
422 test sorting when the sort field is empty(undefined) for some of the
427 sort.setSort(SortField("string", SortField.STRING))
428 self._assertMatches(self.full, self.queryF, sort, "ZJI")
430 sort.setSort(SortField("string", SortField.STRING, True))
431 self._assertMatches(self.full, self.queryF, sort, "IJZ")
433 sort.setSort(SortField("i18n", Locale.ENGLISH))
434 self._assertMatches(self.full, self.queryF, sort, "ZJI")
436 sort.setSort(SortField("i18n", Locale.ENGLISH, True))
437 self._assertMatches(self.full, self.queryF, sort, "IJZ")
439 sort.setSort(SortField("int", SortField.INT))
440 self._assertMatches(self.full, self.queryF, sort, "IZJ")
442 sort.setSort(SortField("int", SortField.INT, True))
443 self._assertMatches(self.full, self.queryF, sort, "JZI")
445 sort.setSort(SortField("float", SortField.FLOAT))
446 self._assertMatches(self.full, self.queryF, sort, "ZJI")
448 # using a nonexisting field as first sort key shouldn't make a
450 sort.setSort([SortField("nosuchfield", SortField.STRING),
451 SortField("float", SortField.FLOAT)])
452 self._assertMatches(self.full, self.queryF, sort, "ZJI")
454 sort.setSort(SortField("float", SortField.FLOAT, True))
455 self._assertMatches(self.full, self.queryF, sort, "IJZ")
457 # When a field is None for both documents, the next SortField should
460 sort.setSort([SortField("int", SortField.INT),
461 SortField("string", SortField.STRING),
462 SortField("float", SortField.FLOAT)])
463 self._assertMatches(self.full, self.queryG, sort, "ZWXY")
465 # Reverse the last criterium to make sure the test didn't pass by
467 sort.setSort([SortField("int", SortField.INT),
468 SortField("string", SortField.STRING),
469 SortField("float", SortField.FLOAT, True)])
470 self._assertMatches(self.full, self.queryG, sort, "ZYXW")
472 # Do the same for a MultiSearcher
473 multiSearcher = MultiSearcher([self.full])
475 sort.setSort([SortField("int", SortField.INT),
476 SortField("string", SortField.STRING),
477 SortField("float", SortField.FLOAT)])
478 self._assertMatches(multiSearcher, self.queryG, sort, "ZWXY")
480 sort.setSort([SortField("int", SortField.INT),
481 SortField("string", SortField.STRING),
482 SortField("float", SortField.FLOAT, True)])
483 self._assertMatches(multiSearcher, self.queryG, sort, "ZYXW")
485 # Don't close the multiSearcher. it would close the full searcher too!
486 # Do the same for a ParallelMultiSearcher
487 parallelSearcher = ParallelMultiSearcher([self.full])
489 sort.setSort([SortField("int", SortField.INT),
490 SortField("string", SortField.STRING),
491 SortField("float", SortField.FLOAT)])
492 self._assertMatches(parallelSearcher, self.queryG, sort, "ZWXY")
494 sort.setSort([SortField("int", SortField.INT),
495 SortField("string", SortField.STRING),
496 SortField("float", SortField.FLOAT, True)])
497 self._assertMatches(parallelSearcher, self.queryG, sort, "ZYXW")
499 # Don't close the parallelSearcher. it would close the full searcher
502 def testSortCombos(self):
504 test sorts using a series of fields
508 sort.setSort([SortField("int", SortField.INT),
509 SortField("float", SortField.FLOAT)])
510 self._assertMatches(self.full, self.queryX, sort, "IGEAC")
512 sort.setSort([SortField("int", SortField.INT, True),
513 SortField(None, SortField.DOC, True)])
514 self._assertMatches(self.full, self.queryX, sort, "CEAGI")
516 sort.setSort([SortField("float", SortField.FLOAT),
517 SortField("string", SortField.STRING)])
518 self._assertMatches(self.full, self.queryX, sort, "GICEA")
520 def testLocaleSort(self):
522 test using a Locale for sorting strings
526 sort.setSort([SortField("string", Locale.US)])
527 self._assertMatches(self.full, self.queryX, sort, "AIGEC")
528 self._assertMatches(self.full, self.queryY, sort, "DJHFB")
530 sort.setSort([SortField("string", Locale.US, True)])
531 self._assertMatches(self.full, self.queryX, sort, "CEGIA")
532 self._assertMatches(self.full, self.queryY, sort, "BFHJD")
534 def testInternationalSort(self):
536 test using various international locales with accented characters
537 (which sort differently depending on locale)
541 sort.setSort(SortField("i18n", Locale.US))
542 self._assertMatches(self.full, self.queryY, sort, "BFJDH")
544 sort.setSort(SortField("i18n", Locale("sv", "se")))
545 self._assertMatches(self.full, self.queryY, sort, "BJDFH")
547 sort.setSort(SortField("i18n", Locale("da", "dk")))
548 self._assertMatches(self.full, self.queryY, sort, "BJDHF")
550 sort.setSort(SortField("i18n", Locale.US))
551 self._assertMatches(self.full, self.queryX, sort, "ECAGI")
553 sort.setSort(SortField("i18n", Locale.FRANCE))
554 self._assertMatches(self.full, self.queryX, sort, "EACGI")
556 def testInternationalMultiSearcherSort(self):
558 Test the MultiSearcher's ability to preserve locale-sensitive ordering
559 by wrapping it around a single searcher
563 multiSearcher = MultiSearcher([self.full])
564 sort.setSort(SortField("i18n", Locale("sv", "se")))
565 self._assertMatches(multiSearcher, self.queryY, sort, "BJDFH")
567 sort.setSort(SortField("i18n", Locale.US))
568 self._assertMatches(multiSearcher, self.queryY, sort, "BFJDH")
570 sort.setSort(SortField("i18n", Locale("da", "dk")))
571 self._assertMatches(multiSearcher, self.queryY, sort, "BJDHF")
573 def testMultiSort(self):
575 test a variety of sorts using more than one searcher
578 searcher = MultiSearcher([self.searchX, self.searchY])
579 self.runMultiSorts(searcher, False)
581 def testParallelMultiSort(self):
583 test a variety of sorts using a parallel multisearcher
586 searcher = ParallelMultiSearcher([self.searchX, self.searchY])
587 self.runMultiSorts(searcher, False)
589 def testNormalizedScores(self):
591 test that the relevancy scores are the same even if
595 # capture relevancy scores
596 scoresX = self.getScores(self.full.search(self.queryX, None,
597 1000).scoreDocs, self.full)
598 scoresY = self.getScores(self.full.search(self.queryY, None,
599 1000).scoreDocs, self.full)
600 scoresA = self.getScores(self.full.search(self.queryA, None,
601 1000).scoreDocs, self.full)
603 # we'll test searching locally, remote and multi
604 multi = MultiSearcher([self.searchX, self.searchY])
606 # change sorting and make sure relevancy stays the same
609 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
610 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
611 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
612 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
613 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
614 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
616 sort.setSort(SortField.FIELD_DOC)
617 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
618 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
619 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
620 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
621 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
622 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
624 sort.setSort(SortField("int", SortField.INT))
625 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
626 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
627 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
628 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
629 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
630 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
632 sort.setSort(SortField("float", SortField.FLOAT))
633 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
634 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
635 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
636 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
637 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
638 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
640 sort.setSort(SortField("string", SortField.STRING))
641 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
642 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
643 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
644 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
645 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
646 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
648 sort.setSort([SortField("int", SortField.INT),
649 SortField("float", SortField.FLOAT)])
650 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
651 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
652 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
653 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
654 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
655 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
657 sort.setSort([SortField("int", SortField.INT, True),
658 SortField(None, SortField.DOC, True)])
659 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
660 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
661 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
662 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
663 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
664 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
666 sort.setSort([SortField("float", SortField.FLOAT),
667 SortField("string", SortField.STRING)])
668 self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
669 self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
670 self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
671 self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
672 self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
673 self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
675 def testTopDocsScores(self):
677 There was previously a bug in FieldSortedHitQueue.maxscore when only
678 a single doc was added. That is what the following tests for.
684 # try to pick a query that will result in an unnormalized
685 # score greater than 1 to test for correct normalization
686 docs1 = self.full.search(self.queryE, None, nDocs, sort)
688 # a filter that only allows through the first hit
689 class filter(PythonFilter):
690 def getDocIdSet(_self, reader):
691 bs = BitSet(reader.maxDoc())
692 bs.set(0, reader.maxDoc())
693 bs.set(docs1.scoreDocs[0].doc)
694 return DocIdBitSet(bs)
698 docs2 = self.full.search(self.queryE, filt, nDocs, sort)
699 self.assertEqual(docs1.scoreDocs[0].score,
700 docs2.scoreDocs[0].score,
703 def testSortWithoutFillFields(self):
705 There was previously a bug in TopFieldCollector when fillFields was
706 set to False - the same doc and score was set in ScoreDoc[]
707 array. This test asserts that if fillFields is False, the documents
708 are set properly. It does not use Searcher's default search
709 methods(with Sort) since all set fillFields to True.
712 sorts = [Sort(SortField.FIELD_DOC), Sort()]
714 q = MatchAllDocsQuery()
715 tdc = TopFieldCollector.create(sort, 10, False,
717 self.full.search(q, tdc)
719 sds = tdc.topDocs().scoreDocs
720 for i in xrange(1, len(sds)):
721 self.assert_(sds[i].doc != sds[i - 1].doc)
723 def testSortWithoutScoreTracking(self):
725 Two Sort criteria to instantiate the multi/single comparators.
728 sorts = [Sort(SortField.FIELD_DOC), Sort()]
730 q = MatchAllDocsQuery()
731 tdc = TopFieldCollector.create(sort, 10, True, False,
734 self.full.search(q, tdc)
739 self.assert_(Float.isNaN_(sd.score))
741 self.assert_(Float.isNaN_(tds.getMaxScore()))
743 def testSortWithScoreNoMaxScoreTracking(self):
745 Two Sort criteria to instantiate the multi/single comparators.
748 sorts = [Sort(SortField.FIELD_DOC), Sort()]
750 q = MatchAllDocsQuery()
751 tdc = TopFieldCollector.create(sort, 10, True, True,
754 self.full.search(q, tdc)
759 self.assert_(not Float.isNaN_(sd.score))
761 self.assert_(Float.isNaN_(tds.getMaxScore()))
763 def testSortWithScoreAndMaxScoreTracking(self):
765 Two Sort criteria to instantiate the multi/single comparators.
768 sorts = [Sort(SortField.FIELD_DOC), Sort()]
770 q = MatchAllDocsQuery()
771 tdc = TopFieldCollector.create(sort, 10, True, True,
774 self.full.search(q, tdc)
779 self.assert_(not Float.isNaN_(sd.score))
781 self.assert_(not Float.isNaN_(tds.getMaxScore()))
783 def testOutOfOrderDocsScoringSort(self):
785 Two Sort criteria to instantiate the multi/single comparators.
788 sorts = [Sort(SortField.FIELD_DOC), Sort()]
790 tfcOptions = [[False, False, False],
791 [False, False, True],
792 [False, True, False],
794 [True, False, False],
800 "OutOfOrderOneComparatorNonScoringCollector",
801 "OutOfOrderOneComparatorScoringMaxScoreCollector",
802 "OutOfOrderOneComparatorScoringNoMaxScoreCollector",
803 "OutOfOrderOneComparatorScoringMaxScoreCollector",
804 "OutOfOrderOneComparatorNonScoringCollector",
805 "OutOfOrderOneComparatorScoringMaxScoreCollector",
806 "OutOfOrderOneComparatorScoringNoMaxScoreCollector",
807 "OutOfOrderOneComparatorScoringMaxScoreCollector"
812 # Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
813 # which delegates to BS if there are no mandatory clauses.
814 bq.add(MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
816 # Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to
817 # return the clause instead of BQ.
818 bq.setMinimumNumberShouldMatch(1)
821 for tfcOption, actualTFCClass in izip(tfcOptions,
823 tdc = TopFieldCollector.create(sort, 10, tfcOption[0],
824 tfcOption[1], tfcOption[2],
827 self.assert_(tdc.getClass().getName().endswith("$" + actualTFCClass))
829 self.full.search(bq, tdc)
833 self.assertEqual(10, len(sds))
835 def testSortWithScoreAndMaxScoreTrackingNoResults(self):
837 Two Sort criteria to instantiate the multi/single comparators.
840 sorts = [Sort(SortField.FIELD_DOC), Sort()]
842 tdc = TopFieldCollector.create(sort, 10, True, True, True, True)
844 self.assertEqual(0, tds.totalHits)
845 self.assert_(Float.isNaN_(tds.getMaxScore()))
847 def runMultiSorts(self, multi, isFull):
849 runs a variety of sorts useful for multisearchers
853 sort.setSort(SortField.FIELD_DOC)
854 expected = isFull and "ABCDEFGHIJ" or "ACEGIBDFHJ"
855 self._assertMatches(multi, self.queryA, sort, expected)
857 sort.setSort(SortField("int", SortField.INT))
858 expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
859 self._assertMatches(multi, self.queryA, sort, expected)
861 sort.setSort([SortField("int", SortField.INT), SortField.FIELD_DOC])
862 expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
863 self._assertMatches(multi, self.queryA, sort, expected)
865 sort.setSort(SortField("int", SortField.INT))
866 expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
867 self._assertMatches(multi, self.queryA, sort, expected)
869 sort.setSort([SortField("float", SortField.FLOAT), SortField.FIELD_DOC])
870 self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
872 sort.setSort(SortField("float", SortField.FLOAT))
873 self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
875 sort.setSort(SortField("string", SortField.STRING))
876 self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
878 sort.setSort(SortField("int", SortField.INT, True))
879 expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI"
880 self._assertMatches(multi, self.queryA, sort, expected)
882 sort.setSort(SortField("float", SortField.FLOAT, True))
883 self._assertMatches(multi, self.queryA, sort, "BAFECIJHDG")
885 sort.setSort(SortField("string", SortField.STRING, True))
886 self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
888 sort.setSort([SortField("int", SortField.INT),
889 SortField("float", SortField.FLOAT)])
890 self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC")
892 sort.setSort([SortField("float", SortField.FLOAT),
893 SortField("string", SortField.STRING)])
894 self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB")
896 sort.setSort(SortField("int", SortField.INT))
897 self._assertMatches(multi, self.queryF, sort, "IZJ")
899 sort.setSort(SortField("int", SortField.INT, True))
900 self._assertMatches(multi, self.queryF, sort, "JZI")
902 sort.setSort(SortField("float", SortField.FLOAT))
903 self._assertMatches(multi, self.queryF, sort, "ZJI")
905 sort.setSort(SortField("string", SortField.STRING))
906 self._assertMatches(multi, self.queryF, sort, "ZJI")
908 sort.setSort(SortField("string", SortField.STRING, True))
909 self._assertMatches(multi, self.queryF, sort, "IJZ")
911 # up to this point, all of the searches should have "sane"
912 # FieldCache behavior, and should have reused hte cache in several
914 self._assertSaneFieldCaches(self.getName() + " various")
916 # next we'll check Locale based(String[]) for 'string', so purge first
917 FieldCache.DEFAULT.purgeAllCaches()
919 sort.setSort([SortField("string", Locale.US)])
920 self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
922 sort.setSort([SortField("string", Locale.US, True)])
923 self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
925 sort.setSort([SortField("string", Locale.UK)])
926 self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
928 self._assertSaneFieldCaches(self.getName() + " Locale.US + Locale.UK")
929 FieldCache.DEFAULT.purgeAllCaches()
931 def _assertMatches(self, searcher, query, sort, expectedResult):
933 make sure the documents returned by the search match the expected
937 # ScoreDoc[] result = searcher.search(query, None, 1000, sort).scoreDocs
938 hits = searcher.search(query, None, len(expectedResult) or 1, sort)
941 self.assertEqual(hits.totalHits, len(expectedResult))
944 doc = searcher.doc(sd.doc)
945 v = doc.getValues("tracer")
949 self.assertEqual(expectedResult, ''.join(buff))
951 def getScores(self, hits, searcher):
955 doc = searcher.doc(hit.doc)
956 v = doc.getValues("tracer")
957 self.assertEqual(len(v), 1)
958 scoreMap[v[0]] = hit.score
962 def _assertSameValues(self, m1, m2):
964 make sure all the values in the maps match
967 self.assertEquals(len(m1), len(m2))
968 for key in m1.iterkeys():
969 self.assertEquals(m1[key], m2[key], 1e-6)
973 return type(self).__name__
975 def _assertSaneFieldCaches(self, msg):
977 entries = FieldCache.DEFAULT.getCacheEntries()
979 insanity = FieldCacheSanityChecker.checkSanity(entries)
980 self.assertEqual(0, len(insanity),
981 msg + ": Insane FieldCache usage(s) found")
984 class MyFieldComparator(PythonFieldComparator):
986 def __init__(self, numHits):
987 super(MyFieldComparator, self).__init__()
988 self.slotValues = [0] * numHits
990 def copy(self, slot, doc):
991 self.slotValues[slot] = self.docValues[doc]
993 def compare(self, slot1, slot2):
994 return self.slotValues[slot1] - self.slotValues[slot2]
996 def compareBottom(self, doc):
997 return self.bottomValue - self.docValues[doc]
999 def setBottom(self, bottom):
1000 self.bottomValue = self.slotValues[bottom]
1002 def setNextReader(self, reader, docBase):
1004 class intParser(PythonIntParser):
1005 def parseInt(_self, val):
1006 return (ord(val[0]) - ord('A')) * 123456
1008 self.docValues = FieldCache.DEFAULT.getInts(reader, "parser",
1011 def value(self, slot):
1012 return Integer(self.slotValues[slot])
1015 class MyFieldComparatorSource(PythonFieldComparatorSource):
1017 def newComparator(self, fieldname, numHits, sortPos, reversed):
1018 return MyFieldComparator(numHits)
1022 if __name__ == "__main__":
1024 env = lucene.initVM()
1025 if '-loop' in sys.argv:
1026 sys.argv.remove('-loop')
1032 # refs = sorted(env._dumpRefs(classes=True).items(),
1033 # key=lambda x: x[1], reverse=True)