1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.text.Collator;
22 import java.util.Locale;
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.index.RandomIndexWriter;
26 import org.apache.lucene.index.Term;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.store.Directory;
30 import org.junit.Test;
33 * A basic 'positive' Unit test class for the TermRangeFilter class.
36 * NOTE: at the moment, this class only tests for 'positive' results, it does
37 * not verify the results to ensure there are no 'false positives', nor does it
38 * adequately test 'negative' results. It also does not test that garbage in
39 * results in an Exception.
41 public class TestTermRangeFilter extends BaseTestRangeFilter {
44 public void testRangeFilterId() throws IOException {
46 IndexReader reader = signedIndexReader;
47 IndexSearcher search = newSearcher(reader);
49 int medId = ((maxId - minId) / 2);
51 String minIP = pad(minId);
52 String maxIP = pad(maxId);
53 String medIP = pad(medId);
55 int numDocs = reader.numDocs();
57 assertEquals("num of docs", numDocs, 1 + maxId - minId);
60 Query q = new TermQuery(new Term("body", "body"));
62 // test id, bounded on both ends
64 result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
66 assertEquals("find all", numDocs, result.length);
68 result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
70 assertEquals("all but last", numDocs - 1, result.length);
72 result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
74 assertEquals("all but first", numDocs - 1, result.length);
76 result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
78 assertEquals("all but ends", numDocs - 2, result.length);
80 result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
82 assertEquals("med and up", 1 + maxId - medId, result.length);
84 result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
86 assertEquals("up to med", 1 + medId - minId, result.length);
90 result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
92 assertEquals("min and up", numDocs, result.length);
94 result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
96 assertEquals("max and down", numDocs, result.length);
98 result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
100 assertEquals("not min, but up", numDocs - 1, result.length);
102 result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
104 assertEquals("not max, but down", numDocs - 1, result.length);
106 result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
108 assertEquals("med and up, not max", maxId - medId, result.length);
110 result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
112 assertEquals("not min, up to med", medId - minId, result.length);
116 result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
118 assertEquals("min,min,F,F", 0, result.length);
119 result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
121 assertEquals("med,med,F,F", 0, result.length);
122 result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
124 assertEquals("max,max,F,F", 0, result.length);
126 result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
128 assertEquals("min,min,T,T", 1, result.length);
129 result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
131 assertEquals("nul,min,F,T", 1, result.length);
133 result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
135 assertEquals("max,max,T,T", 1, result.length);
136 result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
138 assertEquals("max,nul,T,T", 1, result.length);
140 result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
142 assertEquals("med,med,T,T", 1, result.length);
148 public void testRangeFilterIdCollating() throws IOException {
150 IndexReader reader = signedIndexReader;
151 IndexSearcher search = newSearcher(reader);
153 Collator c = Collator.getInstance(Locale.ENGLISH);
155 int medId = ((maxId - minId) / 2);
157 String minIP = pad(minId);
158 String maxIP = pad(maxId);
159 String medIP = pad(medId);
161 int numDocs = reader.numDocs();
163 assertEquals("num of docs", numDocs, 1 + maxId - minId);
165 Query q = new TermQuery(new Term("body", "body"));
167 // test id, bounded on both ends
168 int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
169 T, c), 1000).totalHits;
170 assertEquals("find all", numDocs, numHits);
172 numHits = search.search(q,
173 new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
174 assertEquals("all but last", numDocs - 1, numHits);
176 numHits = search.search(q,
177 new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
178 assertEquals("all but first", numDocs - 1, numHits);
180 numHits = search.search(q,
181 new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
182 assertEquals("all but ends", numDocs - 2, numHits);
184 numHits = search.search(q,
185 new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
186 assertEquals("med and up", 1 + maxId - medId, numHits);
188 numHits = search.search(q,
189 new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
190 assertEquals("up to med", 1 + medId - minId, numHits);
194 numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
196 assertEquals("min and up", numDocs, numHits);
198 numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
200 assertEquals("max and down", numDocs, numHits);
202 numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
204 assertEquals("not min, but up", numDocs - 1, numHits);
206 numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
208 assertEquals("not max, but down", numDocs - 1, numHits);
210 numHits = search.search(q,
211 new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
212 assertEquals("med and up, not max", maxId - medId, numHits);
214 numHits = search.search(q,
215 new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
216 assertEquals("not min, up to med", medId - minId, numHits);
220 numHits = search.search(q,
221 new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
222 assertEquals("min,min,F,F", 0, numHits);
223 numHits = search.search(q,
224 new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
225 assertEquals("med,med,F,F", 0, numHits);
226 numHits = search.search(q,
227 new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
228 assertEquals("max,max,F,F", 0, numHits);
230 numHits = search.search(q,
231 new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
232 assertEquals("min,min,T,T", 1, numHits);
233 numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
235 assertEquals("nul,min,F,T", 1, numHits);
237 numHits = search.search(q,
238 new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
239 assertEquals("max,max,T,T", 1, numHits);
240 numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
242 assertEquals("max,nul,T,T", 1, numHits);
244 numHits = search.search(q,
245 new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
246 assertEquals("med,med,T,T", 1, numHits);
252 public void testRangeFilterRand() throws IOException {
254 IndexReader reader = signedIndexReader;
255 IndexSearcher search = newSearcher(reader);
257 String minRP = pad(signedIndexDir.minR);
258 String maxRP = pad(signedIndexDir.maxR);
260 int numDocs = reader.numDocs();
262 assertEquals("num of docs", numDocs, 1 + maxId - minId);
265 Query q = new TermQuery(new Term("body", "body"));
267 // test extremes, bounded on both ends
269 result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
271 assertEquals("find all", numDocs, result.length);
273 result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
275 assertEquals("all but biggest", numDocs - 1, result.length);
277 result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
279 assertEquals("all but smallest", numDocs - 1, result.length);
281 result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
283 assertEquals("all but extremes", numDocs - 2, result.length);
287 result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
289 assertEquals("smallest and up", numDocs, result.length);
291 result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
293 assertEquals("biggest and down", numDocs, result.length);
295 result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
297 assertEquals("not smallest, but up", numDocs - 1, result.length);
299 result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
301 assertEquals("not biggest, but down", numDocs - 1, result.length);
305 result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
307 assertEquals("min,min,F,F", 0, result.length);
308 result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
310 assertEquals("max,max,F,F", 0, result.length);
312 result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
314 assertEquals("min,min,T,T", 1, result.length);
315 result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
317 assertEquals("nul,min,F,T", 1, result.length);
319 result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
321 assertEquals("max,max,T,T", 1, result.length);
322 result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
324 assertEquals("max,nul,T,T", 1, result.length);
330 public void testRangeFilterRandCollating() throws IOException {
332 // using the unsigned index because collation seems to ignore hyphens
333 IndexReader reader = unsignedIndexReader;
334 IndexSearcher search = newSearcher(reader);
336 Collator c = Collator.getInstance(Locale.ENGLISH);
338 String minRP = pad(unsignedIndexDir.minR);
339 String maxRP = pad(unsignedIndexDir.maxR);
341 int numDocs = reader.numDocs();
343 assertEquals("num of docs", numDocs, 1 + maxId - minId);
345 Query q = new TermQuery(new Term("body", "body"));
347 // test extremes, bounded on both ends
349 int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
350 T, c), 1000).totalHits;
351 assertEquals("find all", numDocs, numHits);
353 numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
355 assertEquals("all but biggest", numDocs - 1, numHits);
357 numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
359 assertEquals("all but smallest", numDocs - 1, numHits);
361 numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
363 assertEquals("all but extremes", numDocs - 2, numHits);
367 numHits = search.search(q,
368 new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
369 assertEquals("smallest and up", numDocs, numHits);
371 numHits = search.search(q,
372 new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
373 assertEquals("biggest and down", numDocs, numHits);
375 numHits = search.search(q,
376 new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
377 assertEquals("not smallest, but up", numDocs - 1, numHits);
379 numHits = search.search(q,
380 new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
381 assertEquals("not biggest, but down", numDocs - 1, numHits);
385 numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
387 assertEquals("min,min,F,F", 0, numHits);
388 numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
390 assertEquals("max,max,F,F", 0, numHits);
392 numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
394 assertEquals("min,min,T,T", 1, numHits);
395 numHits = search.search(q,
396 new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
397 assertEquals("nul,min,F,T", 1, numHits);
399 numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
401 assertEquals("max,max,T,T", 1, numHits);
402 numHits = search.search(q,
403 new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
404 assertEquals("max,nul,T,T", 1, numHits);
410 public void testFarsi() throws Exception {
413 Directory farsiIndex = newDirectory();
414 RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
415 Document doc = new Document();
416 doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
417 Field.Index.NOT_ANALYZED));
419 .add(newField("body", "body", Field.Store.YES,
420 Field.Index.NOT_ANALYZED));
421 writer.addDocument(doc);
423 IndexReader reader = writer.getReader();
426 IndexSearcher search = newSearcher(reader);
427 Query q = new TermQuery(new Term("body", "body"));
429 // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
430 // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
431 // characters properly.
432 Collator collator = Collator.getInstance(new Locale("ar"));
434 // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
435 // orders the U+0698 character before the U+0633 character, so the single
436 // index Term below should NOT be returned by a TermRangeFilter with a Farsi
437 // Collator (or an Arabic one for the case when Farsi is not supported).
438 int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
439 "\u0698", T, T, collator), 1000).totalHits;
440 assertEquals("The index Term should not be included.", 0, numHits);
442 numHits = search.search(q, new TermRangeFilter("content", "\u0633",
443 "\u0638", T, T, collator), 1000).totalHits;
444 assertEquals("The index Term should be included.", 1, numHits);
451 public void testDanish() throws Exception {
454 Directory danishIndex = newDirectory();
455 RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
456 // Danish collation orders the words below in the given order
457 // (example taken from TestSort.testInternationalSort() ).
458 String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
459 for (int docnum = 0; docnum < words.length; ++docnum) {
460 Document doc = new Document();
461 doc.add(newField("content", words[docnum], Field.Store.YES,
462 Field.Index.NOT_ANALYZED));
463 doc.add(newField("body", "body", Field.Store.YES,
464 Field.Index.NOT_ANALYZED));
465 writer.addDocument(doc);
467 IndexReader reader = writer.getReader();
470 IndexSearcher search = newSearcher(reader);
471 Query q = new TermQuery(new Term("body", "body"));
473 Collator collator = Collator.getInstance(new Locale("da", "dk"));
475 // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
476 // but Danish collation does.
477 int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
478 "MAND", F, F, collator), 1000).totalHits;
479 assertEquals("The index Term should be included.", 1, numHits);
481 numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
482 "MAND", F, F, collator), 1000).totalHits;
483 assertEquals("The index Term should not be included.", 0, numHits);