1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.Field;
22 import org.apache.lucene.index.IndexReader;
23 import org.apache.lucene.index.IndexWriter;
24 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
25 import org.apache.lucene.store.Directory;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.MockAnalyzer;
28 import org.apache.lucene.analysis.MockTokenizer;
29 import org.apache.lucene.analysis.TokenStream;
30 import org.apache.lucene.analysis.Tokenizer;
31 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
33 import org.apache.lucene.util.LuceneTestCase;
34 import java.io.IOException;
35 import java.io.Reader;
36 import java.util.Locale;
38 import java.util.HashSet;
39 import java.util.Arrays;
40 import java.text.Collator;
43 public class TestTermRangeQuery extends LuceneTestCase {
45 private int docCount = 0;
46 private Directory dir;
49 public void setUp() throws Exception {
55 public void tearDown() throws Exception {
60 public void testExclusive() throws Exception {
61 Query query = new TermRangeQuery("content", "A", "C", false, false);
62 initializeIndex(new String[] {"A", "B", "C", "D"});
63 IndexReader reader = IndexReader.open(dir);
64 IndexSearcher searcher = new IndexSearcher(reader);
65 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
66 assertEquals("A,B,C,D, only B in range", 1, hits.length);
70 initializeIndex(new String[] {"A", "B", "D"});
71 reader = IndexReader.open(dir);
72 searcher = new IndexSearcher(reader);
73 hits = searcher.search(query, null, 1000).scoreDocs;
74 assertEquals("A,B,D, only B in range", 1, hits.length);
79 reader = IndexReader.open(dir);
80 searcher = new IndexSearcher(reader);
81 hits = searcher.search(query, null, 1000).scoreDocs;
82 assertEquals("C added, still only B in range", 1, hits.length);
87 public void testInclusive() throws Exception {
88 Query query = new TermRangeQuery("content", "A", "C", true, true);
90 initializeIndex(new String[]{"A", "B", "C", "D"});
91 IndexReader reader = IndexReader.open(dir);
92 IndexSearcher searcher = new IndexSearcher(reader);
93 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
94 assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
98 initializeIndex(new String[]{"A", "B", "D"});
99 reader = IndexReader.open(dir);
100 searcher = new IndexSearcher(reader);
101 hits = searcher.search(query, null, 1000).scoreDocs;
102 assertEquals("A,B,D - A and B in range", 2, hits.length);
107 reader = IndexReader.open(dir);
108 searcher = new IndexSearcher(reader);
109 hits = searcher.search(query, null, 1000).scoreDocs;
110 assertEquals("C added - A, B, C in range", 3, hits.length);
115 /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
116 * with constant score and checks, that only the lower end of terms is put into the range */
117 public void testTopTermsRewrite() throws Exception {
118 initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
120 IndexReader reader = IndexReader.open(dir);
121 IndexSearcher searcher = new IndexSearcher(reader);
122 TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
123 checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
125 final int savedClauseCount = BooleanQuery.getMaxClauseCount();
127 BooleanQuery.setMaxClauseCount(3);
128 checkBooleanTerms(searcher, query, "B", "C", "D");
130 BooleanQuery.setMaxClauseCount(savedClauseCount);
136 private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException {
137 query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
138 final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
139 final Set<String> allowedTerms = new HashSet<String>(Arrays.asList(terms));
140 assertEquals(allowedTerms.size(), bq.clauses().size());
141 for (BooleanClause c : bq.clauses()) {
142 assertTrue(c.getQuery() instanceof TermQuery);
143 final TermQuery tq = (TermQuery) c.getQuery();
144 final String term = tq.getTerm().text();
145 assertTrue("invalid term: "+ term, allowedTerms.contains(term));
146 allowedTerms.remove(term); // remove to fail on double terms
148 assertEquals(0, allowedTerms.size());
151 public void testEqualsHashcode() {
152 Query query = new TermRangeQuery("content", "A", "C", true, true);
154 query.setBoost(1.0f);
155 Query other = new TermRangeQuery("content", "A", "C", true, true);
156 other.setBoost(1.0f);
158 assertEquals("query equals itself is true", query, query);
159 assertEquals("equivalent queries are equal", query, other);
160 assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
162 other.setBoost(2.0f);
163 assertFalse("Different boost queries are not equal", query.equals(other));
165 other = new TermRangeQuery("notcontent", "A", "C", true, true);
166 assertFalse("Different fields are not equal", query.equals(other));
168 other = new TermRangeQuery("content", "X", "C", true, true);
169 assertFalse("Different lower terms are not equal", query.equals(other));
171 other = new TermRangeQuery("content", "A", "Z", true, true);
172 assertFalse("Different upper terms are not equal", query.equals(other));
174 query = new TermRangeQuery("content", null, "C", true, true);
175 other = new TermRangeQuery("content", null, "C", true, true);
176 assertEquals("equivalent queries with null lowerterms are equal()", query, other);
177 assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
179 query = new TermRangeQuery("content", "C", null, true, true);
180 other = new TermRangeQuery("content", "C", null, true, true);
181 assertEquals("equivalent queries with null upperterms are equal()", query, other);
182 assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
184 query = new TermRangeQuery("content", null, "C", true, true);
185 other = new TermRangeQuery("content", "C", null, true, true);
186 assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
188 query = new TermRangeQuery("content", "A", "C", false, false);
189 other = new TermRangeQuery("content", "A", "C", true, true);
190 assertFalse("queries with different inclusive are not equal", query.equals(other));
192 query = new TermRangeQuery("content", "A", "C", false, false);
193 other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
194 assertFalse("a query with a collator is not equal to one without", query.equals(other));
197 public void testExclusiveCollating() throws Exception {
198 Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
199 initializeIndex(new String[] {"A", "B", "C", "D"});
200 IndexSearcher searcher = new IndexSearcher(dir, true);
201 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
202 assertEquals("A,B,C,D, only B in range", 1, hits.length);
205 initializeIndex(new String[] {"A", "B", "D"});
206 searcher = new IndexSearcher(dir, true);
207 hits = searcher.search(query, null, 1000).scoreDocs;
208 assertEquals("A,B,D, only B in range", 1, hits.length);
212 searcher = new IndexSearcher(dir, true);
213 hits = searcher.search(query, null, 1000).scoreDocs;
214 assertEquals("C added, still only B in range", 1, hits.length);
218 public void testInclusiveCollating() throws Exception {
219 Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
221 initializeIndex(new String[]{"A", "B", "C", "D"});
222 IndexSearcher searcher = new IndexSearcher(dir, true);
223 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
224 assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
227 initializeIndex(new String[]{"A", "B", "D"});
228 searcher = new IndexSearcher(dir, true);
229 hits = searcher.search(query, null, 1000).scoreDocs;
230 assertEquals("A,B,D - A and B in range", 2, hits.length);
234 searcher = new IndexSearcher(dir, true);
235 hits = searcher.search(query, null, 1000).scoreDocs;
236 assertEquals("C added - A, B, C in range", 3, hits.length);
240 public void testFarsi() throws Exception {
241 // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
242 // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
243 // characters properly.
244 Collator collator = Collator.getInstance(new Locale("ar"));
245 Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
246 // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
247 // orders the U+0698 character before the U+0633 character, so the single
248 // index Term below should NOT be returned by a TermRangeQuery with a Farsi
249 // Collator (or an Arabic one for the case when Farsi is not supported).
250 initializeIndex(new String[]{ "\u0633\u0627\u0628"});
251 IndexSearcher searcher = new IndexSearcher(dir, true);
252 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
253 assertEquals("The index Term should not be included.", 0, hits.length);
255 query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
256 hits = searcher.search(query, null, 1000).scoreDocs;
257 assertEquals("The index Term should be included.", 1, hits.length);
261 public void testDanish() throws Exception {
262 Collator collator = Collator.getInstance(new Locale("da", "dk"));
263 // Danish collation orders the words below in the given order (example taken
264 // from TestSort.testInternationalSort() ).
265 String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
266 Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
268 // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
269 // but Danish collation does.
270 initializeIndex(words);
271 IndexSearcher searcher = new IndexSearcher(dir, true);
272 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
273 assertEquals("The index Term should be included.", 1, hits.length);
275 query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
276 hits = searcher.search(query, null, 1000).scoreDocs;
277 assertEquals("The index Term should not be included.", 0, hits.length);
281 private static class SingleCharAnalyzer extends Analyzer {
283 private static class SingleCharTokenizer extends Tokenizer {
284 char[] buffer = new char[1];
285 boolean done = false;
286 CharTermAttribute termAtt;
288 public SingleCharTokenizer(Reader r) {
290 termAtt = addAttribute(CharTermAttribute.class);
294 public boolean incrementToken() throws IOException {
298 int count = input.read(buffer);
302 termAtt.copyBuffer(buffer, 0, 1);
309 public final void reset(Reader reader) throws IOException {
316 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
317 Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
318 if (tokenizer == null) {
319 tokenizer = new SingleCharTokenizer(reader);
320 setPreviousTokenStream(tokenizer);
322 tokenizer.reset(reader);
327 public TokenStream tokenStream(String fieldName, Reader reader) {
328 return new SingleCharTokenizer(reader);
332 private void initializeIndex(String[] values) throws IOException {
333 initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
336 private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
337 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
338 TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
339 for (int i = 0; i < values.length; i++) {
340 insertDoc(writer, values[i]);
345 // shouldnt create an analyzer for every doc?
346 private void addDoc(String content) throws IOException {
347 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
348 insertDoc(writer, content);
352 private void insertDoc(IndexWriter writer, String content) throws IOException {
353 Document doc = new Document();
355 doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
356 doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED));
358 writer.addDocument(doc);
363 public void testExclusiveLowerNull() throws Exception {
364 Analyzer analyzer = new SingleCharAnalyzer();
365 //http://issues.apache.org/jira/browse/LUCENE-38
366 Query query = new TermRangeQuery("content", null, "C",
368 initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
369 IndexReader reader = IndexReader.open(dir);
370 IndexSearcher searcher = new IndexSearcher(reader);
371 int numHits = searcher.search(query, null, 1000).totalHits;
372 // When Lucene-38 is fixed, use the assert on the next line:
373 assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits);
374 // until Lucene-38 is fixed, use this assert:
375 //assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
379 initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
380 reader = IndexReader.open(dir);
381 searcher = new IndexSearcher(reader);
382 numHits = searcher.search(query, null, 1000).totalHits;
383 // When Lucene-38 is fixed, use the assert on the next line:
384 assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits);
385 // until Lucene-38 is fixed, use this assert:
386 //assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
390 reader = IndexReader.open(dir);
391 searcher = new IndexSearcher(reader);
392 numHits = searcher.search(query, null, 1000).totalHits;
393 // When Lucene-38 is fixed, use the assert on the next line:
394 assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
395 // until Lucene-38 is fixed, use this assert
396 //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
402 public void testInclusiveLowerNull() throws Exception {
403 //http://issues.apache.org/jira/browse/LUCENE-38
404 Analyzer analyzer = new SingleCharAnalyzer();
405 Query query = new TermRangeQuery("content", null, "C", true, true);
406 initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
407 IndexReader reader = IndexReader.open(dir);
408 IndexSearcher searcher = new IndexSearcher(reader);
409 int numHits = searcher.search(query, null, 1000).totalHits;
410 // When Lucene-38 is fixed, use the assert on the next line:
411 assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
412 // until Lucene-38 is fixed, use this assert
413 //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
416 initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
417 reader = IndexReader.open(dir);
418 searcher = new IndexSearcher(reader);
419 numHits = searcher.search(query, null, 1000).totalHits;
420 // When Lucene-38 is fixed, use the assert on the next line:
421 assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
422 // until Lucene-38 is fixed, use this assert
423 //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
427 reader = IndexReader.open(dir);
428 searcher = new IndexSearcher(reader);
429 numHits = searcher.search(query, null, 1000).totalHits;
430 // When Lucene-38 is fixed, use the assert on the next line:
431 assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
432 // until Lucene-38 is fixed, use this assert
433 //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());