1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.Field;
22 import org.apache.lucene.index.IndexWriter;
23 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
24 import org.apache.lucene.store.Directory;
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.analysis.MockTokenizer;
28 import org.apache.lucene.analysis.TokenStream;
29 import org.apache.lucene.analysis.Tokenizer;
30 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
32 import org.apache.lucene.util.LuceneTestCase;
33 import java.io.IOException;
34 import java.io.Reader;
35 import java.util.Locale;
37 import java.util.HashSet;
38 import java.util.Arrays;
39 import java.text.Collator;
42 public class TestTermRangeQuery extends LuceneTestCase {
44 private int docCount = 0;
45 private Directory dir;
48 public void setUp() throws Exception {
54 public void tearDown() throws Exception {
59 public void testExclusive() throws Exception {
60 Query query = new TermRangeQuery("content", "A", "C", false, false);
61 initializeIndex(new String[] {"A", "B", "C", "D"});
62 IndexSearcher searcher = new IndexSearcher(dir, true);
63 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
64 assertEquals("A,B,C,D, only B in range", 1, hits.length);
67 initializeIndex(new String[] {"A", "B", "D"});
68 searcher = new IndexSearcher(dir, true);
69 hits = searcher.search(query, null, 1000).scoreDocs;
70 assertEquals("A,B,D, only B in range", 1, hits.length);
74 searcher = new IndexSearcher(dir, true);
75 hits = searcher.search(query, null, 1000).scoreDocs;
76 assertEquals("C added, still only B in range", 1, hits.length);
80 public void testInclusive() throws Exception {
81 Query query = new TermRangeQuery("content", "A", "C", true, true);
83 initializeIndex(new String[]{"A", "B", "C", "D"});
84 IndexSearcher searcher = new IndexSearcher(dir, true);
85 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
86 assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
89 initializeIndex(new String[]{"A", "B", "D"});
90 searcher = new IndexSearcher(dir, true);
91 hits = searcher.search(query, null, 1000).scoreDocs;
92 assertEquals("A,B,D - A and B in range", 2, hits.length);
96 searcher = new IndexSearcher(dir, true);
97 hits = searcher.search(query, null, 1000).scoreDocs;
98 assertEquals("C added - A, B, C in range", 3, hits.length);
102 /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
103 * with constant score and checks, that only the lower end of terms is put into the range */
104 public void testTopTermsRewrite() throws Exception {
105 initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
107 IndexSearcher searcher = new IndexSearcher(dir, true);
108 TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
109 checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
111 final int savedClauseCount = BooleanQuery.getMaxClauseCount();
113 BooleanQuery.setMaxClauseCount(3);
114 checkBooleanTerms(searcher, query, "B", "C", "D");
116 BooleanQuery.setMaxClauseCount(savedClauseCount);
121 private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException {
122 query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
123 final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
124 final Set<String> allowedTerms = new HashSet<String>(Arrays.asList(terms));
125 assertEquals(allowedTerms.size(), bq.clauses().size());
126 for (BooleanClause c : bq.clauses()) {
127 assertTrue(c.getQuery() instanceof TermQuery);
128 final TermQuery tq = (TermQuery) c.getQuery();
129 final String term = tq.getTerm().text();
130 assertTrue("invalid term: "+ term, allowedTerms.contains(term));
131 allowedTerms.remove(term); // remove to fail on double terms
133 assertEquals(0, allowedTerms.size());
136 public void testEqualsHashcode() {
137 Query query = new TermRangeQuery("content", "A", "C", true, true);
139 query.setBoost(1.0f);
140 Query other = new TermRangeQuery("content", "A", "C", true, true);
141 other.setBoost(1.0f);
143 assertEquals("query equals itself is true", query, query);
144 assertEquals("equivalent queries are equal", query, other);
145 assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
147 other.setBoost(2.0f);
148 assertFalse("Different boost queries are not equal", query.equals(other));
150 other = new TermRangeQuery("notcontent", "A", "C", true, true);
151 assertFalse("Different fields are not equal", query.equals(other));
153 other = new TermRangeQuery("content", "X", "C", true, true);
154 assertFalse("Different lower terms are not equal", query.equals(other));
156 other = new TermRangeQuery("content", "A", "Z", true, true);
157 assertFalse("Different upper terms are not equal", query.equals(other));
159 query = new TermRangeQuery("content", null, "C", true, true);
160 other = new TermRangeQuery("content", null, "C", true, true);
161 assertEquals("equivalent queries with null lowerterms are equal()", query, other);
162 assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
164 query = new TermRangeQuery("content", "C", null, true, true);
165 other = new TermRangeQuery("content", "C", null, true, true);
166 assertEquals("equivalent queries with null upperterms are equal()", query, other);
167 assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
169 query = new TermRangeQuery("content", null, "C", true, true);
170 other = new TermRangeQuery("content", "C", null, true, true);
171 assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
173 query = new TermRangeQuery("content", "A", "C", false, false);
174 other = new TermRangeQuery("content", "A", "C", true, true);
175 assertFalse("queries with different inclusive are not equal", query.equals(other));
177 query = new TermRangeQuery("content", "A", "C", false, false);
178 other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
179 assertFalse("a query with a collator is not equal to one without", query.equals(other));
182 public void testExclusiveCollating() throws Exception {
183 Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
184 initializeIndex(new String[] {"A", "B", "C", "D"});
185 IndexSearcher searcher = new IndexSearcher(dir, true);
186 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
187 assertEquals("A,B,C,D, only B in range", 1, hits.length);
190 initializeIndex(new String[] {"A", "B", "D"});
191 searcher = new IndexSearcher(dir, true);
192 hits = searcher.search(query, null, 1000).scoreDocs;
193 assertEquals("A,B,D, only B in range", 1, hits.length);
197 searcher = new IndexSearcher(dir, true);
198 hits = searcher.search(query, null, 1000).scoreDocs;
199 assertEquals("C added, still only B in range", 1, hits.length);
203 public void testInclusiveCollating() throws Exception {
204 Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
206 initializeIndex(new String[]{"A", "B", "C", "D"});
207 IndexSearcher searcher = new IndexSearcher(dir, true);
208 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
209 assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
212 initializeIndex(new String[]{"A", "B", "D"});
213 searcher = new IndexSearcher(dir, true);
214 hits = searcher.search(query, null, 1000).scoreDocs;
215 assertEquals("A,B,D - A and B in range", 2, hits.length);
219 searcher = new IndexSearcher(dir, true);
220 hits = searcher.search(query, null, 1000).scoreDocs;
221 assertEquals("C added - A, B, C in range", 3, hits.length);
225 public void testFarsi() throws Exception {
226 // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
227 // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
228 // characters properly.
229 Collator collator = Collator.getInstance(new Locale("ar"));
230 Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
231 // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
232 // orders the U+0698 character before the U+0633 character, so the single
233 // index Term below should NOT be returned by a TermRangeQuery with a Farsi
234 // Collator (or an Arabic one for the case when Farsi is not supported).
235 initializeIndex(new String[]{ "\u0633\u0627\u0628"});
236 IndexSearcher searcher = new IndexSearcher(dir, true);
237 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
238 assertEquals("The index Term should not be included.", 0, hits.length);
240 query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
241 hits = searcher.search(query, null, 1000).scoreDocs;
242 assertEquals("The index Term should be included.", 1, hits.length);
246 public void testDanish() throws Exception {
247 Collator collator = Collator.getInstance(new Locale("da", "dk"));
248 // Danish collation orders the words below in the given order (example taken
249 // from TestSort.testInternationalSort() ).
250 String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
251 Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
253 // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
254 // but Danish collation does.
255 initializeIndex(words);
256 IndexSearcher searcher = new IndexSearcher(dir, true);
257 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
258 assertEquals("The index Term should be included.", 1, hits.length);
260 query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
261 hits = searcher.search(query, null, 1000).scoreDocs;
262 assertEquals("The index Term should not be included.", 0, hits.length);
266 private static class SingleCharAnalyzer extends Analyzer {
268 private static class SingleCharTokenizer extends Tokenizer {
269 char[] buffer = new char[1];
270 boolean done = false;
271 CharTermAttribute termAtt;
273 public SingleCharTokenizer(Reader r) {
275 termAtt = addAttribute(CharTermAttribute.class);
279 public boolean incrementToken() throws IOException {
283 int count = input.read(buffer);
287 termAtt.copyBuffer(buffer, 0, 1);
294 public final void reset(Reader reader) throws IOException {
301 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
302 Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
303 if (tokenizer == null) {
304 tokenizer = new SingleCharTokenizer(reader);
305 setPreviousTokenStream(tokenizer);
307 tokenizer.reset(reader);
312 public TokenStream tokenStream(String fieldName, Reader reader) {
313 return new SingleCharTokenizer(reader);
317 private void initializeIndex(String[] values) throws IOException {
318 initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
321 private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
322 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
323 TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
324 for (int i = 0; i < values.length; i++) {
325 insertDoc(writer, values[i]);
330 // shouldnt create an analyzer for every doc?
331 private void addDoc(String content) throws IOException {
332 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
333 insertDoc(writer, content);
337 private void insertDoc(IndexWriter writer, String content) throws IOException {
338 Document doc = new Document();
340 doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
341 doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED));
343 writer.addDocument(doc);
348 public void testExclusiveLowerNull() throws Exception {
349 Analyzer analyzer = new SingleCharAnalyzer();
350 //http://issues.apache.org/jira/browse/LUCENE-38
351 Query query = new TermRangeQuery("content", null, "C",
353 initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
354 IndexSearcher searcher = new IndexSearcher(dir, true);
355 int numHits = searcher.search(query, null, 1000).totalHits;
356 // When Lucene-38 is fixed, use the assert on the next line:
357 assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits);
358 // until Lucene-38 is fixed, use this assert:
359 //assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
362 initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
363 searcher = new IndexSearcher(dir, true);
364 numHits = searcher.search(query, null, 1000).totalHits;
365 // When Lucene-38 is fixed, use the assert on the next line:
366 assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits);
367 // until Lucene-38 is fixed, use this assert:
368 //assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
371 searcher = new IndexSearcher(dir, true);
372 numHits = searcher.search(query, null, 1000).totalHits;
373 // When Lucene-38 is fixed, use the assert on the next line:
374 assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
375 // until Lucene-38 is fixed, use this assert
376 //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
381 public void testInclusiveLowerNull() throws Exception {
382 //http://issues.apache.org/jira/browse/LUCENE-38
383 Analyzer analyzer = new SingleCharAnalyzer();
384 Query query = new TermRangeQuery("content", null, "C", true, true);
385 initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
386 IndexSearcher searcher = new IndexSearcher(dir, true);
387 int numHits = searcher.search(query, null, 1000).totalHits;
388 // When Lucene-38 is fixed, use the assert on the next line:
389 assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
390 // until Lucene-38 is fixed, use this assert
391 //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
393 initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
394 searcher = new IndexSearcher(dir, true);
395 numHits = searcher.search(query, null, 1000).totalHits;
396 // When Lucene-38 is fixed, use the assert on the next line:
397 assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
398 // until Lucene-38 is fixed, use this assert
399 //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
402 searcher = new IndexSearcher(dir, true);
403 numHits = searcher.search(query, null, 1000).totalHits;
404 // When Lucene-38 is fixed, use the assert on the next line:
405 assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
406 // until Lucene-38 is fixed, use this assert
407 //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());