lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.document.Document;
  21 import org.apache.lucene.document.Field;
  22 import org.apache.lucene.index.IndexReader;
  23 import org.apache.lucene.index.IndexWriter;
  24 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  25 import org.apache.lucene.store.Directory;
  26 import org.apache.lucene.analysis.Analyzer;
  27 import org.apache.lucene.analysis.MockAnalyzer;
  28 import org.apache.lucene.analysis.MockTokenizer;
  29 import org.apache.lucene.analysis.TokenStream;
  30 import org.apache.lucene.analysis.Tokenizer;
  31 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  32
  33 import org.apache.lucene.util.LuceneTestCase;
  34 import java.io.IOException;
  35 import java.io.Reader;
  36 import java.util.Locale;
  37 import java.util.Set;
  38 import java.util.HashSet;
  39 import java.util.Arrays;
  40 import java.text.Collator;
  41
  42
  43 public class TestTermRangeQuery extends LuceneTestCase {
  44
  45   private int docCount = 0;
  46   private Directory dir;
  47
  48   @Override
  49   public void setUp() throws Exception {
  50     super.setUp();
  51     dir = newDirectory();
  52   }
  53
  54   @Override
  55   public void tearDown() throws Exception {
  56     dir.close();
  57     super.tearDown();
  58   }
  59
  60   public void testExclusive() throws Exception {
  61     Query query = new TermRangeQuery("content", "A", "C", false, false);
  62     initializeIndex(new String[] {"A", "B", "C", "D"});
  63     IndexReader reader = IndexReader.open(dir);
  64     IndexSearcher searcher = new IndexSearcher(reader);
  65     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
  66     assertEquals("A,B,C,D, only B in range", 1, hits.length);
  67     searcher.close();
  68     reader.close();
  69
  70     initializeIndex(new String[] {"A", "B", "D"});
  71     reader = IndexReader.open(dir);
  72     searcher = new IndexSearcher(reader);
  73     hits = searcher.search(query, null, 1000).scoreDocs;
  74     assertEquals("A,B,D, only B in range", 1, hits.length);
  75     searcher.close();
  76     reader.close();
  77
  78     addDoc("C");
  79     reader = IndexReader.open(dir);
  80     searcher = new IndexSearcher(reader);
  81     hits = searcher.search(query, null, 1000).scoreDocs;
  82     assertEquals("C added, still only B in range", 1, hits.length);
  83     searcher.close();
  84     reader.close();
  85   }
  86
  87   public void testInclusive() throws Exception {
  88     Query query = new TermRangeQuery("content", "A", "C", true, true);
  89
  90     initializeIndex(new String[]{"A", "B", "C", "D"});
  91     IndexReader reader = IndexReader.open(dir);
  92     IndexSearcher searcher = new IndexSearcher(reader);
  93     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
  94     assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
  95     searcher.close();
  96     reader.close();
  97
  98     initializeIndex(new String[]{"A", "B", "D"});
  99     reader = IndexReader.open(dir);
 100     searcher = new IndexSearcher(reader);
 101     hits = searcher.search(query, null, 1000).scoreDocs;
 102     assertEquals("A,B,D - A and B in range", 2, hits.length);
 103     searcher.close();
 104     reader.close();
 105
 106     addDoc("C");
 107     reader = IndexReader.open(dir);
 108     searcher = new IndexSearcher(reader);
 109     hits = searcher.search(query, null, 1000).scoreDocs;
 110     assertEquals("C added - A, B, C in range", 3, hits.length);
 111     searcher.close();
 112     reader.close();
 113   }
 114
 115   /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
 116    * with constant score and checks, that only the lower end of terms is put into the range */
 117   public void testTopTermsRewrite() throws Exception {
 118     initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
 119
 120     IndexReader reader = IndexReader.open(dir);
 121     IndexSearcher searcher = new IndexSearcher(reader);
 122     TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
 123     checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
 124
 125     final int savedClauseCount = BooleanQuery.getMaxClauseCount();
 126     try {
 127       BooleanQuery.setMaxClauseCount(3);
 128       checkBooleanTerms(searcher, query, "B", "C", "D");
 129     } finally {
 130       BooleanQuery.setMaxClauseCount(savedClauseCount);
 131     }
 132     searcher.close();
 133     reader.close();
 134   }
 135
 136   private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException {
 137     query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
 138     final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
 139     final Set<String> allowedTerms = new HashSet<String>(Arrays.asList(terms));
 140     assertEquals(allowedTerms.size(), bq.clauses().size());
 141     for (BooleanClause c : bq.clauses()) {
 142       assertTrue(c.getQuery() instanceof TermQuery);
 143       final TermQuery tq = (TermQuery) c.getQuery();
 144       final String term = tq.getTerm().text();
 145       assertTrue("invalid term: "+ term, allowedTerms.contains(term));
 146       allowedTerms.remove(term); // remove to fail on double terms
 147     }
 148     assertEquals(0, allowedTerms.size());
 149   }
 150
 151   public void testEqualsHashcode() {
 152     Query query = new TermRangeQuery("content", "A", "C", true, true);
 153
 154     query.setBoost(1.0f);
 155     Query other = new TermRangeQuery("content", "A", "C", true, true);
 156     other.setBoost(1.0f);
 157
 158     assertEquals("query equals itself is true", query, query);
 159     assertEquals("equivalent queries are equal", query, other);
 160     assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
 161
 162     other.setBoost(2.0f);
 163     assertFalse("Different boost queries are not equal", query.equals(other));
 164
 165     other = new TermRangeQuery("notcontent", "A", "C", true, true);
 166     assertFalse("Different fields are not equal", query.equals(other));
 167
 168     other = new TermRangeQuery("content", "X", "C", true, true);
 169     assertFalse("Different lower terms are not equal", query.equals(other));
 170
 171     other = new TermRangeQuery("content", "A", "Z", true, true);
 172     assertFalse("Different upper terms are not equal", query.equals(other));
 173
 174     query = new TermRangeQuery("content", null, "C", true, true);
 175     other = new TermRangeQuery("content", null, "C", true, true);
 176     assertEquals("equivalent queries with null lowerterms are equal()", query, other);
 177     assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
 178
 179     query = new TermRangeQuery("content", "C", null, true, true);
 180     other = new TermRangeQuery("content", "C", null, true, true);
 181     assertEquals("equivalent queries with null upperterms are equal()", query, other);
 182     assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
 183
 184     query = new TermRangeQuery("content", null, "C", true, true);
 185     other = new TermRangeQuery("content", "C", null, true, true);
 186     assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
 187
 188     query = new TermRangeQuery("content", "A", "C", false, false);
 189     other = new TermRangeQuery("content", "A", "C", true, true);
 190     assertFalse("queries with different inclusive are not equal", query.equals(other));
 191
 192     query = new TermRangeQuery("content", "A", "C", false, false);
 193     other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
 194     assertFalse("a query with a collator is not equal to one without", query.equals(other));
 195   }
 196
 197   public void testExclusiveCollating() throws Exception {
 198     Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
 199     initializeIndex(new String[] {"A", "B", "C", "D"});
 200     IndexSearcher searcher = new IndexSearcher(dir, true);
 201     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 202     assertEquals("A,B,C,D, only B in range", 1, hits.length);
 203     searcher.close();
 204
 205     initializeIndex(new String[] {"A", "B", "D"});
 206     searcher = new IndexSearcher(dir, true);
 207     hits = searcher.search(query, null, 1000).scoreDocs;
 208     assertEquals("A,B,D, only B in range", 1, hits.length);
 209     searcher.close();
 210
 211     addDoc("C");
 212     searcher = new IndexSearcher(dir, true);
 213     hits = searcher.search(query, null, 1000).scoreDocs;
 214     assertEquals("C added, still only B in range", 1, hits.length);
 215     searcher.close();
 216   }
 217
 218   public void testInclusiveCollating() throws Exception {
 219     Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
 220
 221     initializeIndex(new String[]{"A", "B", "C", "D"});
 222     IndexSearcher searcher = new IndexSearcher(dir, true);
 223     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 224     assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
 225     searcher.close();
 226
 227     initializeIndex(new String[]{"A", "B", "D"});
 228     searcher = new IndexSearcher(dir, true);
 229     hits = searcher.search(query, null, 1000).scoreDocs;
 230     assertEquals("A,B,D - A and B in range", 2, hits.length);
 231     searcher.close();
 232
 233     addDoc("C");
 234     searcher = new IndexSearcher(dir, true);
 235     hits = searcher.search(query, null, 1000).scoreDocs;
 236     assertEquals("C added - A, B, C in range", 3, hits.length);
 237     searcher.close();
 238   }
 239
 240   public void testFarsi() throws Exception {
 241     // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
 242     // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
 243     // characters properly.
 244     Collator collator = Collator.getInstance(new Locale("ar"));
 245     Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
 246     // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
 247     // orders the U+0698 character before the U+0633 character, so the single
 248     // index Term below should NOT be returned by a TermRangeQuery with a Farsi
 249     // Collator (or an Arabic one for the case when Farsi is not supported).
 250     initializeIndex(new String[]{ "\u0633\u0627\u0628"});
 251     IndexSearcher searcher = new IndexSearcher(dir, true);
 252     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 253     assertEquals("The index Term should not be included.", 0, hits.length);
 254
 255     query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
 256     hits = searcher.search(query, null, 1000).scoreDocs;
 257     assertEquals("The index Term should be included.", 1, hits.length);
 258     searcher.close();
 259   }
 260
 261   public void testDanish() throws Exception {
 262     Collator collator = Collator.getInstance(new Locale("da", "dk"));
 263     // Danish collation orders the words below in the given order (example taken
 264     // from TestSort.testInternationalSort() ).
 265     String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
 266     Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
 267
 268     // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
 269     // but Danish collation does.
 270     initializeIndex(words);
 271     IndexSearcher searcher = new IndexSearcher(dir, true);
 272     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 273     assertEquals("The index Term should be included.", 1, hits.length);
 274
 275     query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
 276     hits = searcher.search(query, null, 1000).scoreDocs;
 277     assertEquals("The index Term should not be included.", 0, hits.length);
 278     searcher.close();
 279   }
 280
 281   private static class SingleCharAnalyzer extends Analyzer {
 282
 283     private static class SingleCharTokenizer extends Tokenizer {
 284       char[] buffer = new char[1];
 285       boolean done = false;
 286       CharTermAttribute termAtt;
 287
 288       public SingleCharTokenizer(Reader r) {
 289         super(r);
 290         termAtt = addAttribute(CharTermAttribute.class);
 291       }
 292
 293       @Override
 294       public boolean incrementToken() throws IOException {
 295         if (done)
 296           return false;
 297         else {
 298           int count = input.read(buffer);
 299           clearAttributes();
 300           done = true;
 301           if (count == 1) {
 302             termAtt.copyBuffer(buffer, 0, 1);
 303           }
 304           return true;
 305         }
 306       }
 307
 308       @Override
 309       public final void reset(Reader reader) throws IOException {
 310         super.reset(reader);
 311         done = false;
 312       }
 313     }
 314
 315     @Override
 316     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
 317       Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
 318       if (tokenizer == null) {
 319         tokenizer = new SingleCharTokenizer(reader);
 320         setPreviousTokenStream(tokenizer);
 321       } else
 322         tokenizer.reset(reader);
 323       return tokenizer;
 324     }
 325
 326     @Override
 327     public TokenStream tokenStream(String fieldName, Reader reader) {
 328       return new SingleCharTokenizer(reader);
 329     }
 330   }
 331
 332   private void initializeIndex(String[] values) throws IOException {
 333     initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
 334   }
 335
 336   private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
 337     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
 338         TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
 339     for (int i = 0; i < values.length; i++) {
 340       insertDoc(writer, values[i]);
 341     }
 342     writer.close();
 343   }
 344
 345   // shouldnt create an analyzer for every doc?
 346   private void addDoc(String content) throws IOException {
 347     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
 348     insertDoc(writer, content);
 349     writer.close();
 350   }
 351
 352   private void insertDoc(IndexWriter writer, String content) throws IOException {
 353     Document doc = new Document();
 354
 355     doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
 356     doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED));
 357
 358     writer.addDocument(doc);
 359     docCount++;
 360   }
 361
 362   // LUCENE-38
 363   public void testExclusiveLowerNull() throws Exception {
 364     Analyzer analyzer = new SingleCharAnalyzer();
 365     //http://issues.apache.org/jira/browse/LUCENE-38
 366     Query query = new TermRangeQuery("content", null, "C",
 367                                  false, false);
 368     initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
 369     IndexReader reader = IndexReader.open(dir);
 370     IndexSearcher searcher = new IndexSearcher(reader);
 371     int numHits = searcher.search(query, null, 1000).totalHits;
 372     // When Lucene-38 is fixed, use the assert on the next line:
 373     assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits);
 374     // until Lucene-38 is fixed, use this assert:
 375     //assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
 376
 377     searcher.close();
 378     reader.close();
 379     initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
 380     reader = IndexReader.open(dir);
 381     searcher = new IndexSearcher(reader);
 382     numHits = searcher.search(query, null, 1000).totalHits;
 383     // When Lucene-38 is fixed, use the assert on the next line:
 384     assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits);
 385     // until Lucene-38 is fixed, use this assert:
 386     //assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
 387     searcher.close();
 388     reader.close();
 389     addDoc("C");
 390     reader = IndexReader.open(dir);
 391     searcher = new IndexSearcher(reader);
 392     numHits = searcher.search(query, null, 1000).totalHits;
 393     // When Lucene-38 is fixed, use the assert on the next line:
 394     assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
 395     // until Lucene-38 is fixed, use this assert
 396     //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
 397     searcher.close();
 398     reader.close();
 399   }
 400
 401   // LUCENE-38
 402   public void testInclusiveLowerNull() throws Exception {
 403     //http://issues.apache.org/jira/browse/LUCENE-38
 404     Analyzer analyzer = new SingleCharAnalyzer();
 405     Query query = new TermRangeQuery("content", null, "C", true, true);
 406     initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
 407     IndexReader reader = IndexReader.open(dir);
 408     IndexSearcher searcher = new IndexSearcher(reader);
 409     int numHits = searcher.search(query, null, 1000).totalHits;
 410     // When Lucene-38 is fixed, use the assert on the next line:
 411     assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
 412     // until Lucene-38 is fixed, use this assert
 413     //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
 414     searcher.close();
 415     reader.close();
 416     initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
 417     reader = IndexReader.open(dir);
 418     searcher = new IndexSearcher(reader);
 419     numHits = searcher.search(query, null, 1000).totalHits;
 420     // When Lucene-38 is fixed, use the assert on the next line:
 421     assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
 422     // until Lucene-38 is fixed, use this assert
 423     //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
 424     searcher.close();
 425     reader.close();
 426     addDoc("C");
 427     reader = IndexReader.open(dir);
 428     searcher = new IndexSearcher(reader);
 429     numHits = searcher.search(query, null, 1000).totalHits;
 430     // When Lucene-38 is fixed, use the assert on the next line:
 431     assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
 432     // until Lucene-38 is fixed, use this assert
 433     //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
 434      searcher.close();
 435      reader.close();
 436   }
 437 }