lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.document.Document;
  21 import org.apache.lucene.document.Field;
  22 import org.apache.lucene.index.IndexWriter;
  23 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  24 import org.apache.lucene.store.Directory;
  25 import org.apache.lucene.analysis.Analyzer;
  26 import org.apache.lucene.analysis.MockAnalyzer;
  27 import org.apache.lucene.analysis.MockTokenizer;
  28 import org.apache.lucene.analysis.TokenStream;
  29 import org.apache.lucene.analysis.Tokenizer;
  30 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  31
  32 import org.apache.lucene.util.LuceneTestCase;
  33 import java.io.IOException;
  34 import java.io.Reader;
  35 import java.util.Locale;
  36 import java.util.Set;
  37 import java.util.HashSet;
  38 import java.util.Arrays;
  39 import java.text.Collator;
  40
  41
  42 public class TestTermRangeQuery extends LuceneTestCase {
  43
  44   private int docCount = 0;
  45   private Directory dir;
  46
  47   @Override
  48   public void setUp() throws Exception {
  49     super.setUp();
  50     dir = newDirectory();
  51   }
  52
  53   @Override
  54   public void tearDown() throws Exception {
  55     dir.close();
  56     super.tearDown();
  57   }
  58
  59   public void testExclusive() throws Exception {
  60     Query query = new TermRangeQuery("content", "A", "C", false, false);
  61     initializeIndex(new String[] {"A", "B", "C", "D"});
  62     IndexSearcher searcher = new IndexSearcher(dir, true);
  63     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
  64     assertEquals("A,B,C,D, only B in range", 1, hits.length);
  65     searcher.close();
  66
  67     initializeIndex(new String[] {"A", "B", "D"});
  68     searcher = new IndexSearcher(dir, true);
  69     hits = searcher.search(query, null, 1000).scoreDocs;
  70     assertEquals("A,B,D, only B in range", 1, hits.length);
  71     searcher.close();
  72
  73     addDoc("C");
  74     searcher = new IndexSearcher(dir, true);
  75     hits = searcher.search(query, null, 1000).scoreDocs;
  76     assertEquals("C added, still only B in range", 1, hits.length);
  77     searcher.close();
  78   }
  79
  80   public void testInclusive() throws Exception {
  81     Query query = new TermRangeQuery("content", "A", "C", true, true);
  82
  83     initializeIndex(new String[]{"A", "B", "C", "D"});
  84     IndexSearcher searcher = new IndexSearcher(dir, true);
  85     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
  86     assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
  87     searcher.close();
  88
  89     initializeIndex(new String[]{"A", "B", "D"});
  90     searcher = new IndexSearcher(dir, true);
  91     hits = searcher.search(query, null, 1000).scoreDocs;
  92     assertEquals("A,B,D - A and B in range", 2, hits.length);
  93     searcher.close();
  94
  95     addDoc("C");
  96     searcher = new IndexSearcher(dir, true);
  97     hits = searcher.search(query, null, 1000).scoreDocs;
  98     assertEquals("C added - A, B, C in range", 3, hits.length);
  99     searcher.close();
 100   }
 101
 102   /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
 103    * with constant score and checks, that only the lower end of terms is put into the range */
 104   public void testTopTermsRewrite() throws Exception {
 105     initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
 106
 107     IndexSearcher searcher = new IndexSearcher(dir, true);
 108     TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
 109     checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
 110
 111     final int savedClauseCount = BooleanQuery.getMaxClauseCount();
 112     try {
 113       BooleanQuery.setMaxClauseCount(3);
 114       checkBooleanTerms(searcher, query, "B", "C", "D");
 115     } finally {
 116       BooleanQuery.setMaxClauseCount(savedClauseCount);
 117     }
 118     searcher.close();
 119   }
 120
 121   private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException {
 122     query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
 123     final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
 124     final Set<String> allowedTerms = new HashSet<String>(Arrays.asList(terms));
 125     assertEquals(allowedTerms.size(), bq.clauses().size());
 126     for (BooleanClause c : bq.clauses()) {
 127       assertTrue(c.getQuery() instanceof TermQuery);
 128       final TermQuery tq = (TermQuery) c.getQuery();
 129       final String term = tq.getTerm().text();
 130       assertTrue("invalid term: "+ term, allowedTerms.contains(term));
 131       allowedTerms.remove(term); // remove to fail on double terms
 132     }
 133     assertEquals(0, allowedTerms.size());
 134   }
 135
 136   public void testEqualsHashcode() {
 137     Query query = new TermRangeQuery("content", "A", "C", true, true);
 138
 139     query.setBoost(1.0f);
 140     Query other = new TermRangeQuery("content", "A", "C", true, true);
 141     other.setBoost(1.0f);
 142
 143     assertEquals("query equals itself is true", query, query);
 144     assertEquals("equivalent queries are equal", query, other);
 145     assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
 146
 147     other.setBoost(2.0f);
 148     assertFalse("Different boost queries are not equal", query.equals(other));
 149
 150     other = new TermRangeQuery("notcontent", "A", "C", true, true);
 151     assertFalse("Different fields are not equal", query.equals(other));
 152
 153     other = new TermRangeQuery("content", "X", "C", true, true);
 154     assertFalse("Different lower terms are not equal", query.equals(other));
 155
 156     other = new TermRangeQuery("content", "A", "Z", true, true);
 157     assertFalse("Different upper terms are not equal", query.equals(other));
 158
 159     query = new TermRangeQuery("content", null, "C", true, true);
 160     other = new TermRangeQuery("content", null, "C", true, true);
 161     assertEquals("equivalent queries with null lowerterms are equal()", query, other);
 162     assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
 163
 164     query = new TermRangeQuery("content", "C", null, true, true);
 165     other = new TermRangeQuery("content", "C", null, true, true);
 166     assertEquals("equivalent queries with null upperterms are equal()", query, other);
 167     assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
 168
 169     query = new TermRangeQuery("content", null, "C", true, true);
 170     other = new TermRangeQuery("content", "C", null, true, true);
 171     assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
 172
 173     query = new TermRangeQuery("content", "A", "C", false, false);
 174     other = new TermRangeQuery("content", "A", "C", true, true);
 175     assertFalse("queries with different inclusive are not equal", query.equals(other));
 176
 177     query = new TermRangeQuery("content", "A", "C", false, false);
 178     other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
 179     assertFalse("a query with a collator is not equal to one without", query.equals(other));
 180   }
 181
 182   public void testExclusiveCollating() throws Exception {
 183     Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
 184     initializeIndex(new String[] {"A", "B", "C", "D"});
 185     IndexSearcher searcher = new IndexSearcher(dir, true);
 186     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 187     assertEquals("A,B,C,D, only B in range", 1, hits.length);
 188     searcher.close();
 189
 190     initializeIndex(new String[] {"A", "B", "D"});
 191     searcher = new IndexSearcher(dir, true);
 192     hits = searcher.search(query, null, 1000).scoreDocs;
 193     assertEquals("A,B,D, only B in range", 1, hits.length);
 194     searcher.close();
 195
 196     addDoc("C");
 197     searcher = new IndexSearcher(dir, true);
 198     hits = searcher.search(query, null, 1000).scoreDocs;
 199     assertEquals("C added, still only B in range", 1, hits.length);
 200     searcher.close();
 201   }
 202
 203   public void testInclusiveCollating() throws Exception {
 204     Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
 205
 206     initializeIndex(new String[]{"A", "B", "C", "D"});
 207     IndexSearcher searcher = new IndexSearcher(dir, true);
 208     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 209     assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
 210     searcher.close();
 211
 212     initializeIndex(new String[]{"A", "B", "D"});
 213     searcher = new IndexSearcher(dir, true);
 214     hits = searcher.search(query, null, 1000).scoreDocs;
 215     assertEquals("A,B,D - A and B in range", 2, hits.length);
 216     searcher.close();
 217
 218     addDoc("C");
 219     searcher = new IndexSearcher(dir, true);
 220     hits = searcher.search(query, null, 1000).scoreDocs;
 221     assertEquals("C added - A, B, C in range", 3, hits.length);
 222     searcher.close();
 223   }
 224
 225   public void testFarsi() throws Exception {
 226     // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
 227     // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
 228     // characters properly.
 229     Collator collator = Collator.getInstance(new Locale("ar"));
 230     Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
 231     // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
 232     // orders the U+0698 character before the U+0633 character, so the single
 233     // index Term below should NOT be returned by a TermRangeQuery with a Farsi
 234     // Collator (or an Arabic one for the case when Farsi is not supported).
 235     initializeIndex(new String[]{ "\u0633\u0627\u0628"});
 236     IndexSearcher searcher = new IndexSearcher(dir, true);
 237     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 238     assertEquals("The index Term should not be included.", 0, hits.length);
 239
 240     query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
 241     hits = searcher.search(query, null, 1000).scoreDocs;
 242     assertEquals("The index Term should be included.", 1, hits.length);
 243     searcher.close();
 244   }
 245
 246   public void testDanish() throws Exception {
 247     Collator collator = Collator.getInstance(new Locale("da", "dk"));
 248     // Danish collation orders the words below in the given order (example taken
 249     // from TestSort.testInternationalSort() ).
 250     String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
 251     Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
 252
 253     // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
 254     // but Danish collation does.
 255     initializeIndex(words);
 256     IndexSearcher searcher = new IndexSearcher(dir, true);
 257     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 258     assertEquals("The index Term should be included.", 1, hits.length);
 259
 260     query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
 261     hits = searcher.search(query, null, 1000).scoreDocs;
 262     assertEquals("The index Term should not be included.", 0, hits.length);
 263     searcher.close();
 264   }
 265
 266   private static class SingleCharAnalyzer extends Analyzer {
 267
 268     private static class SingleCharTokenizer extends Tokenizer {
 269       char[] buffer = new char[1];
 270       boolean done = false;
 271       CharTermAttribute termAtt;
 272
 273       public SingleCharTokenizer(Reader r) {
 274         super(r);
 275         termAtt = addAttribute(CharTermAttribute.class);
 276       }
 277
 278       @Override
 279       public boolean incrementToken() throws IOException {
 280         if (done)
 281           return false;
 282         else {
 283           int count = input.read(buffer);
 284           clearAttributes();
 285           done = true;
 286           if (count == 1) {
 287             termAtt.copyBuffer(buffer, 0, 1);
 288           }
 289           return true;
 290         }
 291       }
 292
 293       @Override
 294       public final void reset(Reader reader) throws IOException {
 295         super.reset(reader);
 296         done = false;
 297       }
 298     }
 299
 300     @Override
 301     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
 302       Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
 303       if (tokenizer == null) {
 304         tokenizer = new SingleCharTokenizer(reader);
 305         setPreviousTokenStream(tokenizer);
 306       } else
 307         tokenizer.reset(reader);
 308       return tokenizer;
 309     }
 310
 311     @Override
 312     public TokenStream tokenStream(String fieldName, Reader reader) {
 313       return new SingleCharTokenizer(reader);
 314     }
 315   }
 316
 317   private void initializeIndex(String[] values) throws IOException {
 318     initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
 319   }
 320
 321   private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
 322     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
 323         TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
 324     for (int i = 0; i < values.length; i++) {
 325       insertDoc(writer, values[i]);
 326     }
 327     writer.close();
 328   }
 329
 330   // shouldnt create an analyzer for every doc?
 331   private void addDoc(String content) throws IOException {
 332     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
 333     insertDoc(writer, content);
 334     writer.close();
 335   }
 336
 337   private void insertDoc(IndexWriter writer, String content) throws IOException {
 338     Document doc = new Document();
 339
 340     doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
 341     doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED));
 342
 343     writer.addDocument(doc);
 344     docCount++;
 345   }
 346
 347   // LUCENE-38
 348   public void testExclusiveLowerNull() throws Exception {
 349     Analyzer analyzer = new SingleCharAnalyzer();
 350     //http://issues.apache.org/jira/browse/LUCENE-38
 351     Query query = new TermRangeQuery("content", null, "C",
 352                                  false, false);
 353     initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
 354     IndexSearcher searcher = new IndexSearcher(dir, true);
 355     int numHits = searcher.search(query, null, 1000).totalHits;
 356     // When Lucene-38 is fixed, use the assert on the next line:
 357     assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits);
 358     // until Lucene-38 is fixed, use this assert:
 359     //assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length());
 360
 361     searcher.close();
 362     initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
 363     searcher = new IndexSearcher(dir, true);
 364     numHits = searcher.search(query, null, 1000).totalHits;
 365     // When Lucene-38 is fixed, use the assert on the next line:
 366     assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits);
 367     // until Lucene-38 is fixed, use this assert:
 368     //assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length());
 369     searcher.close();
 370     addDoc("C");
 371     searcher = new IndexSearcher(dir, true);
 372     numHits = searcher.search(query, null, 1000).totalHits;
 373     // When Lucene-38 is fixed, use the assert on the next line:
 374     assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
 375     // until Lucene-38 is fixed, use this assert
 376     //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
 377     searcher.close();
 378   }
 379
 380   // LUCENE-38
 381   public void testInclusiveLowerNull() throws Exception {
 382     //http://issues.apache.org/jira/browse/LUCENE-38
 383     Analyzer analyzer = new SingleCharAnalyzer();
 384     Query query = new TermRangeQuery("content", null, "C", true, true);
 385     initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
 386     IndexSearcher searcher = new IndexSearcher(dir, true);
 387     int numHits = searcher.search(query, null, 1000).totalHits;
 388     // When Lucene-38 is fixed, use the assert on the next line:
 389     assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
 390     // until Lucene-38 is fixed, use this assert
 391     //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
 392     searcher.close();
 393     initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
 394     searcher = new IndexSearcher(dir, true);
 395     numHits = searcher.search(query, null, 1000).totalHits;
 396     // When Lucene-38 is fixed, use the assert on the next line:
 397     assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
 398     // until Lucene-38 is fixed, use this assert
 399     //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
 400     searcher.close();
 401     addDoc("C");
 402     searcher = new IndexSearcher(dir, true);
 403     numHits = searcher.search(query, null, 1000).totalHits;
 404     // When Lucene-38 is fixed, use the assert on the next line:
 405     assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
 406     // until Lucene-38 is fixed, use this assert
 407     //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
 408      searcher.close();
 409   }
 410 }