lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.index.IndexWriterConfig;
  21 import org.apache.lucene.index.RandomIndexWriter;
  22 import org.apache.lucene.index.Term;
  23 import org.apache.lucene.index.TermEnum;
  24 import org.apache.lucene.index.IndexReader;
  25 import org.apache.lucene.queryParser.ParseException;
  26 import org.apache.lucene.queryParser.QueryParser;
  27 import org.apache.lucene.search.Explanation.IDFExplanation;
  28 import org.apache.lucene.store.Directory;
  29 import org.apache.lucene.analysis.Analyzer;
  30 import org.apache.lucene.analysis.SimpleAnalyzer;
  31 import org.apache.lucene.analysis.TokenStream;
  32 import org.apache.lucene.analysis.Tokenizer;
  33 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  34 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  35 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  36 import org.apache.lucene.document.Document;
  37 import org.apache.lucene.document.Field;
  38 import org.apache.lucene.index.IndexWriter;
  39 import org.apache.lucene.search.IndexSearcher;
  40 import org.apache.lucene.store.RAMDirectory;
  41 import org.apache.lucene.util.LuceneTestCase;
  42
  43 import java.io.IOException;
  44 import java.util.Collection;
  45 import java.util.LinkedList;
  46 import java.io.Reader;
  47
  48 /**
  49  * This class tests the MultiPhraseQuery class.
  50  *
  51  *
  52  */
  53 public class TestMultiPhraseQuery extends LuceneTestCase {
  54
  55   public void testPhrasePrefix() throws IOException {
  56     Directory indexStore = newDirectory();
  57     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
  58     add("blueberry pie", writer);
  59     add("blueberry strudel", writer);
  60     add("blueberry pizza", writer);
  61     add("blueberry chewing gum", writer);
  62     add("bluebird pizza", writer);
  63     add("bluebird foobar pizza", writer);
  64     add("piccadilly circus", writer);
  65
  66     IndexReader reader = writer.getReader();
  67     IndexSearcher searcher = newSearcher(reader);
  68
  69     // search for "blueberry pi*":
  70     MultiPhraseQuery query1 = new MultiPhraseQuery();
  71     // search for "strawberry pi*":
  72     MultiPhraseQuery query2 = new MultiPhraseQuery();
  73     query1.add(new Term("body", "blueberry"));
  74     query2.add(new Term("body", "strawberry"));
  75
  76     LinkedList<Term> termsWithPrefix = new LinkedList<Term>();
  77     IndexReader ir = reader;
  78
  79     // this TermEnum gives "piccadilly", "pie" and "pizza".
  80     String prefix = "pi";
  81     TermEnum te = ir.terms(new Term("body", prefix));
  82     do {
  83         if (te.term().text().startsWith(prefix))
  84         {
  85             termsWithPrefix.add(te.term());
  86         }
  87     } while (te.next());
  88
  89     query1.add(termsWithPrefix.toArray(new Term[0]));
  90     assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
  91     query2.add(termsWithPrefix.toArray(new Term[0]));
  92     assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2.toString());
  93
  94     ScoreDoc[] result;
  95     result = searcher.search(query1, null, 1000).scoreDocs;
  96     assertEquals(2, result.length);
  97     result = searcher.search(query2, null, 1000).scoreDocs;
  98     assertEquals(0, result.length);
  99
 100     // search for "blue* pizza":
 101     MultiPhraseQuery query3 = new MultiPhraseQuery();
 102     termsWithPrefix.clear();
 103     prefix = "blue";
 104     te = ir.terms(new Term("body", prefix));
 105     do {
 106         if (te.term().text().startsWith(prefix))
 107         {
 108             termsWithPrefix.add(te.term());
 109         }
 110     } while (te.next());
 111     query3.add(termsWithPrefix.toArray(new Term[0]));
 112     query3.add(new Term("body", "pizza"));
 113
 114     result = searcher.search(query3, null, 1000).scoreDocs;
 115     assertEquals(2, result.length); // blueberry pizza, bluebird pizza
 116     assertEquals("body:\"(blueberry bluebird) pizza\"", query3.toString());
 117
 118     // test slop:
 119     query3.setSlop(1);
 120     result = searcher.search(query3, null, 1000).scoreDocs;
 121
 122     // just make sure no exc:
 123     searcher.explain(query3, 0);
 124
 125     assertEquals(3, result.length); // blueberry pizza, bluebird pizza, bluebird foobar pizza
 126
 127     MultiPhraseQuery query4 = new MultiPhraseQuery();
 128     try {
 129       query4.add(new Term("field1", "foo"));
 130       query4.add(new Term("field2", "foobar"));
 131       fail();
 132     } catch(IllegalArgumentException e) {
 133       // okay, all terms must belong to the same field
 134     }
 135
 136     writer.close();
 137     searcher.close();
 138     reader.close();
 139     indexStore.close();
 140   }
 141
 142   // LUCENE-2580
 143   public void testTall() throws IOException {
 144     Directory indexStore = newDirectory();
 145     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
 146     add("blueberry chocolate pie", writer);
 147     add("blueberry chocolate tart", writer);
 148     IndexReader r = writer.getReader();
 149     writer.close();
 150
 151     IndexSearcher searcher = newSearcher(r);
 152     MultiPhraseQuery q = new MultiPhraseQuery();
 153     q.add(new Term("body", "blueberry"));
 154     q.add(new Term("body", "chocolate"));
 155     q.add(new Term[] {new Term("body", "pie"), new Term("body", "tart")});
 156     assertEquals(2, searcher.search(q, 1).totalHits);
 157     searcher.close();
 158     r.close();
 159     indexStore.close();
 160   }
 161
 162   private void add(String s, RandomIndexWriter writer) throws IOException {
 163     Document doc = new Document();
 164     doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED));
 165     writer.addDocument(doc);
 166   }
 167
 168   public void testBooleanQueryContainingSingleTermPrefixQuery()
 169       throws IOException {
 170     // this tests against bug 33161 (now fixed)
 171     // In order to cause the bug, the outer query must have more than one term
 172     // and all terms required.
 173     // The contained PhraseMultiQuery must contain exactly one term array.
 174     Directory indexStore = newDirectory();
 175     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
 176     add("blueberry pie", writer);
 177     add("blueberry chewing gum", writer);
 178     add("blue raspberry pie", writer);
 179
 180     IndexReader reader = writer.getReader();
 181     IndexSearcher searcher = newSearcher(reader);
 182     // This query will be equivalent to +body:pie +body:"blue*"
 183     BooleanQuery q = new BooleanQuery();
 184     q.add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST);
 185
 186     MultiPhraseQuery trouble = new MultiPhraseQuery();
 187     trouble.add(new Term[] {new Term("body", "blueberry"),
 188         new Term("body", "blue")});
 189     q.add(trouble, BooleanClause.Occur.MUST);
 190
 191     // exception will be thrown here without fix
 192     ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
 193
 194     assertEquals("Wrong number of hits", 2, hits.length);
 195
 196     // just make sure no exc:
 197     searcher.explain(q, 0);
 198
 199     writer.close();
 200     searcher.close();
 201     reader.close();
 202     indexStore.close();
 203   }
 204
 205   public void testPhrasePrefixWithBooleanQuery() throws IOException {
 206     Directory indexStore = newDirectory();
 207     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
 208     add("This is a test", "object", writer);
 209     add("a note", "note", writer);
 210
 211     IndexReader reader = writer.getReader();
 212     IndexSearcher searcher = newSearcher(reader);
 213
 214     // This query will be equivalent to +type:note +body:"a t*"
 215     BooleanQuery q = new BooleanQuery();
 216     q.add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST);
 217
 218     MultiPhraseQuery trouble = new MultiPhraseQuery();
 219     trouble.add(new Term("body", "a"));
 220     trouble
 221         .add(new Term[] {new Term("body", "test"), new Term("body", "this")});
 222     q.add(trouble, BooleanClause.Occur.MUST);
 223
 224     // exception will be thrown here without fix for #35626:
 225     ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
 226     assertEquals("Wrong number of hits", 0, hits.length);
 227     writer.close();
 228     searcher.close();
 229     reader.close();
 230     indexStore.close();
 231   }
 232
 233   public void testNoDocs() throws Exception {
 234     Directory indexStore = newDirectory();
 235     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
 236     add("a note", "note", writer);
 237
 238     IndexReader reader = writer.getReader();
 239     IndexSearcher searcher = newSearcher(reader);
 240
 241     MultiPhraseQuery q = new MultiPhraseQuery();
 242     q.add(new Term("body", "a"));
 243     q.add(new Term[] {new Term("body", "nope"), new Term("body", "nope")});
 244     assertEquals("Wrong number of hits", 0,
 245         searcher.search(q, null, 1).totalHits);
 246
 247     // just make sure no exc:
 248     searcher.explain(q, 0);
 249
 250     writer.close();
 251     searcher.close();
 252     reader.close();
 253     indexStore.close();
 254   }
 255
 256   public void testHashCodeAndEquals() {
 257     MultiPhraseQuery query1 = new MultiPhraseQuery();
 258     MultiPhraseQuery query2 = new MultiPhraseQuery();
 259
 260     assertEquals(query1.hashCode(), query2.hashCode());
 261     assertEquals(query1, query2);
 262
 263     Term term1 = new Term("someField", "someText");
 264
 265     query1.add(term1);
 266     query2.add(term1);
 267
 268     assertEquals(query1.hashCode(), query2.hashCode());
 269     assertEquals(query1, query2);
 270
 271     Term term2 = new Term("someField", "someMoreText");
 272
 273     query1.add(term2);
 274
 275     assertFalse(query1.hashCode() == query2.hashCode());
 276     assertFalse(query1.equals(query2));
 277
 278     query2.add(term2);
 279
 280     assertEquals(query1.hashCode(), query2.hashCode());
 281     assertEquals(query1, query2);
 282   }
 283
 284   private void add(String s, String type, RandomIndexWriter writer)
 285       throws IOException {
 286     Document doc = new Document();
 287     doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED));
 288     doc.add(newField("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED));
 289     writer.addDocument(doc);
 290   }
 291
 292   // LUCENE-2526
 293   public void testEmptyToString() {
 294     new MultiPhraseQuery().toString();
 295   }
 296
 297   public void testCustomIDF() throws Exception {
 298     Directory indexStore = newDirectory();
 299     RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
 300     add("This is a test", "object", writer);
 301     add("a note", "note", writer);
 302
 303     IndexReader reader = writer.getReader();
 304     IndexSearcher searcher = newSearcher(reader);
 305     searcher.setSimilarity(new DefaultSimilarity() {
 306
 307       @Override
 308       public IDFExplanation idfExplain(Collection<Term> terms,
 309           Searcher searcher) throws IOException {
 310         return new IDFExplanation() {
 311
 312           @Override
 313           public float getIdf() {
 314             return 10f;
 315           }
 316
 317           @Override
 318           public String explain() {
 319             return "just a test";
 320           }
 321
 322         };
 323       }
 324     });
 325
 326     MultiPhraseQuery query = new MultiPhraseQuery();
 327     query.add(new Term[] { new Term("body", "this"), new Term("body", "that") });
 328     query.add(new Term("body", "is"));
 329     Weight weight = query.createWeight(searcher);
 330     assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f);
 331
 332     writer.close();
 333     searcher.close();
 334     reader.close();
 335     indexStore.close();
 336   }
 337
 338   private static class TokenAndPos {
 339     public final String token;
 340     public final int pos;
 341     public TokenAndPos(String token, int pos) {
 342       this.token = token;
 343       this.pos = pos;
 344     }
 345   }
 346
 347   private static class CannedAnalyzer extends Analyzer {
 348     private final TokenAndPos[] tokens;
 349
 350     public CannedAnalyzer(TokenAndPos[] tokens) {
 351       this.tokens = tokens;
 352     }
 353
 354     @Override
 355     public TokenStream tokenStream(String fieldName, Reader reader) {
 356       return new CannedTokenizer(tokens);
 357     }
 358   }
 359
 360   private static class CannedTokenizer extends Tokenizer {
 361     private final TokenAndPos[] tokens;
 362     private int upto = 0;
 363     private int lastPos = 0;
 364     private final TermAttribute termAtt = addAttribute(TermAttribute.class);
 365     private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
 366
 367     public CannedTokenizer(TokenAndPos[] tokens) {
 368       this.tokens = tokens;
 369     }
 370
 371     @Override
 372     public final boolean incrementToken() throws IOException {
 373       clearAttributes();
 374       if (upto < tokens.length) {
 375         final TokenAndPos token = tokens[upto++];
 376         termAtt.setTermBuffer(token.token);
 377         posIncrAtt.setPositionIncrement(token.pos - lastPos);
 378         lastPos = token.pos;
 379         return true;
 380       } else {
 381         return false;
 382       }
 383     }
 384
 385     @Override
 386     public void reset() throws IOException {
 387       super.reset();
 388       this.upto = 0;
 389       this.lastPos = 0;
 390     }
 391   }
 392
 393   public void testZeroPosIncr() throws IOException {
 394     Directory dir = new RAMDirectory();
 395     final TokenAndPos[] tokens = new TokenAndPos[3];
 396     tokens[0] = new TokenAndPos("a", 0);
 397     tokens[1] = new TokenAndPos("b", 0);
 398     tokens[2] = new TokenAndPos("c", 0);
 399
 400     IndexWriter writer = new IndexWriter(dir, new CannedAnalyzer(tokens), true, IndexWriter.MaxFieldLength.LIMITED);
 401     Document doc = new Document();
 402     doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
 403     writer.addDocument(doc);
 404     writer.addDocument(doc);
 405     IndexReader r = writer.getReader();
 406     writer.close();
 407     IndexSearcher s = new IndexSearcher(r);
 408     MultiPhraseQuery mpq = new MultiPhraseQuery();
 409     //mpq.setSlop(1);
 410
 411     // NOTE: not great that if we do the else clause here we
 412     // get different scores!  MultiPhraseQuery counts that
 413     // phrase as occurring twice per doc (it should be 1, I
 414     // think?).  This is because MultipleTermPositions is able to
 415     // return the same position more than once (0, in this
 416     // case):
 417     if (true) {
 418       mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
 419       mpq.add(new Term[] {new Term("field", "a")}, 0);
 420     } else {
 421       mpq.add(new Term[] {new Term("field", "a")}, 0);
 422       mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
 423     }
 424     TopDocs hits = s.search(mpq, 2);
 425     assertEquals(2, hits.totalHits);
 426     assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
 427     /*
 428     for(int hit=0;hit<hits.totalHits;hit++) {
 429       ScoreDoc sd = hits.scoreDocs[hit];
 430       System.out.println("  hit doc=" + sd.doc + " score=" + sd.score);
 431     }
 432     */
 433     r.close();
 434     dir.close();
 435   }
 436
 437   private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
 438       new TokenAndPos("x", 0),
 439       new TokenAndPos("a", 1),
 440       new TokenAndPos("1", 1),
 441       new TokenAndPos("m", 2), // not existing, relying on slop=2
 442       new TokenAndPos("b", 3),
 443       new TokenAndPos("1", 3),
 444       new TokenAndPos("n", 4), // not existing, relying on slop=2
 445       new TokenAndPos("c", 5),
 446       new TokenAndPos("y", 6)
 447   };
 448
 449   private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
 450       new TokenAndPos("a", 0),
 451       new TokenAndPos("1", 0),
 452       new TokenAndPos("b", 1),
 453       new TokenAndPos("1", 1),
 454       new TokenAndPos("c", 2)
 455   };
 456
 457   private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
 458       { new TokenAndPos("a", 0) },
 459       { new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
 460       { new TokenAndPos("b", 1) },
 461       { new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
 462       { new TokenAndPos("c", 2) }
 463   };
 464
 465   private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
 466       { new TokenAndPos("x", 0) },
 467       { new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
 468       { new TokenAndPos("x", 1) },
 469       { new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
 470       { new TokenAndPos("c", 2) }
 471   };
 472
 473   /**
 474    * using query parser, MPQ will be created, and will not be strict about having all query terms
 475    * in each position - one of each position is sufficient (OR logic)
 476    */
 477   public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
 478     QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
 479     final Query q = qp.parse("\"this text is acually ignored\"");
 480     assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
 481     doTestZeroPosIncrSloppy(q, 0);
 482     ((MultiPhraseQuery) q).setSlop(1);
 483     doTestZeroPosIncrSloppy(q, 0);
 484     ((MultiPhraseQuery) q).setSlop(2);
 485     doTestZeroPosIncrSloppy(q, 1);
 486   }
 487
 488   private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
 489     Directory dir = newDirectory(); // random dir
 490     IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
 491     IndexWriter writer = new IndexWriter(dir, cfg);
 492     Document doc = new Document();
 493     doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
 494     writer.addDocument(doc);
 495     IndexReader r = IndexReader.open(writer,false);
 496     writer.close();
 497     IndexSearcher s = new IndexSearcher(r);
 498
 499     if (VERBOSE) {
 500       System.out.println("QUERY=" + q);
 501     }
 502
 503     TopDocs hits = s.search(q, 1);
 504     assertEquals("wrong number of results", nExpected, hits.totalHits);
 505
 506     if (VERBOSE) {
 507       for(int hit=0;hit<hits.totalHits;hit++) {
 508         ScoreDoc sd = hits.scoreDocs[hit];
 509         System.out.println("  hit doc=" + sd.doc + " score=" + sd.score);
 510       }
 511     }
 512
 513     r.close();
 514     dir.close();
 515   }
 516
 517   /**
 518    * PQ AND Mode - Manually creating a phrase query
 519    */
 520   public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
 521     final PhraseQuery pq = new PhraseQuery();
 522     for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
 523       pq.add(new Term("field",tap.token), tap.pos);
 524     }
 525     doTestZeroPosIncrSloppy(pq, 0);
 526     pq.setSlop(1);
 527     doTestZeroPosIncrSloppy(pq, 0);
 528     pq.setSlop(2);
 529     doTestZeroPosIncrSloppy(pq, 1);
 530   }
 531
 532   /**
 533    * MPQ AND Mode - Manually creating a multiple phrase query
 534    */
 535   public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
 536     final MultiPhraseQuery mpq = new MultiPhraseQuery();
 537     for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
 538       mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
 539     }
 540     doTestZeroPosIncrSloppy(mpq, 0);
 541     mpq.setSlop(1);
 542     doTestZeroPosIncrSloppy(mpq, 0);
 543     mpq.setSlop(2);
 544     doTestZeroPosIncrSloppy(mpq, 1);
 545   }
 546
 547   /**
 548    * MPQ Combined AND OR Mode - Manually creating a multiple phrase query
 549    */
 550   public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
 551     final MultiPhraseQuery mpq = new MultiPhraseQuery();
 552     for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
 553       Term[] terms = tapTerms(tap);
 554       final int pos = tap[0].pos;
 555       mpq.add(terms, pos); //AND logic in pos, OR across lines
 556     }
 557     doTestZeroPosIncrSloppy(mpq, 0);
 558     mpq.setSlop(1);
 559     doTestZeroPosIncrSloppy(mpq, 0);
 560     mpq.setSlop(2);
 561     doTestZeroPosIncrSloppy(mpq, 1);
 562   }
 563
 564   /**
 565    * MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
 566    */
 567   public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
 568     final MultiPhraseQuery mpq = new MultiPhraseQuery();
 569     for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
 570       Term[] terms = tapTerms(tap);
 571       final int pos = tap[0].pos;
 572       mpq.add(terms, pos); //AND logic in pos, OR across lines
 573     }
 574     doTestZeroPosIncrSloppy(mpq, 0);
 575     mpq.setSlop(2);
 576     doTestZeroPosIncrSloppy(mpq, 0);
 577   }
 578
 579   private Term[] tapTerms(TokenAndPos[] tap) {
 580     Term[] terms = new Term[tap.length];
 581     for (int i=0; i<terms.length; i++) {
 582       terms[i] = new Term("field",tap[i].token);
 583     }
 584     return terms;
 585   }
 586
 587 }
 588