lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhraseQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.util.LuceneTestCase;
  21 import org.apache.lucene.analysis.*;
  22 import org.apache.lucene.analysis.tokenattributes.*;
  23 import org.apache.lucene.document.*;
  24 import org.apache.lucene.index.*;
  25 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  26 import org.apache.lucene.queryParser.QueryParser;
  27 import org.apache.lucene.store.*;
  28 import org.apache.lucene.util.Version;
  29 import org.apache.lucene.util._TestUtil;
  30 import org.junit.AfterClass;
  31 import org.junit.BeforeClass;
  32
  33 import java.io.IOException;
  34 import java.io.Reader;
  35 import java.io.StringReader;
  36 import java.util.List;
  37 import java.util.ArrayList;
  38 import java.util.Random;
  39
  40 /**
  41  * Tests {@link PhraseQuery}.
  42  *
  43  * @see TestPositionIncrement
  44  */
  45 public class TestPhraseQuery extends LuceneTestCase {
  46
  47   /** threshold for comparing floats */
  48   public static final float SCORE_COMP_THRESH = 1e-6f;
  49
  50   private static IndexSearcher searcher;
  51   private static IndexReader reader;
  52   private PhraseQuery query;
  53   private static Directory directory;
  54
  55   @BeforeClass
  56   public static void beforeClass() throws Exception {
  57     directory = newDirectory();
  58     Analyzer analyzer = new Analyzer() {
  59       @Override
  60       public TokenStream tokenStream(String fieldName, Reader reader) {
  61         return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
  62       }
  63
  64       @Override
  65       public int getPositionIncrementGap(String fieldName) {
  66         return 100;
  67       }
  68     };
  69     RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
  70
  71     Document doc = new Document();
  72     doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
  73     doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED));
  74     Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED);
  75     doc.add(repeatedField);
  76     doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED));
  77     writer.addDocument(doc);
  78
  79     doc = new Document();
  80     doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
  81     writer.addDocument(doc);
  82
  83     doc = new Document();
  84     doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
  85     writer.addDocument(doc);
  86
  87     reader = writer.getReader();
  88     writer.close();
  89
  90     searcher = newSearcher(reader);
  91   }
  92
  93   @Override
  94   public void setUp() throws Exception {
  95     super.setUp();
  96     query = new PhraseQuery();
  97   }
  98
  99   @AfterClass
 100   public static void afterClass() throws Exception {
 101     searcher.close();
 102     searcher = null;
 103     reader.close();
 104     reader = null;
 105     directory.close();
 106     directory = null;
 107   }
 108
 109   public void testNotCloseEnough() throws Exception {
 110     query.setSlop(2);
 111     query.add(new Term("field", "one"));
 112     query.add(new Term("field", "five"));
 113     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 114     assertEquals(0, hits.length);
 115     QueryUtils.check(random, query,searcher);
 116   }
 117
 118   public void testBarelyCloseEnough() throws Exception {
 119     query.setSlop(3);
 120     query.add(new Term("field", "one"));
 121     query.add(new Term("field", "five"));
 122     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 123     assertEquals(1, hits.length);
 124     QueryUtils.check(random, query,searcher);
 125   }
 126
 127   /**
 128    * Ensures slop of 0 works for exact matches, but not reversed
 129    */
 130   public void testExact() throws Exception {
 131     // slop is zero by default
 132     query.add(new Term("field", "four"));
 133     query.add(new Term("field", "five"));
 134     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 135     assertEquals("exact match", 1, hits.length);
 136     QueryUtils.check(random, query,searcher);
 137
 138
 139     query = new PhraseQuery();
 140     query.add(new Term("field", "two"));
 141     query.add(new Term("field", "one"));
 142     hits = searcher.search(query, null, 1000).scoreDocs;
 143     assertEquals("reverse not exact", 0, hits.length);
 144     QueryUtils.check(random, query,searcher);
 145   }
 146
 147   public void testSlop1() throws Exception {
 148     // Ensures slop of 1 works with terms in order.
 149     query.setSlop(1);
 150     query.add(new Term("field", "one"));
 151     query.add(new Term("field", "two"));
 152     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 153     assertEquals("in order", 1, hits.length);
 154     QueryUtils.check(random, query,searcher);
 155
 156
 157     // Ensures slop of 1 does not work for phrases out of order;
 158     // must be at least 2.
 159     query = new PhraseQuery();
 160     query.setSlop(1);
 161     query.add(new Term("field", "two"));
 162     query.add(new Term("field", "one"));
 163     hits = searcher.search(query, null, 1000).scoreDocs;
 164     assertEquals("reversed, slop not 2 or more", 0, hits.length);
 165     QueryUtils.check(random, query,searcher);
 166   }
 167
 168   /**
 169    * As long as slop is at least 2, terms can be reversed
 170    */
 171   public void testOrderDoesntMatter() throws Exception {
 172     query.setSlop(2); // must be at least two for reverse order match
 173     query.add(new Term("field", "two"));
 174     query.add(new Term("field", "one"));
 175     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 176     assertEquals("just sloppy enough", 1, hits.length);
 177     QueryUtils.check(random, query,searcher);
 178
 179
 180     query = new PhraseQuery();
 181     query.setSlop(2);
 182     query.add(new Term("field", "three"));
 183     query.add(new Term("field", "one"));
 184     hits = searcher.search(query, null, 1000).scoreDocs;
 185     assertEquals("not sloppy enough", 0, hits.length);
 186     QueryUtils.check(random, query,searcher);
 187
 188   }
 189
 190   /**
 191    * slop is the total number of positional moves allowed
 192    * to line up a phrase
 193    */
 194   public void testMulipleTerms() throws Exception {
 195     query.setSlop(2);
 196     query.add(new Term("field", "one"));
 197     query.add(new Term("field", "three"));
 198     query.add(new Term("field", "five"));
 199     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 200     assertEquals("two total moves", 1, hits.length);
 201     QueryUtils.check(random, query,searcher);
 202
 203
 204     query = new PhraseQuery();
 205     query.setSlop(5); // it takes six moves to match this phrase
 206     query.add(new Term("field", "five"));
 207     query.add(new Term("field", "three"));
 208     query.add(new Term("field", "one"));
 209     hits = searcher.search(query, null, 1000).scoreDocs;
 210     assertEquals("slop of 5 not close enough", 0, hits.length);
 211     QueryUtils.check(random, query,searcher);
 212
 213
 214     query.setSlop(6);
 215     hits = searcher.search(query, null, 1000).scoreDocs;
 216     assertEquals("slop of 6 just right", 1, hits.length);
 217     QueryUtils.check(random, query,searcher);
 218
 219   }
 220
 221   public void testPhraseQueryWithStopAnalyzer() throws Exception {
 222     Directory directory = newDirectory();
 223     StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
 224     RandomIndexWriter writer = new RandomIndexWriter(random, directory,
 225         newIndexWriterConfig( Version.LUCENE_24, stopAnalyzer));
 226     Document doc = new Document();
 227     doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
 228     writer.addDocument(doc);
 229     IndexReader reader = writer.getReader();
 230     writer.close();
 231
 232     IndexSearcher searcher = newSearcher(reader);
 233
 234     // valid exact phrase query
 235     PhraseQuery query = new PhraseQuery();
 236     query.add(new Term("field","stop"));
 237     query.add(new Term("field","words"));
 238     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 239     assertEquals(1, hits.length);
 240     QueryUtils.check(random, query,searcher);
 241
 242
 243     // StopAnalyzer as of 2.4 does not leave "holes", so this matches.
 244     query = new PhraseQuery();
 245     query.add(new Term("field", "words"));
 246     query.add(new Term("field", "here"));
 247     hits = searcher.search(query, null, 1000).scoreDocs;
 248     assertEquals(1, hits.length);
 249     QueryUtils.check(random, query,searcher);
 250
 251
 252     searcher.close();
 253     reader.close();
 254     directory.close();
 255   }
 256
 257   public void testPhraseQueryInConjunctionScorer() throws Exception {
 258     Directory directory = newDirectory();
 259     RandomIndexWriter writer = new RandomIndexWriter(random, directory);
 260
 261     Document doc = new Document();
 262     doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
 263     writer.addDocument(doc);
 264
 265     doc = new Document();
 266     doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
 267     doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
 268     writer.addDocument(doc);
 269
 270     IndexReader reader = writer.getReader();
 271     writer.close();
 272
 273     IndexSearcher searcher = newSearcher(reader);
 274
 275     PhraseQuery phraseQuery = new PhraseQuery();
 276     phraseQuery.add(new Term("source", "marketing"));
 277     phraseQuery.add(new Term("source", "info"));
 278     ScoreDoc[] hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
 279     assertEquals(2, hits.length);
 280     QueryUtils.check(random, phraseQuery,searcher);
 281
 282
 283     TermQuery termQuery = new TermQuery(new Term("contents","foobar"));
 284     BooleanQuery booleanQuery = new BooleanQuery();
 285     booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
 286     booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
 287     hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
 288     assertEquals(1, hits.length);
 289     QueryUtils.check(random, termQuery,searcher);
 290
 291
 292     searcher.close();
 293     reader.close();
 294
 295     writer = new RandomIndexWriter(random, directory,
 296         newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
 297     doc = new Document();
 298     doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
 299     writer.addDocument(doc);
 300
 301     doc = new Document();
 302     doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
 303     writer.addDocument(doc);
 304
 305     doc = new Document();
 306     doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
 307     writer.addDocument(doc);
 308
 309     reader = writer.getReader();
 310     writer.close();
 311
 312     searcher = newSearcher(reader);
 313
 314     termQuery = new TermQuery(new Term("contents","woo"));
 315     phraseQuery = new PhraseQuery();
 316     phraseQuery.add(new Term("contents","map"));
 317     phraseQuery.add(new Term("contents","entry"));
 318
 319     hits = searcher.search(termQuery, null, 1000).scoreDocs;
 320     assertEquals(3, hits.length);
 321     hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
 322     assertEquals(2, hits.length);
 323
 324
 325     booleanQuery = new BooleanQuery();
 326     booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
 327     booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
 328     hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
 329     assertEquals(2, hits.length);
 330
 331     booleanQuery = new BooleanQuery();
 332     booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
 333     booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
 334     hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
 335     assertEquals(2, hits.length);
 336     QueryUtils.check(random, booleanQuery,searcher);
 337
 338
 339     searcher.close();
 340     reader.close();
 341     directory.close();
 342   }
 343
 344   public void testSlopScoring() throws IOException {
 345     Directory directory = newDirectory();
 346     RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
 347
 348     Document doc = new Document();
 349     doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
 350     writer.addDocument(doc);
 351
 352     Document doc2 = new Document();
 353     doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED));
 354     writer.addDocument(doc2);
 355
 356     Document doc3 = new Document();
 357     doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
 358     writer.addDocument(doc3);
 359
 360     IndexReader reader = writer.getReader();
 361     writer.close();
 362
 363     IndexSearcher searcher = newSearcher(reader);
 364     PhraseQuery query = new PhraseQuery();
 365     query.add(new Term("field", "firstname"));
 366     query.add(new Term("field", "lastname"));
 367     query.setSlop(Integer.MAX_VALUE);
 368     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 369     assertEquals(3, hits.length);
 370     // Make sure that those matches where the terms appear closer to
 371     // each other get a higher score:
 372     assertEquals(0.71, hits[0].score, 0.01);
 373     assertEquals(0, hits[0].doc);
 374     assertEquals(0.44, hits[1].score, 0.01);
 375     assertEquals(1, hits[1].doc);
 376     assertEquals(0.31, hits[2].score, 0.01);
 377     assertEquals(2, hits[2].doc);
 378     QueryUtils.check(random, query,searcher);
 379     searcher.close();
 380     reader.close();
 381     directory.close();
 382   }
 383
 384   public void testToString() throws Exception {
 385     StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
 386     QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
 387     qp.setEnablePositionIncrements(true);
 388     PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
 389     assertEquals("field:\"? hi ? ? ? test\"", q.toString());
 390     q.add(new Term("field", "hello"), 1);
 391     assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString());
 392   }
 393
 394   public void testWrappedPhrase() throws IOException {
 395     query.add(new Term("repeated", "first"));
 396     query.add(new Term("repeated", "part"));
 397     query.add(new Term("repeated", "second"));
 398     query.add(new Term("repeated", "part"));
 399     query.setSlop(100);
 400
 401     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 402     assertEquals("slop of 100 just right", 1, hits.length);
 403     QueryUtils.check(random, query,searcher);
 404
 405     query.setSlop(99);
 406
 407     hits = searcher.search(query, null, 1000).scoreDocs;
 408     assertEquals("slop of 99 not enough", 0, hits.length);
 409     QueryUtils.check(random, query,searcher);
 410   }
 411
 412   // work on two docs like this: "phrase exist notexist exist found"
 413   public void testNonExistingPhrase() throws IOException {
 414     // phrase without repetitions that exists in 2 docs
 415     query.add(new Term("nonexist", "phrase"));
 416     query.add(new Term("nonexist", "notexist"));
 417     query.add(new Term("nonexist", "found"));
 418     query.setSlop(2); // would be found this way
 419
 420     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 421     assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length);
 422     QueryUtils.check(random, query,searcher);
 423
 424     // phrase with repetitions that exists in 2 docs
 425     query = new PhraseQuery();
 426     query.add(new Term("nonexist", "phrase"));
 427     query.add(new Term("nonexist", "exist"));
 428     query.add(new Term("nonexist", "exist"));
 429     query.setSlop(1); // would be found
 430
 431     hits = searcher.search(query, null, 1000).scoreDocs;
 432     assertEquals("phrase with repetitions exists in two docs", 2, hits.length);
 433     QueryUtils.check(random, query,searcher);
 434
 435     // phrase I with repetitions that does not exist in any doc
 436     query = new PhraseQuery();
 437     query.add(new Term("nonexist", "phrase"));
 438     query.add(new Term("nonexist", "notexist"));
 439     query.add(new Term("nonexist", "phrase"));
 440     query.setSlop(1000); // would not be found no matter how high the slop is
 441
 442     hits = searcher.search(query, null, 1000).scoreDocs;
 443     assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
 444     QueryUtils.check(random, query,searcher);
 445
 446     // phrase II with repetitions that does not exist in any doc
 447     query = new PhraseQuery();
 448     query.add(new Term("nonexist", "phrase"));
 449     query.add(new Term("nonexist", "exist"));
 450     query.add(new Term("nonexist", "exist"));
 451     query.add(new Term("nonexist", "exist"));
 452     query.setSlop(1000); // would not be found no matter how high the slop is
 453
 454     hits = searcher.search(query, null, 1000).scoreDocs;
 455     assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
 456     QueryUtils.check(random, query,searcher);
 457
 458   }
 459
 460   /**
 461    * Working on a 2 fields like this:
 462    *    Field("field", "one two three four five")
 463    *    Field("palindrome", "one two three two one")
 464    * Phrase of size 2 occuriong twice, once in order and once in reverse,
 465    * because doc is a palyndrome, is counted twice.
 466    * Also, in this case order in query does not matter.
 467    * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
 468    */
 469   public void testPalyndrome2() throws Exception {
 470
 471     // search on non palyndrome, find phrase with no slop, using exact phrase scorer
 472     query.setSlop(0); // to use exact phrase scorer
 473     query.add(new Term("field", "two"));
 474     query.add(new Term("field", "three"));
 475     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 476     assertEquals("phrase found with exact phrase scorer", 1, hits.length);
 477     float score0 = hits[0].score;
 478     //System.out.println("(exact) field: two three: "+score0);
 479     QueryUtils.check(random, query,searcher);
 480
 481     // search on non palyndrome, find phrase with slop 2, though no slop required here.
 482     query.setSlop(2); // to use sloppy scorer
 483     hits = searcher.search(query, null, 1000).scoreDocs;
 484     assertEquals("just sloppy enough", 1, hits.length);
 485     float score1 = hits[0].score;
 486     //System.out.println("(sloppy) field: two three: "+score1);
 487     assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
 488     QueryUtils.check(random, query,searcher);
 489
 490     // search ordered in palyndrome, find it twice
 491     query = new PhraseQuery();
 492     query.setSlop(2); // must be at least two for both ordered and reversed to match
 493     query.add(new Term("palindrome", "two"));
 494     query.add(new Term("palindrome", "three"));
 495     hits = searcher.search(query, null, 1000).scoreDocs;
 496     assertEquals("just sloppy enough", 1, hits.length);
 497     //float score2 = hits[0].score;
 498     //System.out.println("palindrome: two three: "+score2);
 499     QueryUtils.check(random, query,searcher);
 500
 501     //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
 502     //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
 503
 504     // search reveresed in palyndrome, find it twice
 505     query = new PhraseQuery();
 506     query.setSlop(2); // must be at least two for both ordered and reversed to match
 507     query.add(new Term("palindrome", "three"));
 508     query.add(new Term("palindrome", "two"));
 509     hits = searcher.search(query, null, 1000).scoreDocs;
 510     assertEquals("just sloppy enough", 1, hits.length);
 511     //float score3 = hits[0].score;
 512     //System.out.println("palindrome: three two: "+score3);
 513     QueryUtils.check(random, query,searcher);
 514
 515     //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
 516     //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
 517     //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
 518   }
 519
 520   /**
 521    * Working on a 2 fields like this:
 522    *    Field("field", "one two three four five")
 523    *    Field("palindrome", "one two three two one")
 524    * Phrase of size 3 occuriong twice, once in order and once in reverse,
 525    * because doc is a palyndrome, is counted twice.
 526    * Also, in this case order in query does not matter.
 527    * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
 528    */
 529   public void testPalyndrome3() throws Exception {
 530
 531     // search on non palyndrome, find phrase with no slop, using exact phrase scorer
 532     query.setSlop(0); // to use exact phrase scorer
 533     query.add(new Term("field", "one"));
 534     query.add(new Term("field", "two"));
 535     query.add(new Term("field", "three"));
 536     ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
 537     assertEquals("phrase found with exact phrase scorer", 1, hits.length);
 538     float score0 = hits[0].score;
 539     //System.out.println("(exact) field: one two three: "+score0);
 540     QueryUtils.check(random, query,searcher);
 541
 542     // just make sure no exc:
 543     searcher.explain(query, 0);
 544
 545     // search on non palyndrome, find phrase with slop 3, though no slop required here.
 546     query.setSlop(4); // to use sloppy scorer
 547     hits = searcher.search(query, null, 1000).scoreDocs;
 548     assertEquals("just sloppy enough", 1, hits.length);
 549     float score1 = hits[0].score;
 550     //System.out.println("(sloppy) field: one two three: "+score1);
 551     assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
 552     QueryUtils.check(random, query,searcher);
 553
 554     // search ordered in palyndrome, find it twice
 555     query = new PhraseQuery();
 556     query.setSlop(4); // must be at least four for both ordered and reversed to match
 557     query.add(new Term("palindrome", "one"));
 558     query.add(new Term("palindrome", "two"));
 559     query.add(new Term("palindrome", "three"));
 560     hits = searcher.search(query, null, 1000).scoreDocs;
 561
 562     // just make sure no exc:
 563     searcher.explain(query, 0);
 564
 565     assertEquals("just sloppy enough", 1, hits.length);
 566     //float score2 = hits[0].score;
 567     //System.out.println("palindrome: one two three: "+score2);
 568     QueryUtils.check(random, query,searcher);
 569
 570     //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
 571     //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
 572
 573     // search reveresed in palyndrome, find it twice
 574     query = new PhraseQuery();
 575     query.setSlop(4); // must be at least four for both ordered and reversed to match
 576     query.add(new Term("palindrome", "three"));
 577     query.add(new Term("palindrome", "two"));
 578     query.add(new Term("palindrome", "one"));
 579     hits = searcher.search(query, null, 1000).scoreDocs;
 580     assertEquals("just sloppy enough", 1, hits.length);
 581     //float score3 = hits[0].score;
 582     //System.out.println("palindrome: three two one: "+score3);
 583     QueryUtils.check(random, query,searcher);
 584
 585     //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
 586     //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
 587     //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
 588   }
 589
 590   // LUCENE-1280
 591   public void testEmptyPhraseQuery() throws Throwable {
 592     final BooleanQuery q2 = new BooleanQuery();
 593     q2.add(new PhraseQuery(), BooleanClause.Occur.MUST);
 594     q2.toString();
 595   }
 596
 597   /* test that a single term is rewritten to a term query */
 598   public void testRewrite() throws IOException {
 599     PhraseQuery pq = new PhraseQuery();
 600     pq.add(new Term("foo", "bar"));
 601     Query rewritten = pq.rewrite(searcher.getIndexReader());
 602     assertTrue(rewritten instanceof TermQuery);
 603   }
 604
 605   public void testRandomPhrases() throws Exception {
 606     Directory dir = newDirectory();
 607     Analyzer analyzer = new MockAnalyzer(random);
 608
 609     RandomIndexWriter w  = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy()));
 610     List<List<String>> docs = new ArrayList<List<String>>();
 611     Document d = new Document();
 612     Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED);
 613     d.add(f);
 614
 615     Random r = random;
 616
 617     int NUM_DOCS = atLeast(10);
 618     for (int i = 0; i < NUM_DOCS; i++) {
 619       // must be > 4096 so it spans multiple chunks
 620       int termCount = _TestUtil.nextInt(random, 4097, 8200);
 621
 622       List<String> doc = new ArrayList<String>();
 623
 624       StringBuilder sb = new StringBuilder();
 625       while(doc.size() < termCount) {
 626         if (r.nextInt(5) == 1 || docs.size() == 0) {
 627           // make new non-empty-string term
 628           String term;
 629           while(true) {
 630             term = _TestUtil.randomUnicodeString(r);
 631             if (term.length() > 0) {
 632               break;
 633             }
 634           }
 635           TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
 636           CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
 637           ts.reset();
 638           while(ts.incrementToken()) {
 639             String text = termAttr.toString();
 640             doc.add(text);
 641             sb.append(text).append(' ');
 642           }
 643           ts.end();
 644           ts.close();
 645         } else {
 646           // pick existing sub-phrase
 647           List<String> lastDoc = docs.get(r.nextInt(docs.size()));
 648           int len = _TestUtil.nextInt(r, 1, 10);
 649           int start = r.nextInt(lastDoc.size()-len);
 650           for(int k=start;k<start+len;k++) {
 651             String t = lastDoc.get(k);
 652             doc.add(t);
 653             sb.append(t).append(' ');
 654           }
 655         }
 656       }
 657       docs.add(doc);
 658       f.setValue(sb.toString());
 659       w.addDocument(d);
 660     }
 661
 662     IndexReader reader = w.getReader();
 663     IndexSearcher s = newSearcher(reader);
 664     w.close();
 665
 666     // now search
 667     int num = atLeast(10);
 668     for(int i=0;i<num;i++) {
 669       int docID = r.nextInt(docs.size());
 670       List<String> doc = docs.get(docID);
 671
 672       final int numTerm = _TestUtil.nextInt(r, 2, 20);
 673       final int start = r.nextInt(doc.size()-numTerm);
 674       PhraseQuery pq = new PhraseQuery();
 675       StringBuilder sb = new StringBuilder();
 676       for(int t=start;t<start+numTerm;t++) {
 677         pq.add(new Term("f", doc.get(t)));
 678         sb.append(doc.get(t)).append(' ');
 679       }
 680
 681       TopDocs hits = s.search(pq, NUM_DOCS);
 682       boolean found = false;
 683       for(int j=0;j<hits.scoreDocs.length;j++) {
 684         if (hits.scoreDocs[j].doc == docID) {
 685           found = true;
 686           break;
 687         }
 688       }
 689
 690       assertTrue("phrase '" + sb + "' not found; start=" + start, found);
 691     }
 692
 693     reader.close();
 694     s.close();
 695     dir.close();
 696   }
 697 }