1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.util.LuceneTestCase;
21 import org.apache.lucene.analysis.*;
22 import org.apache.lucene.analysis.tokenattributes.*;
23 import org.apache.lucene.document.*;
24 import org.apache.lucene.index.*;
25 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
26 import org.apache.lucene.queryParser.QueryParser;
27 import org.apache.lucene.store.*;
28 import org.apache.lucene.util.Version;
29 import org.apache.lucene.util._TestUtil;
30 import org.junit.AfterClass;
31 import org.junit.BeforeClass;
33 import java.io.IOException;
34 import java.io.Reader;
35 import java.io.StringReader;
36 import java.util.List;
37 import java.util.ArrayList;
38 import java.util.Random;
41 * Tests {@link PhraseQuery}.
43 * @see TestPositionIncrement
45 public class TestPhraseQuery extends LuceneTestCase {
47 /** threshold for comparing floats */
48 public static final float SCORE_COMP_THRESH = 1e-6f;
50 private static IndexSearcher searcher;
51 private static IndexReader reader;
52 private PhraseQuery query;
53 private static Directory directory;
56 public static void beforeClass() throws Exception {
57 directory = newDirectory();
58 Analyzer analyzer = new Analyzer() {
60 public TokenStream tokenStream(String fieldName, Reader reader) {
61 return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
65 public int getPositionIncrementGap(String fieldName) {
69 RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
71 Document doc = new Document();
72 doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
73 doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED));
74 Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED);
75 doc.add(repeatedField);
76 doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED));
77 writer.addDocument(doc);
80 doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
81 writer.addDocument(doc);
84 doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
85 writer.addDocument(doc);
87 reader = writer.getReader();
90 searcher = newSearcher(reader);
94 public void setUp() throws Exception {
96 query = new PhraseQuery();
100 public static void afterClass() throws Exception {
109 public void testNotCloseEnough() throws Exception {
111 query.add(new Term("field", "one"));
112 query.add(new Term("field", "five"));
113 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
114 assertEquals(0, hits.length);
115 QueryUtils.check(random, query,searcher);
118 public void testBarelyCloseEnough() throws Exception {
120 query.add(new Term("field", "one"));
121 query.add(new Term("field", "five"));
122 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
123 assertEquals(1, hits.length);
124 QueryUtils.check(random, query,searcher);
128 * Ensures slop of 0 works for exact matches, but not reversed
130 public void testExact() throws Exception {
131 // slop is zero by default
132 query.add(new Term("field", "four"));
133 query.add(new Term("field", "five"));
134 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
135 assertEquals("exact match", 1, hits.length);
136 QueryUtils.check(random, query,searcher);
139 query = new PhraseQuery();
140 query.add(new Term("field", "two"));
141 query.add(new Term("field", "one"));
142 hits = searcher.search(query, null, 1000).scoreDocs;
143 assertEquals("reverse not exact", 0, hits.length);
144 QueryUtils.check(random, query,searcher);
147 public void testSlop1() throws Exception {
148 // Ensures slop of 1 works with terms in order.
150 query.add(new Term("field", "one"));
151 query.add(new Term("field", "two"));
152 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
153 assertEquals("in order", 1, hits.length);
154 QueryUtils.check(random, query,searcher);
157 // Ensures slop of 1 does not work for phrases out of order;
158 // must be at least 2.
159 query = new PhraseQuery();
161 query.add(new Term("field", "two"));
162 query.add(new Term("field", "one"));
163 hits = searcher.search(query, null, 1000).scoreDocs;
164 assertEquals("reversed, slop not 2 or more", 0, hits.length);
165 QueryUtils.check(random, query,searcher);
169 * As long as slop is at least 2, terms can be reversed
171 public void testOrderDoesntMatter() throws Exception {
172 query.setSlop(2); // must be at least two for reverse order match
173 query.add(new Term("field", "two"));
174 query.add(new Term("field", "one"));
175 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
176 assertEquals("just sloppy enough", 1, hits.length);
177 QueryUtils.check(random, query,searcher);
180 query = new PhraseQuery();
182 query.add(new Term("field", "three"));
183 query.add(new Term("field", "one"));
184 hits = searcher.search(query, null, 1000).scoreDocs;
185 assertEquals("not sloppy enough", 0, hits.length);
186 QueryUtils.check(random, query,searcher);
191 * slop is the total number of positional moves allowed
192 * to line up a phrase
194 public void testMulipleTerms() throws Exception {
196 query.add(new Term("field", "one"));
197 query.add(new Term("field", "three"));
198 query.add(new Term("field", "five"));
199 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
200 assertEquals("two total moves", 1, hits.length);
201 QueryUtils.check(random, query,searcher);
204 query = new PhraseQuery();
205 query.setSlop(5); // it takes six moves to match this phrase
206 query.add(new Term("field", "five"));
207 query.add(new Term("field", "three"));
208 query.add(new Term("field", "one"));
209 hits = searcher.search(query, null, 1000).scoreDocs;
210 assertEquals("slop of 5 not close enough", 0, hits.length);
211 QueryUtils.check(random, query,searcher);
215 hits = searcher.search(query, null, 1000).scoreDocs;
216 assertEquals("slop of 6 just right", 1, hits.length);
217 QueryUtils.check(random, query,searcher);
221 public void testPhraseQueryWithStopAnalyzer() throws Exception {
222 Directory directory = newDirectory();
223 StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
224 RandomIndexWriter writer = new RandomIndexWriter(random, directory,
225 newIndexWriterConfig( Version.LUCENE_24, stopAnalyzer));
226 Document doc = new Document();
227 doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
228 writer.addDocument(doc);
229 IndexReader reader = writer.getReader();
232 IndexSearcher searcher = newSearcher(reader);
234 // valid exact phrase query
235 PhraseQuery query = new PhraseQuery();
236 query.add(new Term("field","stop"));
237 query.add(new Term("field","words"));
238 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
239 assertEquals(1, hits.length);
240 QueryUtils.check(random, query,searcher);
243 // StopAnalyzer as of 2.4 does not leave "holes", so this matches.
244 query = new PhraseQuery();
245 query.add(new Term("field", "words"));
246 query.add(new Term("field", "here"));
247 hits = searcher.search(query, null, 1000).scoreDocs;
248 assertEquals(1, hits.length);
249 QueryUtils.check(random, query,searcher);
257 public void testPhraseQueryInConjunctionScorer() throws Exception {
258 Directory directory = newDirectory();
259 RandomIndexWriter writer = new RandomIndexWriter(random, directory);
261 Document doc = new Document();
262 doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
263 writer.addDocument(doc);
265 doc = new Document();
266 doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
267 doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
268 writer.addDocument(doc);
270 IndexReader reader = writer.getReader();
273 IndexSearcher searcher = newSearcher(reader);
275 PhraseQuery phraseQuery = new PhraseQuery();
276 phraseQuery.add(new Term("source", "marketing"));
277 phraseQuery.add(new Term("source", "info"));
278 ScoreDoc[] hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
279 assertEquals(2, hits.length);
280 QueryUtils.check(random, phraseQuery,searcher);
283 TermQuery termQuery = new TermQuery(new Term("contents","foobar"));
284 BooleanQuery booleanQuery = new BooleanQuery();
285 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
286 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
287 hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
288 assertEquals(1, hits.length);
289 QueryUtils.check(random, termQuery,searcher);
295 writer = new RandomIndexWriter(random, directory,
296 newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
297 doc = new Document();
298 doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
299 writer.addDocument(doc);
301 doc = new Document();
302 doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
303 writer.addDocument(doc);
305 doc = new Document();
306 doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
307 writer.addDocument(doc);
309 reader = writer.getReader();
312 searcher = newSearcher(reader);
314 termQuery = new TermQuery(new Term("contents","woo"));
315 phraseQuery = new PhraseQuery();
316 phraseQuery.add(new Term("contents","map"));
317 phraseQuery.add(new Term("contents","entry"));
319 hits = searcher.search(termQuery, null, 1000).scoreDocs;
320 assertEquals(3, hits.length);
321 hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
322 assertEquals(2, hits.length);
325 booleanQuery = new BooleanQuery();
326 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
327 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
328 hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
329 assertEquals(2, hits.length);
331 booleanQuery = new BooleanQuery();
332 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
333 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
334 hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
335 assertEquals(2, hits.length);
336 QueryUtils.check(random, booleanQuery,searcher);
344 public void testSlopScoring() throws IOException {
345 Directory directory = newDirectory();
346 RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
348 Document doc = new Document();
349 doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
350 writer.addDocument(doc);
352 Document doc2 = new Document();
353 doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED));
354 writer.addDocument(doc2);
356 Document doc3 = new Document();
357 doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
358 writer.addDocument(doc3);
360 IndexReader reader = writer.getReader();
363 IndexSearcher searcher = newSearcher(reader);
364 PhraseQuery query = new PhraseQuery();
365 query.add(new Term("field", "firstname"));
366 query.add(new Term("field", "lastname"));
367 query.setSlop(Integer.MAX_VALUE);
368 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
369 assertEquals(3, hits.length);
370 // Make sure that those matches where the terms appear closer to
371 // each other get a higher score:
372 assertEquals(0.71, hits[0].score, 0.01);
373 assertEquals(0, hits[0].doc);
374 assertEquals(0.44, hits[1].score, 0.01);
375 assertEquals(1, hits[1].doc);
376 assertEquals(0.31, hits[2].score, 0.01);
377 assertEquals(2, hits[2].doc);
378 QueryUtils.check(random, query,searcher);
384 public void testToString() throws Exception {
385 StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
386 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
387 qp.setEnablePositionIncrements(true);
388 PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
389 assertEquals("field:\"? hi ? ? ? test\"", q.toString());
390 q.add(new Term("field", "hello"), 1);
391 assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString());
394 public void testWrappedPhrase() throws IOException {
395 query.add(new Term("repeated", "first"));
396 query.add(new Term("repeated", "part"));
397 query.add(new Term("repeated", "second"));
398 query.add(new Term("repeated", "part"));
401 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
402 assertEquals("slop of 100 just right", 1, hits.length);
403 QueryUtils.check(random, query,searcher);
407 hits = searcher.search(query, null, 1000).scoreDocs;
408 assertEquals("slop of 99 not enough", 0, hits.length);
409 QueryUtils.check(random, query,searcher);
412 // work on two docs like this: "phrase exist notexist exist found"
413 public void testNonExistingPhrase() throws IOException {
414 // phrase without repetitions that exists in 2 docs
415 query.add(new Term("nonexist", "phrase"));
416 query.add(new Term("nonexist", "notexist"));
417 query.add(new Term("nonexist", "found"));
418 query.setSlop(2); // would be found this way
420 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
421 assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length);
422 QueryUtils.check(random, query,searcher);
424 // phrase with repetitions that exists in 2 docs
425 query = new PhraseQuery();
426 query.add(new Term("nonexist", "phrase"));
427 query.add(new Term("nonexist", "exist"));
428 query.add(new Term("nonexist", "exist"));
429 query.setSlop(1); // would be found
431 hits = searcher.search(query, null, 1000).scoreDocs;
432 assertEquals("phrase with repetitions exists in two docs", 2, hits.length);
433 QueryUtils.check(random, query,searcher);
435 // phrase I with repetitions that does not exist in any doc
436 query = new PhraseQuery();
437 query.add(new Term("nonexist", "phrase"));
438 query.add(new Term("nonexist", "notexist"));
439 query.add(new Term("nonexist", "phrase"));
440 query.setSlop(1000); // would not be found no matter how high the slop is
442 hits = searcher.search(query, null, 1000).scoreDocs;
443 assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
444 QueryUtils.check(random, query,searcher);
446 // phrase II with repetitions that does not exist in any doc
447 query = new PhraseQuery();
448 query.add(new Term("nonexist", "phrase"));
449 query.add(new Term("nonexist", "exist"));
450 query.add(new Term("nonexist", "exist"));
451 query.add(new Term("nonexist", "exist"));
452 query.setSlop(1000); // would not be found no matter how high the slop is
454 hits = searcher.search(query, null, 1000).scoreDocs;
455 assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
456 QueryUtils.check(random, query,searcher);
461 * Working on a 2 fields like this:
462 * Field("field", "one two three four five")
463 * Field("palindrome", "one two three two one")
464 * Phrase of size 2 occuriong twice, once in order and once in reverse,
465 * because doc is a palyndrome, is counted twice.
466 * Also, in this case order in query does not matter.
467 * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
469 public void testPalyndrome2() throws Exception {
471 // search on non palyndrome, find phrase with no slop, using exact phrase scorer
472 query.setSlop(0); // to use exact phrase scorer
473 query.add(new Term("field", "two"));
474 query.add(new Term("field", "three"));
475 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
476 assertEquals("phrase found with exact phrase scorer", 1, hits.length);
477 float score0 = hits[0].score;
478 //System.out.println("(exact) field: two three: "+score0);
479 QueryUtils.check(random, query,searcher);
481 // search on non palyndrome, find phrase with slop 2, though no slop required here.
482 query.setSlop(2); // to use sloppy scorer
483 hits = searcher.search(query, null, 1000).scoreDocs;
484 assertEquals("just sloppy enough", 1, hits.length);
485 float score1 = hits[0].score;
486 //System.out.println("(sloppy) field: two three: "+score1);
487 assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
488 QueryUtils.check(random, query,searcher);
490 // search ordered in palyndrome, find it twice
491 query = new PhraseQuery();
492 query.setSlop(2); // must be at least two for both ordered and reversed to match
493 query.add(new Term("palindrome", "two"));
494 query.add(new Term("palindrome", "three"));
495 hits = searcher.search(query, null, 1000).scoreDocs;
496 assertEquals("just sloppy enough", 1, hits.length);
497 //float score2 = hits[0].score;
498 //System.out.println("palindrome: two three: "+score2);
499 QueryUtils.check(random, query,searcher);
501 //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
502 //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
504 // search reveresed in palyndrome, find it twice
505 query = new PhraseQuery();
506 query.setSlop(2); // must be at least two for both ordered and reversed to match
507 query.add(new Term("palindrome", "three"));
508 query.add(new Term("palindrome", "two"));
509 hits = searcher.search(query, null, 1000).scoreDocs;
510 assertEquals("just sloppy enough", 1, hits.length);
511 //float score3 = hits[0].score;
512 //System.out.println("palindrome: three two: "+score3);
513 QueryUtils.check(random, query,searcher);
515 //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
516 //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
517 //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
521 * Working on a 2 fields like this:
522 * Field("field", "one two three four five")
523 * Field("palindrome", "one two three two one")
524 * Phrase of size 3 occuriong twice, once in order and once in reverse,
525 * because doc is a palyndrome, is counted twice.
526 * Also, in this case order in query does not matter.
527 * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
529 public void testPalyndrome3() throws Exception {
531 // search on non palyndrome, find phrase with no slop, using exact phrase scorer
532 query.setSlop(0); // to use exact phrase scorer
533 query.add(new Term("field", "one"));
534 query.add(new Term("field", "two"));
535 query.add(new Term("field", "three"));
536 ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
537 assertEquals("phrase found with exact phrase scorer", 1, hits.length);
538 float score0 = hits[0].score;
539 //System.out.println("(exact) field: one two three: "+score0);
540 QueryUtils.check(random, query,searcher);
542 // just make sure no exc:
543 searcher.explain(query, 0);
545 // search on non palyndrome, find phrase with slop 3, though no slop required here.
546 query.setSlop(4); // to use sloppy scorer
547 hits = searcher.search(query, null, 1000).scoreDocs;
548 assertEquals("just sloppy enough", 1, hits.length);
549 float score1 = hits[0].score;
550 //System.out.println("(sloppy) field: one two three: "+score1);
551 assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
552 QueryUtils.check(random, query,searcher);
554 // search ordered in palyndrome, find it twice
555 query = new PhraseQuery();
556 query.setSlop(4); // must be at least four for both ordered and reversed to match
557 query.add(new Term("palindrome", "one"));
558 query.add(new Term("palindrome", "two"));
559 query.add(new Term("palindrome", "three"));
560 hits = searcher.search(query, null, 1000).scoreDocs;
562 // just make sure no exc:
563 searcher.explain(query, 0);
565 assertEquals("just sloppy enough", 1, hits.length);
566 //float score2 = hits[0].score;
567 //System.out.println("palindrome: one two three: "+score2);
568 QueryUtils.check(random, query,searcher);
570 //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
571 //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
573 // search reveresed in palyndrome, find it twice
574 query = new PhraseQuery();
575 query.setSlop(4); // must be at least four for both ordered and reversed to match
576 query.add(new Term("palindrome", "three"));
577 query.add(new Term("palindrome", "two"));
578 query.add(new Term("palindrome", "one"));
579 hits = searcher.search(query, null, 1000).scoreDocs;
580 assertEquals("just sloppy enough", 1, hits.length);
581 //float score3 = hits[0].score;
582 //System.out.println("palindrome: three two one: "+score3);
583 QueryUtils.check(random, query,searcher);
585 //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
586 //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
587 //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
591 public void testEmptyPhraseQuery() throws Throwable {
592 final BooleanQuery q2 = new BooleanQuery();
593 q2.add(new PhraseQuery(), BooleanClause.Occur.MUST);
597 /* test that a single term is rewritten to a term query */
598 public void testRewrite() throws IOException {
599 PhraseQuery pq = new PhraseQuery();
600 pq.add(new Term("foo", "bar"));
601 Query rewritten = pq.rewrite(searcher.getIndexReader());
602 assertTrue(rewritten instanceof TermQuery);
605 public void testRandomPhrases() throws Exception {
606 Directory dir = newDirectory();
607 Analyzer analyzer = new MockAnalyzer(random);
609 RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy()));
610 List<List<String>> docs = new ArrayList<List<String>>();
611 Document d = new Document();
612 Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED);
617 int NUM_DOCS = atLeast(10);
618 for (int i = 0; i < NUM_DOCS; i++) {
619 // must be > 4096 so it spans multiple chunks
620 int termCount = _TestUtil.nextInt(random, 4097, 8200);
622 List<String> doc = new ArrayList<String>();
624 StringBuilder sb = new StringBuilder();
625 while(doc.size() < termCount) {
626 if (r.nextInt(5) == 1 || docs.size() == 0) {
627 // make new non-empty-string term
630 term = _TestUtil.randomUnicodeString(r);
631 if (term.length() > 0) {
635 TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
636 CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
638 while(ts.incrementToken()) {
639 String text = termAttr.toString();
641 sb.append(text).append(' ');
646 // pick existing sub-phrase
647 List<String> lastDoc = docs.get(r.nextInt(docs.size()));
648 int len = _TestUtil.nextInt(r, 1, 10);
649 int start = r.nextInt(lastDoc.size()-len);
650 for(int k=start;k<start+len;k++) {
651 String t = lastDoc.get(k);
653 sb.append(t).append(' ');
658 f.setValue(sb.toString());
662 IndexReader reader = w.getReader();
663 IndexSearcher s = newSearcher(reader);
667 int num = atLeast(10);
668 for(int i=0;i<num;i++) {
669 int docID = r.nextInt(docs.size());
670 List<String> doc = docs.get(docID);
672 final int numTerm = _TestUtil.nextInt(r, 2, 20);
673 final int start = r.nextInt(doc.size()-numTerm);
674 PhraseQuery pq = new PhraseQuery();
675 StringBuilder sb = new StringBuilder();
676 for(int t=start;t<start+numTerm;t++) {
677 pq.add(new Term("f", doc.get(t)));
678 sb.append(doc.get(t)).append(' ');
681 TopDocs hits = s.search(pq, NUM_DOCS);
682 boolean found = false;
683 for(int j=0;j<hits.scoreDocs.length;j++) {
684 if (hits.scoreDocs[j].doc == docID) {
690 assertTrue("phrase '" + sb + "' not found; start=" + start, found);