+++ /dev/null
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.tokenattributes.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.index.*;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.store.*;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util._TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Random;
-
-/**
- * Tests {@link PhraseQuery}.
- *
- * @see TestPositionIncrement
- */
-public class TestPhraseQuery extends LuceneTestCase {
-
- /** threshold for comparing floats */
- public static final float SCORE_COMP_THRESH = 1e-6f;
-
- private static IndexSearcher searcher;
- private static IndexReader reader;
- private PhraseQuery query;
- private static Directory directory;
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- directory = newDirectory();
- Analyzer analyzer = new Analyzer() {
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- }
-
- @Override
- public int getPositionIncrementGap(String fieldName) {
- return 100;
- }
- };
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer);
-
- Document doc = new Document();
- doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED));
- doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED));
- Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED);
- doc.add(repeatedField);
- doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- doc = new Document();
- doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- doc = new Document();
- doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- reader = writer.getReader();
- writer.close();
-
- searcher = newSearcher(reader);
- }
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- query = new PhraseQuery();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- searcher.close();
- searcher = null;
- reader.close();
- reader = null;
- directory.close();
- directory = null;
- }
-
- public void testNotCloseEnough() throws Exception {
- query.setSlop(2);
- query.add(new Term("field", "one"));
- query.add(new Term("field", "five"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
- QueryUtils.check(random, query,searcher);
- }
-
- public void testBarelyCloseEnough() throws Exception {
- query.setSlop(3);
- query.add(new Term("field", "one"));
- query.add(new Term("field", "five"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
- QueryUtils.check(random, query,searcher);
- }
-
- /**
- * Ensures slop of 0 works for exact matches, but not reversed
- */
- public void testExact() throws Exception {
- // slop is zero by default
- query.add(new Term("field", "four"));
- query.add(new Term("field", "five"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("exact match", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- query = new PhraseQuery();
- query.add(new Term("field", "two"));
- query.add(new Term("field", "one"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("reverse not exact", 0, hits.length);
- QueryUtils.check(random, query,searcher);
- }
-
- public void testSlop1() throws Exception {
- // Ensures slop of 1 works with terms in order.
- query.setSlop(1);
- query.add(new Term("field", "one"));
- query.add(new Term("field", "two"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("in order", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- // Ensures slop of 1 does not work for phrases out of order;
- // must be at least 2.
- query = new PhraseQuery();
- query.setSlop(1);
- query.add(new Term("field", "two"));
- query.add(new Term("field", "one"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("reversed, slop not 2 or more", 0, hits.length);
- QueryUtils.check(random, query,searcher);
- }
-
- /**
- * As long as slop is at least 2, terms can be reversed
- */
- public void testOrderDoesntMatter() throws Exception {
- query.setSlop(2); // must be at least two for reverse order match
- query.add(new Term("field", "two"));
- query.add(new Term("field", "one"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- query = new PhraseQuery();
- query.setSlop(2);
- query.add(new Term("field", "three"));
- query.add(new Term("field", "one"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("not sloppy enough", 0, hits.length);
- QueryUtils.check(random, query,searcher);
-
- }
-
- /**
- * slop is the total number of positional moves allowed
- * to line up a phrase
- */
- public void testMulipleTerms() throws Exception {
- query.setSlop(2);
- query.add(new Term("field", "one"));
- query.add(new Term("field", "three"));
- query.add(new Term("field", "five"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("two total moves", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- query = new PhraseQuery();
- query.setSlop(5); // it takes six moves to match this phrase
- query.add(new Term("field", "five"));
- query.add(new Term("field", "three"));
- query.add(new Term("field", "one"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("slop of 5 not close enough", 0, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- query.setSlop(6);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("slop of 6 just right", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
- }
-
- public void testPhraseQueryWithStopAnalyzer() throws Exception {
- Directory directory = newDirectory();
- StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
- RandomIndexWriter writer = new RandomIndexWriter(random, directory,
- newIndexWriterConfig( Version.LUCENE_24, stopAnalyzer));
- Document doc = new Document();
- doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(reader);
-
- // valid exact phrase query
- PhraseQuery query = new PhraseQuery();
- query.add(new Term("field","stop"));
- query.add(new Term("field","words"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- // StopAnalyzer as of 2.4 does not leave "holes", so this matches.
- query = new PhraseQuery();
- query.add(new Term("field", "words"));
- query.add(new Term("field", "here"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
- QueryUtils.check(random, query,searcher);
-
-
- searcher.close();
- reader.close();
- directory.close();
- }
-
- public void testPhraseQueryInConjunctionScorer() throws Exception {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory);
-
- Document doc = new Document();
- doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- doc = new Document();
- doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
- doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(reader);
-
- PhraseQuery phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term("source", "marketing"));
- phraseQuery.add(new Term("source", "info"));
- ScoreDoc[] hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
- assertEquals(2, hits.length);
- QueryUtils.check(random, phraseQuery,searcher);
-
-
- TermQuery termQuery = new TermQuery(new Term("contents","foobar"));
- BooleanQuery booleanQuery = new BooleanQuery();
- booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
- booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
- hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
- QueryUtils.check(random, termQuery,searcher);
-
-
- searcher.close();
- reader.close();
-
- writer = new RandomIndexWriter(random, directory,
- newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
- doc = new Document();
- doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- doc = new Document();
- doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- doc = new Document();
- doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- reader = writer.getReader();
- writer.close();
-
- searcher = newSearcher(reader);
-
- termQuery = new TermQuery(new Term("contents","woo"));
- phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term("contents","map"));
- phraseQuery.add(new Term("contents","entry"));
-
- hits = searcher.search(termQuery, null, 1000).scoreDocs;
- assertEquals(3, hits.length);
- hits = searcher.search(phraseQuery, null, 1000).scoreDocs;
- assertEquals(2, hits.length);
-
-
- booleanQuery = new BooleanQuery();
- booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
- booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
- hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
- assertEquals(2, hits.length);
-
- booleanQuery = new BooleanQuery();
- booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
- booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
- hits = searcher.search(booleanQuery, null, 1000).scoreDocs;
- assertEquals(2, hits.length);
- QueryUtils.check(random, booleanQuery,searcher);
-
-
- searcher.close();
- reader.close();
- directory.close();
- }
-
- public void testSlopScoring() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
-
- Document doc = new Document();
- doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- Document doc2 = new Document();
- doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc2);
-
- Document doc3 = new Document();
- doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc3);
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(reader);
- PhraseQuery query = new PhraseQuery();
- query.add(new Term("field", "firstname"));
- query.add(new Term("field", "lastname"));
- query.setSlop(Integer.MAX_VALUE);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals(3, hits.length);
- // Make sure that those matches where the terms appear closer to
- // each other get a higher score:
- assertEquals(0.71, hits[0].score, 0.01);
- assertEquals(0, hits[0].doc);
- assertEquals(0.44, hits[1].score, 0.01);
- assertEquals(1, hits[1].doc);
- assertEquals(0.31, hits[2].score, 0.01);
- assertEquals(2, hits[2].doc);
- QueryUtils.check(random, query,searcher);
- searcher.close();
- reader.close();
- directory.close();
- }
-
- public void testToString() throws Exception {
- StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT);
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
- qp.setEnablePositionIncrements(true);
- PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
- assertEquals("field:\"? hi ? ? ? test\"", q.toString());
- q.add(new Term("field", "hello"), 1);
- assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString());
- }
-
- public void testWrappedPhrase() throws IOException {
- query.add(new Term("repeated", "first"));
- query.add(new Term("repeated", "part"));
- query.add(new Term("repeated", "second"));
- query.add(new Term("repeated", "part"));
- query.setSlop(100);
-
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("slop of 100 just right", 1, hits.length);
- QueryUtils.check(random, query,searcher);
-
- query.setSlop(99);
-
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("slop of 99 not enough", 0, hits.length);
- QueryUtils.check(random, query,searcher);
- }
-
- // work on two docs like this: "phrase exist notexist exist found"
- public void testNonExistingPhrase() throws IOException {
- // phrase without repetitions that exists in 2 docs
- query.add(new Term("nonexist", "phrase"));
- query.add(new Term("nonexist", "notexist"));
- query.add(new Term("nonexist", "found"));
- query.setSlop(2); // would be found this way
-
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length);
- QueryUtils.check(random, query,searcher);
-
- // phrase with repetitions that exists in 2 docs
- query = new PhraseQuery();
- query.add(new Term("nonexist", "phrase"));
- query.add(new Term("nonexist", "exist"));
- query.add(new Term("nonexist", "exist"));
- query.setSlop(1); // would be found
-
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("phrase with repetitions exists in two docs", 2, hits.length);
- QueryUtils.check(random, query,searcher);
-
- // phrase I with repetitions that does not exist in any doc
- query = new PhraseQuery();
- query.add(new Term("nonexist", "phrase"));
- query.add(new Term("nonexist", "notexist"));
- query.add(new Term("nonexist", "phrase"));
- query.setSlop(1000); // would not be found no matter how high the slop is
-
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
- QueryUtils.check(random, query,searcher);
-
- // phrase II with repetitions that does not exist in any doc
- query = new PhraseQuery();
- query.add(new Term("nonexist", "phrase"));
- query.add(new Term("nonexist", "exist"));
- query.add(new Term("nonexist", "exist"));
- query.add(new Term("nonexist", "exist"));
- query.setSlop(1000); // would not be found no matter how high the slop is
-
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length);
- QueryUtils.check(random, query,searcher);
-
- }
-
- /**
- * Working on a 2 fields like this:
- * Field("field", "one two three four five")
- * Field("palindrome", "one two three two one")
- * Phrase of size 2 occuriong twice, once in order and once in reverse,
- * because doc is a palyndrome, is counted twice.
- * Also, in this case order in query does not matter.
- * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
- */
- public void testPalyndrome2() throws Exception {
-
- // search on non palyndrome, find phrase with no slop, using exact phrase scorer
- query.setSlop(0); // to use exact phrase scorer
- query.add(new Term("field", "two"));
- query.add(new Term("field", "three"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("phrase found with exact phrase scorer", 1, hits.length);
- float score0 = hits[0].score;
- //System.out.println("(exact) field: two three: "+score0);
- QueryUtils.check(random, query,searcher);
-
- // search on non palyndrome, find phrase with slop 2, though no slop required here.
- query.setSlop(2); // to use sloppy scorer
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- float score1 = hits[0].score;
- //System.out.println("(sloppy) field: two three: "+score1);
- assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
- QueryUtils.check(random, query,searcher);
-
- // search ordered in palyndrome, find it twice
- query = new PhraseQuery();
- query.setSlop(2); // must be at least two for both ordered and reversed to match
- query.add(new Term("palindrome", "two"));
- query.add(new Term("palindrome", "three"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- //float score2 = hits[0].score;
- //System.out.println("palindrome: two three: "+score2);
- QueryUtils.check(random, query,searcher);
-
- //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
- //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
-
- // search reveresed in palyndrome, find it twice
- query = new PhraseQuery();
- query.setSlop(2); // must be at least two for both ordered and reversed to match
- query.add(new Term("palindrome", "three"));
- query.add(new Term("palindrome", "two"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- //float score3 = hits[0].score;
- //System.out.println("palindrome: three two: "+score3);
- QueryUtils.check(random, query,searcher);
-
- //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
- //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
- //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
- }
-
- /**
- * Working on a 2 fields like this:
- * Field("field", "one two three four five")
- * Field("palindrome", "one two three two one")
- * Phrase of size 3 occuriong twice, once in order and once in reverse,
- * because doc is a palyndrome, is counted twice.
- * Also, in this case order in query does not matter.
- * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same.
- */
- public void testPalyndrome3() throws Exception {
-
- // search on non palyndrome, find phrase with no slop, using exact phrase scorer
- query.setSlop(0); // to use exact phrase scorer
- query.add(new Term("field", "one"));
- query.add(new Term("field", "two"));
- query.add(new Term("field", "three"));
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("phrase found with exact phrase scorer", 1, hits.length);
- float score0 = hits[0].score;
- //System.out.println("(exact) field: one two three: "+score0);
- QueryUtils.check(random, query,searcher);
-
- // just make sure no exc:
- searcher.explain(query, 0);
-
- // search on non palyndrome, find phrase with slop 3, though no slop required here.
- query.setSlop(4); // to use sloppy scorer
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- float score1 = hits[0].score;
- //System.out.println("(sloppy) field: one two three: "+score1);
- assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH);
- QueryUtils.check(random, query,searcher);
-
- // search ordered in palyndrome, find it twice
- query = new PhraseQuery();
- query.setSlop(4); // must be at least four for both ordered and reversed to match
- query.add(new Term("palindrome", "one"));
- query.add(new Term("palindrome", "two"));
- query.add(new Term("palindrome", "three"));
- hits = searcher.search(query, null, 1000).scoreDocs;
-
- // just make sure no exc:
- searcher.explain(query, 0);
-
- assertEquals("just sloppy enough", 1, hits.length);
- //float score2 = hits[0].score;
- //System.out.println("palindrome: one two three: "+score2);
- QueryUtils.check(random, query,searcher);
-
- //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
- //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
-
- // search reveresed in palyndrome, find it twice
- query = new PhraseQuery();
- query.setSlop(4); // must be at least four for both ordered and reversed to match
- query.add(new Term("palindrome", "three"));
- query.add(new Term("palindrome", "two"));
- query.add(new Term("palindrome", "one"));
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("just sloppy enough", 1, hits.length);
- //float score3 = hits[0].score;
- //System.out.println("palindrome: three two one: "+score3);
- QueryUtils.check(random, query,searcher);
-
- //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
- //assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
- //assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
- }
-
- // LUCENE-1280
- public void testEmptyPhraseQuery() throws Throwable {
- final BooleanQuery q2 = new BooleanQuery();
- q2.add(new PhraseQuery(), BooleanClause.Occur.MUST);
- q2.toString();
- }
-
- /* test that a single term is rewritten to a term query */
- public void testRewrite() throws IOException {
- PhraseQuery pq = new PhraseQuery();
- pq.add(new Term("foo", "bar"));
- Query rewritten = pq.rewrite(searcher.getIndexReader());
- assertTrue(rewritten instanceof TermQuery);
- }
-
- public void testRandomPhrases() throws Exception {
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
-
- RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy()));
- List<List<String>> docs = new ArrayList<List<String>>();
- Document d = new Document();
- Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f);
-
- Random r = random;
-
- int NUM_DOCS = atLeast(10);
- for (int i = 0; i < NUM_DOCS; i++) {
- // must be > 4096 so it spans multiple chunks
- int termCount = _TestUtil.nextInt(random, 4097, 8200);
-
- List<String> doc = new ArrayList<String>();
-
- StringBuilder sb = new StringBuilder();
- while(doc.size() < termCount) {
- if (r.nextInt(5) == 1 || docs.size() == 0) {
- // make new non-empty-string term
- String term;
- while(true) {
- term = _TestUtil.randomUnicodeString(r);
- if (term.length() > 0) {
- break;
- }
- }
- TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
- CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
- ts.reset();
- while(ts.incrementToken()) {
- String text = termAttr.toString();
- doc.add(text);
- sb.append(text).append(' ');
- }
- ts.end();
- ts.close();
- } else {
- // pick existing sub-phrase
- List<String> lastDoc = docs.get(r.nextInt(docs.size()));
- int len = _TestUtil.nextInt(r, 1, 10);
- int start = r.nextInt(lastDoc.size()-len);
- for(int k=start;k<start+len;k++) {
- String t = lastDoc.get(k);
- doc.add(t);
- sb.append(t).append(' ');
- }
- }
- }
- docs.add(doc);
- f.setValue(sb.toString());
- w.addDocument(d);
- }
-
- IndexReader reader = w.getReader();
- IndexSearcher s = newSearcher(reader);
- w.close();
-
- // now search
- int num = atLeast(10);
- for(int i=0;i<num;i++) {
- int docID = r.nextInt(docs.size());
- List<String> doc = docs.get(docID);
-
- final int numTerm = _TestUtil.nextInt(r, 2, 20);
- final int start = r.nextInt(doc.size()-numTerm);
- PhraseQuery pq = new PhraseQuery();
- StringBuilder sb = new StringBuilder();
- for(int t=start;t<start+numTerm;t++) {
- pq.add(new Term("f", doc.get(t)));
- sb.append(doc.get(t)).append(' ');
- }
-
- TopDocs hits = s.search(pq, NUM_DOCS);
- boolean found = false;
- for(int j=0;j<hits.scoreDocs.length;j++) {
- if (hits.scoreDocs[j].doc == docID) {
- found = true;
- break;
- }
- }
-
- assertTrue("phrase '" + sb + "' not found; start=" + start, found);
- }
-
- reader.close();
- s.close();
- dir.close();
- }
-}