1 package org.apache.lucene.search.vectorhighlight;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.search.BooleanQuery;
20 import org.apache.lucene.search.BooleanClause.Occur;
22 public class FieldPhraseListTest extends AbstractTestCase {
24 public void test1TermIndex() throws Exception {
27 FieldQuery fq = new FieldQuery( tq( "a" ), true, true );
28 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
29 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
30 assertEquals( 1, fpl.phraseList.size() );
31 assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );
33 fq = new FieldQuery( tq( "b" ), true, true );
34 stack = new FieldTermStack( reader, 0, F, fq );
35 fpl = new FieldPhraseList( stack, fq );
36 assertEquals( 0, fpl.phraseList.size() );
39 public void test2TermsIndex() throws Exception {
40 make1d1fIndex( "a a" );
42 FieldQuery fq = new FieldQuery( tq( "a" ), true, true );
43 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
44 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
45 assertEquals( 2, fpl.phraseList.size() );
46 assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );
47 assertEquals( "a(1.0)((2,3))", fpl.phraseList.get( 1 ).toString() );
50 public void test1PhraseIndex() throws Exception {
51 make1d1fIndex( "a b" );
53 FieldQuery fq = new FieldQuery( pqF( "a", "b" ), true, true );
54 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
55 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
56 assertEquals( 1, fpl.phraseList.size() );
57 assertEquals( "ab(1.0)((0,3))", fpl.phraseList.get( 0 ).toString() );
59 fq = new FieldQuery( tq( "b" ), true, true );
60 stack = new FieldTermStack( reader, 0, F, fq );
61 fpl = new FieldPhraseList( stack, fq );
62 assertEquals( 1, fpl.phraseList.size() );
63 assertEquals( "b(1.0)((2,3))", fpl.phraseList.get( 0 ).toString() );
66 public void test1PhraseIndexB() throws Exception {
67 // 01 12 23 34 45 56 67 78 (offsets)
68 // bb|bb|ba|ac|cb|ba|ab|bc
69 // 0 1 2 3 4 5 6 7 (positions)
70 make1d1fIndexB( "bbbacbabc" );
72 FieldQuery fq = new FieldQuery( pqF( "ba", "ac" ), true, true );
73 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
74 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
75 assertEquals( 1, fpl.phraseList.size() );
76 assertEquals( "baac(1.0)((2,5))", fpl.phraseList.get( 0 ).toString() );
79 public void test2ConcatTermsIndexB() throws Exception {
83 make1d1fIndexB( "abab" );
85 FieldQuery fq = new FieldQuery( tq( "ab" ), true, true );
86 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
87 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
88 assertEquals( 2, fpl.phraseList.size() );
89 assertEquals( "ab(1.0)((0,2))", fpl.phraseList.get( 0 ).toString() );
90 assertEquals( "ab(1.0)((2,4))", fpl.phraseList.get( 1 ).toString() );
93 public void test2Terms1PhraseIndex() throws Exception {
94 make1d1fIndex( "c a a b" );
96 // phraseHighlight = true
97 FieldQuery fq = new FieldQuery( pqF( "a", "b" ), true, true );
98 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
99 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
100 assertEquals( 1, fpl.phraseList.size() );
101 assertEquals( "ab(1.0)((4,7))", fpl.phraseList.get( 0 ).toString() );
103 // phraseHighlight = false
104 fq = new FieldQuery( pqF( "a", "b" ), false, true );
105 stack = new FieldTermStack( reader, 0, F, fq );
106 fpl = new FieldPhraseList( stack, fq );
107 assertEquals( 2, fpl.phraseList.size() );
108 assertEquals( "a(1.0)((2,3))", fpl.phraseList.get( 0 ).toString() );
109 assertEquals( "ab(1.0)((4,7))", fpl.phraseList.get( 1 ).toString() );
112 public void testPhraseSlop() throws Exception {
113 make1d1fIndex( "c a a b c" );
115 FieldQuery fq = new FieldQuery( pqF( 2F, 1, "a", "c" ), true, true );
116 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
117 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
118 assertEquals( 1, fpl.phraseList.size() );
119 assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() );
120 assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
121 assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
124 public void test2PhrasesOverlap() throws Exception {
125 make1d1fIndex( "d a b c d" );
127 BooleanQuery query = new BooleanQuery();
128 query.add( pqF( "a", "b" ), Occur.SHOULD );
129 query.add( pqF( "b", "c" ), Occur.SHOULD );
130 FieldQuery fq = new FieldQuery( query, true, true );
131 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
132 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
133 assertEquals( 1, fpl.phraseList.size() );
134 assertEquals( "abc(1.0)((2,7))", fpl.phraseList.get( 0 ).toString() );
137 public void test3TermsPhrase() throws Exception {
138 make1d1fIndex( "d a b a b c d" );
140 FieldQuery fq = new FieldQuery( pqF( "a", "b", "c" ), true, true );
141 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
142 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
143 assertEquals( 1, fpl.phraseList.size() );
144 assertEquals( "abc(1.0)((6,11))", fpl.phraseList.get( 0 ).toString() );
147 public void testSearchLongestPhrase() throws Exception {
148 make1d1fIndex( "d a b d c a b c" );
150 BooleanQuery query = new BooleanQuery();
151 query.add( pqF( "a", "b" ), Occur.SHOULD );
152 query.add( pqF( "a", "b", "c" ), Occur.SHOULD );
153 FieldQuery fq = new FieldQuery( query, true, true );
154 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
155 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
156 assertEquals( 2, fpl.phraseList.size() );
157 assertEquals( "ab(1.0)((2,5))", fpl.phraseList.get( 0 ).toString() );
158 assertEquals( "abc(1.0)((10,15))", fpl.phraseList.get( 1 ).toString() );
161 public void test1PhraseShortMV() throws Exception {
164 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
165 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
166 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
167 assertEquals( 1, fpl.phraseList.size() );
168 assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
171 public void test1PhraseLongMV() throws Exception {
174 FieldQuery fq = new FieldQuery( pqF( "search", "engines" ), true, true );
175 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
176 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
177 assertEquals( 2, fpl.phraseList.size() );
178 assertEquals( "searchengines(1.0)((102,116))", fpl.phraseList.get( 0 ).toString() );
179 assertEquals( "searchengines(1.0)((157,171))", fpl.phraseList.get( 1 ).toString() );
182 public void test1PhraseLongMVB() throws Exception {
185 FieldQuery fq = new FieldQuery( pqF( "sp", "pe", "ee", "ed" ), true, true ); // "speed" -(2gram)-> "sp","pe","ee","ed"
186 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
187 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
188 assertEquals( 1, fpl.phraseList.size() );
189 assertEquals( "sppeeeed(1.0)((88,93))", fpl.phraseList.get( 0 ).toString() );
192 /* This test shows a big speedup from limiting the number of analyzed phrases in
193 * this bad case for FieldPhraseList */
194 /* But it is not reliable as a unit test since it is timing-dependent
195 public void testManyRepeatedTerms() throws Exception {
196 long t = System.currentTimeMillis();
197 testManyTermsWithLimit (-1);
198 long t1 = System.currentTimeMillis();
199 testManyTermsWithLimit (1);
200 long t2 = System.currentTimeMillis();
201 assertTrue (t2-t1 * 1000 < t1-t);
203 private void testManyTermsWithLimit (int limit) throws Exception {
204 StringBuilder buf = new StringBuilder ();
205 for (int i = 0; i < 16000; i++) {
206 buf.append("a b c ");
208 make1d1fIndex( buf.toString());
210 Query query = tq("a");
211 FieldQuery fq = new FieldQuery( query, true, true );
212 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
213 FieldPhraseList fpl = new FieldPhraseList( stack, fq, limit);
214 if (limit < 0 || limit > 16000)
215 assertEquals( 16000, fpl.phraseList.size() );
217 assertEquals( limit, fpl.phraseList.size() );
218 assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );