1 package org.apache.lucene.search.vectorhighlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.HashSet;
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.analysis.Token;
27 import org.apache.lucene.analysis.TokenStream;
28 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
29 import org.apache.lucene.search.BooleanQuery;
30 import org.apache.lucene.search.BooleanClause.Occur;
31 import org.apache.lucene.util.AttributeImpl;
33 public class IndexTimeSynonymTest extends AbstractTestCase {
35 public void testFieldTermStackIndex1wSearch1term() throws Exception {
38 FieldQuery fq = new FieldQuery( tq( "Mac" ), true, true );
39 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
40 assertEquals( 1, stack.termList.size() );
41 assertEquals( "Mac(11,20,3)", stack.pop().toString() );
44 public void testFieldTermStackIndex1wSearch2terms() throws Exception {
47 BooleanQuery bq = new BooleanQuery();
48 bq.add( tq( "Mac" ), Occur.SHOULD );
49 bq.add( tq( "MacBook" ), Occur.SHOULD );
50 FieldQuery fq = new FieldQuery( bq, true, true );
51 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
52 assertEquals( 2, stack.termList.size() );
53 Set<String> expectedSet = new HashSet<String>();
54 expectedSet.add( "Mac(11,20,3)" );
55 expectedSet.add( "MacBook(11,20,3)" );
56 assertTrue( expectedSet.contains( stack.pop().toString() ) );
57 assertTrue( expectedSet.contains( stack.pop().toString() ) );
60 public void testFieldTermStackIndex1w2wSearch1term() throws Exception {
63 FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
64 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
65 assertEquals( 1, stack.termList.size() );
66 assertEquals( "pc(3,5,1)", stack.pop().toString() );
69 public void testFieldTermStackIndex1w2wSearch1phrase() throws Exception {
72 FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
73 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
74 assertEquals( 2, stack.termList.size() );
75 assertEquals( "personal(3,5,1)", stack.pop().toString() );
76 assertEquals( "computer(3,5,2)", stack.pop().toString() );
79 public void testFieldTermStackIndex1w2wSearch1partial() throws Exception {
82 FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
83 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
84 assertEquals( 1, stack.termList.size() );
85 assertEquals( "computer(3,5,2)", stack.pop().toString() );
88 public void testFieldTermStackIndex1w2wSearch1term1phrase() throws Exception {
91 BooleanQuery bq = new BooleanQuery();
92 bq.add( tq( "pc" ), Occur.SHOULD );
93 bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
94 FieldQuery fq = new FieldQuery( bq, true, true );
95 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
96 assertEquals( 3, stack.termList.size() );
97 Set<String> expectedSet = new HashSet<String>();
98 expectedSet.add( "pc(3,5,1)" );
99 expectedSet.add( "personal(3,5,1)" );
100 assertTrue( expectedSet.contains( stack.pop().toString() ) );
101 assertTrue( expectedSet.contains( stack.pop().toString() ) );
102 assertEquals( "computer(3,5,2)", stack.pop().toString() );
105 public void testFieldTermStackIndex2w1wSearch1term() throws Exception {
108 FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
109 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
110 assertEquals( 1, stack.termList.size() );
111 assertEquals( "pc(3,20,1)", stack.pop().toString() );
114 public void testFieldTermStackIndex2w1wSearch1phrase() throws Exception {
117 FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
118 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
119 assertEquals( 2, stack.termList.size() );
120 assertEquals( "personal(3,20,1)", stack.pop().toString() );
121 assertEquals( "computer(3,20,2)", stack.pop().toString() );
124 public void testFieldTermStackIndex2w1wSearch1partial() throws Exception {
127 FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
128 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
129 assertEquals( 1, stack.termList.size() );
130 assertEquals( "computer(3,20,2)", stack.pop().toString() );
133 public void testFieldTermStackIndex2w1wSearch1term1phrase() throws Exception {
136 BooleanQuery bq = new BooleanQuery();
137 bq.add( tq( "pc" ), Occur.SHOULD );
138 bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
139 FieldQuery fq = new FieldQuery( bq, true, true );
140 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
141 assertEquals( 3, stack.termList.size() );
142 Set<String> expectedSet = new HashSet<String>();
143 expectedSet.add( "pc(3,20,1)" );
144 expectedSet.add( "personal(3,20,1)" );
145 assertTrue( expectedSet.contains( stack.pop().toString() ) );
146 assertTrue( expectedSet.contains( stack.pop().toString() ) );
147 assertEquals( "computer(3,20,2)", stack.pop().toString() );
150 public void testFieldPhraseListIndex1w2wSearch1phrase() throws Exception {
153 FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
154 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
155 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
156 assertEquals( 1, fpl.phraseList.size() );
157 assertEquals( "personalcomputer(1.0)((3,5))", fpl.phraseList.get( 0 ).toString() );
158 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
159 assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
162 public void testFieldPhraseListIndex1w2wSearch1partial() throws Exception {
165 FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
166 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
167 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
168 assertEquals( 1, fpl.phraseList.size() );
169 assertEquals( "computer(1.0)((3,5))", fpl.phraseList.get( 0 ).toString() );
170 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
171 assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
174 public void testFieldPhraseListIndex1w2wSearch1term1phrase() throws Exception {
177 BooleanQuery bq = new BooleanQuery();
178 bq.add( tq( "pc" ), Occur.SHOULD );
179 bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
180 FieldQuery fq = new FieldQuery( bq, true, true );
181 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
182 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
183 assertEquals( 1, fpl.phraseList.size() );
184 assertTrue( fpl.phraseList.get( 0 ).toString().indexOf( "(1.0)((3,5))" ) > 0 );
185 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
186 assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
189 public void testFieldPhraseListIndex2w1wSearch1term() throws Exception {
192 FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
193 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
194 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
195 assertEquals( 1, fpl.phraseList.size() );
196 assertEquals( "pc(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
197 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
198 assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
201 public void testFieldPhraseListIndex2w1wSearch1phrase() throws Exception {
204 FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
205 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
206 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
207 assertEquals( 1, fpl.phraseList.size() );
208 assertEquals( "personalcomputer(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
209 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
210 assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
213 public void testFieldPhraseListIndex2w1wSearch1partial() throws Exception {
216 FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
217 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
218 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
219 assertEquals( 1, fpl.phraseList.size() );
220 assertEquals( "computer(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
221 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
222 assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
225 public void testFieldPhraseListIndex2w1wSearch1term1phrase() throws Exception {
228 BooleanQuery bq = new BooleanQuery();
229 bq.add( tq( "pc" ), Occur.SHOULD );
230 bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
231 FieldQuery fq = new FieldQuery( bq, true, true );
232 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
233 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
234 assertEquals( 1, fpl.phraseList.size() );
235 assertTrue( fpl.phraseList.get( 0 ).toString().indexOf( "(1.0)((3,20))" ) > 0 );
236 assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
237 assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
240 private void makeIndex1w() throws Exception {
242 // 012345678901234567890
243 // I'll buy a Macintosh
247 makeSynonymIndex( "I'll buy a Macintosh",
251 t("Macintosh",11,20),t("Mac",11,20,0),t("MacBook",11,20,0));
254 private void makeIndex1w2w() throws Exception {
260 makeSynonymIndex( "My pc was broken",
262 t("pc",3,5),t("personal",3,5,0),t("computer",3,5),
267 private void makeIndex2w1w() throws Exception {
268 // 1111111111222222222233
269 // 01234567890123456789012345678901
270 // My personal computer was broken
273 makeSynonymIndex( "My personal computer was broken",
275 t("personal",3,20),t("pc",3,20,0),t("computer",3,20),
280 void makeSynonymIndex( String value, Token... tokens ) throws Exception {
281 Analyzer analyzer = new TokenArrayAnalyzer( tokens );
282 make1dmfIndex( analyzer, value );
285 public static Token t( String text, int startOffset, int endOffset ){
286 return t( text, startOffset, endOffset, 1 );
289 public static Token t( String text, int startOffset, int endOffset, int positionIncrement ){
290 Token token = new Token( text, startOffset, endOffset );
291 token.setPositionIncrement( positionIncrement );
295 public static final class TokenArrayAnalyzer extends Analyzer {
297 public TokenArrayAnalyzer( Token... tokens ){
298 this.tokens = tokens;
302 public TokenStream tokenStream(String fieldName, Reader reader) {
303 TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
304 final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
308 public boolean incrementToken() throws IOException {
309 if( p >= tokens.length ) return false;
311 tokens[p++].copyTo(reusableToken);