1 package org.apache.lucene.search.vectorhighlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.Field;
22 import org.apache.lucene.document.Field.Index;
23 import org.apache.lucene.document.Field.Store;
24 import org.apache.lucene.document.Field.TermVector;
25 import org.apache.lucene.index.IndexReader;
26 import org.apache.lucene.index.IndexWriter;
27 import org.apache.lucene.index.IndexWriterConfig;
28 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
29 import org.apache.lucene.search.Query;
30 import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
32 public class SimpleFragmentsBuilderTest extends AbstractTestCase {
34 public void test1TermIndex() throws Exception {
35 FieldFragList ffl = ffl( "a", "a" );
36 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
37 assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
40 sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
41 assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
44 public void test2Frags() throws Exception {
45 FieldFragList ffl = ffl( "a", "a b b b b b b b b b b b a b a b" );
46 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
47 String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
48 // 3 snippets requested, but should be 2
49 assertEquals( 2, f.length );
50 assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
51 assertEquals( "b b <b>a</b> b <b>a</b> b ", f[1] );
54 public void test3Frags() throws Exception {
55 FieldFragList ffl = ffl( "a c", "a b b b b b b b b b b b a b a b b b b b c a a b b" );
56 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
57 String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
58 assertEquals( 3, f.length );
59 assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
60 assertEquals( "b b <b>a</b> b <b>a</b> b b b b b ", f[1] );
61 assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[2] );
64 public void testTagsAndEncoder() throws Exception {
65 FieldFragList ffl = ffl( "a", "<h1> a </h1>" );
66 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
67 String[] preTags = { "[" };
68 String[] postTags = { "]" };
69 assertEquals( "<h1> [a] </h1> ",
70 sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
73 private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
74 make1d1fIndex( indexValue );
75 Query query = paW.parse( queryValue );
76 FieldQuery fq = new FieldQuery( query, true, true );
77 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
78 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
79 return new SimpleFragListBuilder().createFieldFragList( fpl, 20 );
82 public void test1PhraseShortMV() throws Exception {
85 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
86 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
87 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
88 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
89 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
90 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
91 assertEquals( " b c <b>d</b> e ", sfb.createFragment( reader, 0, F, ffl ) );
94 public void test1PhraseLongMV() throws Exception {
97 FieldQuery fq = new FieldQuery( pqF( "search", "engines" ), true, true );
98 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
99 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
100 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
101 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
102 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
103 assertEquals( " most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use t",
104 sfb.createFragment( reader, 0, F, ffl ) );
107 public void test1PhraseLongMVB() throws Exception {
110 FieldQuery fq = new FieldQuery( pqF( "sp", "pe", "ee", "ed" ), true, true ); // "speed" -(2gram)-> "sp","pe","ee","ed"
111 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
112 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
113 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
114 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
115 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
116 assertEquals( "ssing <b>speed</b>, the ", sfb.createFragment( reader, 0, F, ffl ) );
119 public void testUnstoredField() throws Exception {
122 FieldQuery fq = new FieldQuery( tq( "aaa" ), true, true );
123 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
124 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
125 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
126 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
127 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
128 assertNull( sfb.createFragment( reader, 0, F, ffl ) );
131 protected void makeUnstoredIndex() throws Exception {
132 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
133 TEST_VERSION_CURRENT, analyzerW).setOpenMode(OpenMode.CREATE));
134 Document doc = new Document();
135 doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
136 writer.addDocument( doc );
138 if (reader != null) reader.close();
139 reader = IndexReader.open( dir, true );
142 public void test1StrMV() throws Exception {
145 FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
146 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
147 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
148 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
149 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
150 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
151 // '/' separator doesn't effect the snippet because of NOT_ANALYZED field
152 sfb.setMultiValuedSeparator( '/' );
153 assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
156 public void testMVSeparator() throws Exception {
159 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
160 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
161 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
162 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
163 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
164 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
165 sfb.setMultiValuedSeparator( '/' );
166 assertEquals( " b c//<b>d</b> e/", sfb.createFragment( reader, 0, F, ffl ) );