1 package org.apache.lucene.search.vectorhighlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.List;
24 import org.apache.lucene.document.Document;
25 import org.apache.lucene.document.Field;
26 import org.apache.lucene.document.MapFieldSelector;
27 import org.apache.lucene.index.IndexReader;
28 import org.apache.lucene.search.highlight.DefaultEncoder;
29 import org.apache.lucene.search.highlight.Encoder;
30 import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
31 import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
32 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
34 public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
36 protected String[] preTags, postTags;
37 public static final String[] COLORED_PRE_TAGS = {
38 "<b style=\"background:yellow\">", "<b style=\"background:lawngreen\">", "<b style=\"background:aquamarine\">",
39 "<b style=\"background:magenta\">", "<b style=\"background:palegreen\">", "<b style=\"background:coral\">",
40 "<b style=\"background:wheat\">", "<b style=\"background:khaki\">", "<b style=\"background:lime\">",
41 "<b style=\"background:deepskyblue\">", "<b style=\"background:deeppink\">", "<b style=\"background:salmon\">",
42 "<b style=\"background:peachpuff\">", "<b style=\"background:violet\">", "<b style=\"background:mediumpurple\">",
43 "<b style=\"background:palegoldenrod\">", "<b style=\"background:darkkhaki\">", "<b style=\"background:springgreen\">",
44 "<b style=\"background:turquoise\">", "<b style=\"background:powderblue\">"
46 public static final String[] COLORED_POST_TAGS = { "</b>" };
47 private char multiValuedSeparator = ' ';
49 protected BaseFragmentsBuilder(){
50 this( new String[]{ "<b>" }, new String[]{ "</b>" } );
53 protected BaseFragmentsBuilder( String[] preTags, String[] postTags ){
54 this.preTags = preTags;
55 this.postTags = postTags;
58 static Object checkTagsArgument( Object tags ){
59 if( tags instanceof String ) return tags;
60 else if( tags instanceof String[] ) return tags;
61 throw new IllegalArgumentException( "type of preTags/postTags must be a String or String[]" );
64 public abstract List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src );
66 private static final Encoder NULL_ENCODER = new DefaultEncoder();
68 public String createFragment( IndexReader reader, int docId,
69 String fieldName, FieldFragList fieldFragList ) throws IOException {
70 return createFragment( reader, docId, fieldName, fieldFragList,
71 preTags, postTags, NULL_ENCODER );
74 public String[] createFragments( IndexReader reader, int docId,
75 String fieldName, FieldFragList fieldFragList, int maxNumFragments )
77 return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
78 preTags, postTags, NULL_ENCODER );
81 public String createFragment( IndexReader reader, int docId,
82 String fieldName, FieldFragList fieldFragList, String[] preTags, String[] postTags,
83 Encoder encoder ) throws IOException {
84 String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1,
85 preTags, postTags, encoder );
86 if( fragments == null || fragments.length == 0 ) return null;
90 public String[] createFragments( IndexReader reader, int docId,
91 String fieldName, FieldFragList fieldFragList, int maxNumFragments,
92 String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
93 if( maxNumFragments < 0 )
94 throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
96 List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
98 List<String> fragments = new ArrayList<String>( maxNumFragments );
99 Field[] values = getFields( reader, docId, fieldName );
100 if( values.length == 0 ) return null;
101 StringBuilder buffer = new StringBuilder();
102 int[] nextValueIndex = { 0 };
103 for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
104 WeightedFragInfo fragInfo = fragInfos.get( n );
105 fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) );
107 return fragments.toArray( new String[fragments.size()] );
111 protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
112 Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
113 return doc.getValues( fieldName ); // according to Document class javadoc, this never returns null
116 protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
117 // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
118 Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
119 return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
123 protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
124 final int s = fragInfo.startOffset;
125 return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s,
126 preTags, postTags, NULL_ENCODER );
129 protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
130 String[] preTags, String[] postTags, Encoder encoder ){
131 final int s = fragInfo.startOffset;
132 return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s,
133 preTags, postTags, encoder );
136 private String makeFragment( WeightedFragInfo fragInfo, String src, int s,
137 String[] preTags, String[] postTags, Encoder encoder ){
138 StringBuilder fragment = new StringBuilder();
140 for( SubInfo subInfo : fragInfo.subInfos ){
141 for( Toffs to : subInfo.termsOffsets ){
143 .append( encoder.encodeText( src.substring( srcIndex, to.startOffset - s ) ) )
144 .append( getPreTag( preTags, subInfo.seqnum ) )
145 .append( encoder.encodeText( src.substring( to.startOffset - s, to.endOffset - s ) ) )
146 .append( getPostTag( postTags, subInfo.seqnum ) );
147 srcIndex = to.endOffset - s;
150 fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
151 return fragment.toString();
155 protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
156 int startOffset, int endOffset ){
157 while( buffer.length() < endOffset && index[0] < values.length ){
158 buffer.append( values[index[0]] );
159 buffer.append( multiValuedSeparator );
162 int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
163 return buffer.substring( startOffset, eo );
166 protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
167 int startOffset, int endOffset ){
168 while( buffer.length() < endOffset && index[0] < values.length ){
169 buffer.append( values[index[0]].stringValue() );
170 if( values[index[0]].isTokenized() )
171 buffer.append( multiValuedSeparator );
174 int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
175 return buffer.substring( startOffset, eo );
178 public void setMultiValuedSeparator( char separator ){
179 multiValuedSeparator = separator;
182 public char getMultiValuedSeparator(){
183 return multiValuedSeparator;
186 protected String getPreTag( int num ){
187 return getPreTag( preTags, num );
190 protected String getPostTag( int num ){
191 return getPostTag( postTags, num );
194 protected String getPreTag( String[] preTags, int num ){
195 int n = num % preTags.length;
199 protected String getPostTag( String[] postTags, int num ){
200 int n = num % postTags.length;