1 package org.apache.lucene.search.vectorhighlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.ArrayList;
21 import java.util.Iterator;
22 import java.util.List;
24 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
27 * A simple implementation of {@link FragListBuilder}.
29 public class SimpleFragListBuilder implements FragListBuilder {
31 public static final int MARGIN_DEFAULT = 6;
32 public static final int MIN_FRAG_CHAR_SIZE_FACTOR = 3;
35 final int minFragCharSize;
37 public SimpleFragListBuilder( int margin ){
39 throw new IllegalArgumentException( "margin(" + margin + ") is too small. It must be 0 or higher." );
42 this.minFragCharSize = Math.max( 1, margin * MIN_FRAG_CHAR_SIZE_FACTOR );
45 public SimpleFragListBuilder(){
46 this( MARGIN_DEFAULT );
49 public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
50 if( fragCharSize < minFragCharSize )
51 throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " +
52 minFragCharSize + " or higher." );
54 FieldFragList ffl = new FieldFragList( fragCharSize );
56 List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
57 Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
58 WeightedPhraseInfo phraseInfo = null;
60 boolean taken = false;
63 if( !ite.hasNext() ) break;
64 phraseInfo = ite.next();
67 if( phraseInfo == null ) break;
69 // if the phrase violates the border of previous fragment, discard it and try next phrase
70 if( phraseInfo.getStartOffset() < startOffset ) continue;
73 wpil.add( phraseInfo );
74 int st = phraseInfo.getStartOffset() - margin < startOffset ?
75 startOffset : phraseInfo.getStartOffset() - margin;
76 int en = st + fragCharSize;
77 if( phraseInfo.getEndOffset() > en )
78 en = phraseInfo.getEndOffset();
83 phraseInfo = ite.next();
85 if( phraseInfo == null ) break;
89 if( phraseInfo.getEndOffset() <= en )
90 wpil.add( phraseInfo );
94 ffl.add( st, en, wpil );