1 package org.apache.lucene.search.vectorhighlight;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.util.ArrayList;
20 import java.util.LinkedList;
21 import java.util.List;
23 import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
24 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
27 * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
28 * to create a FieldFragList object.
30 public class FieldPhraseList {
32 LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
35 * create a FieldPhraseList that has no limit on the number of phrases to analyze
37 * @param fieldTermStack FieldTermStack object
38 * @param fieldQuery FieldQuery object
40 public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
41 this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
47 * @param fieldTermStack FieldTermStack object
48 * @param fieldQuery FieldQuery object
49 * @param phraseLimit maximum size of phraseList
51 public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
52 final String field = fieldTermStack.getFieldName();
54 LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
55 QueryPhraseMap currMap = null;
56 QueryPhraseMap nextMap = null;
57 while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
59 phraseCandidate.clear();
61 TermInfo ti = fieldTermStack.pop();
62 currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
64 // if not found, discard top TermInfo from stack, then try next element
65 if( currMap == null ) continue;
67 // if found, search the longest phrase
68 phraseCandidate.add( ti );
70 ti = fieldTermStack.pop();
73 nextMap = currMap.getTermMap( ti.getText() );
74 if( ti == null || nextMap == null ){
76 fieldTermStack.push( ti );
77 if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
78 addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
81 while( phraseCandidate.size() > 1 ){
82 fieldTermStack.push( phraseCandidate.removeLast() );
83 currMap = fieldQuery.searchPhrase( field, phraseCandidate );
84 if( currMap != null ){
85 addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
93 phraseCandidate.add( ti );
100 void addIfNoOverlap( WeightedPhraseInfo wpi ){
101 for( WeightedPhraseInfo existWpi : phraseList ){
102 if( existWpi.isOffsetOverlap( wpi ) ) return;
104 phraseList.add( wpi );
107 public static class WeightedPhraseInfo {
109 String text; // unnecessary member, just exists for debugging purpose
110 List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
111 // but if position-gap > 1 and slop > 0 then size() could be greater than 1
112 float boost; // query boost
115 public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
116 this( terms, boost, 0 );
119 public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){
121 this.seqnum = number;
122 termsOffsets = new ArrayList<Toffs>( terms.size() );
123 TermInfo ti = terms.get( 0 );
124 termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
125 if( terms.size() == 1 ){
129 StringBuilder sb = new StringBuilder();
130 sb.append( ti.getText() );
131 int pos = ti.getPosition();
132 for( int i = 1; i < terms.size(); i++ ){
134 sb.append( ti.getText() );
135 if( ti.getPosition() - pos == 1 ){
136 Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
137 to.setEndOffset( ti.getEndOffset() );
140 termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
142 pos = ti.getPosition();
144 text = sb.toString();
147 public int getStartOffset(){
148 return termsOffsets.get( 0 ).startOffset;
151 public int getEndOffset(){
152 return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
155 public boolean isOffsetOverlap( WeightedPhraseInfo other ){
156 int so = getStartOffset();
157 int eo = getEndOffset();
158 int oso = other.getStartOffset();
159 int oeo = other.getEndOffset();
160 if( so <= oso && oso < eo ) return true;
161 if( so < oeo && oeo <= eo ) return true;
162 if( oso <= so && so < oeo ) return true;
163 if( oso < eo && eo <= oeo ) return true;
168 public String toString(){
169 StringBuilder sb = new StringBuilder();
170 sb.append( text ).append( '(' ).append( boost ).append( ")(" );
171 for( Toffs to : termsOffsets ){
175 return sb.toString();
178 public static class Toffs {
181 public Toffs( int startOffset, int endOffset ){
182 this.startOffset = startOffset;
183 this.endOffset = endOffset;
185 public void setEndOffset( int endOffset ){
186 this.endOffset = endOffset;
188 public int getStartOffset(){
191 public int getEndOffset(){
195 public String toString(){
196 StringBuilder sb = new StringBuilder();
197 sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
198 return sb.toString();