+++ /dev/null
-package org.apache.lucene.search.vectorhighlight;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
-import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
-
-/**
- * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
- * to create a FieldFragList object.
- */
-public class FieldPhraseList {
-
- LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
-
- /**
- * create a FieldPhraseList that has no limit on the number of phrases to analyze
- *
- * @param fieldTermStack FieldTermStack object
- * @param fieldQuery FieldQuery object
- */
- public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
- this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
- }
-
- /**
- * a constructor.
- *
- * @param fieldTermStack FieldTermStack object
- * @param fieldQuery FieldQuery object
- * @param phraseLimit maximum size of phraseList
- */
- public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
- final String field = fieldTermStack.getFieldName();
-
- LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
- QueryPhraseMap currMap = null;
- QueryPhraseMap nextMap = null;
- while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
- {
- phraseCandidate.clear();
-
- TermInfo ti = fieldTermStack.pop();
- currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
-
- // if not found, discard top TermInfo from stack, then try next element
- if( currMap == null ) continue;
-
- // if found, search the longest phrase
- phraseCandidate.add( ti );
- while( true ){
- ti = fieldTermStack.pop();
- nextMap = null;
- if( ti != null )
- nextMap = currMap.getTermMap( ti.getText() );
- if( ti == null || nextMap == null ){
- if( ti != null )
- fieldTermStack.push( ti );
- if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
- addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
- }
- else{
- while( phraseCandidate.size() > 1 ){
- fieldTermStack.push( phraseCandidate.removeLast() );
- currMap = fieldQuery.searchPhrase( field, phraseCandidate );
- if( currMap != null ){
- addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
- break;
- }
- }
- }
- break;
- }
- else{
- phraseCandidate.add( ti );
- currMap = nextMap;
- }
- }
- }
- }
-
- void addIfNoOverlap( WeightedPhraseInfo wpi ){
- for( WeightedPhraseInfo existWpi : phraseList ){
- if( existWpi.isOffsetOverlap( wpi ) ) return;
- }
- phraseList.add( wpi );
- }
-
- public static class WeightedPhraseInfo {
-
- String text; // unnecessary member, just exists for debugging purpose
- List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
- // but if position-gap > 1 and slop > 0 then size() could be greater than 1
- float boost; // query boost
- int seqnum;
-
- public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
- this( terms, boost, 0 );
- }
-
- public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){
- this.boost = boost;
- this.seqnum = number;
- termsOffsets = new ArrayList<Toffs>( terms.size() );
- TermInfo ti = terms.get( 0 );
- termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
- if( terms.size() == 1 ){
- text = ti.getText();
- return;
- }
- StringBuilder sb = new StringBuilder();
- sb.append( ti.getText() );
- int pos = ti.getPosition();
- for( int i = 1; i < terms.size(); i++ ){
- ti = terms.get( i );
- sb.append( ti.getText() );
- if( ti.getPosition() - pos == 1 ){
- Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
- to.setEndOffset( ti.getEndOffset() );
- }
- else{
- termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
- }
- pos = ti.getPosition();
- }
- text = sb.toString();
- }
-
- public int getStartOffset(){
- return termsOffsets.get( 0 ).startOffset;
- }
-
- public int getEndOffset(){
- return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
- }
-
- public boolean isOffsetOverlap( WeightedPhraseInfo other ){
- int so = getStartOffset();
- int eo = getEndOffset();
- int oso = other.getStartOffset();
- int oeo = other.getEndOffset();
- if( so <= oso && oso < eo ) return true;
- if( so < oeo && oeo <= eo ) return true;
- if( oso <= so && so < oeo ) return true;
- if( oso < eo && eo <= oeo ) return true;
- return false;
- }
-
- @Override
- public String toString(){
- StringBuilder sb = new StringBuilder();
- sb.append( text ).append( '(' ).append( boost ).append( ")(" );
- for( Toffs to : termsOffsets ){
- sb.append( to );
- }
- sb.append( ')' );
- return sb.toString();
- }
-
- public static class Toffs {
- int startOffset;
- int endOffset;
- public Toffs( int startOffset, int endOffset ){
- this.startOffset = startOffset;
- this.endOffset = endOffset;
- }
- public void setEndOffset( int endOffset ){
- this.endOffset = endOffset;
- }
- public int getStartOffset(){
- return startOffset;
- }
- public int getEndOffset(){
- return endOffset;
- }
- @Override
- public String toString(){
- StringBuilder sb = new StringBuilder();
- sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
- return sb.toString();
- }
- }
- }
-}