lucene-java-3.5.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java

   1 package org.apache.lucene.search.vectorhighlight;
   2 /**
   3  * Licensed to the Apache Software Foundation (ASF) under one or more
   4  * contributor license agreements.  See the NOTICE file distributed with
   5  * this work for additional information regarding copyright ownership.
   6  * The ASF licenses this file to You under the Apache License, Version 2.0
   7  * (the "License"); you may not use this file except in compliance with
   8  * the License.  You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import java.util.ArrayList;
  20 import java.util.LinkedList;
  21 import java.util.List;
  22
  23 import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
  24 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
  25
  26 /**
  27  * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
  28  * to create a FieldFragList object.
  29  */
  30 public class FieldPhraseList {
  31
  32   LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
  33
  34   /**
  35    * create a FieldPhraseList that has no limit on the number of phrases to analyze
  36    *
  37    * @param fieldTermStack FieldTermStack object
  38    * @param fieldQuery FieldQuery object
  39    */
  40   public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
  41       this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
  42   }
  43
  44   /**
  45    * a constructor.
  46    *
  47    * @param fieldTermStack FieldTermStack object
  48    * @param fieldQuery FieldQuery object
  49    * @param phraseLimit maximum size of phraseList
  50    */
  51   public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){
  52     final String field = fieldTermStack.getFieldName();
  53
  54     LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
  55     QueryPhraseMap currMap = null;
  56     QueryPhraseMap nextMap = null;
  57     while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
  58     {
  59       phraseCandidate.clear();
  60
  61       TermInfo ti = fieldTermStack.pop();
  62       currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
  63
  64       // if not found, discard top TermInfo from stack, then try next element
  65       if( currMap == null ) continue;
  66
  67       // if found, search the longest phrase
  68       phraseCandidate.add( ti );
  69       while( true ){
  70         ti = fieldTermStack.pop();
  71         nextMap = null;
  72         if( ti != null )
  73           nextMap = currMap.getTermMap( ti.getText() );
  74         if( ti == null || nextMap == null ){
  75           if( ti != null )
  76             fieldTermStack.push( ti );
  77           if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
  78             addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
  79           }
  80           else{
  81             while( phraseCandidate.size() > 1 ){
  82               fieldTermStack.push( phraseCandidate.removeLast() );
  83               currMap = fieldQuery.searchPhrase( field, phraseCandidate );
  84               if( currMap != null ){
  85                 addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
  86                 break;
  87               }
  88             }
  89           }
  90           break;
  91         }
  92         else{
  93           phraseCandidate.add( ti );
  94           currMap = nextMap;
  95         }
  96       }
  97     }
  98   }
  99
 100   void addIfNoOverlap( WeightedPhraseInfo wpi ){
 101     for( WeightedPhraseInfo existWpi : phraseList ){
 102       if( existWpi.isOffsetOverlap( wpi ) ) return;
 103     }
 104     phraseList.add( wpi );
 105   }
 106
 107   public static class WeightedPhraseInfo {
 108
 109     String text;  // unnecessary member, just exists for debugging purpose
 110     List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
 111                             // but if position-gap > 1 and slop > 0 then size() could be greater than 1
 112     float boost;  // query boost
 113     int seqnum;
 114
 115     public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
 116       this( terms, boost, 0 );
 117     }
 118
 119     public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){
 120       this.boost = boost;
 121       this.seqnum = number;
 122       termsOffsets = new ArrayList<Toffs>( terms.size() );
 123       TermInfo ti = terms.get( 0 );
 124       termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
 125       if( terms.size() == 1 ){
 126         text = ti.getText();
 127         return;
 128       }
 129       StringBuilder sb = new StringBuilder();
 130       sb.append( ti.getText() );
 131       int pos = ti.getPosition();
 132       for( int i = 1; i < terms.size(); i++ ){
 133         ti = terms.get( i );
 134         sb.append( ti.getText() );
 135         if( ti.getPosition() - pos == 1 ){
 136           Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
 137           to.setEndOffset( ti.getEndOffset() );
 138         }
 139         else{
 140           termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
 141         }
 142         pos = ti.getPosition();
 143       }
 144       text = sb.toString();
 145     }
 146
 147     public int getStartOffset(){
 148       return termsOffsets.get( 0 ).startOffset;
 149     }
 150
 151     public int getEndOffset(){
 152       return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
 153     }
 154
 155     public boolean isOffsetOverlap( WeightedPhraseInfo other ){
 156       int so = getStartOffset();
 157       int eo = getEndOffset();
 158       int oso = other.getStartOffset();
 159       int oeo = other.getEndOffset();
 160       if( so <= oso && oso < eo ) return true;
 161       if( so < oeo && oeo <= eo ) return true;
 162       if( oso <= so && so < oeo ) return true;
 163       if( oso < eo && eo <= oeo ) return true;
 164       return false;
 165     }
 166
 167     @Override
 168     public String toString(){
 169       StringBuilder sb = new StringBuilder();
 170       sb.append( text ).append( '(' ).append( boost ).append( ")(" );
 171       for( Toffs to : termsOffsets ){
 172         sb.append( to );
 173       }
 174       sb.append( ')' );
 175       return sb.toString();
 176     }
 177
 178     public static class Toffs {
 179       int startOffset;
 180       int endOffset;
 181       public Toffs( int startOffset, int endOffset ){
 182         this.startOffset = startOffset;
 183         this.endOffset = endOffset;
 184       }
 185       public void setEndOffset( int endOffset ){
 186         this.endOffset = endOffset;
 187       }
 188       public int getStartOffset(){
 189         return startOffset;
 190       }
 191       public int getEndOffset(){
 192         return endOffset;
 193       }
 194       @Override
 195       public String toString(){
 196         StringBuilder sb = new StringBuilder();
 197         sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
 198         return sb.toString();
 199       }
 200     }
 201   }
 202 }