lucene-java-3.5.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java

   1 package org.apache.lucene.search.vectorhighlight;
   2 /**
   3  * Licensed to the Apache Software Foundation (ASF) under one or more
   4  * contributor license agreements.  See the NOTICE file distributed with
   5  * this work for additional information regarding copyright ownership.
   6  * The ASF licenses this file to You under the Apache License, Version 2.0
   7  * (the "License"); you may not use this file except in compliance with
   8  * the License.  You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import java.io.IOException;
  20 import java.util.Collections;
  21 import java.util.LinkedList;
  22 import java.util.Set;
  23
  24 import org.apache.lucene.analysis.Analyzer;
  25 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  26 import org.apache.lucene.document.Document;
  27 import org.apache.lucene.document.Field;
  28 import org.apache.lucene.document.Field.Index;
  29 import org.apache.lucene.document.Field.Store;
  30 import org.apache.lucene.document.Field.TermVector;
  31 import org.apache.lucene.index.IndexReader;
  32 import org.apache.lucene.index.IndexWriter;
  33 import org.apache.lucene.index.IndexWriterConfig;
  34 import org.apache.lucene.index.TermFreqVector;
  35 import org.apache.lucene.index.TermPositionVector;
  36 import org.apache.lucene.index.TermVectorOffsetInfo;
  37 import org.apache.lucene.queryParser.QueryParser;
  38 import org.apache.lucene.search.Query;
  39 import org.apache.lucene.store.Directory;
  40 import org.apache.lucene.store.RAMDirectory;
  41 import org.apache.lucene.util.Version;
  42
  43 /**
  44  * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
  45  * of the document to be highlighted.
  46  */
  47 public class FieldTermStack {
  48
  49   private final String fieldName;
  50   LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
  51
  52   public static void main( String[] args ) throws Exception {
  53     Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
  54     QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
  55     Query query = parser.parse( "a x:b" );
  56     FieldQuery fieldQuery = new FieldQuery( query, true, false );
  57
  58     Directory dir = new RAMDirectory();
  59     IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
  60     Document doc = new Document();
  61     doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
  62     doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
  63     writer.addDocument( doc );
  64     writer.close();
  65
  66     IndexReader reader = IndexReader.open( dir, true );
  67     new FieldTermStack( reader, 0, "f", fieldQuery );
  68     reader.close();
  69   }
  70
  71   /**
  72    * a constructor.
  73    *
  74    * @param reader IndexReader of the index
  75    * @param docId document id to be highlighted
  76    * @param fieldName field of the document to be highlighted
  77    * @param fieldQuery FieldQuery object
  78    * @throws IOException
  79    */
  80   public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
  81     this.fieldName = fieldName;
  82
  83     Set<String> termSet = fieldQuery.getTermSet( fieldName );
  84     // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
  85     if( termSet == null ) return;
  86
  87     TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName );
  88     if( tfv == null ) return; // just return to make null snippets
  89     TermPositionVector tpv = null;
  90     try{
  91       tpv = (TermPositionVector)tfv;
  92     }
  93     catch( ClassCastException e ){
  94       return; // just return to make null snippets
  95     }
  96
  97
  98     for( String term : tpv.getTerms() ){
  99       if( !termSet.contains( term ) ) continue;
 100       int index = tpv.indexOf( term );
 101       TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
 102       if( tvois == null ) return; // just return to make null snippets
 103       int[] poss = tpv.getTermPositions( index );
 104       if( poss == null ) return; // just return to make null snippets
 105       for( int i = 0; i < tvois.length; i++ )
 106         termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
 107     }
 108
 109     // sort by position
 110     Collections.sort( termList );
 111   }
 112
 113   /**
 114    * @return field name
 115    */
 116   public String getFieldName(){
 117     return fieldName;
 118   }
 119
 120   /**
 121    * @return the top TermInfo object of the stack
 122    */
 123   public TermInfo pop(){
 124     return termList.poll();
 125   }
 126
 127   /**
 128    * @param termInfo the TermInfo object to be put on the top of the stack
 129    */
 130   public void push( TermInfo termInfo ){
 131     // termList.push( termInfo );  // avoid Java 1.6 feature
 132     termList.addFirst( termInfo );
 133   }
 134
 135   /**
 136    * to know whether the stack is empty
 137    *
 138    * @return true if the stack is empty, false if not
 139    */
 140   public boolean isEmpty(){
 141     return termList == null || termList.size() == 0;
 142   }
 143
 144   public static class TermInfo implements Comparable<TermInfo>{
 145
 146     final String text;
 147     final int startOffset;
 148     final int endOffset;
 149     final int position;
 150
 151     TermInfo( String text, int startOffset, int endOffset, int position ){
 152       this.text = text;
 153       this.startOffset = startOffset;
 154       this.endOffset = endOffset;
 155       this.position = position;
 156     }
 157
 158     public String getText(){ return text; }
 159     public int getStartOffset(){ return startOffset; }
 160     public int getEndOffset(){ return endOffset; }
 161     public int getPosition(){ return position; }
 162
 163     @Override
 164     public String toString(){
 165       StringBuilder sb = new StringBuilder();
 166       sb.append( text ).append( '(' ).append(startOffset).append( ',' ).append( endOffset ).append( ',' ).append( position ).append( ')' );
 167       return sb.toString();
 168     }
 169
 170     public int compareTo( TermInfo o ) {
 171       return ( this.position - o.position );
 172     }
 173   }
 174 }