1 package org.apache.lucene.search.vectorhighlight;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.io.IOException;
20 import java.util.Collections;
21 import java.util.LinkedList;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.WhitespaceAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.document.Field.Index;
29 import org.apache.lucene.document.Field.Store;
30 import org.apache.lucene.document.Field.TermVector;
31 import org.apache.lucene.index.IndexReader;
32 import org.apache.lucene.index.IndexWriter;
33 import org.apache.lucene.index.IndexWriterConfig;
34 import org.apache.lucene.index.TermFreqVector;
35 import org.apache.lucene.index.TermPositionVector;
36 import org.apache.lucene.index.TermVectorOffsetInfo;
37 import org.apache.lucene.queryParser.QueryParser;
38 import org.apache.lucene.search.Query;
39 import org.apache.lucene.store.Directory;
40 import org.apache.lucene.store.RAMDirectory;
41 import org.apache.lucene.util.Version;
44 * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
45 * of the document to be highlighted.
47 public class FieldTermStack {
49 private final String fieldName;
50 LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
52 public static void main( String[] args ) throws Exception {
53 Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
54 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
55 Query query = parser.parse( "a x:b" );
56 FieldQuery fieldQuery = new FieldQuery( query, true, false );
58 Directory dir = new RAMDirectory();
59 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
60 Document doc = new Document();
61 doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
62 doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
63 writer.addDocument( doc );
66 IndexReader reader = IndexReader.open( dir, true );
67 new FieldTermStack( reader, 0, "f", fieldQuery );
74 * @param reader IndexReader of the index
75 * @param docId document id to be highlighted
76 * @param fieldName field of the document to be highlighted
77 * @param fieldQuery FieldQuery object
80 public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
81 this.fieldName = fieldName;
83 TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName );
84 if( tfv == null ) return; // just return to make null snippets
85 TermPositionVector tpv = null;
87 tpv = (TermPositionVector)tfv;
89 catch( ClassCastException e ){
90 return; // just return to make null snippets
93 Set<String> termSet = fieldQuery.getTermSet( fieldName );
94 // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
95 if( termSet == null ) return;
97 for( String term : tpv.getTerms() ){
98 if( !termSet.contains( term ) ) continue;
99 int index = tpv.indexOf( term );
100 TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
101 if( tvois == null ) return; // just return to make null snippets
102 int[] poss = tpv.getTermPositions( index );
103 if( poss == null ) return; // just return to make null snippets
104 for( int i = 0; i < tvois.length; i++ )
105 termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
109 Collections.sort( termList );
115 public String getFieldName(){
120 * @return the top TermInfo object of the stack
122 public TermInfo pop(){
123 return termList.poll();
127 * @param termInfo the TermInfo object to be put on the top of the stack
129 public void push( TermInfo termInfo ){
130 // termList.push( termInfo ); // avoid Java 1.6 feature
131 termList.addFirst( termInfo );
135 * to know whether the stack is empty
137 * @return true if the stack is empty, false if not
139 public boolean isEmpty(){
140 return termList == null || termList.size() == 0;
143 public static class TermInfo implements Comparable<TermInfo>{
146 final int startOffset;
150 TermInfo( String text, int startOffset, int endOffset, int position ){
152 this.startOffset = startOffset;
153 this.endOffset = endOffset;
154 this.position = position;
157 public String getText(){ return text; }
158 public int getStartOffset(){ return startOffset; }
159 public int getEndOffset(){ return endOffset; }
160 public int getPosition(){ return position; }
163 public String toString(){
164 StringBuilder sb = new StringBuilder();
165 sb.append( text ).append( '(' ).append(startOffset).append( ',' ).append( endOffset ).append( ',' ).append( position ).append( ')' );
166 return sb.toString();
169 public int compareTo( TermInfo o ) {
170 return ( this.position - o.position );