lucene-java-3.4.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java

   1 package org.apache.lucene.search.highlight;
   2
   3
   4 /**
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 import java.util.List;
  21
  22 import org.apache.lucene.analysis.TokenStream;
  23 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  24 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  26 import org.apache.lucene.search.spans.Spans;
  27
  28
  29 /**
  30  * {@link Fragmenter} implementation which breaks text up into same-size
  31  * fragments but does not split up {@link Spans}. This is a simple sample class.
  32  */
  33 public class SimpleSpanFragmenter implements Fragmenter {
  34   private static final int DEFAULT_FRAGMENT_SIZE = 100;
  35   private int fragmentSize;
  36   private int currentNumFrags;
  37   private int position = -1;
  38   private QueryScorer queryScorer;
  39   private int waitForPos = -1;
  40   private int textSize;
  41   private CharTermAttribute termAtt;
  42   private PositionIncrementAttribute posIncAtt;
  43   private OffsetAttribute offsetAtt;
  44
  45   /**
  46    * @param queryScorer QueryScorer that was used to score hits
  47    */
  48   public SimpleSpanFragmenter(QueryScorer queryScorer) {
  49     this(queryScorer, DEFAULT_FRAGMENT_SIZE);
  50   }
  51
  52   /**
  53    * @param queryScorer QueryScorer that was used to score hits
  54    * @param fragmentSize size in bytes of each fragment
  55    */
  56   public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
  57     this.fragmentSize = fragmentSize;
  58     this.queryScorer = queryScorer;
  59   }
  60
  61   /* (non-Javadoc)
  62    * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment()
  63    */
  64   public boolean isNewFragment() {
  65     position += posIncAtt.getPositionIncrement();
  66
  67     if (waitForPos == position) {
  68       waitForPos = -1;
  69     } else if (waitForPos != -1) {
  70       return false;
  71     }
  72
  73     WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.toString());
  74
  75     if (wSpanTerm != null) {
  76       List<PositionSpan> positionSpans = wSpanTerm.getPositionSpans();
  77
  78       for (int i = 0; i < positionSpans.size(); i++) {
  79         if (positionSpans.get(i).start == position) {
  80           waitForPos = positionSpans.get(i).end + 1;
  81           break;
  82         }
  83       }
  84     }
  85
  86     boolean isNewFrag = offsetAtt.endOffset() >= (fragmentSize * currentNumFrags)
  87         && (textSize - offsetAtt.endOffset()) >= (fragmentSize >>> 1);
  88
  89     if (isNewFrag) {
  90       currentNumFrags++;
  91     }
  92
  93     return isNewFrag;
  94   }
  95
  96
  97   /* (non-Javadoc)
  98    * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream)
  99    */
 100   public void start(String originalText, TokenStream tokenStream) {
 101     position = -1;
 102     currentNumFrags = 1;
 103     textSize = originalText.length();
 104     termAtt = tokenStream.addAttribute(CharTermAttribute.class);
 105     posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
 106     offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
 107   }
 108 }