1 package org.apache.lucene.search.highlight;
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements. See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
20 import java.util.List;
22 import org.apache.lucene.analysis.TokenStream;
23 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
24 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
26 import org.apache.lucene.search.spans.Spans;
30 * {@link Fragmenter} implementation which breaks text up into same-size
31 * fragments but does not split up {@link Spans}. This is a simple sample class.
33 public class SimpleSpanFragmenter implements Fragmenter {
34 private static final int DEFAULT_FRAGMENT_SIZE = 100;
35 private int fragmentSize;
36 private int currentNumFrags;
37 private int position = -1;
38 private QueryScorer queryScorer;
39 private int waitForPos = -1;
41 private CharTermAttribute termAtt;
42 private PositionIncrementAttribute posIncAtt;
43 private OffsetAttribute offsetAtt;
46 * @param queryScorer QueryScorer that was used to score hits
48 public SimpleSpanFragmenter(QueryScorer queryScorer) {
49 this(queryScorer, DEFAULT_FRAGMENT_SIZE);
53 * @param queryScorer QueryScorer that was used to score hits
54 * @param fragmentSize size in bytes of each fragment
56 public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
57 this.fragmentSize = fragmentSize;
58 this.queryScorer = queryScorer;
62 * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment()
64 public boolean isNewFragment() {
65 position += posIncAtt.getPositionIncrement();
67 if (waitForPos == position) {
69 } else if (waitForPos != -1) {
73 WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.toString());
75 if (wSpanTerm != null) {
76 List<PositionSpan> positionSpans = wSpanTerm.getPositionSpans();
78 for (int i = 0; i < positionSpans.size(); i++) {
79 if (positionSpans.get(i).start == position) {
80 waitForPos = positionSpans.get(i).end + 1;
86 boolean isNewFrag = offsetAtt.endOffset() >= (fragmentSize * currentNumFrags)
87 && (textSize - offsetAtt.endOffset()) >= (fragmentSize >>> 1);
98 * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream)
100 public void start(String originalText, TokenStream tokenStream) {
103 textSize = originalText.length();
104 termAtt = tokenStream.addAttribute(CharTermAttribute.class);
105 posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
106 offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);