1 package org.apache.lucene.search.highlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
23 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
26 * One, or several overlapping tokens, along with the score(s) and the scope of
29 public class TokenGroup {
31 private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
32 Token [] tokens=new Token[MAX_NUM_TOKENS_PER_GROUP];
33 float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
38 int matchStartOffset, matchEndOffset;
40 private OffsetAttribute offsetAtt;
41 private CharTermAttribute termAtt;
43 public TokenGroup(TokenStream tokenStream) {
44 offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
45 termAtt = tokenStream.addAttribute(CharTermAttribute.class);
48 void addToken(float score) {
49 if (numTokens < MAX_NUM_TOKENS_PER_GROUP) {
50 int termStartOffset = offsetAtt.startOffset();
51 int termEndOffset = offsetAtt.endOffset();
53 startOffset = matchStartOffset = termStartOffset;
54 endOffset = matchEndOffset = termEndOffset;
57 startOffset = Math.min(startOffset, termStartOffset);
58 endOffset = Math.max(endOffset, termEndOffset);
61 matchStartOffset = offsetAtt.startOffset();
62 matchEndOffset = offsetAtt.endOffset();
64 matchStartOffset = Math.min(matchStartOffset, termStartOffset);
65 matchEndOffset = Math.max(matchEndOffset, termEndOffset);
70 Token token = new Token(termStartOffset, termEndOffset);
71 token.setEmpty().append(termAtt);
72 tokens[numTokens] = token;
73 scores[numTokens] = score;
78 boolean isDistinct() {
79 return offsetAtt.startOffset() >= endOffset;
88 * @param index a value between 0 and numTokens -1
89 * @return the "n"th token
91 public Token getToken(int index)
98 * @param index a value between 0 and numTokens -1
99 * @return the "n"th score
101 public float getScore(int index) {
102 return scores[index];
106 * @return the end position in the original text
108 public int getEndOffset() {
113 * @return the number of tokens in this group
115 public int getNumTokens() {
120 * @return the start position in the original text
122 public int getStartOffset() {
127 * @return all tokens' scores summed up
129 public float getTotalScore() {