1 package org.apache.lucene.search.highlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.Comparator;
22 import java.util.Iterator;
23 import java.util.List;
25 import org.apache.lucene.analysis.Token;
26 import org.apache.lucene.analysis.TokenStream;
27 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
28 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
29 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
30 import org.apache.lucene.index.TermPositionVector;
31 import org.apache.lucene.index.TermVectorOffsetInfo;
32 import org.apache.lucene.util.CollectionUtil;
34 public final class TokenStreamFromTermPositionVector extends TokenStream {
36 private final List<Token> positionedTokens = new ArrayList<Token>();
38 private Iterator<Token> tokensAtCurrentPosition;
40 private CharTermAttribute termAttribute;
42 private PositionIncrementAttribute positionIncrementAttribute;
44 private OffsetAttribute offsetAttribute;
49 * @param termPositionVector TermPositionVector that contains the data for
50 * creating the TokenStream. Must have positions and offsets.
52 public TokenStreamFromTermPositionVector(
53 final TermPositionVector termPositionVector) {
54 termAttribute = addAttribute(CharTermAttribute.class);
55 positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
56 offsetAttribute = addAttribute(OffsetAttribute.class);
57 final String[] terms = termPositionVector.getTerms();
58 for (int i = 0; i < terms.length; i++) {
59 final TermVectorOffsetInfo[] offsets = termPositionVector.getOffsets(i);
60 final int[] termPositions = termPositionVector.getTermPositions(i);
61 for (int j = 0; j < termPositions.length; j++) {
63 if (offsets != null) {
64 token = new Token(terms[i].toCharArray(), 0, terms[i].length(),
65 offsets[j].getStartOffset(), offsets[j].getEndOffset());
68 token.setEmpty().append(terms[i]);
70 // Yes - this is the position, not the increment! This is for
71 // sorting. This value
72 // will be corrected before use.
73 token.setPositionIncrement(termPositions[j]);
74 this.positionedTokens.add(token);
77 CollectionUtil.mergeSort(this.positionedTokens, tokenComparator);
78 int lastPosition = -1;
79 for (final Token token : this.positionedTokens) {
80 int thisPosition = token.getPositionIncrement();
81 token.setPositionIncrement(thisPosition - lastPosition);
82 lastPosition = thisPosition;
84 this.tokensAtCurrentPosition = this.positionedTokens.iterator();
87 private static final Comparator<Token> tokenComparator = new Comparator<Token>() {
88 public int compare(final Token o1, final Token o2) {
89 return o1.getPositionIncrement() - o2.getPositionIncrement();
94 public boolean incrementToken() throws IOException {
95 if (this.tokensAtCurrentPosition.hasNext()) {
96 final Token next = this.tokensAtCurrentPosition.next();
98 termAttribute.setEmpty().append(next);
99 positionIncrementAttribute.setPositionIncrement(next
100 .getPositionIncrement());
101 offsetAttribute.setOffset(next.startOffset(), next.endOffset());
108 public void reset() throws IOException {
109 this.tokensAtCurrentPosition = this.positionedTokens.iterator();