1 package org.apache.lucene.analysis.miscellaneous;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
23 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
24 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
26 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
27 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
28 import org.apache.lucene.index.Payload;
30 import java.io.IOException;
34 * Joins two token streams and leaves the last token of the first stream available
35 * to be used when updating the token values in the second stream based on that token.
37 * The default implementation adds last prefix token end offset to the suffix token start and end offsets.
39 * <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
40 * the ones located in org.apache.lucene.analysis.tokenattributes.
42 public class PrefixAwareTokenFilter extends TokenStream {
44 private TokenStream prefix;
45 private TokenStream suffix;
47 private CharTermAttribute termAtt;
48 private PositionIncrementAttribute posIncrAtt;
49 private PayloadAttribute payloadAtt;
50 private OffsetAttribute offsetAtt;
51 private TypeAttribute typeAtt;
52 private FlagsAttribute flagsAtt;
54 private CharTermAttribute p_termAtt;
55 private PositionIncrementAttribute p_posIncrAtt;
56 private PayloadAttribute p_payloadAtt;
57 private OffsetAttribute p_offsetAtt;
58 private TypeAttribute p_typeAtt;
59 private FlagsAttribute p_flagsAtt;
61 public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
65 prefixExhausted = false;
67 termAtt = addAttribute(CharTermAttribute.class);
68 posIncrAtt = addAttribute(PositionIncrementAttribute.class);
69 payloadAtt = addAttribute(PayloadAttribute.class);
70 offsetAtt = addAttribute(OffsetAttribute.class);
71 typeAtt = addAttribute(TypeAttribute.class);
72 flagsAtt = addAttribute(FlagsAttribute.class);
74 p_termAtt = prefix.addAttribute(CharTermAttribute.class);
75 p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
76 p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
77 p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
78 p_typeAtt = prefix.addAttribute(TypeAttribute.class);
79 p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
82 private Token previousPrefixToken = new Token();
83 private Token reusableToken = new Token();
85 private boolean prefixExhausted;
88 public final boolean incrementToken() throws IOException {
89 if (!prefixExhausted) {
90 Token nextToken = getNextPrefixInputToken(reusableToken);
91 if (nextToken == null) {
92 prefixExhausted = true;
94 previousPrefixToken.reinit(nextToken);
95 // Make it a deep copy
96 Payload p = previousPrefixToken.getPayload();
98 previousPrefixToken.setPayload((Payload) p.clone());
100 setCurrentToken(nextToken);
105 Token nextToken = getNextSuffixInputToken(reusableToken);
106 if (nextToken == null) {
110 nextToken = updateSuffixToken(nextToken, previousPrefixToken);
111 setCurrentToken(nextToken);
115 private void setCurrentToken(Token token) {
116 if (token == null) return;
118 termAtt.copyBuffer(token.buffer(), 0, token.length());
119 posIncrAtt.setPositionIncrement(token.getPositionIncrement());
120 flagsAtt.setFlags(token.getFlags());
121 offsetAtt.setOffset(token.startOffset(), token.endOffset());
122 typeAtt.setType(token.type());
123 payloadAtt.setPayload(token.getPayload());
126 private Token getNextPrefixInputToken(Token token) throws IOException {
127 if (!prefix.incrementToken()) return null;
128 token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
129 token.setPositionIncrement(p_posIncrAtt.getPositionIncrement());
130 token.setFlags(p_flagsAtt.getFlags());
131 token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
132 token.setType(p_typeAtt.type());
133 token.setPayload(p_payloadAtt.getPayload());
137 private Token getNextSuffixInputToken(Token token) throws IOException {
138 if (!suffix.incrementToken()) return null;
139 token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
140 token.setPositionIncrement(posIncrAtt.getPositionIncrement());
141 token.setFlags(flagsAtt.getFlags());
142 token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
143 token.setType(typeAtt.type());
144 token.setPayload(payloadAtt.getPayload());
149 * The default implementation adds last prefix token end offset to the suffix token start and end offsets.
151 * @param suffixToken a token from the suffix stream
152 * @param lastPrefixToken the last token from the prefix stream
153 * @return consumer token
155 public Token updateSuffixToken(Token suffixToken, Token lastPrefixToken) {
156 suffixToken.setStartOffset(lastPrefixToken.endOffset() + suffixToken.startOffset());
157 suffixToken.setEndOffset(lastPrefixToken.endOffset() + suffixToken.endOffset());
162 public void end() throws IOException {
168 public void close() throws IOException {
174 public void reset() throws IOException {
176 if (prefix != null) {
177 prefixExhausted = false;
180 if (suffix != null) {
187 public TokenStream getPrefix() {
191 public void setPrefix(TokenStream prefix) {
192 this.prefix = prefix;
195 public TokenStream getSuffix() {
199 public void setSuffix(TokenStream suffix) {
200 this.suffix = suffix;