1 package org.apache.lucene.queryParser.standard.processors;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.StringReader;
22 import java.util.ArrayList;
23 import java.util.LinkedList;
24 import java.util.List;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.CachingTokenFilter;
28 import org.apache.lucene.analysis.TokenStream;
29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
31 import org.apache.lucene.queryParser.core.QueryNodeException;
32 import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
33 import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
34 import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
35 import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
36 import org.apache.lucene.queryParser.core.nodes.NoTokenFoundQueryNode;
37 import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
38 import org.apache.lucene.queryParser.core.nodes.QueryNode;
39 import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
40 import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
41 import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
42 import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
43 import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
44 import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
45 import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
46 import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
49 * This processor verifies if {@link ConfigurationKeys#ANALYZER}
50 * is defined in the {@link QueryConfigHandler}. If it is and the analyzer is
51 * not <code>null</code>, it looks for every {@link FieldQueryNode} that is not
52 * {@link WildcardQueryNode}, {@link FuzzyQueryNode} or
53 * {@link ParametricQueryNode} contained in the query node tree, then it applies
54 * the analyzer to that {@link FieldQueryNode} object. <br/>
56 * If the analyzer return only one term, the returned term is set to the
57 * {@link FieldQueryNode} and it's returned. <br/>
59 * If the analyzer return more than one term, a {@link TokenizedPhraseQueryNode}
60 * or {@link MultiPhraseQueryNode} is created, whether there is one or more
61 * terms at the same position, and it's returned. <br/>
63 * If no term is returned by the analyzer a {@link NoTokenFoundQueryNode} object
66 * @see ConfigurationKeys#ANALYZER
70 public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
72 private Analyzer analyzer;
74 private boolean positionIncrementsEnabled;
76 public AnalyzerQueryNodeProcessor() {
81 public QueryNode process(QueryNode queryTree) throws QueryNodeException {
82 Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
84 if (analyzer != null) {
85 this.analyzer = analyzer;
86 this.positionIncrementsEnabled = false;
87 Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
89 if (positionIncrementsEnabled != null) {
90 this.positionIncrementsEnabled = positionIncrementsEnabled;
93 if (this.analyzer != null) {
94 return super.process(queryTree);
104 protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
106 if (node instanceof TextableQueryNode
107 && !(node instanceof WildcardQueryNode)
108 && !(node instanceof FuzzyQueryNode)
109 && !(node instanceof ParametricQueryNode)) {
111 FieldQueryNode fieldNode = ((FieldQueryNode) node);
112 String text = fieldNode.getTextAsString();
113 String field = fieldNode.getFieldAsString();
117 source = this.analyzer.reusableTokenStream(field, new StringReader(text));
119 } catch (IOException e1) {
120 throw new RuntimeException(e1);
122 CachingTokenFilter buffer = new CachingTokenFilter(source);
124 PositionIncrementAttribute posIncrAtt = null;
126 int positionCount = 0;
127 boolean severalTokensAtSamePosition = false;
129 if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
130 posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
135 while (buffer.incrementToken()) {
137 int positionIncrement = (posIncrAtt != null) ? posIncrAtt
138 .getPositionIncrement() : 1;
139 if (positionIncrement != 0) {
140 positionCount += positionIncrement;
143 severalTokensAtSamePosition = true;
148 } catch (IOException e) {
153 // rewind the buffer stream
156 // close original stream - all tokens buffered
158 } catch (IOException e) {
162 if (!buffer.hasAttribute(CharTermAttribute.class)) {
163 return new NoTokenFoundQueryNode();
166 CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
168 if (numTokens == 0) {
169 return new NoTokenFoundQueryNode();
171 } else if (numTokens == 1) {
175 hasNext = buffer.incrementToken();
176 assert hasNext == true;
177 term = termAtt.toString();
179 } catch (IOException e) {
180 // safe to ignore, because we know the number of tokens
183 fieldNode.setText(term);
187 } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
188 if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
190 LinkedList<QueryNode> children = new LinkedList<QueryNode>();
192 for (int i = 0; i < numTokens; i++) {
195 boolean hasNext = buffer.incrementToken();
196 assert hasNext == true;
197 term = termAtt.toString();
199 } catch (IOException e) {
200 // safe to ignore, because we know the number of tokens
203 children.add(new FieldQueryNode(field, term, -1, -1));
206 if (positionCount == 1)
207 return new GroupQueryNode(
208 new StandardBooleanQueryNode(children, true));
210 return new StandardBooleanQueryNode(children, false);
214 MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
216 List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
219 int termGroupCount = 0;
220 for (; i < numTokens; i++) {
222 int positionIncrement = 1;
224 boolean hasNext = buffer.incrementToken();
225 assert hasNext == true;
226 term = termAtt.toString();
227 if (posIncrAtt != null) {
228 positionIncrement = posIncrAtt.getPositionIncrement();
231 } catch (IOException e) {
232 // safe to ignore, because we know the number of tokens
235 if (positionIncrement > 0 && multiTerms.size() > 0) {
237 for (FieldQueryNode termNode : multiTerms) {
239 if (this.positionIncrementsEnabled) {
240 termNode.setPositionIncrement(position);
242 termNode.setPositionIncrement(termGroupCount);
249 // Only increment once for each "group" of
250 // terms that were in the same position:
257 position += positionIncrement;
258 multiTerms.add(new FieldQueryNode(field, term, -1, -1));
262 for (FieldQueryNode termNode : multiTerms) {
264 if (this.positionIncrementsEnabled) {
265 termNode.setPositionIncrement(position);
268 termNode.setPositionIncrement(termGroupCount);
281 TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
285 for (int i = 0; i < numTokens; i++) {
287 int positionIncrement = 1;
290 boolean hasNext = buffer.incrementToken();
291 assert hasNext == true;
292 term = termAtt.toString();
294 if (posIncrAtt != null) {
295 positionIncrement = posIncrAtt.getPositionIncrement();
298 } catch (IOException e) {
299 // safe to ignore, because we know the number of tokens
302 FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
304 if (this.positionIncrementsEnabled) {
305 position += positionIncrement;
306 newFieldNode.setPositionIncrement(position);
309 newFieldNode.setPositionIncrement(i);
312 pq.add(newFieldNode);
327 protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
334 protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
335 throws QueryNodeException {