lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java

   1 package org.apache.lucene.queryParser.standard.processors;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.StringReader;
  22 import java.util.ArrayList;
  23 import java.util.LinkedList;
  24 import java.util.List;
  25
  26 import org.apache.lucene.analysis.Analyzer;
  27 import org.apache.lucene.analysis.CachingTokenFilter;
  28 import org.apache.lucene.analysis.TokenStream;
  29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  30 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  31 import org.apache.lucene.queryParser.core.QueryNodeException;
  32 import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
  33 import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
  34 import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
  35 import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
  36 import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
  37 import org.apache.lucene.queryParser.core.nodes.NoTokenFoundQueryNode;
  38 import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
  39 import org.apache.lucene.queryParser.core.nodes.QueryNode;
  40 import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
  41 import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
  42 import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
  43 import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
  44 import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
  45 import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
  46 import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
  47 import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
  48
  49 /**
  50  * This processor verifies if {@link ConfigurationKeys#ANALYZER}
  51  * is defined in the {@link QueryConfigHandler}. If it is and the analyzer is
  52  * not <code>null</code>, it looks for every {@link FieldQueryNode} that is not
  53  * {@link WildcardQueryNode}, {@link FuzzyQueryNode} or
  54  * {@link ParametricQueryNode} contained in the query node tree, then it applies
  55  * the analyzer to that {@link FieldQueryNode} object. <br/>
  56  * <br/>
  57  * If the analyzer return only one term, the returned term is set to the
  58  * {@link FieldQueryNode} and it's returned. <br/>
  59  * <br/>
  60  * If the analyzer return more than one term, a {@link TokenizedPhraseQueryNode}
  61  * or {@link MultiPhraseQueryNode} is created, whether there is one or more
  62  * terms at the same position, and it's returned. <br/>
  63  * <br/>
  64  * If no term is returned by the analyzer a {@link NoTokenFoundQueryNode} object
  65  * is returned. <br/>
  66  *
  67  * @see ConfigurationKeys#ANALYZER
  68  * @see Analyzer
  69  * @see TokenStream
  70  */
  71 public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
  72
  73   private Analyzer analyzer;
  74
  75   private boolean positionIncrementsEnabled;
  76
  77   public AnalyzerQueryNodeProcessor() {
  78     // empty constructor
  79   }
  80
  81   @Override
  82   public QueryNode process(QueryNode queryTree) throws QueryNodeException {
  83     Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
  84
  85     if (analyzer != null) {
  86       this.analyzer = analyzer;
  87       this.positionIncrementsEnabled = false;
  88       Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
  89
  90       if (positionIncrementsEnabled != null) {
  91           this.positionIncrementsEnabled = positionIncrementsEnabled;
  92       }
  93
  94       if (this.analyzer != null) {
  95         return super.process(queryTree);
  96       }
  97
  98     }
  99
 100     return queryTree;
 101
 102   }
 103
 104   @Override
 105   protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
 106
 107     if (node instanceof TextableQueryNode
 108         && !(node instanceof WildcardQueryNode)
 109         && !(node instanceof FuzzyQueryNode)
 110         && !(node instanceof ParametricQueryNode)) {
 111
 112       FieldQueryNode fieldNode = ((FieldQueryNode) node);
 113       String text = fieldNode.getTextAsString();
 114       String field = fieldNode.getFieldAsString();
 115
 116       TokenStream source;
 117       try {
 118         source = this.analyzer.reusableTokenStream(field, new StringReader(text));
 119         source.reset();
 120       } catch (IOException e1) {
 121         throw new RuntimeException(e1);
 122       }
 123       CachingTokenFilter buffer = new CachingTokenFilter(source);
 124
 125       PositionIncrementAttribute posIncrAtt = null;
 126       int numTokens = 0;
 127       int positionCount = 0;
 128       boolean severalTokensAtSamePosition = false;
 129
 130       if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
 131         posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
 132       }
 133
 134       try {
 135
 136         while (buffer.incrementToken()) {
 137           numTokens++;
 138           int positionIncrement = (posIncrAtt != null) ? posIncrAtt
 139               .getPositionIncrement() : 1;
 140           if (positionIncrement != 0) {
 141             positionCount += positionIncrement;
 142
 143           } else {
 144             severalTokensAtSamePosition = true;
 145           }
 146
 147         }
 148
 149       } catch (IOException e) {
 150         // ignore
 151       }
 152
 153       try {
 154         // rewind the buffer stream
 155         buffer.reset();
 156
 157         // close original stream - all tokens buffered
 158         source.close();
 159       } catch (IOException e) {
 160         // ignore
 161       }
 162
 163       if (!buffer.hasAttribute(CharTermAttribute.class)) {
 164         return new NoTokenFoundQueryNode();
 165       }
 166
 167       CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
 168
 169       if (numTokens == 0) {
 170         return new NoTokenFoundQueryNode();
 171
 172       } else if (numTokens == 1) {
 173         String term = null;
 174         try {
 175           boolean hasNext;
 176           hasNext = buffer.incrementToken();
 177           assert hasNext == true;
 178           term = termAtt.toString();
 179
 180         } catch (IOException e) {
 181           // safe to ignore, because we know the number of tokens
 182         }
 183
 184         fieldNode.setText(term);
 185
 186         return fieldNode;
 187
 188       } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
 189         if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
 190           // no phrase query:
 191           LinkedList<QueryNode> children = new LinkedList<QueryNode>();
 192
 193           for (int i = 0; i < numTokens; i++) {
 194             String term = null;
 195             try {
 196               boolean hasNext = buffer.incrementToken();
 197               assert hasNext == true;
 198               term = termAtt.toString();
 199
 200             } catch (IOException e) {
 201               // safe to ignore, because we know the number of tokens
 202             }
 203
 204             children.add(new FieldQueryNode(field, term, -1, -1));
 205
 206           }
 207           return new GroupQueryNode(
 208               new StandardBooleanQueryNode(children, positionCount==1));
 209         } else {
 210           // phrase query:
 211           MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
 212
 213           List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
 214           int position = -1;
 215           int i = 0;
 216           int termGroupCount = 0;
 217           for (; i < numTokens; i++) {
 218             String term = null;
 219             int positionIncrement = 1;
 220             try {
 221               boolean hasNext = buffer.incrementToken();
 222               assert hasNext == true;
 223               term = termAtt.toString();
 224               if (posIncrAtt != null) {
 225                 positionIncrement = posIncrAtt.getPositionIncrement();
 226               }
 227
 228             } catch (IOException e) {
 229               // safe to ignore, because we know the number of tokens
 230             }
 231
 232             if (positionIncrement > 0 && multiTerms.size() > 0) {
 233
 234               for (FieldQueryNode termNode : multiTerms) {
 235
 236                 if (this.positionIncrementsEnabled) {
 237                   termNode.setPositionIncrement(position);
 238                 } else {
 239                   termNode.setPositionIncrement(termGroupCount);
 240                 }
 241
 242                 mpq.add(termNode);
 243
 244               }
 245
 246               // Only increment once for each "group" of
 247               // terms that were in the same position:
 248               termGroupCount++;
 249
 250               multiTerms.clear();
 251
 252             }
 253
 254             position += positionIncrement;
 255             multiTerms.add(new FieldQueryNode(field, term, -1, -1));
 256
 257           }
 258
 259           for (FieldQueryNode termNode : multiTerms) {
 260
 261             if (this.positionIncrementsEnabled) {
 262               termNode.setPositionIncrement(position);
 263
 264             } else {
 265               termNode.setPositionIncrement(termGroupCount);
 266             }
 267
 268             mpq.add(termNode);
 269
 270           }
 271
 272           return mpq;
 273
 274         }
 275
 276       } else {
 277
 278         TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
 279
 280         int position = -1;
 281
 282         for (int i = 0; i < numTokens; i++) {
 283           String term = null;
 284           int positionIncrement = 1;
 285
 286           try {
 287             boolean hasNext = buffer.incrementToken();
 288             assert hasNext == true;
 289             term = termAtt.toString();
 290
 291             if (posIncrAtt != null) {
 292               positionIncrement = posIncrAtt.getPositionIncrement();
 293             }
 294
 295           } catch (IOException e) {
 296             // safe to ignore, because we know the number of tokens
 297           }
 298
 299           FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
 300
 301           if (this.positionIncrementsEnabled) {
 302             position += positionIncrement;
 303             newFieldNode.setPositionIncrement(position);
 304
 305           } else {
 306             newFieldNode.setPositionIncrement(i);
 307           }
 308
 309           pq.add(newFieldNode);
 310
 311         }
 312
 313         return pq;
 314
 315       }
 316
 317     }
 318
 319     return node;
 320
 321   }
 322
 323   @Override
 324   protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
 325
 326     return node;
 327
 328   }
 329
 330   @Override
 331   protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
 332       throws QueryNodeException {
 333
 334     return children;
 335
 336   }
 337
 338 }