lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java

   1 package org.apache.lucene.queryParser.standard.processors;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.StringReader;
  22 import java.util.ArrayList;
  23 import java.util.LinkedList;
  24 import java.util.List;
  25
  26 import org.apache.lucene.analysis.Analyzer;
  27 import org.apache.lucene.analysis.CachingTokenFilter;
  28 import org.apache.lucene.analysis.TokenStream;
  29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  30 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  31 import org.apache.lucene.queryParser.core.QueryNodeException;
  32 import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
  33 import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
  34 import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
  35 import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
  36 import org.apache.lucene.queryParser.core.nodes.NoTokenFoundQueryNode;
  37 import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
  38 import org.apache.lucene.queryParser.core.nodes.QueryNode;
  39 import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
  40 import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
  41 import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
  42 import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
  43 import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
  44 import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
  45 import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
  46 import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
  47
  48 /**
  49  * This processor verifies if {@link ConfigurationKeys#ANALYZER}
  50  * is defined in the {@link QueryConfigHandler}. If it is and the analyzer is
  51  * not <code>null</code>, it looks for every {@link FieldQueryNode} that is not
  52  * {@link WildcardQueryNode}, {@link FuzzyQueryNode} or
  53  * {@link ParametricQueryNode} contained in the query node tree, then it applies
  54  * the analyzer to that {@link FieldQueryNode} object. <br/>
  55  * <br/>
  56  * If the analyzer return only one term, the returned term is set to the
  57  * {@link FieldQueryNode} and it's returned. <br/>
  58  * <br/>
  59  * If the analyzer return more than one term, a {@link TokenizedPhraseQueryNode}
  60  * or {@link MultiPhraseQueryNode} is created, whether there is one or more
  61  * terms at the same position, and it's returned. <br/>
  62  * <br/>
  63  * If no term is returned by the analyzer a {@link NoTokenFoundQueryNode} object
  64  * is returned. <br/>
  65  *
  66  * @see ConfigurationKeys#ANALYZER
  67  * @see Analyzer
  68  * @see TokenStream
  69  */
  70 public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
  71
  72   private Analyzer analyzer;
  73
  74   private boolean positionIncrementsEnabled;
  75
  76   public AnalyzerQueryNodeProcessor() {
  77     // empty constructor
  78   }
  79
  80   @Override
  81   public QueryNode process(QueryNode queryTree) throws QueryNodeException {
  82     Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
  83
  84     if (analyzer != null) {
  85       this.analyzer = analyzer;
  86       this.positionIncrementsEnabled = false;
  87       Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
  88
  89       if (positionIncrementsEnabled != null) {
  90           this.positionIncrementsEnabled = positionIncrementsEnabled;
  91       }
  92
  93       if (this.analyzer != null) {
  94         return super.process(queryTree);
  95       }
  96
  97     }
  98
  99     return queryTree;
 100
 101   }
 102
 103   @Override
 104   protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
 105
 106     if (node instanceof TextableQueryNode
 107         && !(node instanceof WildcardQueryNode)
 108         && !(node instanceof FuzzyQueryNode)
 109         && !(node instanceof ParametricQueryNode)) {
 110
 111       FieldQueryNode fieldNode = ((FieldQueryNode) node);
 112       String text = fieldNode.getTextAsString();
 113       String field = fieldNode.getFieldAsString();
 114
 115       TokenStream source;
 116       try {
 117         source = this.analyzer.reusableTokenStream(field, new StringReader(text));
 118         source.reset();
 119       } catch (IOException e1) {
 120         throw new RuntimeException(e1);
 121       }
 122       CachingTokenFilter buffer = new CachingTokenFilter(source);
 123
 124       PositionIncrementAttribute posIncrAtt = null;
 125       int numTokens = 0;
 126       int positionCount = 0;
 127       boolean severalTokensAtSamePosition = false;
 128
 129       if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
 130         posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
 131       }
 132
 133       try {
 134
 135         while (buffer.incrementToken()) {
 136           numTokens++;
 137           int positionIncrement = (posIncrAtt != null) ? posIncrAtt
 138               .getPositionIncrement() : 1;
 139           if (positionIncrement != 0) {
 140             positionCount += positionIncrement;
 141
 142           } else {
 143             severalTokensAtSamePosition = true;
 144           }
 145
 146         }
 147
 148       } catch (IOException e) {
 149         // ignore
 150       }
 151
 152       try {
 153         // rewind the buffer stream
 154         buffer.reset();
 155
 156         // close original stream - all tokens buffered
 157         source.close();
 158       } catch (IOException e) {
 159         // ignore
 160       }
 161
 162       if (!buffer.hasAttribute(CharTermAttribute.class)) {
 163         return new NoTokenFoundQueryNode();
 164       }
 165
 166       CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
 167
 168       if (numTokens == 0) {
 169         return new NoTokenFoundQueryNode();
 170
 171       } else if (numTokens == 1) {
 172         String term = null;
 173         try {
 174           boolean hasNext;
 175           hasNext = buffer.incrementToken();
 176           assert hasNext == true;
 177           term = termAtt.toString();
 178
 179         } catch (IOException e) {
 180           // safe to ignore, because we know the number of tokens
 181         }
 182
 183         fieldNode.setText(term);
 184
 185         return fieldNode;
 186
 187       } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
 188         if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
 189           // no phrase query:
 190           LinkedList<QueryNode> children = new LinkedList<QueryNode>();
 191
 192           for (int i = 0; i < numTokens; i++) {
 193             String term = null;
 194             try {
 195               boolean hasNext = buffer.incrementToken();
 196               assert hasNext == true;
 197               term = termAtt.toString();
 198
 199             } catch (IOException e) {
 200               // safe to ignore, because we know the number of tokens
 201             }
 202
 203             children.add(new FieldQueryNode(field, term, -1, -1));
 204
 205           }
 206           if (positionCount == 1)
 207             return new GroupQueryNode(
 208               new StandardBooleanQueryNode(children, true));
 209           else
 210             return new StandardBooleanQueryNode(children, false);
 211
 212         } else {
 213           // phrase query:
 214           MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
 215
 216           List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
 217           int position = -1;
 218           int i = 0;
 219           int termGroupCount = 0;
 220           for (; i < numTokens; i++) {
 221             String term = null;
 222             int positionIncrement = 1;
 223             try {
 224               boolean hasNext = buffer.incrementToken();
 225               assert hasNext == true;
 226               term = termAtt.toString();
 227               if (posIncrAtt != null) {
 228                 positionIncrement = posIncrAtt.getPositionIncrement();
 229               }
 230
 231             } catch (IOException e) {
 232               // safe to ignore, because we know the number of tokens
 233             }
 234
 235             if (positionIncrement > 0 && multiTerms.size() > 0) {
 236
 237               for (FieldQueryNode termNode : multiTerms) {
 238
 239                 if (this.positionIncrementsEnabled) {
 240                   termNode.setPositionIncrement(position);
 241                 } else {
 242                   termNode.setPositionIncrement(termGroupCount);
 243                 }
 244
 245                 mpq.add(termNode);
 246
 247               }
 248
 249               // Only increment once for each "group" of
 250               // terms that were in the same position:
 251               termGroupCount++;
 252
 253               multiTerms.clear();
 254
 255             }
 256
 257             position += positionIncrement;
 258             multiTerms.add(new FieldQueryNode(field, term, -1, -1));
 259
 260           }
 261
 262           for (FieldQueryNode termNode : multiTerms) {
 263
 264             if (this.positionIncrementsEnabled) {
 265               termNode.setPositionIncrement(position);
 266
 267             } else {
 268               termNode.setPositionIncrement(termGroupCount);
 269             }
 270
 271             mpq.add(termNode);
 272
 273           }
 274
 275           return mpq;
 276
 277         }
 278
 279       } else {
 280
 281         TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
 282
 283         int position = -1;
 284
 285         for (int i = 0; i < numTokens; i++) {
 286           String term = null;
 287           int positionIncrement = 1;
 288
 289           try {
 290             boolean hasNext = buffer.incrementToken();
 291             assert hasNext == true;
 292             term = termAtt.toString();
 293
 294             if (posIncrAtt != null) {
 295               positionIncrement = posIncrAtt.getPositionIncrement();
 296             }
 297
 298           } catch (IOException e) {
 299             // safe to ignore, because we know the number of tokens
 300           }
 301
 302           FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
 303
 304           if (this.positionIncrementsEnabled) {
 305             position += positionIncrement;
 306             newFieldNode.setPositionIncrement(position);
 307
 308           } else {
 309             newFieldNode.setPositionIncrement(i);
 310           }
 311
 312           pq.add(newFieldNode);
 313
 314         }
 315
 316         return pq;
 317
 318       }
 319
 320     }
 321
 322     return node;
 323
 324   }
 325
 326   @Override
 327   protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
 328
 329     return node;
 330
 331   }
 332
 333   @Override
 334   protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
 335       throws QueryNodeException {
 336
 337     return children;
 338
 339   }
 340
 341 }