lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/NGramPhraseQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21
  22 import org.apache.lucene.index.IndexReader;
  23 import org.apache.lucene.index.Term;
  24
  25 /**
  26  * This is a {@link PhraseQuery} which is optimized for n-gram phrase query.
  27  * For example, when you query "ABCD" on a 2-gram field, you may want to use
  28  * NGramPhraseQuery rather than {@link PhraseQuery}, because NGramPhraseQuery
  29  * will {@link #rewrite(IndexReader)} the query to "AB/0 CD/2", while {@link PhraseQuery}
  30  * will query "AB/0 BC/1 CD/2" (where term/position).
  31  *
  32  */
  33 public class NGramPhraseQuery extends PhraseQuery {
  34   private final int n;
  35
  36   /**
  37    * Constructor that takes gram size.
  38    * @param n
  39    */
  40   public NGramPhraseQuery(int n){
  41     super();
  42     this.n = n;
  43   }
  44
  45   @Override
  46   public Query rewrite(IndexReader reader) throws IOException {
  47     if(getSlop() != 0) return super.rewrite(reader);
  48
  49     // check whether optimizable or not
  50     if(n < 2 || // non-overlap n-gram cannot be optimized
  51         getTerms().length < 3)  // too short to optimize
  52       return super.rewrite(reader);
  53
  54     // check all posIncrement is 1
  55     // if not, cannot optimize
  56     int[] positions = getPositions();
  57     Term[] terms = getTerms();
  58     int prevPosition = positions[0];
  59     for(int i = 1; i < positions.length; i++){
  60       int pos = positions[i];
  61       if(prevPosition + 1 != pos) return super.rewrite(reader);
  62       prevPosition = pos;
  63     }
  64
  65     // now create the new optimized phrase query for n-gram
  66     PhraseQuery optimized = new PhraseQuery();
  67     int pos = 0;
  68     final int lastPos = terms.length - 1;
  69     for(int i = 0; i < terms.length; i++){
  70       if(pos % n == 0 || pos >= lastPos){
  71         optimized.add(terms[i], positions[i]);
  72       }
  73       pos++;
  74     }
  75
  76     return optimized;
  77   }
  78
  79   /** Returns true iff <code>o</code> is equal to this. */
  80   @Override
  81   public boolean equals(Object o) {
  82     if (!(o instanceof NGramPhraseQuery))
  83       return false;
  84     NGramPhraseQuery other = (NGramPhraseQuery)o;
  85     if(this.n != other.n) return false;
  86     return super.equals(other);
  87   }
  88
  89   /** Returns a hash code value for this object.*/
  90   @Override
  91   public int hashCode() {
  92     return Float.floatToIntBits(getBoost())
  93       ^ getSlop()
  94       ^ getTerms().hashCode()
  95       ^ getPositions().hashCode()
  96       ^ n;
  97   }
  98 }