--- /dev/null
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+
+/** Implements the fuzzy search query. The similarity measurement
+ * is based on the Levenshtein (edit distance) algorithm.
+ *
+ * <p><em>Warning:</em> this query is not very scalable with its default prefix
+ * length of 0 - in this case, *every* term will be enumerated and
+ * cause an edit score calculation.
+ *
+ * <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
+ * as default. So terms will be collected and scored according to their
+ * edit distance. Only the top terms are used for building the {@link BooleanQuery}.
+ * It is not recommended to change the rewrite mode for fuzzy queries.
+ */
+public class FuzzyQuery extends MultiTermQuery {
+
+ public final static float defaultMinSimilarity = 0.5f;
+ public final static int defaultPrefixLength = 0;
+ public final static int defaultMaxExpansions = Integer.MAX_VALUE;
+
+ private float minimumSimilarity;
+ private int prefixLength;
+ private boolean termLongEnough = false;
+
+ protected Term term;
+
+ /**
+ * Create a new FuzzyQuery that will match terms with a similarity
+ * of at least <code>minimumSimilarity</code> to <code>term</code>.
+ * If a <code>prefixLength</code> > 0 is specified, a common prefix
+ * of that length is also required.
+ *
+ * @param term the term to search for
+ * @param minimumSimilarity a value between 0 and 1 to set the required similarity
+ * between the query term and the matching terms. For example, for a
+ * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
+ * as the query term is considered similar to the query term if the edit distance
+ * between both terms is less than <code>length(term)*0.5</code>
+ * @param prefixLength length of common (non-fuzzy) prefix
+ * @param maxExpansions the maximum number of terms to match. If this number is
+ * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
+ * then the maxClauseCount will be used instead.
+ * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0
+ * or if prefixLength < 0
+ */
+ public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
+ int maxExpansions) {
+ this.term = term;
+
+ if (minimumSimilarity >= 1.0f)
+ throw new IllegalArgumentException("minimumSimilarity >= 1");
+ else if (minimumSimilarity < 0.0f)
+ throw new IllegalArgumentException("minimumSimilarity < 0");
+ if (prefixLength < 0)
+ throw new IllegalArgumentException("prefixLength < 0");
+ if (maxExpansions < 0)
+ throw new IllegalArgumentException("maxExpansions < 0");
+
+ setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
+
+ if (term.text().length() > 1.0f / (1.0f - minimumSimilarity)) {
+ this.termLongEnough = true;
+ }
+
+ this.minimumSimilarity = minimumSimilarity;
+ this.prefixLength = prefixLength;
+ }
+
+ /**
+ * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, prefixLength, Integer.MAX_VALUE)}.
+ */
+ public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
+ this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
+ }
+
+ /**
+ * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0, Integer.MAX_VALUE)}.
+ */
+ public FuzzyQuery(Term term, float minimumSimilarity) {
+ this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
+ }
+
+ /**
+ * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0, Integer.MAX_VALUE)}.
+ */
+ public FuzzyQuery(Term term) {
+ this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);
+ }
+
+ /**
+ * Returns the minimum similarity that is required for this query to match.
+ * @return float value between 0.0 and 1.0
+ */
+ public float getMinSimilarity() {
+ return minimumSimilarity;
+ }
+
+ /**
+ * Returns the non-fuzzy prefix length. This is the number of characters at the start
+ * of a term that must be identical (not fuzzy) to the query term if the query
+ * is to match that term.
+ */
+ public int getPrefixLength() {
+ return prefixLength;
+ }
+
+ @Override
+ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ if (!termLongEnough) { // can only match if it's exact
+ return new SingleTermEnum(reader, term);
+ }
+ return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
+ }
+
+ /**
+ * Returns the pattern term.
+ */
+ public Term getTerm() {
+ return term;
+ }
+
+ @Override
+ public String toString(String field) {
+ final StringBuilder buffer = new StringBuilder();
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ buffer.append('~');
+ buffer.append(Float.toString(minimumSimilarity));
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + Float.floatToIntBits(minimumSimilarity);
+ result = prime * result + prefixLength;
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ FuzzyQuery other = (FuzzyQuery) obj;
+ if (Float.floatToIntBits(minimumSimilarity) != Float
+ .floatToIntBits(other.minimumSimilarity))
+ return false;
+ if (prefixLength != other.prefixLength)
+ return false;
+ if (term == null) {
+ if (other.term != null)
+ return false;
+ } else if (!term.equals(other.term))
+ return false;
+ return true;
+ }
+
+
+}