1 package org.apache.lucene.search.spell;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.Arrays;
22 public class JaroWinklerDistance implements StringDistance {
24 private float threshold = 0.7f;
26 private int[] matches(String s1, String s2) {
28 if (s1.length() > s2.length()) {
35 int range = Math.max(max.length() / 2 - 1, 0);
36 int[] matchIndexes = new int[min.length()];
37 Arrays.fill(matchIndexes, -1);
38 boolean[] matchFlags = new boolean[max.length()];
40 for (int mi = 0; mi < min.length(); mi++) {
41 char c1 = min.charAt(mi);
42 for (int xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max
43 .length()); xi < xn; xi++) {
44 if (!matchFlags[xi] && c1 == max.charAt(xi)) {
45 matchIndexes[mi] = xi;
46 matchFlags[xi] = true;
52 char[] ms1 = new char[matches];
53 char[] ms2 = new char[matches];
54 for (int i = 0, si = 0; i < min.length(); i++) {
55 if (matchIndexes[i] != -1) {
56 ms1[si] = min.charAt(i);
60 for (int i = 0, si = 0; i < max.length(); i++) {
62 ms2[si] = max.charAt(i);
66 int transpositions = 0;
67 for (int mi = 0; mi < ms1.length; mi++) {
68 if (ms1[mi] != ms2[mi]) {
73 for (int mi = 0; mi < min.length(); mi++) {
74 if (s1.charAt(mi) == s2.charAt(mi)) {
80 return new int[] { matches, transpositions / 2, prefix, max.length() };
83 public float getDistance(String s1, String s2) {
84 int[] mtp = matches(s1, s2);
89 float j = ((m / s1.length() + m / s2.length() + (m - mtp[1]) / m)) / 3;
90 float jw = j < getThreshold() ? j : j + Math.min(0.1f, 1f / mtp[3]) * mtp[2]
96 * Sets the threshold used to determine when Winkler bonus should be used.
97 * Set to a negative value to get the Jaro distance.
98 * @param threshold the new value of the threshold
100 public void setThreshold(float threshold) {
101 this.threshold = threshold;
105 * Returns the current value of the threshold used for adding the Winkler bonus.
106 * The default value is 0.7.
107 * @return the current value of the threshold
109 public float getThreshold() {