2 Egothor Software License version 1.00
3 Copyright (C) 1997-2004 Leo Galambos.
4 Copyright (C) 2002-2004 "Egothor developers"
5 on behalf of the Egothor Project.
8 This software is copyrighted by the "Egothor developers". If this
9 license applies to a single file or document, the "Egothor developers"
10 are the people or entities mentioned as copyright holders in that file
11 or document. If this license applies to the Egothor project as a
12 whole, the copyright holders are the people or entities mentioned in
13 the file CREDITS. This file can be found in the same location as this
14 license in the distribution.
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are
19 1. Redistributions of source code must retain the above copyright
20 notice, the list of contributors, this list of conditions, and the
22 2. Redistributions in binary form must reproduce the above copyright
23 notice, the list of contributors, this list of conditions, and the
24 disclaimer that follows these conditions in the documentation
25 and/or other materials provided with the distribution.
26 3. The name "Egothor" must not be used to endorse or promote products
27 derived from this software without prior written permission. For
28 written permission, please contact Leo.G@seznam.cz
29 4. Products derived from this software may not be called "Egothor",
30 nor may "Egothor" appear in their name, without prior written
31 permission from Leo.G@seznam.cz.
33 In addition, we request that you include in the end-user documentation
34 provided with the redistribution and/or in the software itself an
35 acknowledgement equivalent to the following:
36 "This product includes software developed by the Egothor Project.
37 http://egothor.sf.net/"
39 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
41 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
42 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
43 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
44 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
45 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
46 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
47 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
48 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
49 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 This software consists of voluntary contributions made by many
52 individuals on behalf of the Egothor Project and was originally
53 created by Leo Galambos (Leo.G@seznam.cz).
55 package org.egothor.stemmer;
58 * The Diff object generates a patch string.
60 * A patch string is actually a command to a stemmer telling it how to reduce a
61 * word to its root. For example, to reduce the word teacher to its root teach
62 * the patch string Db would be generated. This command tells the stemmer to
63 * delete the last 2 characters from the word teacher to reach the stem (the
64 * patch commands are applied starting from the last character in order to save
78 * Constructor for the Diff object.
85 * Constructor for the Diff object
87 * @param ins Description of the Parameter
88 * @param del Description of the Parameter
89 * @param rep Description of the Parameter
90 * @param noop Description of the Parameter
92 public Diff(int ins, int del, int rep, int noop) {
100 * Apply the given patch string <tt>diff</tt> to the given string <tt>
103 * @param dest Destination string
104 * @param diff Patch string
106 public static void apply(StringBuilder dest, CharSequence diff) {
113 int pos = dest.length() - 1;
118 for (int i = 0; i < diff.length() / 2; i++) {
119 char cmd = diff.charAt(2 * i);
120 char param = diff.charAt(2 * i + 1);
121 int par_num = (param - 'a' + 1);
124 pos = pos - par_num + 1;
127 dest.setCharAt(pos, param);
133 * delete par_num chars from index pos
135 // String s = orig.toString();
136 // s = s.substring( 0, pos ) + s.substring( o + 1 );
137 // orig = new StringBuffer( s );
138 dest.delete(pos, o + 1);
141 dest.insert(pos += 1, param);
146 } catch (StringIndexOutOfBoundsException x) {
147 // x.printStackTrace();
148 } catch (ArrayIndexOutOfBoundsException x) {
149 // x.printStackTrace();
154 * Construct a patch string that transforms a to b.
156 * @param a String 1st string
157 * @param b String 2nd string
160 public synchronized String exec(String a, String b) {
161 if (a == null || b == null) {
169 int go[] = new int[4];
176 * setup memory if needed => processing speed up
178 maxx = a.length() + 1;
179 maxy = b.length() + 1;
180 if ((maxx >= sizex) || (maxy >= sizey)) {
183 net = new int[sizex][sizey];
184 way = new int[sizex][sizey];
190 for (x = 0; x < maxx; x++) {
191 for (y = 0; y < maxy; y++) {
197 * set known persistent values
199 for (x = 1; x < maxx; x++) {
203 for (y = 1; y < maxy; y++) {
208 for (x = 1; x < maxx; x++) {
209 for (y = 1; y < maxy; y++) {
210 go[X] = net[x - 1][y] + DELETE;
211 // way on x costs 1 unit
212 go[Y] = net[x][y - 1] + INSERT;
213 // way on y costs 1 unit
214 go[R] = net[x - 1][y - 1] + REPLACE;
215 go[D] = net[x - 1][y - 1]
216 + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100);
217 // diagonal costs 0, when no change
219 if (go[min] >= go[X]) {
222 if (go[min] > go[Y]) {
225 if (go[min] > go[R]) {
229 net[x][y] = (short) go[min];
233 // read the patch string
234 StringBuffer result = new StringBuffer();
235 final char base = 'a' - 1;
238 for (x = maxx - 1, y = maxy - 1; x + y != 0;) {
241 if (equals != base) {
242 result.append("-" + (equals));
250 if (deletes != base) {
251 result.append("D" + (deletes));
254 if (equals != base) {
255 result.append("-" + (equals));
259 result.append(b.charAt(--y));
263 if (deletes != base) {
264 result.append("D" + (deletes));
267 if (equals != base) {
268 result.append("-" + (equals));
272 result.append(b.charAt(--y));
277 if (deletes != base) {
278 result.append("D" + (deletes));
288 if (deletes != base) {
289 result.append("D" + (deletes));
293 return result.toString();