-/*
- Egothor Software License version 1.00
- Copyright (C) 1997-2004 Leo Galambos.
- Copyright (C) 2002-2004 "Egothor developers"
- on behalf of the Egothor Project.
- All rights reserved.
-
- This software is copyrighted by the "Egothor developers". If this
- license applies to a single file or document, the "Egothor developers"
- are the people or entities mentioned as copyright holders in that file
- or document. If this license applies to the Egothor project as a
- whole, the copyright holders are the people or entities mentioned in
- the file CREDITS. This file can be found in the same location as this
- license in the distribution.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- 1. Redistributions of source code must retain the above copyright
- notice, the list of contributors, this list of conditions, and the
- following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, the list of contributors, this list of conditions, and the
- disclaimer that follows these conditions in the documentation
- and/or other materials provided with the distribution.
- 3. The name "Egothor" must not be used to endorse or promote products
- derived from this software without prior written permission. For
- written permission, please contact Leo.G@seznam.cz
- 4. Products derived from this software may not be called "Egothor",
- nor may "Egothor" appear in their name, without prior written
- permission from Leo.G@seznam.cz.
-
- In addition, we request that you include in the end-user documentation
- provided with the redistribution and/or in the software itself an
- acknowledgement equivalent to the following:
- "This product includes software developed by the Egothor Project.
- http://egothor.sf.net/"
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- This software consists of voluntary contributions made by many
- individuals on behalf of the Egothor Project and was originally
- created by Leo Galambos (Leo.G@seznam.cz).
- */
-package org.egothor.stemmer;
-
-/**
- * The Diff object generates a patch string.
- * <p>
- * A patch string is actually a command to a stemmer telling it how to reduce a
- * word to its root. For example, to reduce the word teacher to its root teach
- * the patch string Db would be generated. This command tells the stemmer to
- * delete the last 2 characters from the word teacher to reach the stem (the
- * patch commands are applied starting from the last character in order to save
- */
-public class Diff {
- int sizex = 0;
- int sizey = 0;
- int net[][];
- int way[][];
-
- int INSERT;
- int DELETE;
- int REPLACE;
- int NOOP;
-
- /**
- * Constructor for the Diff object.
- */
- public Diff() {
- this(1, 1, 1, 0);
- }
-
- /**
- * Constructor for the Diff object
- *
- * @param ins Description of the Parameter
- * @param del Description of the Parameter
- * @param rep Description of the Parameter
- * @param noop Description of the Parameter
- */
- public Diff(int ins, int del, int rep, int noop) {
- INSERT = ins;
- DELETE = del;
- REPLACE = rep;
- NOOP = noop;
- }
-
- /**
- * Apply the given patch string <tt>diff</tt> to the given string <tt>
- * dest</tt>.
- *
- * @param dest Destination string
- * @param diff Patch string
- */
- public static void apply(StringBuilder dest, CharSequence diff) {
- try {
-
- if (diff == null) {
- return;
- }
-
- int pos = dest.length() - 1;
- if (pos < 0) {
- return;
- }
- // orig == ""
- for (int i = 0; i < diff.length() / 2; i++) {
- char cmd = diff.charAt(2 * i);
- char param = diff.charAt(2 * i + 1);
- int par_num = (param - 'a' + 1);
- switch (cmd) {
- case '-':
- pos = pos - par_num + 1;
- break;
- case 'R':
- dest.setCharAt(pos, param);
- break;
- case 'D':
- int o = pos;
- pos -= par_num - 1;
- /*
- * delete par_num chars from index pos
- */
- // String s = orig.toString();
- // s = s.substring( 0, pos ) + s.substring( o + 1 );
- // orig = new StringBuffer( s );
- dest.delete(pos, o + 1);
- break;
- case 'I':
- dest.insert(pos += 1, param);
- break;
- }
- pos--;
- }
- } catch (StringIndexOutOfBoundsException x) {
- // x.printStackTrace();
- } catch (ArrayIndexOutOfBoundsException x) {
- // x.printStackTrace();
- }
- }
-
- /**
- * Construct a patch string that transforms a to b.
- *
- * @param a String 1st string
- * @param b String 2nd string
- * @return String
- */
- public synchronized String exec(String a, String b) {
- if (a == null || b == null) {
- return null;
- }
-
- int x;
- int y;
- int maxx;
- int maxy;
- int go[] = new int[4];
- final int X = 1;
- final int Y = 2;
- final int R = 3;
- final int D = 0;
-
- /*
- * setup memory if needed => processing speed up
- */
- maxx = a.length() + 1;
- maxy = b.length() + 1;
- if ((maxx >= sizex) || (maxy >= sizey)) {
- sizex = maxx + 8;
- sizey = maxy + 8;
- net = new int[sizex][sizey];
- way = new int[sizex][sizey];
- }
-
- /*
- * clear the network
- */
- for (x = 0; x < maxx; x++) {
- for (y = 0; y < maxy; y++) {
- net[x][y] = 0;
- }
- }
-
- /*
- * set known persistent values
- */
- for (x = 1; x < maxx; x++) {
- net[x][0] = x;
- way[x][0] = X;
- }
- for (y = 1; y < maxy; y++) {
- net[0][y] = y;
- way[0][y] = Y;
- }
-
- for (x = 1; x < maxx; x++) {
- for (y = 1; y < maxy; y++) {
- go[X] = net[x - 1][y] + DELETE;
- // way on x costs 1 unit
- go[Y] = net[x][y - 1] + INSERT;
- // way on y costs 1 unit
- go[R] = net[x - 1][y - 1] + REPLACE;
- go[D] = net[x - 1][y - 1]
- + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100);
- // diagonal costs 0, when no change
- short min = D;
- if (go[min] >= go[X]) {
- min = X;
- }
- if (go[min] > go[Y]) {
- min = Y;
- }
- if (go[min] > go[R]) {
- min = R;
- }
- way[x][y] = min;
- net[x][y] = (short) go[min];
- }
- }
-
- // read the patch string
- StringBuffer result = new StringBuffer();
- final char base = 'a' - 1;
- char deletes = base;
- char equals = base;
- for (x = maxx - 1, y = maxy - 1; x + y != 0;) {
- switch (way[x][y]) {
- case X:
- if (equals != base) {
- result.append("-" + (equals));
- equals = base;
- }
- deletes++;
- x--;
- break;
- // delete
- case Y:
- if (deletes != base) {
- result.append("D" + (deletes));
- deletes = base;
- }
- if (equals != base) {
- result.append("-" + (equals));
- equals = base;
- }
- result.append('I');
- result.append(b.charAt(--y));
- break;
- // insert
- case R:
- if (deletes != base) {
- result.append("D" + (deletes));
- deletes = base;
- }
- if (equals != base) {
- result.append("-" + (equals));
- equals = base;
- }
- result.append('R');
- result.append(b.charAt(--y));
- x--;
- break;
- // replace
- case D:
- if (deletes != base) {
- result.append("D" + (deletes));
- deletes = base;
- }
- equals++;
- x--;
- y--;
- break;
- // no change
- }
- }
- if (deletes != base) {
- result.append("D" + (deletes));
- deletes = base;
- }
-
- return result.toString();
- }
-}