--- /dev/null
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This software is copyrighted by the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or document. If this license applies to the Egothor project as a
+ whole, the copyright holders are the people or entities mentioned in
+ the file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer that follows these conditions in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived from this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products derived from this software may not be called "Egothor",
+ nor may "Egothor" appear in their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided with the redistribution and/or in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many
+ individuals on behalf of the Egothor Project and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The MultiTrie is a Trie of Tries.
+ * <p>
+ * It stores words and their associated patch commands. The MultiTrie handles
+ * patch commmands broken into their constituent parts, as a MultiTrie does, but
+ * the commands are delimited by the skip command.
+ */
+public class MultiTrie2 extends MultiTrie {
+ /**
+ * Constructor for the MultiTrie object.
+ *
+ * @param is the input stream
+ * @exception IOException if an I/O error occurs
+ */
+ public MultiTrie2(DataInput is) throws IOException {
+ super(is);
+ }
+
+ /**
+ * Constructor for the MultiTrie2 object
+ *
+ * @param forward set to <tt>true</tt> if the elements should be read left to
+ * right
+ */
+ public MultiTrie2(boolean forward) {
+ super(forward);
+ }
+
+ /**
+ * Return the element that is stored in a cell associated with the given key.
+ *
+ * @param key the key to the cell holding the desired element
+ * @return the element
+ */
+ @Override
+ public CharSequence getFully(CharSequence key) {
+ StringBuilder result = new StringBuilder(tries.size() * 2);
+ try {
+ CharSequence lastkey = key;
+ CharSequence p[] = new CharSequence[tries.size()];
+ char lastch = ' ';
+ for (int i = 0; i < tries.size(); i++) {
+ CharSequence r = tries.get(i).getFully(lastkey);
+ if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+ return result;
+ }
+ if (cannotFollow(lastch, r.charAt(0))) {
+ return result;
+ } else {
+ lastch = r.charAt(r.length() - 2);
+ }
+ // key=key.substring(lengthPP(r));
+ p[i] = r;
+ if (p[i].charAt(0) == '-') {
+ if (i > 0) {
+ key = skip(key, lengthPP(p[i - 1]));
+ }
+ key = skip(key, lengthPP(p[i]));
+ }
+ // key = skip(key, lengthPP(r));
+ result.append(r);
+ if (key.length() != 0) {
+ lastkey = key;
+ }
+ }
+ } catch (IndexOutOfBoundsException x) {}
+ return result;
+ }
+
+ /**
+ * Return the element that is stored as last on a path belonging to the given
+ * key.
+ *
+ * @param key the key associated with the desired element
+ * @return the element that is stored as last on a path
+ */
+ @Override
+ public CharSequence getLastOnPath(CharSequence key) {
+ StringBuilder result = new StringBuilder(tries.size() * 2);
+ try {
+ CharSequence lastkey = key;
+ CharSequence p[] = new CharSequence[tries.size()];
+ char lastch = ' ';
+ for (int i = 0; i < tries.size(); i++) {
+ CharSequence r = tries.get(i).getLastOnPath(lastkey);
+ if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+ return result;
+ }
+ // System.err.println("LP:"+key+" last:"+lastch+" new:"+r);
+ if (cannotFollow(lastch, r.charAt(0))) {
+ return result;
+ } else {
+ lastch = r.charAt(r.length() - 2);
+ }
+ // key=key.substring(lengthPP(r));
+ p[i] = r;
+ if (p[i].charAt(0) == '-') {
+ if (i > 0) {
+ key = skip(key, lengthPP(p[i - 1]));
+ }
+ key = skip(key, lengthPP(p[i]));
+ }
+ // key = skip(key, lengthPP(r));
+ result.append(r);
+ if (key.length() != 0) {
+ lastkey = key;
+ }
+ }
+ } catch (IndexOutOfBoundsException x) {}
+ return result;
+ }
+
+ /**
+ * Write this data structure to the given output stream.
+ *
+ * @param os the output stream
+ * @exception IOException if an I/O error occurs
+ */
+ @Override
+ public void store(DataOutput os) throws IOException {
+ super.store(os);
+ }
+
+ /**
+ * Add an element to this structure consisting of the given key and patch
+ * command.
+ * <p>
+ * This method will return without executing if the <tt>cmd</tt>
+ * parameter's length is 0.
+ *
+ * @param key the key
+ * @param cmd the patch command
+ */
+ @Override
+ public void add(CharSequence key, CharSequence cmd) {
+ if (cmd.length() == 0) {
+ return;
+ }
+ // System.err.println( cmd );
+ CharSequence p[] = decompose(cmd);
+ int levels = p.length;
+ // System.err.println("levels "+key+" cmd "+cmd+"|"+levels);
+ while (levels >= tries.size()) {
+ tries.add(new Trie(forward));
+ }
+ CharSequence lastkey = key;
+ for (int i = 0; i < levels; i++) {
+ if (key.length() > 0) {
+ tries.get(i).add(key, p[i]);
+ lastkey = key;
+ } else {
+ tries.get(i).add(lastkey, p[i]);
+ }
+ // System.err.println("-"+key+" "+p[i]+"|"+key.length());
+ /*
+ * key=key.substring(lengthPP(p[i]));
+ */
+ if (p[i].length() > 0 && p[i].charAt(0) == '-') {
+ if (i > 0) {
+ key = skip(key, lengthPP(p[i - 1]));
+ }
+ key = skip(key, lengthPP(p[i]));
+ }
+ // System.err.println("--->"+key);
+ }
+ if (key.length() > 0) {
+ tries.get(levels).add(key, EOM_NODE);
+ } else {
+ tries.get(levels).add(lastkey, EOM_NODE);
+ }
+ }
+
+ /**
+ * Break the given patch command into its constituent pieces. The pieces are
+ * delimited by NOOP commands.
+ *
+ * @param cmd the patch command
+ * @return an array containing the pieces of the command
+ */
+ public CharSequence[] decompose(CharSequence cmd) {
+ int parts = 0;
+
+ for (int i = 0; 0 <= i && i < cmd.length();) {
+ int next = dashEven(cmd, i);
+ if (i == next) {
+ parts++;
+ i = next + 2;
+ } else {
+ parts++;
+ i = next;
+ }
+ }
+
+ CharSequence part[] = new CharSequence[parts];
+ int x = 0;
+
+ for (int i = 0; 0 <= i && i < cmd.length();) {
+ int next = dashEven(cmd, i);
+ if (i == next) {
+ part[x++] = cmd.subSequence(i, i + 2);
+ i = next + 2;
+ } else {
+ part[x++] = (next < 0) ? cmd.subSequence(i, cmd.length()) : cmd.subSequence(i, next);
+ i = next;
+ }
+ }
+ return part;
+ }
+
+ /**
+ * Remove empty rows from the given Trie and return the newly reduced Trie.
+ *
+ * @param by the Trie to reduce
+ * @return the newly reduced Trie
+ */
+ @Override
+ public Trie reduce(Reduce by) {
+ List<Trie> h = new ArrayList<Trie>();
+ for (Trie trie : tries)
+ h.add(trie.reduce(by));
+
+ MultiTrie2 m = new MultiTrie2(forward);
+ m.tries = h;
+ return m;
+ }
+
+ private boolean cannotFollow(char after, char goes) {
+ switch (after) {
+ case '-':
+ case 'D':
+ return after == goes;
+ }
+ return false;
+ }
+
+ private CharSequence skip(CharSequence in, int count) {
+ if (forward) {
+ return in.subSequence(count, in.length());
+ } else {
+ return in.subSequence(0, in.length() - count);
+ }
+ }
+
+ private int dashEven(CharSequence in, int from) {
+ while (from < in.length()) {
+ if (in.charAt(from) == '-') {
+ return from;
+ } else {
+ from += 2;
+ }
+ }
+ return -1;
+ }
+
+ @SuppressWarnings("fallthrough")
+ private int lengthPP(CharSequence cmd) {
+ int len = 0;
+ for (int i = 0; i < cmd.length(); i++) {
+ switch (cmd.charAt(i++)) {
+ case '-':
+ case 'D':
+ len += cmd.charAt(i) - 'a' + 1;
+ break;
+ case 'R':
+ len++; /* intentional fallthrough */
+ case 'I':
+ break;
+ }
+ }
+ return len;
+ }
+}