+++ /dev/null
-/*
- Egothor Software License version 1.00
- Copyright (C) 1997-2004 Leo Galambos.
- Copyright (C) 2002-2004 "Egothor developers"
- on behalf of the Egothor Project.
- All rights reserved.
-
- This software is copyrighted by the "Egothor developers". If this
- license applies to a single file or document, the "Egothor developers"
- are the people or entities mentioned as copyright holders in that file
- or document. If this license applies to the Egothor project as a
- whole, the copyright holders are the people or entities mentioned in
- the file CREDITS. This file can be found in the same location as this
- license in the distribution.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- 1. Redistributions of source code must retain the above copyright
- notice, the list of contributors, this list of conditions, and the
- following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, the list of contributors, this list of conditions, and the
- disclaimer that follows these conditions in the documentation
- and/or other materials provided with the distribution.
- 3. The name "Egothor" must not be used to endorse or promote products
- derived from this software without prior written permission. For
- written permission, please contact Leo.G@seznam.cz
- 4. Products derived from this software may not be called "Egothor",
- nor may "Egothor" appear in their name, without prior written
- permission from Leo.G@seznam.cz.
-
- In addition, we request that you include in the end-user documentation
- provided with the redistribution and/or in the software itself an
- acknowledgement equivalent to the following:
- "This product includes software developed by the Egothor Project.
- http://egothor.sf.net/"
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- This software consists of voluntary contributions made by many
- individuals on behalf of the Egothor Project and was originally
- created by Leo Galambos (Leo.G@seznam.cz).
- */
-package org.egothor.stemmer;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * The MultiTrie is a Trie of Tries.
- * <p>
- * It stores words and their associated patch commands. The MultiTrie handles
- * patch commmands broken into their constituent parts, as a MultiTrie does, but
- * the commands are delimited by the skip command.
- */
-public class MultiTrie2 extends MultiTrie {
- /**
- * Constructor for the MultiTrie object.
- *
- * @param is the input stream
- * @exception IOException if an I/O error occurs
- */
- public MultiTrie2(DataInput is) throws IOException {
- super(is);
- }
-
- /**
- * Constructor for the MultiTrie2 object
- *
- * @param forward set to <tt>true</tt> if the elements should be read left to
- * right
- */
- public MultiTrie2(boolean forward) {
- super(forward);
- }
-
- /**
- * Return the element that is stored in a cell associated with the given key.
- *
- * @param key the key to the cell holding the desired element
- * @return the element
- */
- @Override
- public CharSequence getFully(CharSequence key) {
- StringBuilder result = new StringBuilder(tries.size() * 2);
- try {
- CharSequence lastkey = key;
- CharSequence p[] = new CharSequence[tries.size()];
- char lastch = ' ';
- for (int i = 0; i < tries.size(); i++) {
- CharSequence r = tries.get(i).getFully(lastkey);
- if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
- return result;
- }
- if (cannotFollow(lastch, r.charAt(0))) {
- return result;
- } else {
- lastch = r.charAt(r.length() - 2);
- }
- // key=key.substring(lengthPP(r));
- p[i] = r;
- if (p[i].charAt(0) == '-') {
- if (i > 0) {
- key = skip(key, lengthPP(p[i - 1]));
- }
- key = skip(key, lengthPP(p[i]));
- }
- // key = skip(key, lengthPP(r));
- result.append(r);
- if (key.length() != 0) {
- lastkey = key;
- }
- }
- } catch (IndexOutOfBoundsException x) {}
- return result;
- }
-
- /**
- * Return the element that is stored as last on a path belonging to the given
- * key.
- *
- * @param key the key associated with the desired element
- * @return the element that is stored as last on a path
- */
- @Override
- public CharSequence getLastOnPath(CharSequence key) {
- StringBuilder result = new StringBuilder(tries.size() * 2);
- try {
- CharSequence lastkey = key;
- CharSequence p[] = new CharSequence[tries.size()];
- char lastch = ' ';
- for (int i = 0; i < tries.size(); i++) {
- CharSequence r = tries.get(i).getLastOnPath(lastkey);
- if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
- return result;
- }
- // System.err.println("LP:"+key+" last:"+lastch+" new:"+r);
- if (cannotFollow(lastch, r.charAt(0))) {
- return result;
- } else {
- lastch = r.charAt(r.length() - 2);
- }
- // key=key.substring(lengthPP(r));
- p[i] = r;
- if (p[i].charAt(0) == '-') {
- if (i > 0) {
- key = skip(key, lengthPP(p[i - 1]));
- }
- key = skip(key, lengthPP(p[i]));
- }
- // key = skip(key, lengthPP(r));
- result.append(r);
- if (key.length() != 0) {
- lastkey = key;
- }
- }
- } catch (IndexOutOfBoundsException x) {}
- return result;
- }
-
- /**
- * Write this data structure to the given output stream.
- *
- * @param os the output stream
- * @exception IOException if an I/O error occurs
- */
- @Override
- public void store(DataOutput os) throws IOException {
- super.store(os);
- }
-
- /**
- * Add an element to this structure consisting of the given key and patch
- * command.
- * <p>
- * This method will return without executing if the <tt>cmd</tt>
- * parameter's length is 0.
- *
- * @param key the key
- * @param cmd the patch command
- */
- @Override
- public void add(CharSequence key, CharSequence cmd) {
- if (cmd.length() == 0) {
- return;
- }
- // System.err.println( cmd );
- CharSequence p[] = decompose(cmd);
- int levels = p.length;
- // System.err.println("levels "+key+" cmd "+cmd+"|"+levels);
- while (levels >= tries.size()) {
- tries.add(new Trie(forward));
- }
- CharSequence lastkey = key;
- for (int i = 0; i < levels; i++) {
- if (key.length() > 0) {
- tries.get(i).add(key, p[i]);
- lastkey = key;
- } else {
- tries.get(i).add(lastkey, p[i]);
- }
- // System.err.println("-"+key+" "+p[i]+"|"+key.length());
- /*
- * key=key.substring(lengthPP(p[i]));
- */
- if (p[i].length() > 0 && p[i].charAt(0) == '-') {
- if (i > 0) {
- key = skip(key, lengthPP(p[i - 1]));
- }
- key = skip(key, lengthPP(p[i]));
- }
- // System.err.println("--->"+key);
- }
- if (key.length() > 0) {
- tries.get(levels).add(key, EOM_NODE);
- } else {
- tries.get(levels).add(lastkey, EOM_NODE);
- }
- }
-
- /**
- * Break the given patch command into its constituent pieces. The pieces are
- * delimited by NOOP commands.
- *
- * @param cmd the patch command
- * @return an array containing the pieces of the command
- */
- public CharSequence[] decompose(CharSequence cmd) {
- int parts = 0;
-
- for (int i = 0; 0 <= i && i < cmd.length();) {
- int next = dashEven(cmd, i);
- if (i == next) {
- parts++;
- i = next + 2;
- } else {
- parts++;
- i = next;
- }
- }
-
- CharSequence part[] = new CharSequence[parts];
- int x = 0;
-
- for (int i = 0; 0 <= i && i < cmd.length();) {
- int next = dashEven(cmd, i);
- if (i == next) {
- part[x++] = cmd.subSequence(i, i + 2);
- i = next + 2;
- } else {
- part[x++] = (next < 0) ? cmd.subSequence(i, cmd.length()) : cmd.subSequence(i, next);
- i = next;
- }
- }
- return part;
- }
-
- /**
- * Remove empty rows from the given Trie and return the newly reduced Trie.
- *
- * @param by the Trie to reduce
- * @return the newly reduced Trie
- */
- @Override
- public Trie reduce(Reduce by) {
- List<Trie> h = new ArrayList<Trie>();
- for (Trie trie : tries)
- h.add(trie.reduce(by));
-
- MultiTrie2 m = new MultiTrie2(forward);
- m.tries = h;
- return m;
- }
-
- private boolean cannotFollow(char after, char goes) {
- switch (after) {
- case '-':
- case 'D':
- return after == goes;
- }
- return false;
- }
-
- private CharSequence skip(CharSequence in, int count) {
- if (forward) {
- return in.subSequence(count, in.length());
- } else {
- return in.subSequence(0, in.length() - count);
- }
- }
-
- private int dashEven(CharSequence in, int from) {
- while (from < in.length()) {
- if (in.charAt(from) == '-') {
- return from;
- } else {
- from += 2;
- }
- }
- return -1;
- }
-
- @SuppressWarnings("fallthrough")
- private int lengthPP(CharSequence cmd) {
- int len = 0;
- for (int i = 0; i < cmd.length(); i++) {
- switch (cmd.charAt(i++)) {
- case '-':
- case 'D':
- len += cmd.charAt(i) - 'a' + 1;
- break;
- case 'R':
- len++; /* intentional fallthrough */
- case 'I':
- break;
- }
- }
- return len;
- }
-}