+++ /dev/null
-/*
- Egothor Software License version 1.00
- Copyright (C) 1997-2004 Leo Galambos.
- Copyright (C) 2002-2004 "Egothor developers"
- on behalf of the Egothor Project.
- All rights reserved.
-
- This software is copyrighted by the "Egothor developers". If this
- license applies to a single file or document, the "Egothor developers"
- are the people or entities mentioned as copyright holders in that file
- or document. If this license applies to the Egothor project as a
- whole, the copyright holders are the people or entities mentioned in
- the file CREDITS. This file can be found in the same location as this
- license in the distribution.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- 1. Redistributions of source code must retain the above copyright
- notice, the list of contributors, this list of conditions, and the
- following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
- notice, the list of contributors, this list of conditions, and the
- disclaimer that follows these conditions in the documentation
- and/or other materials provided with the distribution.
- 3. The name "Egothor" must not be used to endorse or promote products
- derived from this software without prior written permission. For
- written permission, please contact Leo.G@seznam.cz
- 4. Products derived from this software may not be called "Egothor",
- nor may "Egothor" appear in their name, without prior written
- permission from Leo.G@seznam.cz.
-
- In addition, we request that you include in the end-user documentation
- provided with the redistribution and/or in the software itself an
- acknowledgement equivalent to the following:
- "This product includes software developed by the Egothor Project.
- http://egothor.sf.net/"
-
- THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- This software consists of voluntary contributions made by many
- individuals on behalf of the Egothor Project and was originally
- created by Leo Galambos (Leo.G@seznam.cz).
- */
-package org.egothor.stemmer;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A Trie is used to store a dictionary of words and their stems.
- * <p>
- * Actually, what is stored are words with their respective patch commands. A
- * trie can be termed forward (keys read from left to right) or backward (keys
- * read from right to left). This property will vary depending on the language
- * for which a Trie is constructed.
- */
-public class Trie {
- List<Row> rows = new ArrayList<Row>();
- List<CharSequence> cmds = new ArrayList<CharSequence>();
- int root;
-
- boolean forward = false;
-
- /**
- * Constructor for the Trie object.
- *
- * @param is the input stream
- * @exception IOException if an I/O error occurs
- */
- public Trie(DataInput is) throws IOException {
- forward = is.readBoolean();
- root = is.readInt();
- for (int i = is.readInt(); i > 0; i--) {
- cmds.add(is.readUTF());
- }
- for (int i = is.readInt(); i > 0; i--) {
- rows.add(new Row(is));
- }
- }
-
- /**
- * Constructor for the Trie object.
- *
- * @param forward set to <tt>true</tt>
- */
- public Trie(boolean forward) {
- rows.add(new Row());
- root = 0;
- this.forward = forward;
- }
-
- /**
- * Constructor for the Trie object.
- *
- * @param forward <tt>true</tt> if read left to right, <tt>false</tt> if read
- * right to left
- * @param root index of the row that is the root node
- * @param cmds the patch commands to store
- * @param rows a Vector of Vectors. Each inner Vector is a node of this Trie
- */
- public Trie(boolean forward, int root, List<CharSequence> cmds, List<Row> rows) {
- this.rows = rows;
- this.cmds = cmds;
- this.root = root;
- this.forward = forward;
- }
-
- /**
- * Gets the all attribute of the Trie object
- *
- * @param key Description of the Parameter
- * @return The all value
- */
- public CharSequence[] getAll(CharSequence key) {
- int res[] = new int[key.length()];
- int resc = 0;
- Row now = getRow(root);
- int w;
- StrEnum e = new StrEnum(key, forward);
- boolean br = false;
-
- for (int i = 0; i < key.length() - 1; i++) {
- Character ch = new Character(e.next());
- w = now.getCmd(ch);
- if (w >= 0) {
- int n = w;
- for (int j = 0; j < resc; j++) {
- if (n == res[j]) {
- n = -1;
- break;
- }
- }
- if (n >= 0) {
- res[resc++] = n;
- }
- }
- w = now.getRef(ch);
- if (w >= 0) {
- now = getRow(w);
- } else {
- br = true;
- break;
- }
- }
- if (br == false) {
- w = now.getCmd(new Character(e.next()));
- if (w >= 0) {
- int n = w;
- for (int j = 0; j < resc; j++) {
- if (n == res[j]) {
- n = -1;
- break;
- }
- }
- if (n >= 0) {
- res[resc++] = n;
- }
- }
- }
-
- if (resc < 1) {
- return null;
- }
- CharSequence R[] = new CharSequence[resc];
- for (int j = 0; j < resc; j++) {
- R[j] = cmds.get(res[j]);
- }
- return R;
- }
-
- /**
- * Return the number of cells in this Trie object.
- *
- * @return the number of cells
- */
- public int getCells() {
- int size = 0;
- for (Row row : rows)
- size += row.getCells();
- return size;
- }
-
- /**
- * Gets the cellsPnt attribute of the Trie object
- *
- * @return The cellsPnt value
- */
- public int getCellsPnt() {
- int size = 0;
- for (Row row : rows)
- size += row.getCellsPnt();
- return size;
- }
-
- /**
- * Gets the cellsVal attribute of the Trie object
- *
- * @return The cellsVal value
- */
- public int getCellsVal() {
- int size = 0;
- for (Row row : rows)
- size += row.getCellsVal();
- return size;
- }
-
- /**
- * Return the element that is stored in a cell associated with the given key.
- *
- * @param key the key
- * @return the associated element
- */
- public CharSequence getFully(CharSequence key) {
- Row now = getRow(root);
- int w;
- Cell c;
- int cmd = -1;
- StrEnum e = new StrEnum(key, forward);
- Character ch = null;
- Character aux = null;
-
- for (int i = 0; i < key.length();) {
- ch = new Character(e.next());
- i++;
-
- c = now.at(ch);
- if (c == null) {
- return null;
- }
-
- cmd = c.cmd;
-
- for (int skip = c.skip; skip > 0; skip--) {
- if (i < key.length()) {
- aux = new Character(e.next());
- } else {
- return null;
- }
- i++;
- }
-
- w = now.getRef(ch);
- if (w >= 0) {
- now = getRow(w);
- } else if (i < key.length()) {
- return null;
- }
- }
- return (cmd == -1) ? null : cmds.get(cmd);
- }
-
- /**
- * Return the element that is stored as last on a path associated with the
- * given key.
- *
- * @param key the key associated with the desired element
- * @return the last on path element
- */
- public CharSequence getLastOnPath(CharSequence key) {
- Row now = getRow(root);
- int w;
- CharSequence last = null;
- StrEnum e = new StrEnum(key, forward);
-
- for (int i = 0; i < key.length() - 1; i++) {
- Character ch = new Character(e.next());
- w = now.getCmd(ch);
- if (w >= 0) {
- last = cmds.get(w);
- }
- w = now.getRef(ch);
- if (w >= 0) {
- now = getRow(w);
- } else {
- return last;
- }
- }
- w = now.getCmd(new Character(e.next()));
- return (w >= 0) ? cmds.get(w) : last;
- }
-
- /**
- * Return the Row at the given index.
- *
- * @param index the index containing the desired Row
- * @return the Row
- */
- private Row getRow(int index) {
- if (index < 0 || index >= rows.size()) {
- return null;
- }
- return rows.get(index);
- }
-
- /**
- * Write this Trie to the given output stream.
- *
- * @param os the output stream
- * @exception IOException if an I/O error occurs
- */
- public void store(DataOutput os) throws IOException {
- os.writeBoolean(forward);
- os.writeInt(root);
- os.writeInt(cmds.size());
- for (CharSequence cmd : cmds)
- os.writeUTF(cmd.toString());
-
- os.writeInt(rows.size());
- for (Row row : rows)
- row.store(os);
- }
-
- /**
- * Add the given key associated with the given patch command. If either
- * parameter is null this method will return without executing.
- *
- * @param key the key
- * @param cmd the patch command
- */
- public void add(CharSequence key, CharSequence cmd) {
- if (key == null || cmd == null) {
- return;
- }
- if (cmd.length() == 0) {
- return;
- }
- int id_cmd = cmds.indexOf(cmd);
- if (id_cmd == -1) {
- id_cmd = cmds.size();
- cmds.add(cmd);
- }
-
- int node = root;
- Row r = getRow(node);
-
- StrEnum e = new StrEnum(key, forward);
-
- for (int i = 0; i < e.length() - 1; i++) {
- Character ch = new Character(e.next());
- node = r.getRef(ch);
- if (node >= 0) {
- r = getRow(node);
- } else {
- node = rows.size();
- Row n;
- rows.add(n = new Row());
- r.setRef(ch, node);
- r = n;
- }
- }
- r.setCmd(new Character(e.next()), id_cmd);
- }
-
- /**
- * Remove empty rows from the given Trie and return the newly reduced Trie.
- *
- * @param by the Trie to reduce
- * @return the newly reduced Trie
- */
- public Trie reduce(Reduce by) {
- return by.optimize(this);
- }
-
- public void printInfo(CharSequence prefix) {
- System.out.println(prefix + "nds " + rows.size() + " cmds " + cmds.size()
- + " cells " + getCells() + " valcells " + getCellsVal() + " pntcells "
- + getCellsPnt());
- }
-
- /**
- * This class is part of the Egothor Project
- */
- class StrEnum {
- CharSequence s;
- int from;
- int by;
-
- /**
- * Constructor for the StrEnum object
- *
- * @param s Description of the Parameter
- * @param up Description of the Parameter
- */
- StrEnum(CharSequence s, boolean up) {
- this.s = s;
- if (up) {
- from = 0;
- by = 1;
- } else {
- from = s.length() - 1;
- by = -1;
- }
- }
-
- int length() {
- return s.length();
- }
-
- char next() {
- char ch = s.charAt(from);
- from += by;
- return ch;
- }
- }
-}