--- /dev/null
+package org.apache.lucene.search.suggest.jaspell;
+
+/**
+ * Copyright (c) 2005 Bruno Martins
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the organization nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Vector;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Implementation of a Ternary Search Trie, a data structure for storing
+ * <code>String</code> objects that combines the compact size of a binary search
+ * tree with the speed of a digital search trie, and is therefore ideal for
+ * practical use in sorting and searching data.</p>
+ * <p>
+ *
+ * This data structure is faster than hashing for many typical search problems,
+ * and supports a broader range of useful problems and operations. Ternary
+ * searches are faster than hashing and more powerful, too.
+ * </p>
+ * <p>
+ *
+ * The theory of ternary search trees was described at a symposium in 1997 (see
+ * "Fast Algorithms for Sorting and Searching Strings," by J.L. Bentley and R.
+ * Sedgewick, Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete
+ * Algorithms, January 1997). Algorithms in C, Third Edition, by Robert
+ * Sedgewick (Addison-Wesley, 1998) provides yet another view of ternary search
+ * trees.
+ *
+ * @author Bruno Martins
+ *
+ */
+public class JaspellTernarySearchTrie {
+
+ /**
+ * An inner class of Ternary Search Trie that represents a node in the trie.
+ */
+ protected final class TSTNode {
+
+ /** Index values for accessing relatives array. */
+ protected final static int PARENT = 0, LOKID = 1, EQKID = 2, HIKID = 3;
+
+ /** The key to the node. */
+ protected Object data;
+
+ /** The relative nodes. */
+ protected TSTNode[] relatives = new TSTNode[4];
+
+ /** The char used in the split. */
+ protected char splitchar;
+
+ /**
+ * Constructor method.
+ *
+ *@param splitchar
+ * The char used in the split.
+ *@param parent
+ * The parent node.
+ */
+ protected TSTNode(char splitchar, TSTNode parent) {
+ this.splitchar = splitchar;
+ relatives[PARENT] = parent;
+ }
+ }
+
+ /**
+ * Compares characters by alfabetical order.
+ *
+ *@param cCompare2
+ * The first char in the comparison.
+ *@param cRef
+ * The second char in the comparison.
+ *@return A negative number, 0 or a positive number if the second char is
+ * less, equal or greater.
+ */
+ private static int compareCharsAlphabetically(char cCompare2, char cRef) {
+ return Character.toLowerCase(cCompare2) - Character.toLowerCase(cRef);
+ }
+
+ /* what follows is the original Jaspell code.
+ private static int compareCharsAlphabetically(int cCompare2, int cRef) {
+ int cCompare = 0;
+ if (cCompare2 >= 65) {
+ if (cCompare2 < 89) {
+ cCompare = (2 * cCompare2) - 65;
+ } else if (cCompare2 < 97) {
+ cCompare = cCompare2 + 24;
+ } else if (cCompare2 < 121) {
+ cCompare = (2 * cCompare2) - 128;
+ } else cCompare = cCompare2;
+ } else cCompare = cCompare2;
+ if (cRef < 65) {
+ return cCompare - cRef;
+ }
+ if (cRef < 89) {
+ return cCompare - ((2 * cRef) - 65);
+ }
+ if (cRef < 97) {
+ return cCompare - (cRef + 24);
+ }
+ if (cRef < 121) {
+ return cCompare - ((2 * cRef) - 128);
+ }
+ return cCompare - cRef;
+ }
+ */
+
+ /**
+ * The default number of values returned by the <code>matchAlmost</code>
+ * method.
+ */
+ private int defaultNumReturnValues = -1;
+
+ /**
+ * the number of differences allowed in a call to the
+ * <code>matchAlmostKey</code> method.
+ */
+ private int matchAlmostDiff;
+
+ /** The base node in the trie. */
+ private TSTNode rootNode;
+
+ /**
+ * Constructs an empty Ternary Search Trie.
+ */
+ public JaspellTernarySearchTrie() {
+ }
+
+ // for loading
+ void setRoot(TSTNode newRoot) {
+ rootNode = newRoot;
+ }
+
+ // for saving
+ TSTNode getRoot() {
+ return rootNode;
+ }
+
+ /**
+ * Constructs a Ternary Search Trie and loads data from a <code>File</code>
+ * into the Trie. The file is a normal text document, where each line is of
+ * the form word TAB float.
+ *
+ *@param file
+ * The <code>File</code> with the data to load into the Trie.
+ *@exception IOException
+ * A problem occured while reading the data.
+ */
+ public JaspellTernarySearchTrie(File file) throws IOException {
+ this(file, false);
+ }
+
+ /**
+ * Constructs a Ternary Search Trie and loads data from a <code>File</code>
+ * into the Trie. The file is a normal text document, where each line is of
+ * the form "word TAB float".
+ *
+ *@param file
+ * The <code>File</code> with the data to load into the Trie.
+ *@param compression
+ * If true, the file is compressed with the GZIP algorithm, and if
+ * false, the file is a normal text document.
+ *@exception IOException
+ * A problem occured while reading the data.
+ */
+ public JaspellTernarySearchTrie(File file, boolean compression)
+ throws IOException {
+ this();
+ BufferedReader in;
+ if (compression)
+ in = new BufferedReader(new InputStreamReader(new GZIPInputStream(
+ new FileInputStream(file))));
+ else in = new BufferedReader(new InputStreamReader((new FileInputStream(
+ file))));
+ String word;
+ int pos;
+ Float occur, one = new Float(1);
+ int numWords = 0;
+ while ((word = in.readLine()) != null) {
+ numWords++;
+ pos = word.indexOf("\t");
+ occur = one;
+ if (pos != -1) {
+ occur = Float.parseFloat(word.substring(pos + 1).trim());
+ word = word.substring(0, pos);
+ }
+ String key = word.toLowerCase();
+ if (rootNode == null) {
+ rootNode = new TSTNode(key.charAt(0), null);
+ }
+ TSTNode node = null;
+ if (key.length() > 0 && rootNode != null) {
+ TSTNode currentNode = rootNode;
+ int charIndex = 0;
+ while (true) {
+ if (currentNode == null) break;
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ node = currentNode;
+ break;
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ Float occur2 = null;
+ if (node != null) occur2 = ((Float) (node.data));
+ if (occur2 != null) {
+ occur += occur2.floatValue();
+ }
+ currentNode = getOrCreateNode(word.trim().toLowerCase());
+ currentNode.data = occur;
+ }
+ }
+ in.close();
+ }
+
+ /**
+ * Deletes the node passed in as an argument. If this node has non-null data,
+ * then both the node and the data will be deleted. It also deletes any other
+ * nodes in the trie that are no longer needed after the deletion of the node.
+ *
+ *@param nodeToDelete
+ * The node to delete.
+ */
+ private void deleteNode(TSTNode nodeToDelete) {
+ if (nodeToDelete == null) {
+ return;
+ }
+ nodeToDelete.data = null;
+ while (nodeToDelete != null) {
+ nodeToDelete = deleteNodeRecursion(nodeToDelete);
+ // deleteNodeRecursion(nodeToDelete);
+ }
+ }
+
+ /**
+ * Recursively visits each node to be deleted.
+ *
+ * To delete a node, first set its data to null, then pass it into this
+ * method, then pass the node returned by this method into this method (make
+ * sure you don't delete the data of any of the nodes returned from this
+ * method!) and continue in this fashion until the node returned by this
+ * method is <code>null</code>.
+ *
+ * The TSTNode instance returned by this method will be next node to be
+ * operated on by <code>deleteNodeRecursion</code> (This emulates recursive
+ * method call while avoiding the JVM overhead normally associated with a
+ * recursive method.)
+ *
+ *@param currentNode
+ * The node to delete.
+ *@return The next node to be called in deleteNodeRecursion.
+ */
+ private TSTNode deleteNodeRecursion(TSTNode currentNode) {
+ if (currentNode == null) {
+ return null;
+ }
+ if (currentNode.relatives[TSTNode.EQKID] != null
+ || currentNode.data != null) {
+ return null;
+ }
+ // can't delete this node if it has a non-null eq kid or data
+ TSTNode currentParent = currentNode.relatives[TSTNode.PARENT];
+ boolean lokidNull = currentNode.relatives[TSTNode.LOKID] == null;
+ boolean hikidNull = currentNode.relatives[TSTNode.HIKID] == null;
+ int childType;
+ if (currentParent.relatives[TSTNode.LOKID] == currentNode) {
+ childType = TSTNode.LOKID;
+ } else if (currentParent.relatives[TSTNode.EQKID] == currentNode) {
+ childType = TSTNode.EQKID;
+ } else if (currentParent.relatives[TSTNode.HIKID] == currentNode) {
+ childType = TSTNode.HIKID;
+ } else {
+ rootNode = null;
+ return null;
+ }
+ if (lokidNull && hikidNull) {
+ currentParent.relatives[childType] = null;
+ return currentParent;
+ }
+ if (lokidNull) {
+ currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID];
+ currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent;
+ return currentParent;
+ }
+ if (hikidNull) {
+ currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID];
+ currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent;
+ return currentParent;
+ }
+ int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar
+ - currentNode.splitchar;
+ int deltaLo = currentNode.splitchar
+ - currentNode.relatives[TSTNode.LOKID].splitchar;
+ int movingKid;
+ TSTNode targetNode;
+ if (deltaHi == deltaLo) {
+ if (Math.random() < 0.5) {
+ deltaHi++;
+ } else {
+ deltaLo++;
+ }
+ }
+ if (deltaHi > deltaLo) {
+ movingKid = TSTNode.HIKID;
+ targetNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ movingKid = TSTNode.LOKID;
+ targetNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ while (targetNode.relatives[movingKid] != null) {
+ targetNode = targetNode.relatives[movingKid];
+ }
+ targetNode.relatives[movingKid] = currentNode.relatives[movingKid];
+ currentParent.relatives[childType] = targetNode;
+ targetNode.relatives[TSTNode.PARENT] = currentParent;
+ if (!lokidNull) {
+ currentNode.relatives[TSTNode.LOKID] = null;
+ }
+ if (!hikidNull) {
+ currentNode.relatives[TSTNode.HIKID] = null;
+ }
+ return currentParent;
+ }
+
+ /**
+ * Retrieve the object indexed by a key.
+ *
+ *@param key
+ * A <code>String</code> index.
+ *@return The object retrieved from the Ternary Search Trie.
+ */
+ public Object get(String key) {
+ TSTNode node = getNode(key.trim().toLowerCase());
+ if (node == null) {
+ return null;
+ }
+ return node.data;
+ }
+
+ /**
+ * Retrieve the <code>Float</code> indexed by key, increment it by one unit
+ * and store the new <code>Float</code>.
+ *
+ *@param key
+ * A <code>String</code> index.
+ *@return The <code>Float</code> retrieved from the Ternary Search Trie.
+ */
+ public Float getAndIncrement(String key) {
+ String key2 = key.trim().toLowerCase();
+ TSTNode node = getNode(key2);
+ if (node == null) {
+ return null;
+ }
+ Float aux = (Float) (node.data);
+ if (aux == null) {
+ aux = new Float(1);
+ } else {
+ aux = new Float(aux.intValue() + 1);
+ }
+ put(key2, aux);
+ return aux;
+ }
+
+ /**
+ * Returns the key that indexes the node argument.
+ *
+ *@param node
+ * The node whose index is to be calculated.
+ *@return The <code>String</code> that indexes the node argument.
+ */
+ protected String getKey(TSTNode node) {
+ StringBuffer getKeyBuffer = new StringBuffer();
+ getKeyBuffer.setLength(0);
+ getKeyBuffer.append("" + node.splitchar);
+ TSTNode currentNode;
+ TSTNode lastNode;
+ currentNode = node.relatives[TSTNode.PARENT];
+ lastNode = node;
+ while (currentNode != null) {
+ if (currentNode.relatives[TSTNode.EQKID] == lastNode) {
+ getKeyBuffer.append("" + currentNode.splitchar);
+ }
+ lastNode = currentNode;
+ currentNode = currentNode.relatives[TSTNode.PARENT];
+ }
+ getKeyBuffer.reverse();
+ return getKeyBuffer.toString();
+ }
+
+ /**
+ * Returns the node indexed by key, or <code>null</code> if that node doesn't
+ * exist. Search begins at root node.
+ *
+ *@param key
+ * A <code>String</code> that indexes the node that is returned.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named <code>TernarySearchTrie.TSTNode</code>.
+ */
+ public TSTNode getNode(String key) {
+ return getNode(key, rootNode);
+ }
+
+ /**
+ * Returns the node indexed by key, or <code>null</code> if that node doesn't
+ * exist. The search begins at root node.
+ *
+ *@param key2
+ * A <code>String</code> that indexes the node that is returned.
+ *@param startNode
+ * The top node defining the subtrie to be searched.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named <code>TernarySearchTrie.TSTNode</code>.
+ */
+ protected TSTNode getNode(String key2, TSTNode startNode) {
+ String key = key2.trim().toLowerCase();
+ if (key == null || startNode == null || key.length() == 0) {
+ return null;
+ }
+ TSTNode currentNode = startNode;
+ int charIndex = 0;
+ while (true) {
+ if (currentNode == null) {
+ return null;
+ }
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ return currentNode;
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ }
+
+ /**
+ * Returns the node indexed by key, creating that node if it doesn't exist,
+ * and creating any required intermediate nodes if they don't exist.
+ *
+ *@param key
+ * A <code>String</code> that indexes the node that is returned.
+ *@return The node object indexed by key. This object is an instance of an
+ * inner class named <code>TernarySearchTrie.TSTNode</code>.
+ *@exception NullPointerException
+ * If the key is <code>null</code>.
+ *@exception IllegalArgumentException
+ * If the key is an empty <code>String</code>.
+ */
+ protected TSTNode getOrCreateNode(String key) throws NullPointerException,
+ IllegalArgumentException {
+ if (key == null) {
+ throw new NullPointerException(
+ "attempt to get or create node with null key");
+ }
+ if (key.length() == 0) {
+ throw new IllegalArgumentException(
+ "attempt to get or create node with key of zero length");
+ }
+ if (rootNode == null) {
+ rootNode = new TSTNode(key.charAt(0), null);
+ }
+ TSTNode currentNode = rootNode;
+ int charIndex = 0;
+ while (true) {
+ int charComp = compareCharsAlphabetically(key.charAt(charIndex),
+ currentNode.splitchar);
+ if (charComp == 0) {
+ charIndex++;
+ if (charIndex == key.length()) {
+ return currentNode;
+ }
+ if (currentNode.relatives[TSTNode.EQKID] == null) {
+ currentNode.relatives[TSTNode.EQKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.EQKID];
+ } else if (charComp < 0) {
+ if (currentNode.relatives[TSTNode.LOKID] == null) {
+ currentNode.relatives[TSTNode.LOKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.LOKID];
+ } else {
+ if (currentNode.relatives[TSTNode.HIKID] == null) {
+ currentNode.relatives[TSTNode.HIKID] = new TSTNode(key
+ .charAt(charIndex), currentNode);
+ }
+ currentNode = currentNode.relatives[TSTNode.HIKID];
+ }
+ }
+ }
+
+ /**
+ * Returns a <code>List</code> of keys that almost match the argument key.
+ * Keys returned will have exactly diff characters that do not match the
+ * target key, where diff is equal to the last value passed in as an argument
+ * to the <code>setMatchAlmostDiff</code> method.
+ * <p>
+ * If the <code>matchAlmost</code> method is called before the
+ * <code>setMatchAlmostDiff</code> method has been called for the first time,
+ * then diff = 0.
+ *
+ *@param key
+ * The target key.
+ *@return A <code>List</code> with the results.
+ */
+ public List<String> matchAlmost(String key) {
+ return matchAlmost(key, defaultNumReturnValues);
+ }
+
+ /**
+ * Returns a <code>List</code> of keys that almost match the argument key.
+ * Keys returned will have exactly diff characters that do not match the
+ * target key, where diff is equal to the last value passed in as an argument
+ * to the <code>setMatchAlmostDiff</code> method.
+ * <p>
+ * If the <code>matchAlmost</code> method is called before the
+ * <code>setMatchAlmostDiff</code> method has been called for the first time,
+ * then diff = 0.
+ *
+ *@param key
+ * The target key.
+ *@param numReturnValues
+ * The maximum number of values returned by this method.
+ *@return A <code>List</code> with the results
+ */
+ public List<String> matchAlmost(String key, int numReturnValues) {
+ return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
+ ((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
+ }
+
+ /**
+ * Recursivelly vists the nodes in order to find the ones that almost match a
+ * given key.
+ *
+ *@param currentNode
+ * The current node.
+ *@param charIndex
+ * The current char.
+ *@param d
+ * The number of differences so far.
+ *@param matchAlmostNumReturnValues
+ * The maximum number of values in the result <code>List</code>.
+ *@param matchAlmostResult2
+ * The results so far.
+ *@param upTo
+ * If true all keys having up to and including matchAlmostDiff
+ * mismatched letters will be included in the result (including a key
+ * that is exactly the same as the target string) otherwise keys will
+ * be included in the result only if they have exactly
+ * matchAlmostDiff number of mismatched letters.
+ *@param matchAlmostKey
+ * The key being searched.
+ *@return A <code>List</code> with the results.
+ */
+ private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
+ int d, String matchAlmostKey, int matchAlmostNumReturnValues,
+ List<String> matchAlmostResult2, boolean upTo) {
+ if ((currentNode == null)
+ || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
+ || (d < 0) || (charIndex >= matchAlmostKey.length())) {
+ return matchAlmostResult2;
+ }
+ int charComp = compareCharsAlphabetically(matchAlmostKey.charAt(charIndex),
+ currentNode.splitchar);
+ List<String> matchAlmostResult = matchAlmostResult2;
+ if ((d > 0) || (charComp < 0)) {
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.LOKID], charIndex, d,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult,
+ upTo);
+ }
+ int nextD = (charComp == 0) ? d : d - 1;
+ boolean cond = (upTo) ? (nextD >= 0) : (nextD == 0);
+ if ((matchAlmostKey.length() == charIndex + 1) && cond
+ && (currentNode.data != null)) {
+ matchAlmostResult.add(getKey(currentNode));
+ }
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.EQKID], charIndex + 1, nextD,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo);
+ if ((d > 0) || (charComp > 0)) {
+ matchAlmostResult = matchAlmostRecursion(
+ currentNode.relatives[TSTNode.HIKID], charIndex, d,
+ matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult,
+ upTo);
+ }
+ return matchAlmostResult;
+ }
+
+ /**
+ * Returns an alphabetical <code>List</code> of all keys in the trie that
+ * begin with a given prefix. Only keys for nodes having non-null data are
+ * included in the <code>List</code>.
+ *
+ *@param prefix
+ * Each key returned from this method will begin with the characters
+ * in prefix.
+ *@return A <code>List</code> with the results.
+ */
+ public List<String> matchPrefix(String prefix) {
+ return matchPrefix(prefix, defaultNumReturnValues);
+ }
+
+ /**
+ * Returns an alphabetical <code>List</code> of all keys in the trie that
+ * begin with a given prefix. Only keys for nodes having non-null data are
+ * included in the <code>List</code>.
+ *
+ *@param prefix
+ * Each key returned from this method will begin with the characters
+ * in prefix.
+ *@param numReturnValues
+ * The maximum number of values returned from this method.
+ *@return A <code>List</code> with the results
+ */
+ public List<String> matchPrefix(String prefix, int numReturnValues) {
+ Vector<String> sortKeysResult = new Vector<String>();
+ TSTNode startNode = getNode(prefix);
+ if (startNode == null) {
+ return sortKeysResult;
+ }
+ if (startNode.data != null) {
+ sortKeysResult.addElement(getKey(startNode));
+ }
+ return sortKeysRecursion(startNode.relatives[TSTNode.EQKID],
+ ((numReturnValues < 0) ? -1 : numReturnValues), sortKeysResult);
+ }
+
+ /**
+ * Returns the number of nodes in the trie that have non-null data.
+ *
+ *@return The number of nodes in the trie that have non-null data.
+ */
+ public int numDataNodes() {
+ return numDataNodes(rootNode);
+ }
+
+ /**
+ * Returns the number of nodes in the subtrie below and including the starting
+ * node. The method counts only nodes that have non-null data.
+ *
+ *@param startingNode
+ * The top node of the subtrie. the node that defines the subtrie.
+ *@return The total number of nodes in the subtrie.
+ */
+ protected int numDataNodes(TSTNode startingNode) {
+ return recursiveNodeCalculator(startingNode, true, 0);
+ }
+
+ /**
+ * Returns the total number of nodes in the trie. The method counts nodes
+ * whether or not they have data.
+ *
+ *@return The total number of nodes in the trie.
+ */
+ public int numNodes() {
+ return numNodes(rootNode);
+ }
+
+ /**
+ * Returns the total number of nodes in the subtrie below and including the
+ * starting Node. The method counts nodes whether or not they have data.
+ *
+ *@param startingNode
+ * The top node of the subtrie. The node that defines the subtrie.
+ *@return The total number of nodes in the subtrie.
+ */
+ protected int numNodes(TSTNode startingNode) {
+ return recursiveNodeCalculator(startingNode, false, 0);
+ }
+
+ /**
+ * Stores a value in the trie. The value may be retrieved using the key.
+ *
+ *@param key
+ * A <code>String</code> that indexes the object to be stored.
+ *@param value
+ * The object to be stored in the Trie.
+ */
+ public void put(String key, Object value) {
+ getOrCreateNode(key.trim().toLowerCase()).data = value;
+ }
+
+ /**
+ * Recursivelly visists each node to calculate the number of nodes.
+ *
+ *@param currentNode
+ * The current node.
+ *@param checkData
+ * If true we check the data to be different of <code>null</code>.
+ *@param numNodes2
+ * The number of nodes so far.
+ *@return The number of nodes accounted.
+ */
+ private int recursiveNodeCalculator(TSTNode currentNode, boolean checkData,
+ int numNodes2) {
+ if (currentNode == null) {
+ return numNodes2;
+ }
+ int numNodes = recursiveNodeCalculator(
+ currentNode.relatives[TSTNode.LOKID], checkData, numNodes2);
+ numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.EQKID],
+ checkData, numNodes);
+ numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.HIKID],
+ checkData, numNodes);
+ if (checkData) {
+ if (currentNode.data != null) {
+ numNodes++;
+ }
+ } else {
+ numNodes++;
+ }
+ return numNodes;
+ }
+
+ /**
+ * Removes the value indexed by key. Also removes all nodes that are rendered
+ * unnecessary by the removal of this data.
+ *
+ *@param key
+ * A <code>string</code> that indexes the object to be removed from
+ * the Trie.
+ */
+ public void remove(String key) {
+ deleteNode(getNode(key.trim().toLowerCase()));
+ }
+
+ /**
+ * Sets the number of characters by which words can differ from target word
+ * when calling the <code>matchAlmost</code> method.
+ * <p>
+ * Arguments less than 0 will set the char difference to 0, and arguments
+ * greater than 3 will set the char difference to 3.
+ *
+ *@param diff
+ * The number of characters by which words can differ from target
+ * word.
+ */
+ public void setMatchAlmostDiff(int diff) {
+ if (diff < 0) {
+ matchAlmostDiff = 0;
+ } else if (diff > 3) {
+ matchAlmostDiff = 3;
+ } else {
+ matchAlmostDiff = diff;
+ }
+ }
+
+ /**
+ * Sets the default maximum number of values returned from the
+ * <code>matchPrefix</code> and <code>matchAlmost</code> methods.
+ * <p>
+ * The value should be set this to -1 to get an unlimited number of return
+ * values. note that the methods mentioned above provide overloaded versions
+ * that allow you to specify the maximum number of return values, in which
+ * case this value is temporarily overridden.
+ *
+ **@param num
+ * The number of values that will be returned when calling the
+ * methods above.
+ */
+ public void setNumReturnValues(int num) {
+ defaultNumReturnValues = (num < 0) ? -1 : num;
+ }
+
+ /**
+ * Returns keys sorted in alphabetical order. This includes the start Node and
+ * all nodes connected to the start Node.
+ * <p>
+ * The number of keys returned is limited to numReturnValues. To get a list
+ * that isn't limited in size, set numReturnValues to -1.
+ *
+ *@param startNode
+ * The top node defining the subtrie to be searched.
+ *@param numReturnValues
+ * The maximum number of values returned from this method.
+ *@return A <code>List</code> with the results.
+ */
+ protected List<String> sortKeys(TSTNode startNode, int numReturnValues) {
+ return sortKeysRecursion(startNode, ((numReturnValues < 0) ? -1
+ : numReturnValues), new Vector<String>());
+ }
+
+ /**
+ * Returns keys sorted in alphabetical order. This includes the current Node
+ * and all nodes connected to the current Node.
+ * <p>
+ * Sorted keys will be appended to the end of the resulting <code>List</code>.
+ * The result may be empty when this method is invoked, but may not be
+ * <code>null</code>.
+ *
+ *@param currentNode
+ * The current node.
+ *@param sortKeysNumReturnValues
+ * The maximum number of values in the result.
+ *@param sortKeysResult2
+ * The results so far.
+ *@return A <code>List</code> with the results.
+ */
+ private List<String> sortKeysRecursion(TSTNode currentNode,
+ int sortKeysNumReturnValues, List<String> sortKeysResult2) {
+ if (currentNode == null) {
+ return sortKeysResult2;
+ }
+ List<String> sortKeysResult = sortKeysRecursion(
+ currentNode.relatives[TSTNode.LOKID], sortKeysNumReturnValues,
+ sortKeysResult2);
+ if (sortKeysNumReturnValues != -1
+ && sortKeysResult.size() >= sortKeysNumReturnValues) {
+ return sortKeysResult;
+ }
+ if (currentNode.data != null) {
+ sortKeysResult.add(getKey(currentNode));
+ }
+ sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.EQKID],
+ sortKeysNumReturnValues, sortKeysResult);
+ return sortKeysRecursion(currentNode.relatives[TSTNode.HIKID],
+ sortKeysNumReturnValues, sortKeysResult);
+ }
+
+}