X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java diff --git a/lucene-java-3.5.0/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene-java-3.5.0/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java new file mode 100644 index 0000000..b33b187 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -0,0 +1,172 @@ +package org.apache.lucene.search.suggest.jaspell; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.search.spell.SortedIterator; +import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; + +public class JaspellLookup extends Lookup { + JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); + private boolean usePrefix = true; + private int editDistance = 2; + + @Override + public void build(TermFreqIterator tfit) throws IOException { + if (tfit instanceof SortedIterator) { + // make sure it's unsorted + tfit = new UnsortedTermFreqIteratorWrapper(tfit); + } + trie = new JaspellTernarySearchTrie(); + trie.setMatchAlmostDiff(editDistance); + while (tfit.hasNext()) { + String key = tfit.next(); + float freq = tfit.freq(); + if (key.length() == 0) { + continue; + } + trie.put(key, new Float(freq)); + } + } + + @Override + public boolean add(String key, Object value) { + trie.put(key, value); + // XXX + return false; + } + + @Override + public Object get(String key) { + return trie.get(key); + } + + @Override + public List lookup(String key, boolean onlyMorePopular, int num) { + List res = new ArrayList(); + List list; + int count = onlyMorePopular ? num * 2 : num; + if (usePrefix) { + list = trie.matchPrefix(key, count); + } else { + list = trie.matchAlmost(key, count); + } + if (list == null || list.size() == 0) { + return res; + + } + int maxCnt = Math.min(num, list.size()); + if (onlyMorePopular) { + LookupPriorityQueue queue = new LookupPriorityQueue(num); + for (String s : list) { + float freq = (Float)trie.get(s); + queue.insertWithOverflow(new LookupResult(s, freq)); + } + for (LookupResult lr : queue.getResults()) { + res.add(lr); + } + } else { + for (int i = 0; i < maxCnt; i++) { + String s = list.get(i); + float freq = (Float)trie.get(s); + res.add(new LookupResult(s, freq)); + } + } + return res; + } + + public static final String FILENAME = "jaspell.dat"; + private static final byte LO_KID = 0x01; + private static final byte EQ_KID = 0x02; + private static final byte HI_KID = 0x04; + private static final byte HAS_VALUE = 0x08; + + + @Override + public boolean load(File storeDir) throws IOException { + File data = new File(storeDir, FILENAME); + if (!data.exists() || !data.canRead()) { + return false; + } + DataInputStream in = new DataInputStream(new FileInputStream(data)); + TSTNode root = trie.new TSTNode('\0', null); + try { + readRecursively(in, root); + trie.setRoot(root); + } finally { + in.close(); + } + return true; + } + + private void readRecursively(DataInputStream in, TSTNode node) throws IOException { + node.splitchar = in.readChar(); + byte mask = in.readByte(); + if ((mask & HAS_VALUE) != 0) { + node.data = new Float(in.readFloat()); + } + if ((mask & LO_KID) != 0) { + TSTNode kid = trie.new TSTNode('\0', node); + node.relatives[TSTNode.LOKID] = kid; + readRecursively(in, kid); + } + if ((mask & EQ_KID) != 0) { + TSTNode kid = trie.new TSTNode('\0', node); + node.relatives[TSTNode.EQKID] = kid; + readRecursively(in, kid); + } + if ((mask & HI_KID) != 0) { + TSTNode kid = trie.new TSTNode('\0', node); + node.relatives[TSTNode.HIKID] = kid; + readRecursively(in, kid); + } + } + + @Override + public boolean store(File storeDir) throws IOException { + if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) { + return false; + } + TSTNode root = trie.getRoot(); + if (root == null) { // empty tree + return false; + } + File data = new File(storeDir, FILENAME); + DataOutputStream out = new DataOutputStream(new FileOutputStream(data)); + try { + writeRecursively(out, root); + out.flush(); + } finally { + out.close(); + } + return true; + } + + private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException { + if (node == null) { + return; + } + out.writeChar(node.splitchar); + byte mask = 0; + if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID; + if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID; + if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID; + if (node.data != null) mask |= HAS_VALUE; + out.writeByte(mask); + if (node.data != null) { + out.writeFloat((Float)node.data); + } + writeRecursively(out, node.relatives[TSTNode.LOKID]); + writeRecursively(out, node.relatives[TSTNode.EQKID]); + writeRecursively(out, node.relatives[TSTNode.HIKID]); + } +}