1 package org.apache.lucene.search.suggest.jaspell;
3 import java.io.DataInputStream;
4 import java.io.DataOutputStream;
6 import java.io.FileInputStream;
7 import java.io.FileOutputStream;
8 import java.io.IOException;
9 import java.util.ArrayList;
10 import java.util.List;
12 import org.apache.lucene.search.spell.SortedIterator;
13 import org.apache.lucene.search.spell.TermFreqIterator;
14 import org.apache.lucene.search.suggest.Lookup;
15 import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
16 import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
18 public class JaspellLookup extends Lookup {
19 JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
20 private boolean usePrefix = true;
21 private int editDistance = 2;
24 public void build(TermFreqIterator tfit) throws IOException {
25 if (tfit instanceof SortedIterator) {
26 // make sure it's unsorted
27 tfit = new UnsortedTermFreqIteratorWrapper(tfit);
29 trie = new JaspellTernarySearchTrie();
30 trie.setMatchAlmostDiff(editDistance);
31 while (tfit.hasNext()) {
32 String key = tfit.next();
33 float freq = tfit.freq();
34 if (key.length() == 0) {
37 trie.put(key, new Float(freq));
42 public boolean add(String key, Object value) {
49 public Object get(String key) {
54 public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
55 List<LookupResult> res = new ArrayList<LookupResult>();
57 int count = onlyMorePopular ? num * 2 : num;
59 list = trie.matchPrefix(key, count);
61 list = trie.matchAlmost(key, count);
63 if (list == null || list.size() == 0) {
67 int maxCnt = Math.min(num, list.size());
68 if (onlyMorePopular) {
69 LookupPriorityQueue queue = new LookupPriorityQueue(num);
70 for (String s : list) {
71 float freq = (Float)trie.get(s);
72 queue.insertWithOverflow(new LookupResult(s, freq));
74 for (LookupResult lr : queue.getResults()) {
78 for (int i = 0; i < maxCnt; i++) {
79 String s = list.get(i);
80 float freq = (Float)trie.get(s);
81 res.add(new LookupResult(s, freq));
87 public static final String FILENAME = "jaspell.dat";
88 private static final byte LO_KID = 0x01;
89 private static final byte EQ_KID = 0x02;
90 private static final byte HI_KID = 0x04;
91 private static final byte HAS_VALUE = 0x08;
95 public boolean load(File storeDir) throws IOException {
96 File data = new File(storeDir, FILENAME);
97 if (!data.exists() || !data.canRead()) {
100 DataInputStream in = new DataInputStream(new FileInputStream(data));
101 TSTNode root = trie.new TSTNode('\0', null);
103 readRecursively(in, root);
111 private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
112 node.splitchar = in.readChar();
113 byte mask = in.readByte();
114 if ((mask & HAS_VALUE) != 0) {
115 node.data = new Float(in.readFloat());
117 if ((mask & LO_KID) != 0) {
118 TSTNode kid = trie.new TSTNode('\0', node);
119 node.relatives[TSTNode.LOKID] = kid;
120 readRecursively(in, kid);
122 if ((mask & EQ_KID) != 0) {
123 TSTNode kid = trie.new TSTNode('\0', node);
124 node.relatives[TSTNode.EQKID] = kid;
125 readRecursively(in, kid);
127 if ((mask & HI_KID) != 0) {
128 TSTNode kid = trie.new TSTNode('\0', node);
129 node.relatives[TSTNode.HIKID] = kid;
130 readRecursively(in, kid);
135 public boolean store(File storeDir) throws IOException {
136 if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
139 TSTNode root = trie.getRoot();
140 if (root == null) { // empty tree
143 File data = new File(storeDir, FILENAME);
144 DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
146 writeRecursively(out, root);
154 private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
158 out.writeChar(node.splitchar);
160 if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
161 if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
162 if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
163 if (node.data != null) mask |= HAS_VALUE;
165 if (node.data != null) {
166 out.writeFloat((Float)node.data);
168 writeRecursively(out, node.relatives[TSTNode.LOKID]);
169 writeRecursively(out, node.relatives[TSTNode.EQKID]);
170 writeRecursively(out, node.relatives[TSTNode.HIKID]);