1 package org.egothor.stemmer;
4 Egothor Software License version 1.00
5 Copyright (C) 1997-2004 Leo Galambos.
6 Copyright (C) 2002-2004 "Egothor developers"
7 on behalf of the Egothor Project.
10 This software is copyrighted by the "Egothor developers". If this
11 license applies to a single file or document, the "Egothor developers"
12 are the people or entities mentioned as copyright holders in that file
13 or document. If this license applies to the Egothor project as a
14 whole, the copyright holders are the people or entities mentioned in
15 the file CREDITS. This file can be found in the same location as this
16 license in the distribution.
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are
21 1. Redistributions of source code must retain the above copyright
22 notice, the list of contributors, this list of conditions, and the
24 2. Redistributions in binary form must reproduce the above copyright
25 notice, the list of contributors, this list of conditions, and the
26 disclaimer that follows these conditions in the documentation
27 and/or other materials provided with the distribution.
28 3. The name "Egothor" must not be used to endorse or promote products
29 derived from this software without prior written permission. For
30 written permission, please contact Leo.G@seznam.cz
31 4. Products derived from this software may not be called "Egothor",
32 nor may "Egothor" appear in their name, without prior written
33 permission from Leo.G@seznam.cz.
35 In addition, we request that you include in the end-user documentation
36 provided with the redistribution and/or in the software itself an
37 acknowledgement equivalent to the following:
38 "This product includes software developed by the Egothor Project.
39 http://egothor.sf.net/"
41 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
43 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
45 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
46 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
47 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
48 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
49 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
50 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
51 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 This software consists of voluntary contributions made by many
54 individuals on behalf of the Egothor Project and was originally
55 created by Leo Galambos (Leo.G@seznam.cz).
58 import java.io.BufferedInputStream;
59 import java.io.BufferedReader;
60 import java.io.DataInputStream;
62 import java.io.FileInputStream;
63 import java.io.FileReader;
64 import java.io.IOException;
65 import java.io.LineNumberReader;
67 import java.util.StringTokenizer;
69 import org.apache.lucene.util.LuceneTestCase;
71 public class TestCompile extends LuceneTestCase {
73 public void testCompile() throws Exception {
74 URI uri = getClass().getResource("testRules.txt").toURI();
75 String path = uri.getPath();
76 Compile.main(new String[] {"test", path});
77 String compiled = path + ".out";
78 Trie trie = loadTrie(compiled);
79 assertTrie(trie, path, true, true);
80 assertTrie(trie, path, false, true);
81 new File(compiled).delete();
84 public void testCompileBackwards() throws Exception {
85 URI uri = getClass().getResource("testRules.txt").toURI();
86 String path = uri.getPath();
87 Compile.main(new String[] {"-test", path});
88 String compiled = path + ".out";
89 Trie trie = loadTrie(compiled);
90 assertTrie(trie, path, true, true);
91 assertTrie(trie, path, false, true);
92 new File(compiled).delete();
95 public void testCompileMulti() throws Exception {
96 URI uri = getClass().getResource("testRules.txt").toURI();
97 String path = uri.getPath();
98 Compile.main(new String[] {"Mtest", path});
99 String compiled = path + ".out";
100 Trie trie = loadTrie(compiled);
101 assertTrie(trie, path, true, true);
102 assertTrie(trie, path, false, true);
103 new File(compiled).delete();
106 static Trie loadTrie(String path) throws IOException {
108 DataInputStream is = new DataInputStream(new BufferedInputStream(
109 new FileInputStream(path)));
110 String method = is.readUTF().toUpperCase();
111 if (method.indexOf('M') < 0) {
114 trie = new MultiTrie(is);
120 private static void assertTrie(Trie trie, String file, boolean usefull,
121 boolean storeorig) throws Exception {
122 LineNumberReader in = new LineNumberReader(new BufferedReader(
123 new FileReader(file)));
125 for (String line = in.readLine(); line != null; line = in.readLine()) {
127 line = line.toLowerCase();
128 StringTokenizer st = new StringTokenizer(line);
129 String stem = st.nextToken();
131 CharSequence cmd = (usefull) ? trie.getFully(stem) : trie
132 .getLastOnPath(stem);
133 StringBuilder stm = new StringBuilder(stem);
134 Diff.apply(stm, cmd);
135 assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
137 while (st.hasMoreTokens()) {
138 String token = st.nextToken();
139 if (token.equals(stem)) {
142 CharSequence cmd = (usefull) ? trie.getFully(token) : trie
143 .getLastOnPath(token);
144 StringBuilder stm = new StringBuilder(token);
145 Diff.apply(stm, cmd);
146 assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
148 } catch (java.util.NoSuchElementException x) {
149 // no base token (stem) on a line