X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java b/lucene-java-3.4.0/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java deleted file mode 100644 index 7b522e5..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - Egothor Software License version 1.00 - Copyright (C) 1997-2004 Leo Galambos. - Copyright (C) 2002-2004 "Egothor developers" - on behalf of the Egothor Project. - All rights reserved. - - This software is copyrighted by the "Egothor developers". If this - license applies to a single file or document, the "Egothor developers" - are the people or entities mentioned as copyright holders in that file - or document. If this license applies to the Egothor project as a - whole, the copyright holders are the people or entities mentioned in - the file CREDITS. This file can be found in the same location as this - license in the distribution. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - 1. Redistributions of source code must retain the above copyright - notice, the list of contributors, this list of conditions, and the - following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, the list of contributors, this list of conditions, and the - disclaimer that follows these conditions in the documentation - and/or other materials provided with the distribution. - 3. The name "Egothor" must not be used to endorse or promote products - derived from this software without prior written permission. For - written permission, please contact Leo.G@seznam.cz - 4. Products derived from this software may not be called "Egothor", - nor may "Egothor" appear in their name, without prior written - permission from Leo.G@seznam.cz. - - In addition, we request that you include in the end-user documentation - provided with the redistribution and/or in the software itself an - acknowledgement equivalent to the following: - "This product includes software developed by the Egothor Project. - http://egothor.sf.net/" - - THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - This software consists of voluntary contributions made by many - individuals on behalf of the Egothor Project and was originally - created by Leo Galambos (Leo.G@seznam.cz). - */ -package org.egothor.stemmer; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.DataOutputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.util.StringTokenizer; - -/** - * The Compile class is used to compile a stemmer table. - */ -public class Compile { - - static boolean backward; - static boolean multi; - static Trie trie; - - /** - * Entry point to the Compile application. - *

- * This program takes any number of arguments: the first is the name of the - * desired stemming algorithm to use (a list is available in the package - * description) , all of the rest should be the path or paths to a file or - * files containing a stemmer table to compile. - * - * @param args the command line arguments - */ - public static void main(java.lang.String[] args) { - if (args.length < 1) { - return; - } - - args[0].toUpperCase(); - - backward = args[0].charAt(0) == '-'; - int qq = (backward) ? 1 : 0; - boolean storeorig = false; - - if (args[0].charAt(qq) == '0') { - storeorig = true; - qq++; - } - - multi = args[0].charAt(qq) == 'M'; - if (multi) { - qq++; - } - - String charset = System.getProperty("egothor.stemmer.charset", "UTF-8"); - - char optimizer[] = new char[args[0].length() - qq]; - for (int i = 0; i < optimizer.length; i++) { - optimizer[i] = args[0].charAt(qq + i); - } - - for (int i = 1; i < args.length; i++) { - LineNumberReader in; - // System.out.println("[" + args[i] + "]"); - Diff diff = new Diff(); - try { - int stems = 0; - int words = 0; - - allocTrie(); - - System.out.println(args[i]); - in = new LineNumberReader(new BufferedReader(new InputStreamReader( - new FileInputStream(args[i]), charset))); - for (String line = in.readLine(); line != null; line = in.readLine()) { - try { - line = line.toLowerCase(); - StringTokenizer st = new StringTokenizer(line); - String stem = st.nextToken(); - if (storeorig) { - trie.add(stem, "-a"); - words++; - } - while (st.hasMoreTokens()) { - String token = st.nextToken(); - if (token.equals(stem) == false) { - trie.add(token, diff.exec(token, stem)); - words++; - } - } - } catch (java.util.NoSuchElementException x) { - // no base token (stem) on a line - } - } - - Optimizer o = new Optimizer(); - Optimizer2 o2 = new Optimizer2(); - Lift l = new Lift(true); - Lift e = new Lift(false); - Gener g = new Gener(); - - for (int j = 0; j < optimizer.length; j++) { - String prefix; - switch (optimizer[j]) { - case 'G': - trie = trie.reduce(g); - prefix = "G: "; - break; - case 'L': - trie = trie.reduce(l); - prefix = "L: "; - break; - case 'E': - trie = trie.reduce(e); - prefix = "E: "; - break; - case '2': - trie = trie.reduce(o2); - prefix = "2: "; - break; - case '1': - trie = trie.reduce(o); - prefix = "1: "; - break; - default: - continue; - } - trie.printInfo(prefix + " "); - } - - DataOutputStream os = new DataOutputStream(new BufferedOutputStream( - new FileOutputStream(args[i] + ".out"))); - os.writeUTF(args[0]); - trie.store(os); - os.close(); - - } catch (FileNotFoundException x) { - x.printStackTrace(); - } catch (IOException x) { - x.printStackTrace(); - } - } - } - - static void allocTrie() { - if (multi) { - trie = new MultiTrie2(!backward); - } else { - trie = new Trie(!backward); - } - } -}