2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
17 package org.apache.lucene.analysis.stempel;
19 import java.io.BufferedInputStream;
20 import java.io.DataInputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.util.Locale;
25 import org.egothor.stemmer.Diff;
26 import org.egothor.stemmer.Trie;
30 * Stemmer class is a convenient facade for other stemmer-related classes. The
31 * core stemming algorithm and its implementation is taken verbatim from the
32 * Egothor project ( <a href="http://www.egothor.org">www.egothor.org </a>).
35 * Even though the stemmer tables supplied in the distribution package are built
36 * for Polish language, there is nothing language-specific here.
39 public class StempelStemmer {
40 private Trie stemmer = null;
41 private StringBuilder buffer = new StringBuilder();
44 * Create a Stemmer using selected stemmer table
46 * @param stemmerTable stemmer table.
48 public StempelStemmer(InputStream stemmerTable) throws IOException {
49 this(load(stemmerTable));
53 * Create a Stemmer using pre-loaded stemmer table
55 * @param stemmer pre-loaded stemmer table
57 public StempelStemmer(Trie stemmer) {
58 this.stemmer = stemmer;
62 * Load a stemmer table from an inputstream.
64 public static Trie load(InputStream stemmerTable) throws IOException {
65 DataInputStream in = null;
67 in = new DataInputStream(new BufferedInputStream(stemmerTable));
68 String method = in.readUTF().toUpperCase(Locale.ENGLISH);
69 if (method.indexOf('M') < 0) {
70 return new org.egothor.stemmer.Trie(in);
72 return new org.egothor.stemmer.MultiTrie2(in);
82 * @param word input word to be stemmed.
83 * @return stemmed word, or null if the stem could not be generated.
85 public StringBuilder stem(CharSequence word) {
86 CharSequence cmd = stemmer.getLastOnPath(word);
94 Diff.apply(buffer, cmd);
96 if (buffer.length() > 0)