1 package org.apache.lucene.analysis.de;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
23 import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
24 import org.apache.lucene.analysis.TokenFilter;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
27 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30 * A {@link TokenFilter} that stems German words.
32 * It supports a table of words that should
33 * not be stemmed at all. The stemmer used can be changed at runtime after the
34 * filter object is created (as long as it is a {@link GermanStemmer}).
37 * To prevent terms from being stemmed use an instance of
38 * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
39 * the {@link KeywordAttribute} before this {@link TokenStream}.
41 * @see KeywordMarkerFilter
43 public final class GermanStemFilter extends TokenFilter
46 * The actual token in the input stream.
48 private GermanStemmer stemmer = new GermanStemmer();
49 private Set<?> exclusionSet = null;
51 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
52 private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
55 * Creates a {@link GermanStemFilter} instance
56 * @param in the source {@link TokenStream}
58 public GermanStemFilter( TokenStream in )
64 * Builds a GermanStemFilter that uses an exclusion table.
65 * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
68 public GermanStemFilter( TokenStream in, Set<?> exclusionSet )
71 this.exclusionSet = exclusionSet;
75 * @return Returns true for next token in the stream, or false at EOS
78 public boolean incrementToken() throws IOException {
79 if (input.incrementToken()) {
80 String term = termAtt.toString();
81 // Check the exclusion table.
82 if (!keywordAttr.isKeyword() && (exclusionSet == null || !exclusionSet.contains(term))) {
83 String s = stemmer.stem(term);
84 // If not stemmed, don't waste the time adjusting the token.
85 if ((s != null) && !s.equals(term))
86 termAtt.setEmpty().append(s);
95 * Set a alternative/custom {@link GermanStemmer} for this filter.
97 public void setStemmer( GermanStemmer stemmer )
99 if ( stemmer != null ) {
100 this.stemmer = stemmer;
106 * Set an alternative exclusion list for this filter.
107 * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
110 public void setExclusionSet( Set<?> exclusionSet )
112 this.exclusionSet = exclusionSet;