lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java

   1 package org.apache.lucene.analysis.nl;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.HashMap;
  22 import java.util.HashSet;
  23 import java.util.Map;
  24 import java.util.Set;
  25
  26 import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
  27 import org.apache.lucene.analysis.TokenFilter;
  28 import org.apache.lucene.analysis.TokenStream;
  29 import org.apache.lucene.analysis.snowball.SnowballFilter;
  30 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  31 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  32
  33 /**
  34  * A {@link TokenFilter} that stems Dutch words.
  35  * <p>
  36  * It supports a table of words that should
  37  * not be stemmed at all. The stemmer used can be changed at runtime after the
  38  * filter object is created (as long as it is a {@link DutchStemmer}).
  39  * </p>
  40  * <p>
  41  * To prevent terms from being stemmed use an instance of
  42  * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  43  * the {@link KeywordAttribute} before this {@link TokenStream}.
  44  * </p>
  45  * @see KeywordMarkerFilter
  46  * @deprecated Use {@link SnowballFilter} with
  47  * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
  48  * same functionality. This filter will be removed in Lucene 5.0
  49  */
  50 @Deprecated
  51 public final class DutchStemFilter extends TokenFilter {
  52   /**
  53    * The actual token in the input stream.
  54    */
  55   private DutchStemmer stemmer = new DutchStemmer();
  56   private Set<?> exclusions = null;
  57
  58   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  59   private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  60
  61   public DutchStemFilter(TokenStream _in) {
  62     super(_in);
  63   }
  64
  65   /**
  66    * Builds a DutchStemFilter that uses an exclusion table.
  67    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
  68    */
  69   @Deprecated
  70   public DutchStemFilter(TokenStream _in, Set<?> exclusiontable) {
  71     this(_in);
  72     exclusions = exclusiontable;
  73   }
  74
  75   /**
  76    * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
  77    */
  78   public DutchStemFilter(TokenStream _in,  Map<?,?> stemdictionary) {
  79     this(_in);
  80     stemmer.setStemDictionary(stemdictionary);
  81   }
  82
  83   /**
  84    * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
  85    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
  86    */
  87   @Deprecated
  88   public DutchStemFilter(TokenStream _in, Set<?> exclusiontable, Map<?,?> stemdictionary) {
  89     this(_in, exclusiontable);
  90     stemmer.setStemDictionary(stemdictionary);
  91   }
  92
  93   /**
  94    * Returns the next token in the stream, or null at EOS
  95    */
  96   @Override
  97   public boolean incrementToken() throws IOException {
  98     if (input.incrementToken()) {
  99       final String term = termAtt.toString();
 100
 101       // Check the exclusion table.
 102       if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
 103         final String s = stemmer.stem(term);
 104         // If not stemmed, don't waste the time adjusting the token.
 105         if ((s != null) && !s.equals(term))
 106           termAtt.setEmpty().append(s);
 107       }
 108       return true;
 109     } else {
 110       return false;
 111     }
 112   }
 113
 114   /**
 115    * Set a alternative/custom {@link DutchStemmer} for this filter.
 116    */
 117   public void setStemmer(DutchStemmer stemmer) {
 118     if (stemmer != null) {
 119       this.stemmer = stemmer;
 120     }
 121   }
 122
 123   /**
 124    * Set an alternative exclusion list for this filter.
 125    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
 126    */
 127   @Deprecated
 128   public void setExclusionTable(HashSet<?> exclusiontable) {
 129     exclusions = exclusiontable;
 130   }
 131
 132   /**
 133    * Set dictionary for stemming, this dictionary overrules the algorithm,
 134    * so you can correct for a particular unwanted word-stem pair.
 135    */
 136   public void setStemDictionary(HashMap<?,?> dict) {
 137     if (stemmer != null)
 138       stemmer.setStemDictionary(dict);
 139   }
 140 }