lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java

   1 package org.apache.lucene.analysis.br;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.Set;
  22
  23 import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
  24 import org.apache.lucene.analysis.TokenFilter;
  25 import org.apache.lucene.analysis.TokenStream;
  26 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  27 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  28
  29 /**
  30  * A {@link TokenFilter} that applies {@link BrazilianStemmer}.
  31  * <p>
  32  * To prevent terms from being stemmed use an instance of
  33  * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  34  * the {@link KeywordAttribute} before this {@link TokenStream}.
  35  * </p>
  36  * @see KeywordMarkerFilter
  37  *
  38  */
  39 public final class BrazilianStemFilter extends TokenFilter {
  40
  41   /**
  42    * {@link BrazilianStemmer} in use by this filter.
  43    */
  44   private BrazilianStemmer stemmer = new BrazilianStemmer();
  45   private Set<?> exclusions = null;
  46   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  47   private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  48
  49   /**
  50    * Creates a new BrazilianStemFilter
  51    *
  52    * @param in the source {@link TokenStream}
  53    */
  54   public BrazilianStemFilter(TokenStream in) {
  55     super(in);
  56   }
  57
  58   /**
  59    * Creates a new BrazilianStemFilter
  60    *
  61    * @param in the source {@link TokenStream}
  62    * @param exclusiontable a set of terms that should be prevented from being stemmed.
  63    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
  64    */
  65   @Deprecated
  66   public BrazilianStemFilter(TokenStream in, Set<?> exclusiontable) {
  67     this(in);
  68     this.exclusions = exclusiontable;
  69   }
  70
  71   @Override
  72   public boolean incrementToken() throws IOException {
  73     if (input.incrementToken()) {
  74       final String term = termAtt.toString();
  75       // Check the exclusion table.
  76       if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
  77         final String s = stemmer.stem(term);
  78         // If not stemmed, don't waste the time adjusting the token.
  79         if ((s != null) && !s.equals(term))
  80           termAtt.setEmpty().append(s);
  81       }
  82       return true;
  83     } else {
  84       return false;
  85     }
  86   }
  87 }
  88
  89