lucene-java-3.5.0/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java

   1 package org.apache.lucene.analysis.fr;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.analysis.KeywordMarkerFilter;// for javadoc
  21 import org.apache.lucene.analysis.TokenFilter;
  22 import org.apache.lucene.analysis.TokenStream;
  23 import org.apache.lucene.analysis.snowball.SnowballFilter;
  24 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  25 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  26
  27 import java.io.IOException;
  28 import java.util.Map;
  29 import java.util.Set;
  30
  31 /**
  32  * A {@link TokenFilter} that stems french words.
  33  * <p>
  34  * The used stemmer can be changed at runtime after the
  35  * filter object is created (as long as it is a {@link FrenchStemmer}).
  36  * </p>
  37  * <p>
  38  * To prevent terms from being stemmed use an instance of
  39  * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  40  * the {@link KeywordAttribute} before this {@link TokenStream}.
  41  * </p>
  42  * @see KeywordMarkerFilter
  43  * @deprecated Use {@link SnowballFilter} with
  44  * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
  45  * same functionality. This filter will be removed in Lucene 5.0
  46  */
  47 @Deprecated
  48 public final class FrenchStemFilter extends TokenFilter {
  49
  50         /**
  51          * The actual token in the input stream.
  52          */
  53         private FrenchStemmer stemmer = new FrenchStemmer();
  54         private Set<?> exclusions = null;
  55
  56         private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  57   private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  58
  59         public FrenchStemFilter( TokenStream in ) {
  60     super(in);
  61         }
  62
  63   /**
  64    *
  65    * @param in the {@link TokenStream} to filter
  66    * @param exclusiontable a set of terms not to be stemmed
  67    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
  68    */
  69         @Deprecated
  70         public FrenchStemFilter( TokenStream in, Set<?> exclusiontable ) {
  71                 this( in );
  72                 exclusions = exclusiontable;
  73         }
  74
  75         /**
  76          * @return  Returns true for the next token in the stream, or false at EOS
  77          */
  78         @Override
  79         public boolean incrementToken() throws IOException {
  80           if (input.incrementToken()) {
  81             String term = termAtt.toString();
  82
  83             // Check the exclusion table
  84             if ( !keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains( term )) ) {
  85               String s = stemmer.stem( term );
  86               // If not stemmed, don't waste the time  adjusting the token.
  87               if ((s != null) && !s.equals( term ) )
  88                 termAtt.setEmpty().append(s);
  89             }
  90             return true;
  91           } else {
  92             return false;
  93           }
  94         }
  95         /**
  96          * Set a alternative/custom {@link FrenchStemmer} for this filter.
  97          */
  98         public void setStemmer( FrenchStemmer stemmer ) {
  99                 if ( stemmer != null ) {
 100                         this.stemmer = stemmer;
 101                 }
 102         }
 103         /**
 104          * Set an alternative exclusion list for this filter.
 105    * @deprecated use {@link KeywordAttribute} with {@link KeywordMarkerFilter} instead.
 106          */
 107         @Deprecated
 108         public void setExclusionTable( Map<?,?> exclusiontable ) {
 109                 exclusions = exclusiontable.keySet();
 110         }
 111 }
 112
 113