lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java

   1 package org.apache.lucene.analysis.el;
   2
   3 /**
   4  * Copyright 2005 The Apache Software Foundation
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import org.apache.lucene.analysis.Analyzer;
  20 import org.apache.lucene.analysis.StopFilter;
  21 import org.apache.lucene.analysis.StopwordAnalyzerBase;
  22 import org.apache.lucene.analysis.TokenStream;
  23 import org.apache.lucene.analysis.Tokenizer;
  24 import org.apache.lucene.analysis.standard.StandardFilter;
  25 import org.apache.lucene.analysis.standard.StandardTokenizer;
  26 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
  27 import org.apache.lucene.util.Version;
  28
  29 import java.io.IOException;
  30 import java.io.Reader;
  31 import java.util.Map;
  32 import java.util.Set;
  33
  34 /**
  35  * {@link Analyzer} for the Greek language.
  36  * <p>
  37  * Supports an external list of stopwords (words
  38  * that will not be indexed at all).
  39  * A default set of stopwords is used unless an alternative list is specified.
  40  * </p>
  41  *
  42  * <a name="version"/>
  43  * <p>You must specify the required {@link Version}
  44  * compatibility when creating GreekAnalyzer:
  45  * <ul>
  46  *   <li> As of 3.1, StandardFilter and GreekStemmer are used by default.
  47  *   <li> As of 2.9, StopFilter preserves position
  48  *        increments
  49  * </ul>
  50  *
  51  * <p><b>NOTE</b>: This class uses the same {@link Version}
  52  * dependent settings as {@link StandardAnalyzer}.</p>
  53  */
  54 public final class GreekAnalyzer extends StopwordAnalyzerBase {
  55   /** File containing default Greek stopwords. */
  56   public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  57
  58   /**
  59    * Returns a set of default Greek-stopwords
  60    * @return a set of default Greek-stopwords
  61    */
  62   public static final Set<?> getDefaultStopSet(){
  63     return DefaultSetHolder.DEFAULT_SET;
  64   }
  65
  66   private static class DefaultSetHolder {
  67     private static final Set<?> DEFAULT_SET;
  68
  69     static {
  70       try {
  71         DEFAULT_SET = loadStopwordSet(false, GreekAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
  72       } catch (IOException ex) {
  73         // default set should always be present as it is part of the
  74         // distribution (JAR)
  75         throw new RuntimeException("Unable to load default stopword set");
  76       }
  77     }
  78   }
  79
  80   /**
  81    * Builds an analyzer with the default stop words.
  82    * @param matchVersion Lucene compatibility version,
  83    *   See <a href="#version">above</a>
  84    */
  85   public GreekAnalyzer(Version matchVersion) {
  86     this(matchVersion, DefaultSetHolder.DEFAULT_SET);
  87   }
  88
  89   /**
  90    * Builds an analyzer with the given stop words.
  91    * <p>
  92    * <b>NOTE:</b> The stopwords set should be pre-processed with the logic of
  93    * {@link GreekLowerCaseFilter} for best results.
  94    *
  95    * @param matchVersion Lucene compatibility version,
  96    *   See <a href="#version">above</a>
  97    * @param stopwords a stopword set
  98    */
  99   public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
 100     super(matchVersion, stopwords);
 101   }
 102
 103   /**
 104    * Builds an analyzer with the given stop words.
 105    * @param stopwords Array of stopwords to use.
 106    * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
 107    */
 108   @Deprecated
 109   public GreekAnalyzer(Version matchVersion, String... stopwords) {
 110     this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
 111   }
 112
 113   /**
 114    * Builds an analyzer with the given stop words.
 115    * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
 116    */
 117   @Deprecated
 118   public GreekAnalyzer(Version matchVersion, Map<?,?> stopwords) {
 119     this(matchVersion, stopwords.keySet());
 120   }
 121
 122   /**
 123    * Creates
 124    * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
 125    * used to tokenize all the text in the provided {@link Reader}.
 126    *
 127    * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
 128    *         built from a {@link StandardTokenizer} filtered with
 129    *         {@link GreekLowerCaseFilter}, {@link StandardFilter},
 130    *         {@link StopFilter}, and {@link GreekStemFilter}
 131    */
 132   @Override
 133   protected TokenStreamComponents createComponents(String fieldName,
 134       Reader reader) {
 135     final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 136     TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
 137     if (matchVersion.onOrAfter(Version.LUCENE_31))
 138       result = new StandardFilter(matchVersion, result);
 139     result = new StopFilter(matchVersion, result, stopwords);
 140     if (matchVersion.onOrAfter(Version.LUCENE_31))
 141       result = new GreekStemFilter(result);
 142     return new TokenStreamComponents(source, result);
 143   }
 144 }