lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java

   1 package org.apache.lucene.analysis.cz;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.InputStream;
  22
  23 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  24 import org.apache.lucene.analysis.Analyzer;
  25 import org.apache.lucene.analysis.CharArraySet;
  26 import org.apache.lucene.util.Version;
  27
  28 /**
  29  * Test the CzechAnalyzer
  30  *
  31  * Before Lucene 3.1, CzechAnalyzer was a StandardAnalyzer with a custom
  32  * stopword list. As of 3.1 it also includes a stemmer.
  33  *
  34  */
  35 public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
  36
  37   /**
  38    * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
  39    */
  40   @Deprecated
  41   public void testStopWordLegacy() throws Exception {
  42     assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_30), "Pokud mluvime o volnem",
  43         new String[] { "mluvime", "volnem" });
  44   }
  45
  46   public void testStopWord() throws Exception {
  47     assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem",
  48         new String[] { "mluvim", "voln" });
  49   }
  50
  51   /**
  52    * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
  53    */
  54   @Deprecated
  55   public void testReusableTokenStreamLegacy() throws Exception {
  56     Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_30);
  57     assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
  58     assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
  59   }
  60
  61   public void testReusableTokenStream() throws Exception {
  62     Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
  63     assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
  64     assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
  65   }
  66
  67   /**
  68    * An input stream that always throws IOException for testing.
  69    * @deprecated Remove this class when the loadStopWords method is removed.
  70    */
  71   @Deprecated
  72   private class UnreliableInputStream extends InputStream {
  73     @Override
  74     public int read() throws IOException {
  75       throw new IOException();
  76     }
  77   }
  78
  79   /**
  80    * The loadStopWords method does not throw IOException on error,
  81    * instead previously it set the stoptable to null (versus empty)
  82    * this would cause a NPE when it is time to create the StopFilter.
  83    * @deprecated Remove this test when the loadStopWords method is removed.
  84    */
  85   @Deprecated
  86   public void testInvalidStopWordFile() throws Exception {
  87     CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
  88     cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
  89     assertAnalyzesTo(cz, "Pokud mluvime o volnem",
  90         new String[] { "pokud", "mluvime", "o", "volnem" });
  91   }
  92
  93   /**
  94    * Test that changes to the stop table via loadStopWords are applied immediately
  95    * when using reusable token streams.
  96    * @deprecated Remove this test when the loadStopWords method is removed.
  97    */
  98   @Deprecated
  99   public void testStopWordFileReuse() throws Exception {
 100     CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
 101     assertAnalyzesToReuse(cz, "Česká Republika",
 102       new String[] { "česká", "republika" });
 103
 104     InputStream stopwords = getClass().getResourceAsStream("customStopWordFile.txt");
 105     cz.loadStopWords(stopwords, "UTF-8");
 106
 107     assertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
 108   }
 109
 110   public void testWithStemExclusionSet() throws IOException{
 111     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
 112     set.add("hole");
 113     CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
 114     assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
 115   }
 116
 117   /** blast some random strings through the analyzer */
 118   public void testRandomStrings() throws Exception {
 119     checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
 120   }
 121 }