lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java

   1 package org.apache.lucene.analysis.it;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.HashSet;
  22 import java.util.Set;
  23
  24 import org.apache.lucene.analysis.Analyzer;
  25 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  26 import org.apache.lucene.util.Version;
  27
  28 public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
  29   /** This test fails with NPE when the
  30    * stopwords file is missing in classpath */
  31   public void testResourcesAvailable() {
  32     new ItalianAnalyzer(TEST_VERSION_CURRENT);
  33   }
  34
  35   /** test stopwords and stemming */
  36   public void testBasics() throws IOException {
  37     Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
  38     // stemming
  39     checkOneTermReuse(a, "abbandonata", "abbandon");
  40     checkOneTermReuse(a, "abbandonati", "abbandon");
  41     // stopword
  42     assertAnalyzesTo(a, "dallo", new String[] {});
  43   }
  44
  45   /** test use of exclusion set */
  46   public void testExclude() throws IOException {
  47     Set<String> exclusionSet = new HashSet<String>();
  48     exclusionSet.add("abbandonata");
  49     Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT,
  50         ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
  51     checkOneTermReuse(a, "abbandonata", "abbandonata");
  52     checkOneTermReuse(a, "abbandonati", "abbandon");
  53   }
  54
  55   /** blast some random strings through the analyzer */
  56   public void testRandomStrings() throws Exception {
  57     checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
  58   }
  59
  60   /** test that the elisionfilter is working */
  61   public void testContractions() throws IOException {
  62     Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
  63     assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
  64     assertAnalyzesTo(a, "l'Italiano", new String[] { "ital" });
  65   }
  66
  67   /** test that we don't enable this before 3.2*/
  68   public void testContractionsBackwards() throws IOException {
  69     Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
  70     assertAnalyzesTo(a, "dell'Italia", new String[] { "dell'ital" });
  71     assertAnalyzesTo(a, "l'Italiano", new String[] { "l'ital" });
  72   }
  73 }