lucene-java-3.5.0/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java

   1 package org.apache.lucene.analysis.icu;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.Reader;
  22
  23 import org.apache.lucene.analysis.Analyzer;
  24 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  25 import org.apache.lucene.analysis.TokenStream;
  26 import org.apache.lucene.analysis.WhitespaceTokenizer;
  27
  28 import com.ibm.icu.text.Normalizer2;
  29
  30 /**
  31  * Tests the ICUNormalizer2Filter
  32  */
  33 public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
  34   Analyzer a = new Analyzer() {
  35     @Override
  36     public TokenStream tokenStream(String fieldName, Reader reader) {
  37       return new ICUNormalizer2Filter(
  38           new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
  39     }
  40   };
  41
  42   public void testDefaults() throws IOException {
  43     // case folding
  44     assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
  45
  46     // case folding
  47     assertAnalyzesTo(a, "Ruß", new String[] { "russ" });
  48
  49     // case folding
  50     assertAnalyzesTo(a, "ΜΆΪΟΣ", new String[] { "μάϊοσ" });
  51     assertAnalyzesTo(a, "Μάϊος", new String[] { "μάϊοσ" });
  52
  53     // supplementary case folding
  54     assertAnalyzesTo(a, "𐐖", new String[] { "𐐾" });
  55
  56     // normalization
  57     assertAnalyzesTo(a, "ﴳﴺﰧ", new String[] { "طمطمطم" });
  58
  59     // removal of default ignorables
  60     assertAnalyzesTo(a, "क्‍ष", new String[] { "क्ष" });
  61   }
  62
  63   public void testAlternate() throws IOException {
  64     Analyzer a = new Analyzer() {
  65       @Override
  66       public TokenStream tokenStream(String fieldName, Reader reader) {
  67         return new ICUNormalizer2Filter(
  68             new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
  69             /* specify nfc with decompose to get nfd */
  70             Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE));
  71       }
  72     };
  73
  74     // decompose EAcute into E + combining Acute
  75     assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
  76   }
  77
  78   /** blast some random strings through the analyzer */
  79   public void testRandomStrings() throws Exception {
  80     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
  81   }
  82 }