lucene-java-3.4.0/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java

   1 package org.apache.lucene.collation;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20
  21 import com.ibm.icu.text.Collator;
  22
  23 import org.apache.lucene.analysis.CollationTestBase;
  24 import org.apache.lucene.analysis.TokenStream;
  25 import org.apache.lucene.analysis.Analyzer;
  26 import org.apache.lucene.analysis.KeywordTokenizer;
  27
  28 import java.io.Reader;
  29 import java.util.Locale;
  30
  31
  32 public class TestICUCollationKeyFilter extends CollationTestBase {
  33
  34   private Collator collator = Collator.getInstance(new Locale("fa"));
  35   private Analyzer analyzer = new TestAnalyzer(collator);
  36
  37   private String firstRangeBeginning = encodeCollationKey
  38     (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
  39   private String firstRangeEnd = encodeCollationKey
  40     (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
  41   private String secondRangeBeginning = encodeCollationKey
  42     (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
  43   private String secondRangeEnd = encodeCollationKey
  44     (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
  45
  46
  47   public final class TestAnalyzer extends Analyzer {
  48     private Collator _collator;
  49
  50     TestAnalyzer(Collator collator) {
  51       _collator = collator;
  52     }
  53
  54     @Override
  55     public TokenStream tokenStream(String fieldName, Reader reader) {
  56       TokenStream result = new KeywordTokenizer(reader);
  57       result = new ICUCollationKeyFilter(result, _collator);
  58       return result;
  59     }
  60   }
  61
  62   public void testFarsiRangeFilterCollating() throws Exception {
  63     testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd,
  64                                   secondRangeBeginning, secondRangeEnd);
  65   }
  66
  67   public void testFarsiRangeQueryCollating() throws Exception {
  68     testFarsiRangeQueryCollating(analyzer, firstRangeBeginning, firstRangeEnd,
  69                                  secondRangeBeginning, secondRangeEnd);
  70   }
  71
  72   public void testFarsiTermRangeQuery() throws Exception {
  73     testFarsiTermRangeQuery
  74       (analyzer, firstRangeBeginning, firstRangeEnd,
  75        secondRangeBeginning, secondRangeEnd);
  76   }
  77
  78   // Test using various international locales with accented characters (which
  79   // sort differently depending on locale)
  80   //
  81   // Copied (and slightly modified) from
  82   // org.apache.lucene.search.TestSort.testInternationalSort()
  83   //
  84   public void testCollationKeySort() throws Exception {
  85     Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
  86     Analyzer franceAnalyzer
  87       = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
  88     Analyzer swedenAnalyzer
  89       = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
  90     Analyzer denmarkAnalyzer
  91       = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
  92
  93     // The ICU Collator and java.text.Collator implementations differ in their
  94     // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
  95     testCollationKeySort
  96     (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
  97      "BFJHD", "ECAGI", "BJDFH", "BJDHF");
  98   }
  99 }