1 package org.apache.lucene.collation;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 import org.apache.lucene.analysis.CollationTestBase;
22 import org.apache.lucene.analysis.TokenStream;
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.KeywordTokenizer;
26 import java.text.Collator;
27 import java.util.Locale;
28 import java.io.Reader;
31 public class TestCollationKeyFilter extends CollationTestBase {
32 // the sort order of Ø versus U depends on the version of the rules being used
33 // for the inherited root locale: Ø's order isnt specified in Locale.US since
34 // its not used in english.
35 boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
37 // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
38 // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
39 // characters properly.
40 private Collator collator = Collator.getInstance(new Locale("ar"));
41 private Analyzer analyzer = new TestAnalyzer(collator);
43 private String firstRangeBeginning = encodeCollationKey
44 (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
45 private String firstRangeEnd = encodeCollationKey
46 (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
47 private String secondRangeBeginning = encodeCollationKey
48 (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
49 private String secondRangeEnd = encodeCollationKey
50 (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
53 public final class TestAnalyzer extends Analyzer {
54 private Collator _collator;
56 TestAnalyzer(Collator collator) {
61 public TokenStream tokenStream(String fieldName, Reader reader) {
62 TokenStream result = new KeywordTokenizer(reader);
63 result = new CollationKeyFilter(result, _collator);
68 public void testFarsiRangeFilterCollating() throws Exception {
69 testFarsiRangeFilterCollating
70 (analyzer, firstRangeBeginning, firstRangeEnd,
71 secondRangeBeginning, secondRangeEnd);
74 public void testFarsiRangeQueryCollating() throws Exception {
75 testFarsiRangeQueryCollating
76 (analyzer, firstRangeBeginning, firstRangeEnd,
77 secondRangeBeginning, secondRangeEnd);
80 public void testFarsiTermRangeQuery() throws Exception {
81 testFarsiTermRangeQuery
82 (analyzer, firstRangeBeginning, firstRangeEnd,
83 secondRangeBeginning, secondRangeEnd);
86 public void testCollationKeySort() throws Exception {
87 Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US));
88 Analyzer franceAnalyzer
89 = new TestAnalyzer(Collator.getInstance(Locale.FRANCE));
90 Analyzer swedenAnalyzer
91 = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se")));
92 Analyzer denmarkAnalyzer
93 = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
95 // The ICU Collator and Sun java.text.Collator implementations differ in their
96 // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
98 (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
99 oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");