1 package org.apache.lucene.analysis.tr;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.StringReader;
22 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
23 import org.apache.lucene.analysis.MockTokenizer;
24 import org.apache.lucene.analysis.TokenStream;
27 * Test the Turkish lowercase filter.
29 public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase {
34 public void testTurkishLowerCaseFilter() throws Exception {
35 TokenStream stream = new MockTokenizer(new StringReader(
36 "\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false);
37 TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
38 assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
43 * Test decomposed forms
45 public void testDecomposed() throws Exception {
46 TokenStream stream = new MockTokenizer(new StringReader(
47 "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"), MockTokenizer.WHITESPACE, false);
48 TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
49 assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
54 * Test decomposed forms with additional accents
55 * In this example, U+0049 + U+0316 + U+0307 is canonically equivalent
56 * to U+0130 + U+0316, and is lowercased the same way.
58 public void testDecomposed2() throws Exception {
59 TokenStream stream = new MockTokenizer(new StringReader(
60 "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false);
61 TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
62 assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
63 "\u0131\u0316sparta",});