X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java deleted file mode 100644 index 8948729..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.apache.lucene.analysis.hi; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.StringReader; - -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.Tokenizer; - -/** - * Test HindiNormalizer - */ -public class TestHindiNormalizer extends BaseTokenStreamTestCase { - /** - * Test some basic normalization, with an example from the paper. - */ - public void testBasics() throws IOException { - check("अँगरेज़ी", "अंगरेजि"); - check("अँगरेजी", "अंगरेजि"); - check("अँग्रेज़ी", "अंगरेजि"); - check("अँग्रेजी", "अंगरेजि"); - check("अंगरेज़ी", "अंगरेजि"); - check("अंगरेजी", "अंगरेजि"); - check("अंग्रेज़ी", "अंगरेजि"); - check("अंग्रेजी", "अंगरेजि"); - } - - public void testDecompositions() throws IOException { - // removing nukta dot - check("क़िताब", "किताब"); - check("फ़र्ज़", "फरज"); - check("क़र्ज़", "करज"); - // some other composed nukta forms - check("ऱऴख़ग़ड़ढ़य़", "रळखगडढय"); - // removal of format (ZWJ/ZWNJ) - check("शार्‍मा", "शारमा"); - check("शार्‌मा", "शारमा"); - // removal of chandra - check("ॅॆॉॊऍऎऑऒ\u0972", "ेेोोएएओओअ"); - // vowel shortening - check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो"); - } - private void check(String input, String output) throws IOException { - Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false); - TokenFilter tf = new HindiNormalizationFilter(tokenizer); - assertTokenStreamContents(tf, new String[] { output }); - } -}