X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java deleted file mode 100644 index e36c497..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java +++ /dev/null @@ -1,279 +0,0 @@ -package org.apache.lucene.analysis.fr; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.util.Version; - -/** - * Test case for FrenchAnalyzer. - * - * @version $version$ - */ - -public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { - - public void testAnalyzer() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); - - assertAnalyzesTo(fa, "", new String[] { - }); - - assertAnalyzesTo( - fa, - "chien chat cheval", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo( - fa, - "chien CHAT CHEVAL", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo( - fa, - " chien ,? + = - CHAT /: > CHEVAL", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo(fa, "chien++", new String[] { "chien" }); - - assertAnalyzesTo( - fa, - "mot \"entreguillemet\"", - new String[] { "mot", "entreguillemet" }); - - // let's do some french specific tests now - - /* 1. couldn't resist - I would expect this to stay one term as in French the minus - sign is often used for composing words */ - assertAnalyzesTo( - fa, - "Jean-François", - new String[] { "jean", "françois" }); - - // 2. stopwords - assertAnalyzesTo( - fa, - "le la chien les aux chat du des à cheval", - new String[] { "chien", "chat", "cheval" }); - - // some nouns and adjectives - assertAnalyzesTo( - fa, - "lances chismes habitable chiste éléments captifs", - new String[] { - "lanc", - "chism", - "habit", - "chist", - "élément", - "captif" }); - - // some verbs - assertAnalyzesTo( - fa, - "finissions souffrirent rugissante", - new String[] { "fin", "souffr", "rug" }); - - // some everything else - // aujourd'hui stays one term which is OK - assertAnalyzesTo( - fa, - "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", - new String[] { - "c3po", - "aujourd'hui", - "oeuf", - "ïâöûàä", - "anticonstitutionnel", - "jav" }); - - // some more everything else - // here 1940-1945 stays as one term, 1940:1945 not ? - assertAnalyzesTo( - fa, - "33Bis 1940-1945 1940:1945 (---i+++)*", - new String[] { "33bis", "1940", "1945", "1940", "1945", "i" }); - - } - - /** - * @deprecated remove this test for Lucene 4.0 - */ - @Deprecated - public void testAnalyzer30() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30); - - assertAnalyzesTo(fa, "", new String[] { - }); - - assertAnalyzesTo( - fa, - "chien chat cheval", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo( - fa, - "chien CHAT CHEVAL", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo( - fa, - " chien ,? + = - CHAT /: > CHEVAL", - new String[] { "chien", "chat", "cheval" }); - - assertAnalyzesTo(fa, "chien++", new String[] { "chien" }); - - assertAnalyzesTo( - fa, - "mot \"entreguillemet\"", - new String[] { "mot", "entreguillemet" }); - - // let's do some french specific tests now - - /* 1. couldn't resist - I would expect this to stay one term as in French the minus - sign is often used for composing words */ - assertAnalyzesTo( - fa, - "Jean-François", - new String[] { "jean", "françois" }); - - // 2. stopwords - assertAnalyzesTo( - fa, - "le la chien les aux chat du des à cheval", - new String[] { "chien", "chat", "cheval" }); - - // some nouns and adjectives - assertAnalyzesTo( - fa, - "lances chismes habitable chiste éléments captifs", - new String[] { - "lanc", - "chism", - "habit", - "chist", - "élément", - "captif" }); - - // some verbs - assertAnalyzesTo( - fa, - "finissions souffrirent rugissante", - new String[] { "fin", "souffr", "rug" }); - - // some everything else - // aujourd'hui stays one term which is OK - assertAnalyzesTo( - fa, - "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", - new String[] { - "c3po", - "aujourd'hui", - "oeuf", - "ïâöûàä", - "anticonstitutionnel", - "jav" }); - - // some more everything else - // here 1940-1945 stays as one term, 1940:1945 not ? - assertAnalyzesTo( - fa, - "33Bis 1940-1945 1940:1945 (---i+++)*", - new String[] { "33bis", "1940-1945", "1940", "1945", "i" }); - - } - - public void testReusableTokenStream() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); - // stopwords - assertAnalyzesToReuse( - fa, - "le la chien les aux chat du des à cheval", - new String[] { "chien", "chat", "cheval" }); - - // some nouns and adjectives - assertAnalyzesToReuse( - fa, - "lances chismes habitable chiste éléments captifs", - new String[] { - "lanc", - "chism", - "habit", - "chist", - "élément", - "captif" }); - } - - /* - * Test that changes to the exclusion table are applied immediately - * when using reusable token streams. - */ - public void testExclusionTableReuse() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" }); - fa.setStemExclusionTable(new String[] { "habitable" }); - assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" }); - } - - public void testExclusionTableViaCtor() throws Exception { - CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); - set.add("habitable"); - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, - CharArraySet.EMPTY_SET, set); - assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable", - "chist" }); - - fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); - assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable", - "chist" }); - } - - public void testElision() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" }); - } - - /** - * Prior to 3.1, this analyzer had no lowercase filter. - * stopwords were case sensitive. Preserve this for back compat. - * @deprecated Remove this test in Lucene 4.0 - */ - @Deprecated - public void testBuggyStopwordsCasing() throws IOException { - FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30); - assertAnalyzesTo(a, "Votre", new String[] { "votr" }); - } - - /** - * Test that stopwords are not case sensitive - */ - public void testStopwordsCasing() throws IOException { - FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31); - assertAnalyzesTo(a, "Votre", new String[] { }); - } - - /** blast some random strings through the analyzer */ - public void testRandomStrings() throws Exception { - checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); - } -}