+++ /dev/null
-package org.apache.lucene.analysis.fr;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.util.Version;
-
-/**
- * Test case for FrenchAnalyzer.
- *
- * @version $version$
- */
-
-public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
-
- public void testAnalyzer() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
-
- assertAnalyzesTo(fa, "", new String[] {
- });
-
- assertAnalyzesTo(
- fa,
- "chien chat cheval",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(
- fa,
- "chien CHAT CHEVAL",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(
- fa,
- " chien ,? + = - CHAT /: > CHEVAL",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
-
- assertAnalyzesTo(
- fa,
- "mot \"entreguillemet\"",
- new String[] { "mot", "entreguillemet" });
-
- // let's do some french specific tests now
-
- /* 1. couldn't resist
- I would expect this to stay one term as in French the minus
- sign is often used for composing words */
- assertAnalyzesTo(
- fa,
- "Jean-François",
- new String[] { "jean", "françois" });
-
- // 2. stopwords
- assertAnalyzesTo(
- fa,
- "le la chien les aux chat du des à cheval",
- new String[] { "chien", "chat", "cheval" });
-
- // some nouns and adjectives
- assertAnalyzesTo(
- fa,
- "lances chismes habitable chiste éléments captifs",
- new String[] {
- "lanc",
- "chism",
- "habit",
- "chist",
- "élément",
- "captif" });
-
- // some verbs
- assertAnalyzesTo(
- fa,
- "finissions souffrirent rugissante",
- new String[] { "fin", "souffr", "rug" });
-
- // some everything else
- // aujourd'hui stays one term which is OK
- assertAnalyzesTo(
- fa,
- "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
- new String[] {
- "c3po",
- "aujourd'hui",
- "oeuf",
- "ïâöûàä",
- "anticonstitutionnel",
- "jav" });
-
- // some more everything else
- // here 1940-1945 stays as one term, 1940:1945 not ?
- assertAnalyzesTo(
- fa,
- "33Bis 1940-1945 1940:1945 (---i+++)*",
- new String[] { "33bis", "1940", "1945", "1940", "1945", "i" });
-
- }
-
- /**
- * @deprecated remove this test for Lucene 4.0
- */
- @Deprecated
- public void testAnalyzer30() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
-
- assertAnalyzesTo(fa, "", new String[] {
- });
-
- assertAnalyzesTo(
- fa,
- "chien chat cheval",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(
- fa,
- "chien CHAT CHEVAL",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(
- fa,
- " chien ,? + = - CHAT /: > CHEVAL",
- new String[] { "chien", "chat", "cheval" });
-
- assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
-
- assertAnalyzesTo(
- fa,
- "mot \"entreguillemet\"",
- new String[] { "mot", "entreguillemet" });
-
- // let's do some french specific tests now
-
- /* 1. couldn't resist
- I would expect this to stay one term as in French the minus
- sign is often used for composing words */
- assertAnalyzesTo(
- fa,
- "Jean-François",
- new String[] { "jean", "françois" });
-
- // 2. stopwords
- assertAnalyzesTo(
- fa,
- "le la chien les aux chat du des à cheval",
- new String[] { "chien", "chat", "cheval" });
-
- // some nouns and adjectives
- assertAnalyzesTo(
- fa,
- "lances chismes habitable chiste éléments captifs",
- new String[] {
- "lanc",
- "chism",
- "habit",
- "chist",
- "élément",
- "captif" });
-
- // some verbs
- assertAnalyzesTo(
- fa,
- "finissions souffrirent rugissante",
- new String[] { "fin", "souffr", "rug" });
-
- // some everything else
- // aujourd'hui stays one term which is OK
- assertAnalyzesTo(
- fa,
- "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
- new String[] {
- "c3po",
- "aujourd'hui",
- "oeuf",
- "ïâöûàä",
- "anticonstitutionnel",
- "jav" });
-
- // some more everything else
- // here 1940-1945 stays as one term, 1940:1945 not ?
- assertAnalyzesTo(
- fa,
- "33Bis 1940-1945 1940:1945 (---i+++)*",
- new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
-
- }
-
- public void testReusableTokenStream() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
- // stopwords
- assertAnalyzesToReuse(
- fa,
- "le la chien les aux chat du des à cheval",
- new String[] { "chien", "chat", "cheval" });
-
- // some nouns and adjectives
- assertAnalyzesToReuse(
- fa,
- "lances chismes habitable chiste éléments captifs",
- new String[] {
- "lanc",
- "chism",
- "habit",
- "chist",
- "élément",
- "captif" });
- }
-
- /*
- * Test that changes to the exclusion table are applied immediately
- * when using reusable token streams.
- */
- public void testExclusionTableReuse() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
- assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
- fa.setStemExclusionTable(new String[] { "habitable" });
- assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
- }
-
- public void testExclusionTableViaCtor() throws Exception {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- set.add("habitable");
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT,
- CharArraySet.EMPTY_SET, set);
- assertAnalyzesToReuse(fa, "habitable chiste", new String[] { "habitable",
- "chist" });
-
- fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
- assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
- "chist" });
- }
-
- public void testElision() throws Exception {
- FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
- assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
- }
-
- /**
- * Prior to 3.1, this analyzer had no lowercase filter.
- * stopwords were case sensitive. Preserve this for back compat.
- * @deprecated Remove this test in Lucene 4.0
- */
- @Deprecated
- public void testBuggyStopwordsCasing() throws IOException {
- FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
- assertAnalyzesTo(a, "Votre", new String[] { "votr" });
- }
-
- /**
- * Test that stopwords are not case sensitive
- */
- public void testStopwordsCasing() throws IOException {
- FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
- assertAnalyzesTo(a, "Votre", new String[] { });
- }
-
- /** blast some random strings through the analyzer */
- public void testRandomStrings() throws Exception {
- checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
- }
-}