X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java deleted file mode 100644 index e5771c3..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java +++ /dev/null @@ -1,139 +0,0 @@ -package org.apache.lucene.analysis.snowball; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.Payload; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.util.Version; - -public class TestSnowball extends BaseTokenStreamTestCase { - - public void testEnglish() throws Exception { - Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); - assertAnalyzesTo(a, "he abhorred accents", - new String[]{"he", "abhor", "accent"}); - } - - public void testStopwords() throws Exception { - Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English", - StandardAnalyzer.STOP_WORDS_SET); - assertAnalyzesTo(a, "the quick brown fox jumped", - new String[]{"quick", "brown", "fox", "jump"}); - } - - /** - * Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure - * we lowercase I correct for non-Turkish languages in either case. - */ - public void testEnglishLowerCase() throws Exception { - Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); - assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" }); - assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" }); - - Analyzer b = new SnowballAnalyzer(Version.LUCENE_30, "English"); - assertAnalyzesTo(b, "cryogenic", new String[] { "cryogen" }); - assertAnalyzesTo(b, "CRYOGENIC", new String[] { "cryogen" }); - } - - /** - * Test turkish lowercasing - */ - public void testTurkish() throws Exception { - Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish"); - - assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" }); - assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" }); - } - - /** - * Test turkish lowercasing (old buggy behavior) - * @deprecated Remove this when support for 3.0 indexes is no longer required - */ - @Deprecated - public void testTurkishBWComp() throws Exception { - Analyzer a = new SnowballAnalyzer(Version.LUCENE_30, "Turkish"); - // AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci. - // this fails due to wrong casing, because the stemmer - // will only remove -ı, not -i - assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" }); - assertAnalyzesTo(a, "AĞACI", new String[] { "ağaci" }); - } - - - public void testReusableTokenStream() throws Exception { - Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English"); - assertAnalyzesToReuse(a, "he abhorred accents", - new String[]{"he", "abhor", "accent"}); - assertAnalyzesToReuse(a, "she abhorred him", - new String[]{"she", "abhor", "him"}); - } - - public void testFilterTokens() throws Exception { - SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); - CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class); - PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class); - FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class); - - filter.incrementToken(); - - assertEquals("accent", termAtt.toString()); - assertEquals(2, offsetAtt.startOffset()); - assertEquals(7, offsetAtt.endOffset()); - assertEquals("wrd", typeAtt.type()); - assertEquals(3, posIncAtt.getPositionIncrement()); - assertEquals(77, flagsAtt.getFlags()); - assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload()); - } - - private final class TestTokenStream extends TokenStream { - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); - - TestTokenStream() { - super(); - } - - @Override - public boolean incrementToken() { - clearAttributes(); - termAtt.setEmpty().append("accents"); - offsetAtt.setOffset(2, 7); - typeAtt.setType("wrd"); - posIncAtt.setPositionIncrement(3); - payloadAtt.setPayload(new Payload(new byte[]{0,1,2,3})); - flagsAtt.setFlags(77); - return true; - } - } -} \ No newline at end of file