X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java deleted file mode 100644 index 1a74617..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ /dev/null @@ -1,168 +0,0 @@ -package org.apache.lucene.analysis.th; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.StringReader; - -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.util.Version; - -/** - * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer - * - * @version 0.1 - */ - -public class TestThaiAnalyzer extends BaseTokenStreamTestCase { - - /* - * testcase for offsets - */ - public void testOffsets() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, - new int[] { 0, 3, 6, 9, 13, 17, 20, 23 }, - new int[] { 3, 6, 9, 13, 17, 20, 23, 25 }); - } - - public void testTokenType() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" }, - new String[] { "", "", - "", "", - "", "", - "", "", - "" }); - } - - /** - * Thai numeric tokens were typed as instead of . - * @deprecated testing backwards behavior - */ - @Deprecated - public void testBuggyTokenType30() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_30), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" }, - new String[] { "", "", "", - "", "", "", - "", "", "" }); - } - - /** @deprecated testing backwards behavior */ - @Deprecated - public void testAnalyzer30() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); - - assertAnalyzesTo(analyzer, "", new String[] {}); - - assertAnalyzesTo( - analyzer, - "การที่ได้ต้องแสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); - - assertAnalyzesTo( - analyzer, - "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", - new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); - - // English stop words - assertAnalyzesTo( - analyzer, - "ประโยคว่า The quick brown fox jumped over the lazy dogs", - new String[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" }); - } - - /* - * Test that position increments are adjusted correctly for stopwords. - */ - public void testPositionIncrements() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT); - - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้อง the แสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, - new int[] { 0, 3, 6, 9, 18, 22, 25, 28 }, - new int[] { 3, 6, 9, 13, 22, 25, 28, 30 }, - new int[] { 1, 1, 1, 1, 2, 1, 1, 1 }); - - // case that a stopword is adjacent to thai text, with no whitespace - assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องthe แสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, - new int[] { 0, 3, 6, 9, 17, 21, 24, 27 }, - new int[] { 3, 6, 9, 13, 21, 24, 27, 29 }, - new int[] { 1, 1, 1, 1, 2, 1, 1, 1 }); - } - - public void testReusableTokenStream() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT); - assertAnalyzesToReuse(analyzer, "", new String[] {}); - - assertAnalyzesToReuse( - analyzer, - "การที่ได้ต้องแสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); - - assertAnalyzesToReuse( - analyzer, - "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", - new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" }); - } - - /** @deprecated, for version back compat */ - @Deprecated - public void testReusableTokenStream30() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); - assertAnalyzesToReuse(analyzer, "", new String[] {}); - - assertAnalyzesToReuse( - analyzer, - "การที่ได้ต้องแสดงว่างานดี", - new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); - - assertAnalyzesToReuse( - analyzer, - "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", - new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); - } - - /** blast some random strings through the analyzer */ - public void testRandomStrings() throws Exception { - checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); - } - - // LUCENE-3044 - public void testAttributeReuse() throws Exception { - assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); - ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); - // just consume - TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); - assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); - // this consumer adds flagsAtt, which this analyzer does not use. - ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); - ts.addAttribute(FlagsAttribute.class); - assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); - } -}