X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java diff --git a/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java new file mode 100644 index 0000000..d5f10ef --- /dev/null +++ b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis; + +import java.io.StringReader; + +public class TestMappingCharFilter extends BaseTokenStreamTestCase { + + NormalizeCharMap normMap; + + @Override + public void setUp() throws Exception { + super.setUp(); + normMap = new NormalizeCharMap(); + + normMap.add( "aa", "a" ); + normMap.add( "bbb", "b" ); + normMap.add( "cccc", "cc" ); + + normMap.add( "h", "i" ); + normMap.add( "j", "jj" ); + normMap.add( "k", "kkk" ); + normMap.add( "ll", "llll" ); + + normMap.add( "empty", "" ); + } + + public void testReaderReset() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) ); + char[] buf = new char[10]; + int len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + len = cs.read(buf, 0, 10); + assertEquals( -1, len ); + + // rewind + cs.reset(); + len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + } + + public void testNothingChange() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1}); + } + + public void test1to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1}); + } + + public void test1to2() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1}); + } + + public void test1to3() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1}); + } + + public void test2to4() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2}); + } + + public void test2to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2}); + } + + public void test3to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3}); + } + + public void test4to2() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4}); + } + + public void test5to0() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[0]); + } + + // + // 1111111111222 + // 01234567890123456789012 + //(in) h i j k ll cccc bbb aa + // + // 1111111111222 + // 01234567890123456789012 + //(out) i i jj kkk llll cc b a + // + // h, 0, 1 => i, 0, 1 + // i, 2, 3 => i, 2, 3 + // j, 4, 5 => jj, 4, 5 + // k, 6, 7 => kkk, 6, 7 + // ll, 8,10 => llll, 8,10 + // cccc,11,15 => cc,11,15 + // bbb,16,19 => b,16,19 + // aa,20,22 => a,20,22 + // + public void testTokenStream() throws Exception { + CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, + new String[]{"i","i","jj","kkk","llll","cc","b","a"}, + new int[]{0,2,4,6,8,11,16,20}, + new int[]{1,3,5,7,10,15,19,22} + ); + } + + // + // + // 0123456789 + //(in) aaaa ll h + //(out-1) aa llll i + //(out-2) a llllllll i + // + // aaaa,0,4 => a,0,4 + // ll,5,7 => llllllll,5,7 + // h,8,9 => i,8,9 + public void testChained() throws Exception { + CharStream cs = new MappingCharFilter( normMap, + new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, + new String[]{"a","llllllll","i"}, + new int[]{0,5,8}, + new int[]{4,7,9} + ); + } +}