2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.analysis;
20 import java.io.StringReader;
22 public class TestMappingCharFilter extends BaseTokenStreamTestCase {
24 NormalizeCharMap normMap;
27 public void setUp() throws Exception {
29 normMap = new NormalizeCharMap();
31 normMap.add( "aa", "a" );
32 normMap.add( "bbb", "b" );
33 normMap.add( "cccc", "cc" );
35 normMap.add( "h", "i" );
36 normMap.add( "j", "jj" );
37 normMap.add( "k", "kkk" );
38 normMap.add( "ll", "llll" );
40 normMap.add( "empty", "" );
43 public void testReaderReset() throws Exception {
44 CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
45 char[] buf = new char[10];
46 int len = cs.read(buf, 0, 10);
47 assertEquals( 1, len );
48 assertEquals( 'x', buf[0]) ;
49 len = cs.read(buf, 0, 10);
50 assertEquals( -1, len );
54 len = cs.read(buf, 0, 10);
55 assertEquals( 1, len );
56 assertEquals( 'x', buf[0]) ;
59 public void testNothingChange() throws Exception {
60 CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
61 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
62 assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
65 public void test1to1() throws Exception {
66 CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
67 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
68 assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
71 public void test1to2() throws Exception {
72 CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
73 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
74 assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
77 public void test1to3() throws Exception {
78 CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
79 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
80 assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
83 public void test2to4() throws Exception {
84 CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
85 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
86 assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
89 public void test2to1() throws Exception {
90 CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
91 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
92 assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
95 public void test3to1() throws Exception {
96 CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
97 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
98 assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
101 public void test4to2() throws Exception {
102 CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
103 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
104 assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
107 public void test5to0() throws Exception {
108 CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
109 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
110 assertTokenStreamContents(ts, new String[0]);
115 // 01234567890123456789012
116 //(in) h i j k ll cccc bbb aa
119 // 01234567890123456789012
120 //(out) i i jj kkk llll cc b a
122 // h, 0, 1 => i, 0, 1
123 // i, 2, 3 => i, 2, 3
124 // j, 4, 5 => jj, 4, 5
125 // k, 6, 7 => kkk, 6, 7
126 // ll, 8,10 => llll, 8,10
127 // cccc,11,15 => cc,11,15
128 // bbb,16,19 => b,16,19
129 // aa,20,22 => a,20,22
131 public void testTokenStream() throws Exception {
132 CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
133 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
134 assertTokenStreamContents(ts,
135 new String[]{"i","i","jj","kkk","llll","cc","b","a"},
136 new int[]{0,2,4,6,8,11,16,20},
137 new int[]{1,3,5,7,10,15,19,22}
146 //(out-2) a llllllll i
149 // ll,5,7 => llllllll,5,7
151 public void testChained() throws Exception {
152 CharStream cs = new MappingCharFilter( normMap,
153 new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
154 TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
155 assertTokenStreamContents(ts,
156 new String[]{"a","llllllll","i"},