1 package org.apache.lucene.analysis.bg;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.StringReader;
23 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
24 import org.apache.lucene.analysis.CharArraySet;
25 import org.apache.lucene.analysis.KeywordMarkerFilter;
26 import org.apache.lucene.analysis.MockTokenizer;
27 import org.apache.lucene.util.Version;
30 * Test the Bulgarian Stemmer
32 public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
34 * Test showing how masculine noun forms conflate. An example noun for each
35 * common (and some rare) plural pattern is listed.
37 public void testMasculineNouns() throws IOException {
38 BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
41 assertAnalyzesTo(a, "град", new String[] {"град"});
42 assertAnalyzesTo(a, "града", new String[] {"град"});
43 assertAnalyzesTo(a, "градът", new String[] {"град"});
44 assertAnalyzesTo(a, "градове", new String[] {"град"});
45 assertAnalyzesTo(a, "градовете", new String[] {"град"});
48 assertAnalyzesTo(a, "народ", new String[] {"народ"});
49 assertAnalyzesTo(a, "народа", new String[] {"народ"});
50 assertAnalyzesTo(a, "народът", new String[] {"народ"});
51 assertAnalyzesTo(a, "народи", new String[] {"народ"});
52 assertAnalyzesTo(a, "народите", new String[] {"народ"});
53 assertAnalyzesTo(a, "народе", new String[] {"народ"});
56 assertAnalyzesTo(a, "път", new String[] {"път"});
57 assertAnalyzesTo(a, "пътя", new String[] {"път"});
58 assertAnalyzesTo(a, "пътят", new String[] {"път"});
59 assertAnalyzesTo(a, "пътища", new String[] {"път"});
60 assertAnalyzesTo(a, "пътищата", new String[] {"път"});
63 assertAnalyzesTo(a, "градец", new String[] {"градец"});
64 assertAnalyzesTo(a, "градеца", new String[] {"градец"});
65 assertAnalyzesTo(a, "градецът", new String[] {"градец"});
66 /* note the below forms conflate with each other, but not the rest */
67 assertAnalyzesTo(a, "градовце", new String[] {"градовц"});
68 assertAnalyzesTo(a, "градовцете", new String[] {"градовц"});
71 assertAnalyzesTo(a, "дядо", new String[] {"дяд"});
72 assertAnalyzesTo(a, "дядото", new String[] {"дяд"});
73 assertAnalyzesTo(a, "дядовци", new String[] {"дяд"});
74 assertAnalyzesTo(a, "дядовците", new String[] {"дяд"});
77 assertAnalyzesTo(a, "мъж", new String[] {"мъж"});
78 assertAnalyzesTo(a, "мъжа", new String[] {"мъж"});
79 assertAnalyzesTo(a, "мъже", new String[] {"мъж"});
80 assertAnalyzesTo(a, "мъжете", new String[] {"мъж"});
81 assertAnalyzesTo(a, "мъжо", new String[] {"мъж"});
82 /* word is too short, will not remove -ът */
83 assertAnalyzesTo(a, "мъжът", new String[] {"мъжът"});
86 assertAnalyzesTo(a, "крак", new String[] {"крак"});
87 assertAnalyzesTo(a, "крака", new String[] {"крак"});
88 assertAnalyzesTo(a, "кракът", new String[] {"крак"});
89 assertAnalyzesTo(a, "краката", new String[] {"крак"});
92 assertAnalyzesTo(a, "брат", new String[] {"брат"});
93 assertAnalyzesTo(a, "брата", new String[] {"брат"});
94 assertAnalyzesTo(a, "братът", new String[] {"брат"});
95 assertAnalyzesTo(a, "братя", new String[] {"брат"});
96 assertAnalyzesTo(a, "братята", new String[] {"брат"});
97 assertAnalyzesTo(a, "брате", new String[] {"брат"});
101 * Test showing how feminine noun forms conflate
103 public void testFeminineNouns() throws IOException {
104 BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
106 assertAnalyzesTo(a, "вест", new String[] {"вест"});
107 assertAnalyzesTo(a, "вестта", new String[] {"вест"});
108 assertAnalyzesTo(a, "вести", new String[] {"вест"});
109 assertAnalyzesTo(a, "вестите", new String[] {"вест"});
113 * Test showing how neuter noun forms conflate an example noun for each common
114 * plural pattern is listed
116 public void testNeuterNouns() throws IOException {
117 BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
120 assertAnalyzesTo(a, "дърво", new String[] {"дърв"});
121 assertAnalyzesTo(a, "дървото", new String[] {"дърв"});
122 assertAnalyzesTo(a, "дърва", new String[] {"дърв"});
123 assertAnalyzesTo(a, "дървета", new String[] {"дърв"});
124 assertAnalyzesTo(a, "дървата", new String[] {"дърв"});
125 assertAnalyzesTo(a, "дърветата", new String[] {"дърв"});
128 assertAnalyzesTo(a, "море", new String[] {"мор"});
129 assertAnalyzesTo(a, "морето", new String[] {"мор"});
130 assertAnalyzesTo(a, "морета", new String[] {"мор"});
131 assertAnalyzesTo(a, "моретата", new String[] {"мор"});
134 assertAnalyzesTo(a, "изключение", new String[] {"изключени"});
135 assertAnalyzesTo(a, "изключението", new String[] {"изключени"});
136 assertAnalyzesTo(a, "изключенията", new String[] {"изключени"});
137 /* note the below form in this example does not conflate with the rest */
138 assertAnalyzesTo(a, "изключения", new String[] {"изключн"});
142 * Test showing how adjectival forms conflate
144 public void testAdjectives() throws IOException {
145 BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
146 assertAnalyzesTo(a, "красив", new String[] {"красив"});
147 assertAnalyzesTo(a, "красивия", new String[] {"красив"});
148 assertAnalyzesTo(a, "красивият", new String[] {"красив"});
149 assertAnalyzesTo(a, "красива", new String[] {"красив"});
150 assertAnalyzesTo(a, "красивата", new String[] {"красив"});
151 assertAnalyzesTo(a, "красиво", new String[] {"красив"});
152 assertAnalyzesTo(a, "красивото", new String[] {"красив"});
153 assertAnalyzesTo(a, "красиви", new String[] {"красив"});
154 assertAnalyzesTo(a, "красивите", new String[] {"красив"});
158 * Test some exceptional rules, implemented as rewrites.
160 public void testExceptions() throws IOException {
161 BulgarianAnalyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT);
164 assertAnalyzesTo(a, "собственик", new String[] {"собственик"});
165 assertAnalyzesTo(a, "собственика", new String[] {"собственик"});
166 assertAnalyzesTo(a, "собственикът", new String[] {"собственик"});
167 assertAnalyzesTo(a, "собственици", new String[] {"собственик"});
168 assertAnalyzesTo(a, "собствениците", new String[] {"собственик"});
171 assertAnalyzesTo(a, "подлог", new String[] {"подлог"});
172 assertAnalyzesTo(a, "подлога", new String[] {"подлог"});
173 assertAnalyzesTo(a, "подлогът", new String[] {"подлог"});
174 assertAnalyzesTo(a, "подлози", new String[] {"подлог"});
175 assertAnalyzesTo(a, "подлозите", new String[] {"подлог"});
178 assertAnalyzesTo(a, "кожух", new String[] {"кожух"});
179 assertAnalyzesTo(a, "кожуха", new String[] {"кожух"});
180 assertAnalyzesTo(a, "кожухът", new String[] {"кожух"});
181 assertAnalyzesTo(a, "кожуси", new String[] {"кожух"});
182 assertAnalyzesTo(a, "кожусите", new String[] {"кожух"});
185 assertAnalyzesTo(a, "център", new String[] {"центр"});
186 assertAnalyzesTo(a, "центъра", new String[] {"центр"});
187 assertAnalyzesTo(a, "центърът", new String[] {"центр"});
188 assertAnalyzesTo(a, "центрове", new String[] {"центр"});
189 assertAnalyzesTo(a, "центровете", new String[] {"центр"});
192 assertAnalyzesTo(a, "промяна", new String[] {"промян"});
193 assertAnalyzesTo(a, "промяната", new String[] {"промян"});
194 assertAnalyzesTo(a, "промени", new String[] {"промян"});
195 assertAnalyzesTo(a, "промените", new String[] {"промян"});
198 assertAnalyzesTo(a, "песен", new String[] {"песн"});
199 assertAnalyzesTo(a, "песента", new String[] {"песн"});
200 assertAnalyzesTo(a, "песни", new String[] {"песн"});
201 assertAnalyzesTo(a, "песните", new String[] {"песн"});
204 // note: this is the only word i think this rule works for.
205 // most -еве pluralized nouns are monosyllabic,
206 // and the stemmer requires length > 6...
207 assertAnalyzesTo(a, "строй", new String[] {"строй"});
208 assertAnalyzesTo(a, "строеве", new String[] {"строй"});
209 assertAnalyzesTo(a, "строевете", new String[] {"строй"});
210 /* note the below forms conflate with each other, but not the rest */
211 assertAnalyzesTo(a, "строя", new String[] {"стр"});
212 assertAnalyzesTo(a, "строят", new String[] {"стр"});
215 public void testWithKeywordAttribute() throws IOException {
216 CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
218 MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);
220 BulgarianStemFilter filter = new BulgarianStemFilter(
221 new KeywordMarkerFilter(tokenStream, set));
222 assertTokenStreamContents(filter, new String[] { "строй", "строеве" });