1 package org.apache.lucene.analysis.lv;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
25 import org.apache.lucene.analysis.MockTokenizer;
26 import org.apache.lucene.analysis.Tokenizer;
27 import org.apache.lucene.analysis.ReusableAnalyzerBase;
30 * Basic tests for {@link LatvianStemmer}
32 public class TestLatvianStemmer extends BaseTokenStreamTestCase {
33 private Analyzer a = new ReusableAnalyzerBase() {
35 protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
36 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
37 return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
41 public void testNouns1() throws IOException {
43 checkOneTerm(a, "tēvs", "tēv"); // nom. sing.
44 checkOneTerm(a, "tēvi", "tēv"); // nom. pl.
45 checkOneTerm(a, "tēva", "tēv"); // gen. sing.
46 checkOneTerm(a, "tēvu", "tēv"); // gen. pl.
47 checkOneTerm(a, "tēvam", "tēv"); // dat. sing.
48 checkOneTerm(a, "tēviem", "tēv"); // dat. pl.
49 checkOneTerm(a, "tēvu", "tēv"); // acc. sing.
50 checkOneTerm(a, "tēvus", "tēv"); // acc. pl.
51 checkOneTerm(a, "tēvā", "tēv"); // loc. sing.
52 checkOneTerm(a, "tēvos", "tēv"); // loc. pl.
53 checkOneTerm(a, "tēvs", "tēv"); // voc. sing.
54 checkOneTerm(a, "tēvi", "tēv"); // voc. pl.
58 * decl II nouns with (s,t) -> š and (d,z) -> ž
59 * palatalization will generally conflate to two stems
60 * due to the ambiguity (plural and singular).
62 public void testNouns2() throws IOException {
65 // c -> č palatalization
66 checkOneTerm(a, "lācis", "lāc"); // nom. sing.
67 checkOneTerm(a, "lāči", "lāc"); // nom. pl.
68 checkOneTerm(a, "lāča", "lāc"); // gen. sing.
69 checkOneTerm(a, "lāču", "lāc"); // gen. pl.
70 checkOneTerm(a, "lācim", "lāc"); // dat. sing.
71 checkOneTerm(a, "lāčiem", "lāc"); // dat. pl.
72 checkOneTerm(a, "lāci", "lāc"); // acc. sing.
73 checkOneTerm(a, "lāčus", "lāc"); // acc. pl.
74 checkOneTerm(a, "lācī", "lāc"); // loc. sing.
75 checkOneTerm(a, "lāčos", "lāc"); // loc. pl.
76 checkOneTerm(a, "lāci", "lāc"); // voc. sing.
77 checkOneTerm(a, "lāči", "lāc"); // voc. pl.
79 // n -> ņ palatalization
80 checkOneTerm(a, "akmens", "akmen"); // nom. sing.
81 checkOneTerm(a, "akmeņi", "akmen"); // nom. pl.
82 checkOneTerm(a, "akmens", "akmen"); // gen. sing.
83 checkOneTerm(a, "akmeņu", "akmen"); // gen. pl.
84 checkOneTerm(a, "akmenim", "akmen"); // dat. sing.
85 checkOneTerm(a, "akmeņiem", "akmen"); // dat. pl.
86 checkOneTerm(a, "akmeni", "akmen"); // acc. sing.
87 checkOneTerm(a, "akmeņus", "akmen"); // acc. pl.
88 checkOneTerm(a, "akmenī", "akmen"); // loc. sing.
89 checkOneTerm(a, "akmeņos", "akmen"); // loc. pl.
90 checkOneTerm(a, "akmens", "akmen"); // voc. sing.
91 checkOneTerm(a, "akmeņi", "akmen"); // voc. pl.
94 checkOneTerm(a, "kurmis", "kurm"); // nom. sing.
95 checkOneTerm(a, "kurmji", "kurm"); // nom. pl.
96 checkOneTerm(a, "kurmja", "kurm"); // gen. sing.
97 checkOneTerm(a, "kurmju", "kurm"); // gen. pl.
98 checkOneTerm(a, "kurmim", "kurm"); // dat. sing.
99 checkOneTerm(a, "kurmjiem", "kurm"); // dat. pl.
100 checkOneTerm(a, "kurmi", "kurm"); // acc. sing.
101 checkOneTerm(a, "kurmjus", "kurm"); // acc. pl.
102 checkOneTerm(a, "kurmī", "kurm"); // loc. sing.
103 checkOneTerm(a, "kurmjos", "kurm"); // loc. pl.
104 checkOneTerm(a, "kurmi", "kurm"); // voc. sing.
105 checkOneTerm(a, "kurmji", "kurm"); // voc. pl.
108 public void testNouns3() throws IOException {
110 checkOneTerm(a, "lietus", "liet"); // nom. sing.
111 checkOneTerm(a, "lieti", "liet"); // nom. pl.
112 checkOneTerm(a, "lietus", "liet"); // gen. sing.
113 checkOneTerm(a, "lietu", "liet"); // gen. pl.
114 checkOneTerm(a, "lietum", "liet"); // dat. sing.
115 checkOneTerm(a, "lietiem", "liet"); // dat. pl.
116 checkOneTerm(a, "lietu", "liet"); // acc. sing.
117 checkOneTerm(a, "lietus", "liet"); // acc. pl.
118 checkOneTerm(a, "lietū", "liet"); // loc. sing.
119 checkOneTerm(a, "lietos", "liet"); // loc. pl.
120 checkOneTerm(a, "lietus", "liet"); // voc. sing.
121 checkOneTerm(a, "lieti", "liet"); // voc. pl.
124 public void testNouns4() throws IOException {
126 checkOneTerm(a, "lapa", "lap"); // nom. sing.
127 checkOneTerm(a, "lapas", "lap"); // nom. pl.
128 checkOneTerm(a, "lapas", "lap"); // gen. sing.
129 checkOneTerm(a, "lapu", "lap"); // gen. pl.
130 checkOneTerm(a, "lapai", "lap"); // dat. sing.
131 checkOneTerm(a, "lapām", "lap"); // dat. pl.
132 checkOneTerm(a, "lapu", "lap"); // acc. sing.
133 checkOneTerm(a, "lapas", "lap"); // acc. pl.
134 checkOneTerm(a, "lapā", "lap"); // loc. sing.
135 checkOneTerm(a, "lapās", "lap"); // loc. pl.
136 checkOneTerm(a, "lapa", "lap"); // voc. sing.
137 checkOneTerm(a, "lapas", "lap"); // voc. pl.
139 checkOneTerm(a, "puika", "puik"); // nom. sing.
140 checkOneTerm(a, "puikas", "puik"); // nom. pl.
141 checkOneTerm(a, "puikas", "puik"); // gen. sing.
142 checkOneTerm(a, "puiku", "puik"); // gen. pl.
143 checkOneTerm(a, "puikam", "puik"); // dat. sing.
144 checkOneTerm(a, "puikām", "puik"); // dat. pl.
145 checkOneTerm(a, "puiku", "puik"); // acc. sing.
146 checkOneTerm(a, "puikas", "puik"); // acc. pl.
147 checkOneTerm(a, "puikā", "puik"); // loc. sing.
148 checkOneTerm(a, "puikās", "puik"); // loc. pl.
149 checkOneTerm(a, "puika", "puik"); // voc. sing.
150 checkOneTerm(a, "puikas", "puik"); // voc. pl.
154 * Genitive plural forms with (s,t) -> š and (d,z) -> ž
155 * will not conflate due to ambiguity.
157 public void testNouns5() throws IOException {
159 // l -> ļ palatalization
160 checkOneTerm(a, "egle", "egl"); // nom. sing.
161 checkOneTerm(a, "egles", "egl"); // nom. pl.
162 checkOneTerm(a, "egles", "egl"); // gen. sing.
163 checkOneTerm(a, "egļu", "egl"); // gen. pl.
164 checkOneTerm(a, "eglei", "egl"); // dat. sing.
165 checkOneTerm(a, "eglēm", "egl"); // dat. pl.
166 checkOneTerm(a, "egli", "egl"); // acc. sing.
167 checkOneTerm(a, "egles", "egl"); // acc. pl.
168 checkOneTerm(a, "eglē", "egl"); // loc. sing.
169 checkOneTerm(a, "eglēs", "egl"); // loc. pl.
170 checkOneTerm(a, "egle", "egl"); // voc. sing.
171 checkOneTerm(a, "egles", "egl"); // voc. pl.
174 public void testNouns6() throws IOException {
178 checkOneTerm(a, "govs", "gov"); // nom. sing.
179 checkOneTerm(a, "govis", "gov"); // nom. pl.
180 checkOneTerm(a, "govs", "gov"); // gen. sing.
181 checkOneTerm(a, "govju", "gov"); // gen. pl.
182 checkOneTerm(a, "govij", "gov"); // dat. sing.
183 checkOneTerm(a, "govīm", "gov"); // dat. pl.
184 checkOneTerm(a, "govi ", "gov"); // acc. sing.
185 checkOneTerm(a, "govis", "gov"); // acc. pl.
186 checkOneTerm(a, "govi ", "gov"); // inst. sing.
187 checkOneTerm(a, "govīm", "gov"); // inst. pl.
188 checkOneTerm(a, "govī", "gov"); // loc. sing.
189 checkOneTerm(a, "govīs", "gov"); // loc. pl.
190 checkOneTerm(a, "govs", "gov"); // voc. sing.
191 checkOneTerm(a, "govis", "gov"); // voc. pl.
194 public void testAdjectives() throws IOException {
195 checkOneTerm(a, "zils", "zil"); // indef. nom. masc. sing.
196 checkOneTerm(a, "zilais", "zil"); // def. nom. masc. sing.
197 checkOneTerm(a, "zili", "zil"); // indef. nom. masc. pl.
198 checkOneTerm(a, "zilie", "zil"); // def. nom. masc. pl.
199 checkOneTerm(a, "zila", "zil"); // indef. nom. fem. sing.
200 checkOneTerm(a, "zilā", "zil"); // def. nom. fem. sing.
201 checkOneTerm(a, "zilas", "zil"); // indef. nom. fem. pl.
202 checkOneTerm(a, "zilās", "zil"); // def. nom. fem. pl.
203 checkOneTerm(a, "zila", "zil"); // indef. gen. masc. sing.
204 checkOneTerm(a, "zilā", "zil"); // def. gen. masc. sing.
205 checkOneTerm(a, "zilu", "zil"); // indef. gen. masc. pl.
206 checkOneTerm(a, "zilo", "zil"); // def. gen. masc. pl.
207 checkOneTerm(a, "zilas", "zil"); // indef. gen. fem. sing.
208 checkOneTerm(a, "zilās", "zil"); // def. gen. fem. sing.
209 checkOneTerm(a, "zilu", "zil"); // indef. gen. fem. pl.
210 checkOneTerm(a, "zilo", "zil"); // def. gen. fem. pl.
211 checkOneTerm(a, "zilam", "zil"); // indef. dat. masc. sing.
212 checkOneTerm(a, "zilajam", "zil"); // def. dat. masc. sing.
213 checkOneTerm(a, "ziliem", "zil"); // indef. dat. masc. pl.
214 checkOneTerm(a, "zilajiem", "zil"); // def. dat. masc. pl.
215 checkOneTerm(a, "zilai", "zil"); // indef. dat. fem. sing.
216 checkOneTerm(a, "zilajai", "zil"); // def. dat. fem. sing.
217 checkOneTerm(a, "zilām", "zil"); // indef. dat. fem. pl.
218 checkOneTerm(a, "zilajām", "zil"); // def. dat. fem. pl.
219 checkOneTerm(a, "zilu", "zil"); // indef. acc. masc. sing.
220 checkOneTerm(a, "zilo", "zil"); // def. acc. masc. sing.
221 checkOneTerm(a, "zilus", "zil"); // indef. acc. masc. pl.
222 checkOneTerm(a, "zilos", "zil"); // def. acc. masc. pl.
223 checkOneTerm(a, "zilu", "zil"); // indef. acc. fem. sing.
224 checkOneTerm(a, "zilo", "zil"); // def. acc. fem. sing.
225 checkOneTerm(a, "zilās", "zil"); // indef. acc. fem. pl.
226 checkOneTerm(a, "zilās", "zil"); // def. acc. fem. pl.
227 checkOneTerm(a, "zilā", "zil"); // indef. loc. masc. sing.
228 checkOneTerm(a, "zilajā", "zil"); // def. loc. masc. sing.
229 checkOneTerm(a, "zilos", "zil"); // indef. loc. masc. pl.
230 checkOneTerm(a, "zilajos", "zil"); // def. loc. masc. pl.
231 checkOneTerm(a, "zilā", "zil"); // indef. loc. fem. sing.
232 checkOneTerm(a, "zilajā", "zil"); // def. loc. fem. sing.
233 checkOneTerm(a, "zilās", "zil"); // indef. loc. fem. pl.
234 checkOneTerm(a, "zilajās", "zil"); // def. loc. fem. pl.
235 checkOneTerm(a, "zilais", "zil"); // voc. masc. sing.
236 checkOneTerm(a, "zilie", "zil"); // voc. masc. pl.
237 checkOneTerm(a, "zilā", "zil"); // voc. fem. sing.
238 checkOneTerm(a, "zilās", "zil"); // voc. fem. pl.
242 * Note: we intentionally don't handle the ambiguous
243 * (s,t) -> š and (d,z) -> ž
245 public void testPalatalization() throws IOException {
246 checkOneTerm(a, "krāsns", "krāsn"); // nom. sing.
247 checkOneTerm(a, "krāšņu", "krāsn"); // gen. pl.
248 checkOneTerm(a, "zvaigzne", "zvaigzn"); // nom. sing.
249 checkOneTerm(a, "zvaigžņu", "zvaigzn"); // gen. pl.
250 checkOneTerm(a, "kāpslis", "kāpsl"); // nom. sing.
251 checkOneTerm(a, "kāpšļu", "kāpsl"); // gen. pl.
252 checkOneTerm(a, "zizlis", "zizl"); // nom. sing.
253 checkOneTerm(a, "zižļu", "zizl"); // gen. pl.
254 checkOneTerm(a, "vilnis", "viln"); // nom. sing.
255 checkOneTerm(a, "viļņu", "viln"); // gen. pl.
256 checkOneTerm(a, "lelle", "lell"); // nom. sing.
257 checkOneTerm(a, "leļļu", "lell"); // gen. pl.
258 checkOneTerm(a, "pinne", "pinn"); // nom. sing.
259 checkOneTerm(a, "piņņu", "pinn"); // gen. pl.
260 checkOneTerm(a, "rīkste", "rīkst"); // nom. sing.
261 checkOneTerm(a, "rīkšu", "rīkst"); // gen. pl.
265 * Test some length restrictions, we require a 3+ char stem,
266 * with at least one vowel.
268 public void testLength() throws IOException {
269 checkOneTerm(a, "usa", "usa"); // length
270 checkOneTerm(a, "60ms", "60ms"); // vowel count