1 package org.apache.lucene.analysis.hi;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import static org.apache.lucene.analysis.util.StemmerUtil.*;
23 * Light Stemmer for Hindi.
25 * Implements the algorithm specified in:
26 * <i>A Lightweight Stemmer for Hindi</i>
27 * Ananthakrishnan Ramanathan and Durgesh D Rao.
28 * http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf
31 public class HindiStemmer {
32 public int stem(char buffer[], int len) {
34 if ((len > 6) && (endsWith(buffer, len, "ाएंगी")
35 || endsWith(buffer, len, "ाएंगे")
36 || endsWith(buffer, len, "ाऊंगी")
37 || endsWith(buffer, len, "ाऊंगा")
38 || endsWith(buffer, len, "ाइयाँ")
39 || endsWith(buffer, len, "ाइयों")
40 || endsWith(buffer, len, "ाइयां")
45 if ((len > 5) && (endsWith(buffer, len, "ाएगी")
46 || endsWith(buffer, len, "ाएगा")
47 || endsWith(buffer, len, "ाओगी")
48 || endsWith(buffer, len, "ाओगे")
49 || endsWith(buffer, len, "एंगी")
50 || endsWith(buffer, len, "ेंगी")
51 || endsWith(buffer, len, "एंगे")
52 || endsWith(buffer, len, "ेंगे")
53 || endsWith(buffer, len, "ूंगी")
54 || endsWith(buffer, len, "ूंगा")
55 || endsWith(buffer, len, "ातीं")
56 || endsWith(buffer, len, "नाओं")
57 || endsWith(buffer, len, "नाएं")
58 || endsWith(buffer, len, "ताओं")
59 || endsWith(buffer, len, "ताएं")
60 || endsWith(buffer, len, "ियाँ")
61 || endsWith(buffer, len, "ियों")
62 || endsWith(buffer, len, "ियां")
67 if ((len > 4) && (endsWith(buffer, len, "ाकर")
68 || endsWith(buffer, len, "ाइए")
69 || endsWith(buffer, len, "ाईं")
70 || endsWith(buffer, len, "ाया")
71 || endsWith(buffer, len, "ेगी")
72 || endsWith(buffer, len, "ेगा")
73 || endsWith(buffer, len, "ोगी")
74 || endsWith(buffer, len, "ोगे")
75 || endsWith(buffer, len, "ाने")
76 || endsWith(buffer, len, "ाना")
77 || endsWith(buffer, len, "ाते")
78 || endsWith(buffer, len, "ाती")
79 || endsWith(buffer, len, "ाता")
80 || endsWith(buffer, len, "तीं")
81 || endsWith(buffer, len, "ाओं")
82 || endsWith(buffer, len, "ाएं")
83 || endsWith(buffer, len, "ुओं")
84 || endsWith(buffer, len, "ुएं")
85 || endsWith(buffer, len, "ुआं")
90 if ((len > 3) && (endsWith(buffer, len, "कर")
91 || endsWith(buffer, len, "ाओ")
92 || endsWith(buffer, len, "िए")
93 || endsWith(buffer, len, "ाई")
94 || endsWith(buffer, len, "ाए")
95 || endsWith(buffer, len, "ने")
96 || endsWith(buffer, len, "नी")
97 || endsWith(buffer, len, "ना")
98 || endsWith(buffer, len, "ते")
99 || endsWith(buffer, len, "ीं")
100 || endsWith(buffer, len, "ती")
101 || endsWith(buffer, len, "ता")
102 || endsWith(buffer, len, "ाँ")
103 || endsWith(buffer, len, "ां")
104 || endsWith(buffer, len, "ों")
105 || endsWith(buffer, len, "ें")
110 if ((len > 2) && (endsWith(buffer, len, "ो")
111 || endsWith(buffer, len, "े")
112 || endsWith(buffer, len, "ू")
113 || endsWith(buffer, len, "ु")
114 || endsWith(buffer, len, "ी")
115 || endsWith(buffer, len, "ि")
116 || endsWith(buffer, len, "ा")