+++ /dev/null
-package org.apache.lucene.analysis.hi;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static org.apache.lucene.analysis.util.StemmerUtil.*;
-
-/**
- * Light Stemmer for Hindi.
- * <p>
- * Implements the algorithm specified in:
- * <i>A Lightweight Stemmer for Hindi</i>
- * Ananthakrishnan Ramanathan and Durgesh D Rao.
- * http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf
- * </p>
- */
-public class HindiStemmer {
- public int stem(char buffer[], int len) {
- // 5
- if ((len > 6) && (endsWith(buffer, len, "ाएंगी")
- || endsWith(buffer, len, "ाएंगे")
- || endsWith(buffer, len, "ाऊंगी")
- || endsWith(buffer, len, "ाऊंगा")
- || endsWith(buffer, len, "ाइयाँ")
- || endsWith(buffer, len, "ाइयों")
- || endsWith(buffer, len, "ाइयां")
- ))
- return len - 5;
-
- // 4
- if ((len > 5) && (endsWith(buffer, len, "ाएगी")
- || endsWith(buffer, len, "ाएगा")
- || endsWith(buffer, len, "ाओगी")
- || endsWith(buffer, len, "ाओगे")
- || endsWith(buffer, len, "एंगी")
- || endsWith(buffer, len, "ेंगी")
- || endsWith(buffer, len, "एंगे")
- || endsWith(buffer, len, "ेंगे")
- || endsWith(buffer, len, "ूंगी")
- || endsWith(buffer, len, "ूंगा")
- || endsWith(buffer, len, "ातीं")
- || endsWith(buffer, len, "नाओं")
- || endsWith(buffer, len, "नाएं")
- || endsWith(buffer, len, "ताओं")
- || endsWith(buffer, len, "ताएं")
- || endsWith(buffer, len, "ियाँ")
- || endsWith(buffer, len, "ियों")
- || endsWith(buffer, len, "ियां")
- ))
- return len - 4;
-
- // 3
- if ((len > 4) && (endsWith(buffer, len, "ाकर")
- || endsWith(buffer, len, "ाइए")
- || endsWith(buffer, len, "ाईं")
- || endsWith(buffer, len, "ाया")
- || endsWith(buffer, len, "ेगी")
- || endsWith(buffer, len, "ेगा")
- || endsWith(buffer, len, "ोगी")
- || endsWith(buffer, len, "ोगे")
- || endsWith(buffer, len, "ाने")
- || endsWith(buffer, len, "ाना")
- || endsWith(buffer, len, "ाते")
- || endsWith(buffer, len, "ाती")
- || endsWith(buffer, len, "ाता")
- || endsWith(buffer, len, "तीं")
- || endsWith(buffer, len, "ाओं")
- || endsWith(buffer, len, "ाएं")
- || endsWith(buffer, len, "ुओं")
- || endsWith(buffer, len, "ुएं")
- || endsWith(buffer, len, "ुआं")
- ))
- return len - 3;
-
- // 2
- if ((len > 3) && (endsWith(buffer, len, "कर")
- || endsWith(buffer, len, "ाओ")
- || endsWith(buffer, len, "िए")
- || endsWith(buffer, len, "ाई")
- || endsWith(buffer, len, "ाए")
- || endsWith(buffer, len, "ने")
- || endsWith(buffer, len, "नी")
- || endsWith(buffer, len, "ना")
- || endsWith(buffer, len, "ते")
- || endsWith(buffer, len, "ीं")
- || endsWith(buffer, len, "ती")
- || endsWith(buffer, len, "ता")
- || endsWith(buffer, len, "ाँ")
- || endsWith(buffer, len, "ां")
- || endsWith(buffer, len, "ों")
- || endsWith(buffer, len, "ें")
- ))
- return len - 2;
-
- // 1
- if ((len > 2) && (endsWith(buffer, len, "ो")
- || endsWith(buffer, len, "े")
- || endsWith(buffer, len, "ू")
- || endsWith(buffer, len, "ु")
- || endsWith(buffer, len, "ी")
- || endsWith(buffer, len, "ि")
- || endsWith(buffer, len, "ा")
- ))
- return len - 1;
- return len;
- }
-}