+package org.apache.lucene.analysis.nl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+/**
+ * A stemmer for Dutch words.
+ * <p>
+ * The algorithm is an implementation of
+ * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+ * algorithm in Martin Porter's snowball project.
+ * </p>
+ * @deprecated Use {@link org.tartarus.snowball.ext.DutchStemmer} instead,
+ * which has the same functionality. This filter will be removed in Lucene 5.0
+ */
+@Deprecated
+public class DutchStemmer {
+ /**
+ * Buffer for the terms while stemming them.
+ */
+ private StringBuilder sb = new StringBuilder();
+ private boolean _removedE;
+ private Map _stemDict;
+
+ private int _R1;
+ private int _R2;
+
+ //TODO convert to internal
+ /*
+ * Stems the given term to an unique <tt>discriminator</tt>.
+ *
+ * @param term The term that should be stemmed.
+ * @return Discriminator for <tt>term</tt>
+ */
+ public String stem(String term) {
+ term = term.toLowerCase();
+ if (!isStemmable(term))
+ return term;
+ if (_stemDict != null && _stemDict.containsKey(term))
+ if (_stemDict.get(term) instanceof String)
+ return (String) _stemDict.get(term);
+ else
+ return null;
+
+ // Reset the StringBuilder.
+ sb.delete(0, sb.length());
+ sb.insert(0, term);
+ // Stemming starts here...
+ substitute(sb);
+ storeYandI(sb);
+ _R1 = getRIndex(sb, 0);
+ _R1 = Math.max(3, _R1);
+ step1(sb);
+ step2(sb);
+ _R2 = getRIndex(sb, _R1);
+ step3a(sb);
+ step3b(sb);
+ step4(sb);
+ reStoreYandI(sb);
+ return sb.toString();
+ }
+
+ private boolean enEnding(StringBuilder sb) {
+ String[] enend = new String[]{"ene", "en"};
+ for (int i = 0; i < enend.length; i++) {
+ String end = enend[i];
+ String s = sb.toString();
+ int index = s.length() - end.length();
+ if (s.endsWith(end) &&
+ index >= _R1 &&
+ isValidEnEnding(sb, index - 1)
+ ) {
+ sb.delete(index, index + end.length());
+ unDouble(sb, index);
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ private void step1(StringBuilder sb) {
+ if (_R1 >= sb.length())
+ return;
+
+ String s = sb.toString();
+ int lengthR1 = sb.length() - _R1;
+ int index;
+
+ if (s.endsWith("heden")) {
+ sb.replace(_R1, lengthR1 + _R1, sb.substring(_R1, lengthR1 + _R1).replaceAll("heden", "heid"));
+ return;
+ }
+
+ if (enEnding(sb))
+ return;
+
+ if (s.endsWith("se") &&
+ (index = s.length() - 2) >= _R1 &&
+ isValidSEnding(sb, index - 1)
+ ) {
+ sb.delete(index, index + 2);
+ return;
+ }
+ if (s.endsWith("s") &&
+ (index = s.length() - 1) >= _R1 &&
+ isValidSEnding(sb, index - 1)) {
+ sb.delete(index, index + 1);
+ }
+ }
+
+ /**
+ * Delete suffix e if in R1 and
+ * preceded by a non-vowel, and then undouble the ending
+ *
+ * @param sb String being stemmed
+ */
+ private void step2(StringBuilder sb) {
+ _removedE = false;
+ if (_R1 >= sb.length())
+ return;
+ String s = sb.toString();
+ int index = s.length() - 1;
+ if (index >= _R1 &&
+ s.endsWith("e") &&
+ !isVowel(sb.charAt(index - 1))) {
+ sb.delete(index, index + 1);
+ unDouble(sb);
+ _removedE = true;
+ }
+ }
+
+ /**
+ * Delete "heid"
+ *
+ * @param sb String being stemmed
+ */
+ private void step3a(StringBuilder sb) {
+ if (_R2 >= sb.length())
+ return;
+ String s = sb.toString();
+ int index = s.length() - 4;
+ if (s.endsWith("heid") && index >= _R2 && sb.charAt(index - 1) != 'c') {
+ sb.delete(index, index + 4); //remove heid
+ enEnding(sb);
+ }
+ }
+
+ /**
+ * <p>A d-suffix, or derivational suffix, enables a new word,
+ * often with a different grammatical category, or with a different
+ * sense, to be built from another word. Whether a d-suffix can be
+ * attached is discovered not from the rules of grammar, but by
+ * referring to a dictionary. So in English, ness can be added to
+ * certain adjectives to form corresponding nouns (littleness,
+ * kindness, foolishness ...) but not to all adjectives
+ * (not for example, to big, cruel, wise ...) d-suffixes can be
+ * used to change meaning, often in rather exotic ways.</p>
+ * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+ *
+ * @param sb String being stemmed
+ */
+ private void step3b(StringBuilder sb) {
+ if (_R2 >= sb.length())
+ return;
+ String s = sb.toString();
+ int index = 0;
+
+ if ((s.endsWith("end") || s.endsWith("ing")) &&
+ (index = s.length() - 3) >= _R2) {
+ sb.delete(index, index + 3);
+ if (sb.charAt(index - 2) == 'i' &&
+ sb.charAt(index - 1) == 'g') {
+ if (sb.charAt(index - 3) != 'e' & index - 2 >= _R2) {
+ index -= 2;
+ sb.delete(index, index + 2);
+ }
+ } else {
+ unDouble(sb, index);
+ }
+ return;
+ }
+ if (s.endsWith("ig") &&
+ (index = s.length() - 2) >= _R2
+ ) {
+ if (sb.charAt(index - 1) != 'e')
+ sb.delete(index, index + 2);
+ return;
+ }
+ if (s.endsWith("lijk") &&
+ (index = s.length() - 4) >= _R2
+ ) {
+ sb.delete(index, index + 4);
+ step2(sb);
+ return;
+ }
+ if (s.endsWith("baar") &&
+ (index = s.length() - 4) >= _R2
+ ) {
+ sb.delete(index, index + 4);
+ return;
+ }
+ if (s.endsWith("bar") &&
+ (index = s.length() - 3) >= _R2
+ ) {
+ if (_removedE)
+ sb.delete(index, index + 3);
+ return;
+ }
+ }
+
+ /**
+ * undouble vowel
+ * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+ *
+ * @param sb String being stemmed
+ */
+ private void step4(StringBuilder sb) {
+ if (sb.length() < 4)
+ return;
+ String end = sb.substring(sb.length() - 4, sb.length());
+ char c = end.charAt(0);
+ char v1 = end.charAt(1);
+ char v2 = end.charAt(2);
+ char d = end.charAt(3);
+ if (v1 == v2 &&
+ d != 'I' &&
+ v1 != 'i' &&
+ isVowel(v1) &&
+ !isVowel(d) &&
+ !isVowel(c)) {
+ sb.delete(sb.length() - 2, sb.length() - 1);
+ }
+ }
+
+ /**
+ * Checks if a term could be stemmed.
+ *
+ * @return true if, and only if, the given term consists in letters.
+ */
+ private boolean isStemmable(String term) {
+ for (int c = 0; c < term.length(); c++) {
+ if (!Character.isLetter(term.charAt(c))) return false;
+ }
+ return true;
+ }
+
+ /**
+ * Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+ */
+ private void substitute(StringBuilder buffer) {
+ for (int i = 0; i < buffer.length(); i++) {
+ switch (buffer.charAt(i)) {
+ case 'ä':
+ case 'á':
+ {
+ buffer.setCharAt(i, 'a');
+ break;
+ }
+ case 'ë':
+ case 'é':
+ {
+ buffer.setCharAt(i, 'e');
+ break;
+ }
+ case 'ü':
+ case 'ú':
+ {
+ buffer.setCharAt(i, 'u');
+ break;
+ }
+ case 'ï':
+ case 'i':
+ {
+ buffer.setCharAt(i, 'i');
+ break;
+ }
+ case 'ö':
+ case 'ó':
+ {
+ buffer.setCharAt(i, 'o');
+ break;
+ }
+ }
+ }
+ }
+
+ /*private boolean isValidSEnding(StringBuilder sb) {
+ return isValidSEnding(sb, sb.length() - 1);
+ }*/
+
+ private boolean isValidSEnding(StringBuilder sb, int index) {
+ char c = sb.charAt(index);
+ if (isVowel(c) || c == 'j')
+ return false;
+ return true;
+ }
+
+ /*private boolean isValidEnEnding(StringBuilder sb) {
+ return isValidEnEnding(sb, sb.length() - 1);
+ }*/
+
+ private boolean isValidEnEnding(StringBuilder sb, int index) {
+ char c = sb.charAt(index);
+ if (isVowel(c))
+ return false;
+ if (c < 3)
+ return false;
+ // ends with "gem"?
+ if (c == 'm' && sb.charAt(index - 2) == 'g' && sb.charAt(index - 1) == 'e')
+ return false;
+ return true;
+ }
+
+ private void unDouble(StringBuilder sb) {
+ unDouble(sb, sb.length());
+ }
+
+ private void unDouble(StringBuilder sb, int endIndex) {
+ String s = sb.substring(0, endIndex);
+ if (s.endsWith("kk") || s.endsWith("tt") || s.endsWith("dd") || s.endsWith("nn") || s.endsWith("mm") || s.endsWith("ff")) {
+ sb.delete(endIndex - 1, endIndex);
+ }
+ }
+
+ private int getRIndex(StringBuilder sb, int start) {
+ if (start == 0)
+ start = 1;
+ int i = start;
+ for (; i < sb.length(); i++) {
+ //first non-vowel preceded by a vowel
+ if (!isVowel(sb.charAt(i)) && isVowel(sb.charAt(i - 1))) {
+ return i + 1;
+ }
+ }
+ return i + 1;
+ }
+
+ private void storeYandI(StringBuilder sb) {
+ if (sb.charAt(0) == 'y')
+ sb.setCharAt(0, 'Y');
+
+ int last = sb.length() - 1;
+
+ for (int i = 1; i < last; i++) {
+ switch (sb.charAt(i)) {
+ case 'i':
+ {
+ if (isVowel(sb.charAt(i - 1)) &&
+ isVowel(sb.charAt(i + 1))
+ )
+ sb.setCharAt(i, 'I');
+ break;
+ }
+ case 'y':
+ {
+ if (isVowel(sb.charAt(i - 1)))
+ sb.setCharAt(i, 'Y');
+ break;
+ }
+ }
+ }
+ if (last > 0 && sb.charAt(last) == 'y' && isVowel(sb.charAt(last - 1)))
+ sb.setCharAt(last, 'Y');
+ }
+
+ private void reStoreYandI(StringBuilder sb) {
+ String tmp = sb.toString();
+ sb.delete(0, sb.length());
+ sb.insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
+ }
+
+ private boolean isVowel(char c) {
+ switch (c) {
+ case 'e':
+ case 'a':
+ case 'o':
+ case 'i':
+ case 'u':
+ case 'y':
+ case 'è':
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void setStemDictionary(Map dict) {
+ _stemDict = dict;
+ }
+
+}