pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / src / java / org / apache / lucene / analysis / ISOLatin1AccentFilter.java
diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java

deleted file mode 100644 (file)

index 03378ab..0000000
--- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
+++ /dev/null
@@ -1,260 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * A filter that replaces accented characters in the ISO Latin 1 character set 
- * (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
- * <p>
- * For instance, '&agrave;' will be replaced by 'a'.
- * <p>
- * 
- * @deprecated If you build a new index, use {@link ASCIIFoldingFilter}
- * which covers a superset of Latin 1.
- * This class is included for use with existing
- * indexes and will be removed in a future release (possibly Lucene 4.0).
- */
-@Deprecated
-public final class ISOLatin1AccentFilter extends TokenFilter {
-  public ISOLatin1AccentFilter(TokenStream input) {
-    super(input);
-  }
-
-  private char[] output = new char[256];
-  private int outputPos;
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    
-  @Override
-  public final boolean incrementToken() throws java.io.IOException {    
-    if (input.incrementToken()) {
-      final char[] buffer = termAtt.buffer();
-      final int length = termAtt.length();
-      // If no characters actually require rewriting then we
-      // just return token as-is:
-      for(int i=0;i<length;i++) {
-        final char c = buffer[i];
-        if (c >= '\u00c0' && c <= '\uFB06') {
-          removeAccents(buffer, length);
-          termAtt.copyBuffer(output, 0, outputPos);
-          break;
-        }
-      }
-      return true;
-    } else
-      return false;
-  }
-
-  /**
-   * To replace accented characters in a String by unaccented equivalents.
-   */
-  public final void removeAccents(char[] input, int length) {
-
-    // Worst-case length required:
-    final int maxSizeNeeded = 2*length;
-
-    int size = output.length;
-    while (size < maxSizeNeeded)
-      size *= 2;
-
-    if (size != output.length)
-      output = new char[size];
-
-    outputPos = 0;
-
-    int pos = 0;
-
-    for (int i=0; i<length; i++, pos++) {
-      final char c = input[pos];
-
-      // Quick test: if it's not in range then just keep
-      // current character
-      if (c < '\u00c0' || c > '\uFB06')
-        output[outputPos++] = c;
-      else {
-        switch (c) {
-        case '\u00C0' : // À
-        case '\u00C1' : // Á
-        case '\u00C2' : // Â
-        case '\u00C3' : // Ã
-        case '\u00C4' : // Ä
-        case '\u00C5' : // Å
-          output[outputPos++] = 'A';
-          break;
-        case '\u00C6' : // Æ
-          output[outputPos++] = 'A';
-          output[outputPos++] = 'E';
-          break;
-        case '\u00C7' : // Ç
-          output[outputPos++] = 'C';
-          break;
-        case '\u00C8' : // È
-        case '\u00C9' : // É
-        case '\u00CA' : // Ê
-        case '\u00CB' : // Ë
-          output[outputPos++] = 'E';
-          break;
-        case '\u00CC' : // Ì
-        case '\u00CD' : // Í
-        case '\u00CE' : // Î
-        case '\u00CF' : // Ï
-          output[outputPos++] = 'I';
-          break;
-        case '\u0132' : // Ĳ
-            output[outputPos++] = 'I';
-            output[outputPos++] = 'J';
-            break;
-        case '\u00D0' : // Ð
-          output[outputPos++] = 'D';
-          break;
-        case '\u00D1' : // Ñ
-          output[outputPos++] = 'N';
-          break;
-        case '\u00D2' : // Ò
-        case '\u00D3' : // Ó
-        case '\u00D4' : // Ô
-        case '\u00D5' : // Õ
-        case '\u00D6' : // Ö
-        case '\u00D8' : // Ø
-          output[outputPos++] = 'O';
-          break;
-        case '\u0152' : // Œ
-          output[outputPos++] = 'O';
-          output[outputPos++] = 'E';
-          break;
-        case '\u00DE' : // Þ
-          output[outputPos++] = 'T';
-          output[outputPos++] = 'H';
-          break;
-        case '\u00D9' : // Ù
-        case '\u00DA' : // Ú
-        case '\u00DB' : // Û
-        case '\u00DC' : // Ü
-          output[outputPos++] = 'U';
-          break;
-        case '\u00DD' : // Ý
-        case '\u0178' : // Ÿ
-          output[outputPos++] = 'Y';
-          break;
-        case '\u00E0' : // à
-        case '\u00E1' : // á
-        case '\u00E2' : // â
-        case '\u00E3' : // ã
-        case '\u00E4' : // ä
-        case '\u00E5' : // å
-          output[outputPos++] = 'a';
-          break;
-        case '\u00E6' : // æ
-          output[outputPos++] = 'a';
-          output[outputPos++] = 'e';
-          break;
-        case '\u00E7' : // ç
-          output[outputPos++] = 'c';
-          break;
-        case '\u00E8' : // è
-        case '\u00E9' : // é
-        case '\u00EA' : // ê
-        case '\u00EB' : // ë
-          output[outputPos++] = 'e';
-          break;
-        case '\u00EC' : // ì
-        case '\u00ED' : // í
-        case '\u00EE' : // î
-        case '\u00EF' : // ï
-          output[outputPos++] = 'i';
-          break;
-        case '\u0133' : // ĳ
-            output[outputPos++] = 'i';
-            output[outputPos++] = 'j';
-            break;
-        case '\u00F0' : // ð
-          output[outputPos++] = 'd';
-          break;
-        case '\u00F1' : // ñ
-          output[outputPos++] = 'n';
-          break;
-        case '\u00F2' : // ò
-        case '\u00F3' : // ó
-        case '\u00F4' : // ô
-        case '\u00F5' : // õ
-        case '\u00F6' : // ö
-        case '\u00F8' : // ø
-          output[outputPos++] = 'o';
-          break;
-        case '\u0153' : // œ
-          output[outputPos++] = 'o';
-          output[outputPos++] = 'e';
-          break;
-        case '\u00DF' : // ß
-          output[outputPos++] = 's';
-          output[outputPos++] = 's';
-          break;
-        case '\u00FE' : // þ
-          output[outputPos++] = 't';
-          output[outputPos++] = 'h';
-          break;
-        case '\u00F9' : // ù
-        case '\u00FA' : // ú
-        case '\u00FB' : // û
-        case '\u00FC' : // ü
-          output[outputPos++] = 'u';
-          break;
-        case '\u00FD' : // ý
-        case '\u00FF' : // ÿ
-          output[outputPos++] = 'y';
-          break;
-        case '\uFB00': // ﬀ
-            output[outputPos++] = 'f';
-            output[outputPos++] = 'f';
-            break;
-        case '\uFB01': // ﬁ
-            output[outputPos++] = 'f';
-            output[outputPos++] = 'i';
-            break;
-        case '\uFB02': // ﬂ
-            output[outputPos++] = 'f';
-            output[outputPos++] = 'l';
-            break;
-        // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
-//        case '\uFB03': // ﬃ
-//            output[outputPos++] = 'f';
-//            output[outputPos++] = 'f';
-//            output[outputPos++] = 'i';
-//            break;
-//        case '\uFB04': // ﬄ
-//            output[outputPos++] = 'f';
-//            output[outputPos++] = 'f';
-//            output[outputPos++] = 'l';
-//            break;
-        case '\uFB05': // ﬅ
-            output[outputPos++] = 'f';
-            output[outputPos++] = 't';
-            break;
-        case '\uFB06': // ﬆ
-            output[outputPos++] = 's';
-            output[outputPos++] = 't';
-          break;
-        default :
-          output[outputPos++] = c;
-          break;
-        }
-      }
-    }
-  }
-}