--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.LinkedList;
+
+/**
+ * Simplistic {@link CharFilter} that applies the mappings
+ * contained in a {@link NormalizeCharMap} to the character
+ * stream, and correcting the resulting changes to the
+ * offsets.
+ */
+public class MappingCharFilter extends BaseCharFilter {
+
+ private final NormalizeCharMap normMap;
+ private LinkedList<Character> buffer;
+ private String replacement;
+ private int charPointer;
+ private int nextCharCounter;
+
+ /** Default constructor that takes a {@link CharStream}. */
+ public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
+ super(in);
+ this.normMap = normMap;
+ }
+
+ /** Easy-use constructor that takes a {@link Reader}. */
+ public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
+ super(CharReader.get(in));
+ this.normMap = normMap;
+ }
+
+ @Override
+ public int read() throws IOException {
+ while(true) {
+ if (replacement != null && charPointer < replacement.length()) {
+ return replacement.charAt(charPointer++);
+ }
+
+ int firstChar = nextChar();
+ if (firstChar == -1) return -1;
+ NormalizeCharMap nm = normMap.submap != null ?
+ normMap.submap.get(Character.valueOf((char) firstChar)) : null;
+ if (nm == null) return firstChar;
+ NormalizeCharMap result = match(nm);
+ if (result == null) return firstChar;
+ replacement = result.normStr;
+ charPointer = 0;
+ if (result.diff != 0) {
+ int prevCumulativeDiff = getLastCumulativeDiff();
+ if (result.diff < 0) {
+ for(int i = 0; i < -result.diff ; i++)
+ addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+ } else {
+ addOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+ }
+ }
+ }
+ }
+
+ private int nextChar() throws IOException {
+ nextCharCounter++;
+ if (buffer != null && !buffer.isEmpty()) {
+ return buffer.removeFirst().charValue();
+ }
+ return input.read();
+ }
+
+ private void pushChar(int c) {
+ nextCharCounter--;
+ if(buffer == null)
+ buffer = new LinkedList<Character>();
+ buffer.addFirst(Character.valueOf((char) c));
+ }
+
+ private void pushLastChar(int c) {
+ if (buffer == null) {
+ buffer = new LinkedList<Character>();
+ }
+ buffer.addLast(Character.valueOf((char) c));
+ }
+
+ private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
+ NormalizeCharMap result = null;
+ if (map.submap != null) {
+ int chr = nextChar();
+ if (chr != -1) {
+ NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
+ if (subMap != null) {
+ result = match(subMap);
+ }
+ if (result == null) {
+ pushChar(chr);
+ }
+ }
+ }
+ if (result == null && map.normStr != null) {
+ result = map;
+ }
+ return result;
+ }
+
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ char[] tmp = new char[len];
+ int l = input.read(tmp, 0, len);
+ if (l != -1) {
+ for(int i = 0; i < l; i++)
+ pushLastChar(tmp[i]);
+ }
+ l = 0;
+ for(int i = off; i < off + len; i++) {
+ int c = read();
+ if (c == -1) break;
+ cbuf[i] = (char) c;
+ l++;
+ }
+ return l == 0 ? -1 : l;
+ }
+}