--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.reverse;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+
+/**
+ * Reverse token string, for example "country" => "yrtnuoc".
+ * <p>
+ * If <code>marker</code> is supplied, then tokens will be also prepended by
+ * that character. For example, with a marker of \u0001, "country" =>
+ * "\u0001yrtnuoc". This is useful when implementing efficient leading
+ * wildcards search.
+ * </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ReverseStringFilter, or when using any of
+ * its static methods:
+ * <ul>
+ * <li> As of 3.1, supplementary characters are handled correctly
+ * </ul>
+ */
+public final class ReverseStringFilter extends TokenFilter {
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final char marker;
+ private final Version matchVersion;
+ private static final char NOMARKER = '\uFFFF';
+
+ /**
+ * Example marker character: U+0001 (START OF HEADING)
+ */
+ public static final char START_OF_HEADING_MARKER = '\u0001';
+
+ /**
+ * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
+ */
+ public static final char INFORMATION_SEPARATOR_MARKER = '\u001F';
+
+ /**
+ * Example marker character: U+EC00 (PRIVATE USE AREA: EC00)
+ */
+ public static final char PUA_EC00_MARKER = '\uEC00';
+
+ /**
+ * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
+ */
+ public static final char RTL_DIRECTION_MARKER = '\u200F';
+
+ /**
+ * Create a new ReverseStringFilter that reverses all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will not be marked.
+ * </p>
+ *
+ * @param in {@link TokenStream} to filter
+ * @deprecated use {@link #ReverseStringFilter(Version, TokenStream)}
+ * instead. This constructor will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public ReverseStringFilter(TokenStream in) {
+ this(in, NOMARKER);
+ }
+
+ /**
+ * Create a new ReverseStringFilter that reverses and marks all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will be prepended (marked) by the <code>marker</code>
+ * character.
+ * </p>
+ *
+ * @param in {@link TokenStream} to filter
+ * @param marker A character used to mark reversed tokens
+ * @deprecated use {@link #ReverseStringFilter(Version, TokenStream, char)}
+ * instead. This constructor will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public ReverseStringFilter(TokenStream in, char marker) {
+ this(Version.LUCENE_30, in, marker);
+ }
+
+ /**
+ * Create a new ReverseStringFilter that reverses all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will not be marked.
+ * </p>
+ *
+ * @param matchVersion See <a href="#version">above</a>
+ * @param in {@link TokenStream} to filter
+ */
+ public ReverseStringFilter(Version matchVersion, TokenStream in) {
+ this(matchVersion, in, NOMARKER);
+ }
+
+ /**
+ * Create a new ReverseStringFilter that reverses and marks all tokens in the
+ * supplied {@link TokenStream}.
+ * <p>
+ * The reversed tokens will be prepended (marked) by the <code>marker</code>
+ * character.
+ * </p>
+ *
+ * @param matchVersion See <a href="#version">above</a>
+ * @param in {@link TokenStream} to filter
+ * @param marker A character used to mark reversed tokens
+ */
+ public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
+ super(in);
+ this.matchVersion = matchVersion;
+ this.marker = marker;
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ int len = termAtt.length();
+ if (marker != NOMARKER) {
+ len++;
+ termAtt.resizeBuffer(len);
+ termAtt.buffer()[len - 1] = marker;
+ }
+ reverse( matchVersion, termAtt.buffer(), 0, len );
+ termAtt.setLength(len);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Reverses the given input string
+ *
+ * @param input the string to reverse
+ * @return the given input string in reversed order
+ * @deprecated use {@link #reverse(Version, String)} instead. This method
+ * will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public static String reverse( final String input ){
+ return reverse(Version.LUCENE_30, input);
+ }
+
+ /**
+ * Reverses the given input string
+ *
+ * @param matchVersion See <a href="#version">above</a>
+ * @param input the string to reverse
+ * @return the given input string in reversed order
+ */
+ public static String reverse( Version matchVersion, final String input ){
+ final char[] charInput = input.toCharArray();
+ reverse( matchVersion, charInput, 0, charInput.length );
+ return new String( charInput );
+ }
+
+ /**
+ * Reverses the given input buffer in-place
+ * @param buffer the input char array to reverse
+ * @deprecated use {@link #reverse(Version, char[])} instead. This
+ * method will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public static void reverse( final char[] buffer ){
+ reverse( buffer, 0, buffer.length );
+ }
+
+ /**
+ * Reverses the given input buffer in-place
+ * @param matchVersion See <a href="#version">above</a>
+ * @param buffer the input char array to reverse
+ */
+ public static void reverse(Version matchVersion, final char[] buffer) {
+ reverse(matchVersion, buffer, 0, buffer.length);
+ }
+
+ /**
+ * Partially reverses the given input buffer in-place from offset 0
+ * up to the given length.
+ * @param buffer the input char array to reverse
+ * @param len the length in the buffer up to where the
+ * buffer should be reversed
+ * @deprecated use {@link #reverse(Version, char[], int)} instead. This
+ * method will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public static void reverse( final char[] buffer, final int len ){
+ reverse( buffer, 0, len );
+ }
+
+ /**
+ * Partially reverses the given input buffer in-place from offset 0
+ * up to the given length.
+ * @param matchVersion See <a href="#version">above</a>
+ * @param buffer the input char array to reverse
+ * @param len the length in the buffer up to where the
+ * buffer should be reversed
+ */
+ public static void reverse(Version matchVersion, final char[] buffer,
+ final int len) {
+ reverse( matchVersion, buffer, 0, len );
+ }
+
+ /**
+ * Partially reverses the given input buffer in-place from the given offset
+ * up to the given length.
+ * @param buffer the input char array to reverse
+ * @param start the offset from where to reverse the buffer
+ * @param len the length in the buffer up to where the
+ * buffer should be reversed
+ * @deprecated use {@link #reverse(Version, char[], int, int)} instead. This
+ * method will be removed in Lucene 4.0
+ */
+ @Deprecated
+ public static void reverse(char[] buffer, int start, int len ) {
+ reverseUnicode3(buffer, start, len);
+ }
+
+ /**
+ * @deprecated Remove this when support for 3.0 indexes is no longer needed.
+ */
+ @Deprecated
+ private static void reverseUnicode3( char[] buffer, int start, int len ){
+ if( len <= 1 ) return;
+ int num = len>>1;
+ for( int i = start; i < ( start + num ); i++ ){
+ char c = buffer[i];
+ buffer[i] = buffer[start * 2 + len - i - 1];
+ buffer[start * 2 + len - i - 1] = c;
+ }
+ }
+
+ /**
+ * Partially reverses the given input buffer in-place from the given offset
+ * up to the given length.
+ * @param matchVersion See <a href="#version">above</a>
+ * @param buffer the input char array to reverse
+ * @param start the offset from where to reverse the buffer
+ * @param len the length in the buffer up to where the
+ * buffer should be reversed
+ */
+ public static void reverse(Version matchVersion, final char[] buffer,
+ final int start, final int len) {
+ if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
+ reverseUnicode3(buffer, start, len);
+ return;
+ }
+ /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
+ if (len < 2)
+ return;
+ int end = (start + len) - 1;
+ char frontHigh = buffer[start];
+ char endLow = buffer[end];
+ boolean allowFrontSur = true, allowEndSur = true;
+ final int mid = start + (len >> 1);
+ for (int i = start; i < mid; ++i, --end) {
+ final char frontLow = buffer[i + 1];
+ final char endHigh = buffer[end - 1];
+ final boolean surAtFront = allowFrontSur
+ && Character.isSurrogatePair(frontHigh, frontLow);
+ if (surAtFront && (len < 3)) {
+ // nothing to do since surAtFront is allowed and 1 char left
+ return;
+ }
+ final boolean surAtEnd = allowEndSur
+ && Character.isSurrogatePair(endHigh, endLow);
+ allowFrontSur = allowEndSur = true;
+ if (surAtFront == surAtEnd) {
+ if (surAtFront) {
+ // both surrogates
+ buffer[end] = frontLow;
+ buffer[--end] = frontHigh;
+ buffer[i] = endHigh;
+ buffer[++i] = endLow;
+ frontHigh = buffer[i + 1];
+ endLow = buffer[end - 1];
+ } else {
+ // neither surrogates
+ buffer[end] = frontHigh;
+ buffer[i] = endLow;
+ frontHigh = frontLow;
+ endLow = endHigh;
+ }
+ } else {
+ if (surAtFront) {
+ // surrogate only at the front
+ buffer[end] = frontLow;
+ buffer[i] = endLow;
+ endLow = endHigh;
+ allowFrontSur = false;
+ } else {
+ // surrogate only at the end
+ buffer[end] = frontHigh;
+ buffer[i] = endHigh;
+ frontHigh = frontLow;
+ allowEndSur = false;
+ }
+ }
+ }
+ if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur)) {
+ // only if odd length
+ buffer[end] = allowFrontSur ? endLow : frontHigh;
+ }
+ }
+}