+++ /dev/null
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.reverse;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
-
-import java.io.IOException;
-
-/**
- * Reverse token string, for example "country" => "yrtnuoc".
- * <p>
- * If <code>marker</code> is supplied, then tokens will be also prepended by
- * that character. For example, with a marker of \u0001, "country" =>
- * "\u0001yrtnuoc". This is useful when implementing efficient leading
- * wildcards search.
- * </p>
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ReverseStringFilter, or when using any of
- * its static methods:
- * <ul>
- * <li> As of 3.1, supplementary characters are handled correctly
- * </ul>
- */
-public final class ReverseStringFilter extends TokenFilter {
-
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final char marker;
- private final Version matchVersion;
- private static final char NOMARKER = '\uFFFF';
-
- /**
- * Example marker character: U+0001 (START OF HEADING)
- */
- public static final char START_OF_HEADING_MARKER = '\u0001';
-
- /**
- * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
- */
- public static final char INFORMATION_SEPARATOR_MARKER = '\u001F';
-
- /**
- * Example marker character: U+EC00 (PRIVATE USE AREA: EC00)
- */
- public static final char PUA_EC00_MARKER = '\uEC00';
-
- /**
- * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
- */
- public static final char RTL_DIRECTION_MARKER = '\u200F';
-
- /**
- * Create a new ReverseStringFilter that reverses all tokens in the
- * supplied {@link TokenStream}.
- * <p>
- * The reversed tokens will not be marked.
- * </p>
- *
- * @param in {@link TokenStream} to filter
- * @deprecated use {@link #ReverseStringFilter(Version, TokenStream)}
- * instead. This constructor will be removed in Lucene 4.0
- */
- @Deprecated
- public ReverseStringFilter(TokenStream in) {
- this(in, NOMARKER);
- }
-
- /**
- * Create a new ReverseStringFilter that reverses and marks all tokens in the
- * supplied {@link TokenStream}.
- * <p>
- * The reversed tokens will be prepended (marked) by the <code>marker</code>
- * character.
- * </p>
- *
- * @param in {@link TokenStream} to filter
- * @param marker A character used to mark reversed tokens
- * @deprecated use {@link #ReverseStringFilter(Version, TokenStream, char)}
- * instead. This constructor will be removed in Lucene 4.0
- */
- @Deprecated
- public ReverseStringFilter(TokenStream in, char marker) {
- this(Version.LUCENE_30, in, marker);
- }
-
- /**
- * Create a new ReverseStringFilter that reverses all tokens in the
- * supplied {@link TokenStream}.
- * <p>
- * The reversed tokens will not be marked.
- * </p>
- *
- * @param matchVersion See <a href="#version">above</a>
- * @param in {@link TokenStream} to filter
- */
- public ReverseStringFilter(Version matchVersion, TokenStream in) {
- this(matchVersion, in, NOMARKER);
- }
-
- /**
- * Create a new ReverseStringFilter that reverses and marks all tokens in the
- * supplied {@link TokenStream}.
- * <p>
- * The reversed tokens will be prepended (marked) by the <code>marker</code>
- * character.
- * </p>
- *
- * @param matchVersion See <a href="#version">above</a>
- * @param in {@link TokenStream} to filter
- * @param marker A character used to mark reversed tokens
- */
- public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
- super(in);
- this.matchVersion = matchVersion;
- this.marker = marker;
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- int len = termAtt.length();
- if (marker != NOMARKER) {
- len++;
- termAtt.resizeBuffer(len);
- termAtt.buffer()[len - 1] = marker;
- }
- reverse( matchVersion, termAtt.buffer(), 0, len );
- termAtt.setLength(len);
- return true;
- } else {
- return false;
- }
- }
-
- /**
- * Reverses the given input string
- *
- * @param input the string to reverse
- * @return the given input string in reversed order
- * @deprecated use {@link #reverse(Version, String)} instead. This method
- * will be removed in Lucene 4.0
- */
- @Deprecated
- public static String reverse( final String input ){
- return reverse(Version.LUCENE_30, input);
- }
-
- /**
- * Reverses the given input string
- *
- * @param matchVersion See <a href="#version">above</a>
- * @param input the string to reverse
- * @return the given input string in reversed order
- */
- public static String reverse( Version matchVersion, final String input ){
- final char[] charInput = input.toCharArray();
- reverse( matchVersion, charInput, 0, charInput.length );
- return new String( charInput );
- }
-
- /**
- * Reverses the given input buffer in-place
- * @param buffer the input char array to reverse
- * @deprecated use {@link #reverse(Version, char[])} instead. This
- * method will be removed in Lucene 4.0
- */
- @Deprecated
- public static void reverse( final char[] buffer ){
- reverse( buffer, 0, buffer.length );
- }
-
- /**
- * Reverses the given input buffer in-place
- * @param matchVersion See <a href="#version">above</a>
- * @param buffer the input char array to reverse
- */
- public static void reverse(Version matchVersion, final char[] buffer) {
- reverse(matchVersion, buffer, 0, buffer.length);
- }
-
- /**
- * Partially reverses the given input buffer in-place from offset 0
- * up to the given length.
- * @param buffer the input char array to reverse
- * @param len the length in the buffer up to where the
- * buffer should be reversed
- * @deprecated use {@link #reverse(Version, char[], int)} instead. This
- * method will be removed in Lucene 4.0
- */
- @Deprecated
- public static void reverse( final char[] buffer, final int len ){
- reverse( buffer, 0, len );
- }
-
- /**
- * Partially reverses the given input buffer in-place from offset 0
- * up to the given length.
- * @param matchVersion See <a href="#version">above</a>
- * @param buffer the input char array to reverse
- * @param len the length in the buffer up to where the
- * buffer should be reversed
- */
- public static void reverse(Version matchVersion, final char[] buffer,
- final int len) {
- reverse( matchVersion, buffer, 0, len );
- }
-
- /**
- * Partially reverses the given input buffer in-place from the given offset
- * up to the given length.
- * @param buffer the input char array to reverse
- * @param start the offset from where to reverse the buffer
- * @param len the length in the buffer up to where the
- * buffer should be reversed
- * @deprecated use {@link #reverse(Version, char[], int, int)} instead. This
- * method will be removed in Lucene 4.0
- */
- @Deprecated
- public static void reverse(char[] buffer, int start, int len ) {
- reverseUnicode3(buffer, start, len);
- }
-
- /**
- * @deprecated Remove this when support for 3.0 indexes is no longer needed.
- */
- @Deprecated
- private static void reverseUnicode3( char[] buffer, int start, int len ){
- if( len <= 1 ) return;
- int num = len>>1;
- for( int i = start; i < ( start + num ); i++ ){
- char c = buffer[i];
- buffer[i] = buffer[start * 2 + len - i - 1];
- buffer[start * 2 + len - i - 1] = c;
- }
- }
-
- /**
- * Partially reverses the given input buffer in-place from the given offset
- * up to the given length.
- * @param matchVersion See <a href="#version">above</a>
- * @param buffer the input char array to reverse
- * @param start the offset from where to reverse the buffer
- * @param len the length in the buffer up to where the
- * buffer should be reversed
- */
- public static void reverse(Version matchVersion, final char[] buffer,
- final int start, final int len) {
- if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
- reverseUnicode3(buffer, start, len);
- return;
- }
- /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
- if (len < 2)
- return;
- int end = (start + len) - 1;
- char frontHigh = buffer[start];
- char endLow = buffer[end];
- boolean allowFrontSur = true, allowEndSur = true;
- final int mid = start + (len >> 1);
- for (int i = start; i < mid; ++i, --end) {
- final char frontLow = buffer[i + 1];
- final char endHigh = buffer[end - 1];
- final boolean surAtFront = allowFrontSur
- && Character.isSurrogatePair(frontHigh, frontLow);
- if (surAtFront && (len < 3)) {
- // nothing to do since surAtFront is allowed and 1 char left
- return;
- }
- final boolean surAtEnd = allowEndSur
- && Character.isSurrogatePair(endHigh, endLow);
- allowFrontSur = allowEndSur = true;
- if (surAtFront == surAtEnd) {
- if (surAtFront) {
- // both surrogates
- buffer[end] = frontLow;
- buffer[--end] = frontHigh;
- buffer[i] = endHigh;
- buffer[++i] = endLow;
- frontHigh = buffer[i + 1];
- endLow = buffer[end - 1];
- } else {
- // neither surrogates
- buffer[end] = frontHigh;
- buffer[i] = endLow;
- frontHigh = frontLow;
- endLow = endHigh;
- }
- } else {
- if (surAtFront) {
- // surrogate only at the front
- buffer[end] = frontLow;
- buffer[i] = endLow;
- endLow = endHigh;
- allowFrontSur = false;
- } else {
- // surrogate only at the end
- buffer[end] = frontHigh;
- buffer[i] = endHigh;
- frontHigh = frontLow;
- allowEndSur = false;
- }
- }
- }
- if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur)) {
- // only if odd length
- buffer[end] = allowFrontSur ? endLow : frontHigh;
- }
- }
-}