+++ /dev/null
-package org.apache.lucene.util;
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * {@link CharacterUtils} provides a unified interface to Character-related
- * operations to implement backwards compatible character operations based on a
- * {@link Version} instance.
- *
- * @lucene.internal
- */
-public abstract class CharacterUtils {
- private static final Java4CharacterUtils JAVA_4 = new Java4CharacterUtils();
- private static final Java5CharacterUtils JAVA_5 = new Java5CharacterUtils();
-
- /**
- * Returns a {@link CharacterUtils} implementation according to the given
- * {@link Version} instance.
- *
- * @param matchVersion
- * a version instance
- * @return a {@link CharacterUtils} implementation according to the given
- * {@link Version} instance.
- */
- public static CharacterUtils getInstance(final Version matchVersion) {
- return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
- }
-
- /**
- * Returns the code point at the given index of the char array.
- * Depending on the {@link Version} passed to
- * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
- * of {@link Character#codePointAt(char[], int)} as it would have been
- * available on a Java 1.4 JVM or on a later virtual machine version.
- *
- * @param chars
- * a character array
- * @param offset
- * the offset to the char values in the chars array to be converted
- *
- * @return the Unicode code point at the given index
- * @throws NullPointerException
- * - if the array is null.
- * @throws IndexOutOfBoundsException
- * - if the value offset is negative or not less than the length of
- * the char array.
- */
- public abstract int codePointAt(final char[] chars, final int offset);
-
- /**
- * Returns the code point at the given index of the {@link CharSequence}.
- * Depending on the {@link Version} passed to
- * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
- * of {@link Character#codePointAt(char[], int)} as it would have been
- * available on a Java 1.4 JVM or on a later virtual machine version.
- *
- * @param seq
- * a character sequence
- * @param offset
- * the offset to the char values in the chars array to be converted
- *
- * @return the Unicode code point at the given index
- * @throws NullPointerException
- * - if the sequence is null.
- * @throws IndexOutOfBoundsException
- * - if the value offset is negative or not less than the length of
- * the character sequence.
- */
- public abstract int codePointAt(final CharSequence seq, final int offset);
-
- /**
- * Returns the code point at the given index of the char array where only elements
- * with index less than the limit are used.
- * Depending on the {@link Version} passed to
- * {@link CharacterUtils#getInstance(Version)} this method mimics the behavior
- * of {@link Character#codePointAt(char[], int)} as it would have been
- * available on a Java 1.4 JVM or on a later virtual machine version.
- *
- * @param chars
- * a character array
- * @param offset
- * the offset to the char values in the chars array to be converted
- * @param limit the index afer the last element that should be used to calculate
- * codepoint.
- *
- * @return the Unicode code point at the given index
- * @throws NullPointerException
- * - if the array is null.
- * @throws IndexOutOfBoundsException
- * - if the value offset is negative or not less than the length of
- * the char array.
- */
- public abstract int codePointAt(final char[] chars, final int offset, final int limit);
-
- /**
- * Creates a new {@link CharacterBuffer} and allocates a <code>char[]</code>
- * of the given bufferSize.
- *
- * @param bufferSize
- * the internal char buffer size, must be <code>>= 2</code>
- * @return a new {@link CharacterBuffer} instance.
- */
- public static CharacterBuffer newCharacterBuffer(final int bufferSize) {
- if(bufferSize < 2)
- throw new IllegalArgumentException("buffersize must be >= 2");
- return new CharacterBuffer(new char[bufferSize], 0, 0);
- }
-
- /**
- * Fills the {@link CharacterBuffer} with characters read from the given
- * reader {@link Reader}. This method tries to read as many characters into
- * the {@link CharacterBuffer} as possible, each call to fill will start
- * filling the buffer from offset <code>0</code> up to the length of the size
- * of the internal character array.
- * <p>
- * Depending on the {@link Version} passed to
- * {@link CharacterUtils#getInstance(Version)} this method implements
- * supplementary character awareness when filling the given buffer. For all
- * {@link Version} > 3.0 {@link #fill(CharacterBuffer, Reader)} guarantees
- * that the given {@link CharacterBuffer} will never contain a high surrogate
- * character as the last element in the buffer unless it is the last available
- * character in the reader. In other words, high and low surrogate pairs will
- * always be preserved across buffer boarders.
- * </p>
- *
- * @param buffer
- * the buffer to fill.
- * @param reader
- * the reader to read characters from.
- * @return <code>true</code> if and only if no more characters are available
- * in the reader, otherwise <code>false</code>.
- * @throws IOException
- * if the reader throws an {@link IOException}.
- */
- public abstract boolean fill(CharacterBuffer buffer, Reader reader) throws IOException;
-
- private static final class Java5CharacterUtils extends CharacterUtils {
- Java5CharacterUtils() {
- }
-
- @Override
- public final int codePointAt(final char[] chars, final int offset) {
- return Character.codePointAt(chars, offset);
- }
-
- @Override
- public int codePointAt(final CharSequence seq, final int offset) {
- return Character.codePointAt(seq, offset);
- }
-
- @Override
- public int codePointAt(final char[] chars, final int offset, final int limit) {
- return Character.codePointAt(chars, offset, limit);
- }
-
- @Override
- public boolean fill(final CharacterBuffer buffer, final Reader reader) throws IOException {
- final char[] charBuffer = buffer.buffer;
- buffer.offset = 0;
- charBuffer[0] = buffer.lastTrailingHighSurrogate;
- final int offset = buffer.lastTrailingHighSurrogate == 0 ? 0 : 1;
- buffer.lastTrailingHighSurrogate = 0;
- final int read = reader.read(charBuffer, offset, charBuffer.length
- - offset);
- if (read == -1) {
- buffer.length = offset;
- return offset != 0;
- }
- buffer.length = read + offset;
- // special case if the read returns 0 and the lastTrailingHighSurrogate was set
- if (buffer.length > 1
- && Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
- buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
- }
- return true;
- }
- }
-
- private static final class Java4CharacterUtils extends CharacterUtils {
- Java4CharacterUtils() {
- }
-
- @Override
- public final int codePointAt(final char[] chars, final int offset) {
- return chars[offset];
- }
-
- @Override
- public int codePointAt(final CharSequence seq, final int offset) {
- return seq.charAt(offset);
- }
-
- @Override
- public int codePointAt(final char[] chars, final int offset, final int limit) {
- if(offset >= limit)
- throw new IndexOutOfBoundsException("offset must be less than limit");
- return chars[offset];
- }
-
- @Override
- public boolean fill(final CharacterBuffer buffer, final Reader reader) throws IOException {
- buffer.offset = 0;
- final int read = reader.read(buffer.buffer);
- if(read == -1)
- return false;
- buffer.length = read;
- return true;
- }
-
- }
-
- /**
- * A simple IO buffer to use with
- * {@link CharacterUtils#fill(CharacterBuffer, Reader)}.
- */
- public static final class CharacterBuffer {
-
- private final char[] buffer;
- private int offset;
- private int length;
- private char lastTrailingHighSurrogate = 0;
-
- CharacterBuffer(char[] buffer, int offset, int length) {
- this.buffer = buffer;
- this.offset = offset;
- this.length = length;
- }
-
- /**
- * Returns the internal buffer
- *
- * @return the buffer
- */
- public char[] getBuffer() {
- return buffer;
- }
-
- /**
- * Returns the data offset in the internal buffer.
- *
- * @return the offset
- */
- public int getOffset() {
- return offset;
- }
-
- /**
- * Return the length of the data in the internal buffer starting at
- * {@link #getOffset()}
- *
- * @return the length
- */
- public int getLength() {
- return length;
- }
-
- /**
- * Resets the CharacterBuffer. All internals are reset to its default
- * values.
- */
- public void reset() {
- offset = 0;
- length = 0;
- lastTrailingHighSurrogate = 0;
- }
- }
-
-}