-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Comparator;
-import java.io.UnsupportedEncodingException;
-
-/** Represents byte[], as a slice (offset + length) into an
- * existing byte[].
- *
- * @lucene.experimental */
-public final class BytesRef implements Comparable<BytesRef> {
-
- static final int HASH_PRIME = 31;
- public static final byte[] EMPTY_BYTES = new byte[0];
-
- /** The contents of the BytesRef. Should never be {@code null}. */
- public byte[] bytes;
-
- /** Offset of first valid byte. */
- public int offset;
-
- /** Length of used bytes. */
- public int length;
-
- public BytesRef() {
- bytes = EMPTY_BYTES;
- }
-
- /** This instance will directly reference bytes w/o making a copy.
- * bytes should not be null.
- */
- public BytesRef(byte[] bytes, int offset, int length) {
- assert bytes != null;
- this.bytes = bytes;
- this.offset = offset;
- this.length = length;
- }
-
- /** This instance will directly reference bytes w/o making a copy.
- * bytes should not be null */
- public BytesRef(byte[] bytes) {
- assert bytes != null;
- this.bytes = bytes;
- this.offset = 0;
- this.length = bytes.length;
- }
-
- public BytesRef(int capacity) {
- this.bytes = new byte[capacity];
- }
-
- /**
- * @param text Initialize the byte[] from the UTF8 bytes
- * for the provided Sring. This must be well-formed
- * unicode text, with no unpaired surrogates or U+FFFF.
- */
- public BytesRef(CharSequence text) {
- this();
- copy(text);
- }
-
- /**
- * @param text Initialize the byte[] from the UTF8 bytes
- * for the provided array. This must be well-formed
- * unicode text, with no unpaired surrogates or U+FFFF.
- */
- public BytesRef(char text[], int offset, int length) {
- this(length * 4);
- copy(text, offset, length);
- }
-
- public BytesRef(BytesRef other) {
- this();
- copy(other);
- }
-
- /* // maybe?
- public BytesRef(BytesRef other, boolean shallow) {
- this();
- if (shallow) {
- offset = other.offset;
- length = other.length;
- bytes = other.bytes;
- } else {
- copy(other);
- }
- }
- */
-
- /**
- * Copies the UTF8 bytes for this string.
- *
- * @param text Must be well-formed unicode text, with no
- * unpaired surrogates or invalid UTF16 code units.
- */
- public void copy(CharSequence text) {
- UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
- }
-
- /**
- * Copies the UTF8 bytes for this string.
- *
- * @param text Must be well-formed unicode text, with no
- * unpaired surrogates or invalid UTF16 code units.
- */
- public void copy(char text[], int offset, int length) {
- UnicodeUtil.UTF16toUTF8(text, offset, length, this);
- }
-
- public boolean bytesEquals(BytesRef other) {
- if (length == other.length) {
- int otherUpto = other.offset;
- final byte[] otherBytes = other.bytes;
- final int end = offset + length;
- for(int upto=offset;upto<end;upto++,otherUpto++) {
- if (bytes[upto] != otherBytes[otherUpto]) {
- return false;
- }
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public Object clone() {
- return new BytesRef(this);
- }
-
- private boolean sliceEquals(BytesRef other, int pos) {
- if (pos < 0 || length - pos < other.length) {
- return false;
- }
- int i = offset + pos;
- int j = other.offset;
- final int k = other.offset + other.length;
-
- while (j < k) {
- if (bytes[i++] != other.bytes[j++]) {
- return false;
- }
- }
-
- return true;
- }
-
- public boolean startsWith(BytesRef other) {
- return sliceEquals(other, 0);
- }
-
- public boolean endsWith(BytesRef other) {
- return sliceEquals(other, length - other.length);
- }
-
- /** Calculates the hash code as required by TermsHash during indexing.
- * <p>It is defined as:
- * <pre>
- * int hash = 0;
- * for (int i = offset; i < offset + length; i++) {
- * hash = 31*hash + bytes[i];
- * }
- * </pre>
- */
- @Override
- public int hashCode() {
- int result = 0;
- final int end = offset + length;
- for(int i=offset;i<end;i++) {
- result = HASH_PRIME * result + bytes[i];
- }
- return result;
- }
-
- @Override
- public boolean equals(Object other) {
- if (other == null) {
- return false;
- }
- return this.bytesEquals((BytesRef) other);
- }
-
- /** Interprets stored bytes as UTF8 bytes, returning the
- * resulting string */
- public String utf8ToString() {
- try {
- return new String(bytes, offset, length, "UTF-8");
- } catch (UnsupportedEncodingException uee) {
- // should not happen -- UTF8 is presumably supported
- // by all JREs
- throw new RuntimeException(uee);
- }
- }
-
- /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append('[');
- final int end = offset + length;
- for(int i=offset;i<end;i++) {
- if (i > offset) {
- sb.append(' ');
- }
- sb.append(Integer.toHexString(bytes[i]&0xff));
- }
- sb.append(']');
- return sb.toString();
- }
-
- public void copy(BytesRef other) {
- if (bytes.length < other.length) {
- bytes = new byte[other.length];
- }
- System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
- length = other.length;
- offset = 0;
- }
-
- public void append(BytesRef other) {
- int newLen = length + other.length;
- if (bytes.length < newLen) {
- byte[] newBytes = new byte[newLen];
- System.arraycopy(bytes, offset, newBytes, 0, length);
- offset = 0;
- bytes = newBytes;
- }
- System.arraycopy(other.bytes, other.offset, bytes, length+offset, other.length);
- length = newLen;
- }
-
- public void grow(int newLength) {
- bytes = ArrayUtil.grow(bytes, newLength);
- }
-
- /** Unsigned byte order comparison */
- public int compareTo(BytesRef other) {
- if (this == other) return 0;
-
- final byte[] aBytes = this.bytes;
- int aUpto = this.offset;
- final byte[] bBytes = other.bytes;
- int bUpto = other.offset;
-
- final int aStop = aUpto + Math.min(this.length, other.length);
-
- while(aUpto < aStop) {
- int aByte = aBytes[aUpto++] & 0xff;
- int bByte = bBytes[bUpto++] & 0xff;
- int diff = aByte - bByte;
- if (diff != 0) return diff;
- }
-
- // One is a prefix of the other, or, they are equal:
- return this.length - other.length;
- }
-
- private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
-
- public static Comparator<BytesRef> getUTF8SortedAsUnicodeComparator() {
- return utf8SortedAsUnicodeSortOrder;
- }
-
- private static class UTF8SortedAsUnicodeComparator implements Comparator<BytesRef> {
- // Only singleton
- private UTF8SortedAsUnicodeComparator() {};
-
- public int compare(BytesRef a, BytesRef b) {
- final byte[] aBytes = a.bytes;
- int aUpto = a.offset;
- final byte[] bBytes = b.bytes;
- int bUpto = b.offset;
-
- final int aStop;
- if (a.length < b.length) {
- aStop = aUpto + a.length;
- } else {
- aStop = aUpto + b.length;
- }
-
- while(aUpto < aStop) {
- int aByte = aBytes[aUpto++] & 0xff;
- int bByte = bBytes[bUpto++] & 0xff;
-
- int diff = aByte - bByte;
- if (diff != 0) {
- return diff;
- }
- }
-
- // One is a prefix of the other, or, they are equal:
- return a.length - b.length;
- }
- }
-
- private final static Comparator<BytesRef> utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator();
-
- public static Comparator<BytesRef> getUTF8SortedAsUTF16Comparator() {
- return utf8SortedAsUTF16SortOrder;
- }
-
- private static class UTF8SortedAsUTF16Comparator implements Comparator<BytesRef> {
- // Only singleton
- private UTF8SortedAsUTF16Comparator() {};
-
- public int compare(BytesRef a, BytesRef b) {
-
- final byte[] aBytes = a.bytes;
- int aUpto = a.offset;
- final byte[] bBytes = b.bytes;
- int bUpto = b.offset;
-
- final int aStop;
- if (a.length < b.length) {
- aStop = aUpto + a.length;
- } else {
- aStop = aUpto + b.length;
- }
-
- while(aUpto < aStop) {
- int aByte = aBytes[aUpto++] & 0xff;
- int bByte = bBytes[bUpto++] & 0xff;
-
- if (aByte != bByte) {
-
- // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order
-
- // We know the terms are not equal, but, we may
- // have to carefully fixup the bytes at the
- // difference to match UTF16's sort order:
-
- // NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff,
- // we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
- // this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
- // that 6-byte sequences are needed we have much bigger problems anyway.
- if (aByte >= 0xee && bByte >= 0xee) {
- if ((aByte & 0xfe) == 0xee) {
- aByte += 0xe;
- }
- if ((bByte&0xfe) == 0xee) {
- bByte += 0xe;
- }
- }
- return aByte - bByte;
- }
- }
-
- // One is a prefix of the other, or, they are equal:
- return a.length - b.length;
- }
- }
-}