X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BytesRef.java diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BytesRef.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BytesRef.java deleted file mode 100644 index fd4aa5a..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/util/BytesRef.java +++ /dev/null @@ -1,368 +0,0 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.io.UnsupportedEncodingException; - -/** Represents byte[], as a slice (offset + length) into an - * existing byte[]. - * - * @lucene.experimental */ -public final class BytesRef implements Comparable { - - static final int HASH_PRIME = 31; - public static final byte[] EMPTY_BYTES = new byte[0]; - - /** The contents of the BytesRef. Should never be {@code null}. */ - public byte[] bytes; - - /** Offset of first valid byte. */ - public int offset; - - /** Length of used bytes. */ - public int length; - - public BytesRef() { - bytes = EMPTY_BYTES; - } - - /** This instance will directly reference bytes w/o making a copy. - * bytes should not be null. - */ - public BytesRef(byte[] bytes, int offset, int length) { - assert bytes != null; - this.bytes = bytes; - this.offset = offset; - this.length = length; - } - - /** This instance will directly reference bytes w/o making a copy. - * bytes should not be null */ - public BytesRef(byte[] bytes) { - assert bytes != null; - this.bytes = bytes; - this.offset = 0; - this.length = bytes.length; - } - - public BytesRef(int capacity) { - this.bytes = new byte[capacity]; - } - - /** - * @param text Initialize the byte[] from the UTF8 bytes - * for the provided Sring. This must be well-formed - * unicode text, with no unpaired surrogates or U+FFFF. - */ - public BytesRef(CharSequence text) { - this(); - copy(text); - } - - /** - * @param text Initialize the byte[] from the UTF8 bytes - * for the provided array. This must be well-formed - * unicode text, with no unpaired surrogates or U+FFFF. - */ - public BytesRef(char text[], int offset, int length) { - this(length * 4); - copy(text, offset, length); - } - - public BytesRef(BytesRef other) { - this(); - copy(other); - } - - /* // maybe? - public BytesRef(BytesRef other, boolean shallow) { - this(); - if (shallow) { - offset = other.offset; - length = other.length; - bytes = other.bytes; - } else { - copy(other); - } - } - */ - - /** - * Copies the UTF8 bytes for this string. - * - * @param text Must be well-formed unicode text, with no - * unpaired surrogates or invalid UTF16 code units. - */ - public void copy(CharSequence text) { - UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this); - } - - /** - * Copies the UTF8 bytes for this string. - * - * @param text Must be well-formed unicode text, with no - * unpaired surrogates or invalid UTF16 code units. - */ - public void copy(char text[], int offset, int length) { - UnicodeUtil.UTF16toUTF8(text, offset, length, this); - } - - public boolean bytesEquals(BytesRef other) { - if (length == other.length) { - int otherUpto = other.offset; - final byte[] otherBytes = other.bytes; - final int end = offset + length; - for(int upto=offset;uptoIt is defined as: - *
-   *  int hash = 0;
-   *  for (int i = offset; i < offset + length; i++) {
-   *    hash = 31*hash + bytes[i];
-   *  }
-   * 
- */ - @Override - public int hashCode() { - int result = 0; - final int end = offset + length; - for(int i=offset;i offset) { - sb.append(' '); - } - sb.append(Integer.toHexString(bytes[i]&0xff)); - } - sb.append(']'); - return sb.toString(); - } - - public void copy(BytesRef other) { - if (bytes.length < other.length) { - bytes = new byte[other.length]; - } - System.arraycopy(other.bytes, other.offset, bytes, 0, other.length); - length = other.length; - offset = 0; - } - - public void append(BytesRef other) { - int newLen = length + other.length; - if (bytes.length < newLen) { - byte[] newBytes = new byte[newLen]; - System.arraycopy(bytes, offset, newBytes, 0, length); - offset = 0; - bytes = newBytes; - } - System.arraycopy(other.bytes, other.offset, bytes, length+offset, other.length); - length = newLen; - } - - public void grow(int newLength) { - bytes = ArrayUtil.grow(bytes, newLength); - } - - /** Unsigned byte order comparison */ - public int compareTo(BytesRef other) { - if (this == other) return 0; - - final byte[] aBytes = this.bytes; - int aUpto = this.offset; - final byte[] bBytes = other.bytes; - int bUpto = other.offset; - - final int aStop = aUpto + Math.min(this.length, other.length); - - while(aUpto < aStop) { - int aByte = aBytes[aUpto++] & 0xff; - int bByte = bBytes[bUpto++] & 0xff; - int diff = aByte - bByte; - if (diff != 0) return diff; - } - - // One is a prefix of the other, or, they are equal: - return this.length - other.length; - } - - private final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator(); - - public static Comparator getUTF8SortedAsUnicodeComparator() { - return utf8SortedAsUnicodeSortOrder; - } - - private static class UTF8SortedAsUnicodeComparator implements Comparator { - // Only singleton - private UTF8SortedAsUnicodeComparator() {}; - - public int compare(BytesRef a, BytesRef b) { - final byte[] aBytes = a.bytes; - int aUpto = a.offset; - final byte[] bBytes = b.bytes; - int bUpto = b.offset; - - final int aStop; - if (a.length < b.length) { - aStop = aUpto + a.length; - } else { - aStop = aUpto + b.length; - } - - while(aUpto < aStop) { - int aByte = aBytes[aUpto++] & 0xff; - int bByte = bBytes[bUpto++] & 0xff; - - int diff = aByte - bByte; - if (diff != 0) { - return diff; - } - } - - // One is a prefix of the other, or, they are equal: - return a.length - b.length; - } - } - - private final static Comparator utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator(); - - public static Comparator getUTF8SortedAsUTF16Comparator() { - return utf8SortedAsUTF16SortOrder; - } - - private static class UTF8SortedAsUTF16Comparator implements Comparator { - // Only singleton - private UTF8SortedAsUTF16Comparator() {}; - - public int compare(BytesRef a, BytesRef b) { - - final byte[] aBytes = a.bytes; - int aUpto = a.offset; - final byte[] bBytes = b.bytes; - int bUpto = b.offset; - - final int aStop; - if (a.length < b.length) { - aStop = aUpto + a.length; - } else { - aStop = aUpto + b.length; - } - - while(aUpto < aStop) { - int aByte = aBytes[aUpto++] & 0xff; - int bByte = bBytes[bUpto++] & 0xff; - - if (aByte != bByte) { - - // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order - - // We know the terms are not equal, but, we may - // have to carefully fixup the bytes at the - // difference to match UTF16's sort order: - - // NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff, - // we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences] - // this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such - // that 6-byte sequences are needed we have much bigger problems anyway. - if (aByte >= 0xee && bByte >= 0xee) { - if ((aByte & 0xfe) == 0xee) { - aByte += 0xe; - } - if ((bByte&0xfe) == 0xee) { - bByte += 0xe; - } - } - return aByte - bByte; - } - } - - // One is a prefix of the other, or, they are equal: - return a.length - b.length; - } - } -}