+++ /dev/null
-package org.apache.lucene.facet.taxonomy;
-
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Serializable;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A CategoryPath holds a sequence of string components, specifying the
- * hierarchical name of a category.
- * <P>
- * CategoryPath is designed to reduce the number of object allocations, in two
- * ways: First, it keeps the components internally in two arrays, rather than
- * keeping individual strings. Second, it allows reusing the same CategoryPath
- * object (which can be clear()ed and new components add()ed again) and of
- * add()'s parameter (which can be a reusable object, not just a string).
- *
- * @lucene.experimental
- */
-public class CategoryPath implements Serializable, Cloneable, Comparable<CategoryPath> {
-
- // A category path is a sequence of string components. It is kept
- // internally as one large character array "chars" with all the string
- // concatenated (without separators), and an array of integers "ends"
- // pointing to the/ end of each component. Both arrays may be larger
- // than actually in use. An additional integer, "ncomponents" specifies
- // how many components are actually set.
- // We use shorts instead of ints for "ends" to save a bit of space. This
- // means that our path lengths are limited to 32767 characters - which
- // should not be a problem in any realistic scenario.
- protected char[] chars;
- protected short[] ends;
- protected short ncomponents;
-
- /**
- * Return the number of components in the facet path. Note that this is
- * <I>not</I> the number of characters, but the number of components.
- */
- public short length() {
- return ncomponents;
- }
-
- /**
- * Trim the last components from the path.
- *
- * @param nTrim
- * Number of components to trim. If larger than the number of
- * components this path has, the entire path will be cleared.
- */
- public void trim(int nTrim) {
- if (nTrim >= this.ncomponents) {
- clear();
- } else if (nTrim > 0) {
- this.ncomponents -= nTrim;
- }
- }
-
- /**
- * Returns the current character capacity of the CategoryPath. The character
- * capacity is the size of the internal buffer used to hold the characters
- * of all the path's components. When a component is added and the capacity
- * is not big enough, the buffer is automatically grown, and capacityChars()
- * increases.
- */
- public int capacityChars() {
- return chars.length;
- }
-
- /**
- * Returns the current component capacity of the CategoryPath. The component
- * capacity is the maximum number of components that the internal buffer can
- * currently hold. When a component is added beyond this capacity, the
- * buffer is automatically grown, and capacityComponents() increases.
- */
- public int capacityComponents() {
- return ends.length;
- }
-
- /**
- * Construct a new empty CategoryPath object. CategoryPath objects are meant
- * to be reused, by add()ing components, and later clear()ing, and add()ing
- * components again. The CategoryPath object is created with a buffer
- * pre-allocated for a given number of characters and components, but the
- * buffer will grow as necessary (see {@link #capacityChars()} and
- * {@link #capacityComponents()}).
- */
- public CategoryPath(int capacityChars, int capacityComponents) {
- ncomponents = 0;
- chars = new char[capacityChars];
- ends = new short[capacityComponents];
- }
-
- /**
- * Create an empty CategoryPath object. Equivalent to the constructor
- * {@link #CategoryPath(int, int)} with the two initial-capacity arguments
- * set to zero.
- */
- public CategoryPath() {
- this(0, 0);
- }
-
- /**
- * Add the given component to the end of the path.
- * <P>
- * Note that when a String object is passed to this method, a reference to
- * it is not saved (rather, its content is copied), which will lead to that
- * String object being gc'ed. To reduce the number of garbage objects, you
- * can pass a mutable CharBuffer instead of an immutable String to this
- * method.
- */
- public void add(CharSequence component) {
- // Set the new end, increasing the "ends" array sizes if necessary:
- if (ncomponents >= ends.length) {
- short[] newends = new short[(ends.length + 1) * 2];
- System.arraycopy(ends, 0, newends, 0, ends.length);
- ends = newends;
- }
- short prevend = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
- int cmplen = component.length();
- ends[ncomponents] = (short) (prevend + cmplen);
-
- // Copy the new component's characters, increasing the "chars" array
- // sizes if necessary:
- if (ends[ncomponents] > chars.length) {
- char[] newchars = new char[ends[ncomponents] * 2];
- System.arraycopy(chars, 0, newchars, 0, chars.length);
- chars = newchars;
- }
- for (int i = 0; i < cmplen; i++) {
- chars[prevend++] = component.charAt(i);
- }
-
- ncomponents++;
- }
-
- /**
- * Empty the CategoryPath object, so that it has zero components. The
- * capacity of the object (see {@link #capacityChars()} and
- * {@link #capacityComponents()}) is not reduced, so that the object can be
- * reused without frequent reallocations.
- */
- public void clear() {
- ncomponents = 0;
- }
-
- /**
- * Build a string representation of the path, with its components separated
- * by the given delimiter character. The resulting string is appended to a
- * given Appendable, e.g., a StringBuilder, CharBuffer or Writer.
- * <P>
- * Note that the two cases of zero components and one component with zero
- * length produce indistinguishable results (both of them append nothing).
- * This is normally not a problem, because components should not normally
- * have zero lengths.
- * <P>
- * An IOException can be thrown if the given Appendable's append() throws
- * this exception.
- */
- public void appendTo(Appendable out, char delimiter) throws IOException {
- if (ncomponents == 0) {
- return; // just append nothing...
- }
- for (int i = 0; i < ends[0]; i++) {
- out.append(chars[i]);
- }
- for (int j = 1; j < ncomponents; j++) {
- out.append(delimiter);
- for (int i = ends[j - 1]; i < ends[j]; i++) {
- out.append(chars[i]);
- }
- }
- }
-
- /**
- * like {@link #appendTo(Appendable, char)}, but takes only a prefix of the
- * path, rather than the whole path.
- * <P>
- * If the given prefix length is negative or bigger than the path's actual
- * length, the whole path is taken.
- */
- public void appendTo(Appendable out, char delimiter, int prefixLen)
- throws IOException {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- if (prefixLen == 0) {
- return; // just append nothing...
- }
- for (int i = 0; i < ends[0]; i++) {
- out.append(chars[i]);
- }
- for (int j = 1; j < prefixLen; j++) {
- out.append(delimiter);
- for (int i = ends[j - 1]; i < ends[j]; i++) {
- out.append(chars[i]);
- }
- }
- }
-
- /**
- * like {@link #appendTo(Appendable, char)}, but takes only a part of the
- * path, rather than the whole path.
- * <P>
- * <code>start</code> specifies the first component in the subpath, and
- * <code>end</code> is one past the last component. If <code>start</code> is
- * negative, 0 is assumed, and if <code>end</code> is negative or past the
- * end of the path, the path is taken until the end. Otherwise, if
- * <code>end<=start</code>, nothing is appended. Nothing is appended also in
- * the case that the path is empty.
- */
- public void appendTo(Appendable out, char delimiter, int start, int end)
- throws IOException {
- if (start < 0) {
- start = 0;
- }
- if (end < 0 || end > ncomponents) {
- end = ncomponents;
- }
- if (end <= start) {
- return; // just append nothing...
- }
- for (int i = (start == 0 ? 0 : ends[start - 1]); i < ends[start]; i++) {
- out.append(chars[i]);
- }
- for (int j = start + 1; j < end; j++) {
- out.append(delimiter);
- for (int i = ends[j - 1]; i < ends[j]; i++) {
- out.append(chars[i]);
- }
- }
- }
-
- /**
- * Build a string representation of the path, with its components separated
- * by the given delimiter character. The resulting string is returned as a
- * new String object. To avoid this temporary object creation, consider
- * using {@link #appendTo(Appendable, char)} instead.
- * <P>
- * Note that the two cases of zero components and one component with zero
- * length produce indistinguishable results (both of them return an empty
- * string). This is normally not a problem, because components should not
- * normally have zero lengths.
- */
- public String toString(char delimiter) {
- if (ncomponents == 0) {
- return "";
- }
- StringBuilder sb = new StringBuilder(ends[ncomponents - 1]
- + (ncomponents - 1));
- try {
- this.appendTo(sb, delimiter);
- } catch (IOException e) {
- // can't happen, because StringBuilder.append() never actually
- // throws an exception!
- }
- return sb.toString();
- }
-
- /**
- * This method, an implementation of the {@link Object#toString()}
- * interface, is to allow simple printing of a CategoryPath, for debugging
- * purposes. When possible, it recommended to avoid using it it, and rather,
- * if you want to output the path with its components separated by a
- * delimiter character, specify the delimiter explicitly, with
- * {@link #toString(char)}.
- */
- @Override
- public String toString() {
- return toString('/');
- }
-
- /**
- * like {@link #toString(char)}, but takes only a prefix with a given number
- * of components, rather than the whole path.
- * <P>
- * If the given length is negative or bigger than the path's actual length,
- * the whole path is taken.
- */
- public String toString(char delimiter, int prefixLen) {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- if (prefixLen == 0) {
- return "";
- }
- StringBuilder sb = new StringBuilder(ends[prefixLen - 1]
- + (prefixLen - 1));
- try {
- this.appendTo(sb, delimiter, prefixLen);
- } catch (IOException e) {
- // can't happen, because sb.append() never actually throws an
- // exception
- }
- return sb.toString();
- }
-
- /**
- * like {@link #toString(char)}, but takes only a part of the path, rather
- * than the whole path.
- * <P>
- * <code>start</code> specifies the first component in the subpath, and
- * <code>end</code> is one past the last component. If <code>start</code> is
- * negative, 0 is assumed, and if <code>end</code> is negative or past the
- * end of the path, the path is taken until the end. Otherwise, if
- * <code>end<=start</code>, an empty string is returned. An emptry string is
- * returned also in the case that the path is empty.
- */
- public String toString(char delimiter, int start, int end) {
- if (start < 0) {
- start = 0;
- }
- if (end < 0 || end > ncomponents) {
- end = ncomponents;
- }
- if (end <= start) {
- return "";
- }
- int startchar = (start == 0) ? 0 : ends[start - 1];
- StringBuilder sb = new StringBuilder(ends[end - 1] - startchar
- + (end - start) - 1);
- try {
- this.appendTo(sb, delimiter, start, end);
- } catch (IOException e) {
- // can't happen, because sb.append() never actually throws an
- // exception
- }
- return sb.toString();
- }
-
- /**
- * Return the i'th component of the path, in a new String object. If there
- * is no i'th component, a null is returned.
- */
- public String getComponent(int i) {
- if (i < 0 || i >= ncomponents) {
- return null;
- }
- if (i == 0) {
- return new String(chars, 0, ends[0]);
- }
- return new String(chars, ends[i - 1], ends[i] - ends[i - 1]);
- }
-
- /**
- * Return the last component of the path, in a new String object. If the
- * path is empty, a null is returned.
- */
- public String lastComponent() {
- if (ncomponents == 0) {
- return null;
- }
- if (ncomponents == 1) {
- return new String(chars, 0, ends[0]);
- }
- return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1]
- - ends[ncomponents - 2]);
- }
-
- /**
- * Copies the specified number of components from this category path to the
- * specified character array, with the components separated by a given
- * delimiter character. The array must be large enough to hold the
- * components and separators - the amount of needed space can be calculated
- * with {@link #charsNeededForFullPath()}.
- * <P>
- * This method returns the number of characters written to the array.
- *
- * @param outputBuffer
- * The destination character array.
- * @param outputBufferStart
- * The first location to write in the output array.
- * @param numberOfComponentsToCopy
- * The number of path components to write to the destination
- * buffer.
- * @param separatorChar
- * The separator inserted between every pair of path components
- * in the output buffer.
- * @see #charsNeededForFullPath()
- */
- public int copyToCharArray(char[] outputBuffer, int outputBufferStart,
- int numberOfComponentsToCopy, char separatorChar) {
- if (numberOfComponentsToCopy == 0) {
- return 0;
- }
- if (numberOfComponentsToCopy < 0
- || numberOfComponentsToCopy > ncomponents) {
- numberOfComponentsToCopy = ncomponents;
- }
- int outputBufferInitialStart = outputBufferStart; // for calculating
- // chars copied.
- int sourceStart = 0;
- int sourceLength = ends[0];
- for (int component = 0; component < numberOfComponentsToCopy; component++) {
- if (component > 0) {
- sourceStart = ends[component - 1];
- sourceLength = ends[component] - sourceStart;
- outputBuffer[outputBufferStart++] = separatorChar;
- }
- System.arraycopy(chars, sourceStart, outputBuffer,
- outputBufferStart, sourceLength);
- outputBufferStart += sourceLength;
- }
- return outputBufferStart - outputBufferInitialStart;
- }
-
- /**
- * Returns the number of characters required to represent this entire
- * category path, if written using
- * {@link #copyToCharArray(char[], int, int, char)} or
- * {@link #appendTo(Appendable, char)}. This includes the number of
- * characters in all the components, plus the number of separators between
- * them (each one character in the aforementioned methods).
- */
- public int charsNeededForFullPath() {
- if (ncomponents == 0) {
- return 0;
- }
- return ends[ncomponents - 1] + ncomponents - 1;
- }
-
- /**
- * Construct a new CategoryPath object, given a single string with
- * components separated by a given delimiter character.
- * <P>
- * The initial capacity of the constructed object will be exactly what is
- * needed to hold the given path. This fact is convenient when creating a
- * temporary object that will not be reused later.
- */
- public CategoryPath(String pathString, char delimiter) {
- if (pathString.length() == 0) {
- ncomponents = 0;
- chars = new char[0];
- ends = new short[0];
- return;
- }
-
- // This constructor is often used for creating a temporary object
- // (one which will not be reused to hold multiple paths), so we want
- // to do our best to allocate exactly the needed size - not less (to
- // avoid reallocation) and not more (so as not to waste space).
- // To do this, we unfortunately need to make an additional pass on the
- // given string:
- int nparts = 1;
- for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString
- .indexOf(delimiter, i + 1)) {
- nparts++;
- }
-
- ends = new short[nparts];
- chars = new char[pathString.length() - nparts + 1];
- ncomponents = 0;
-
- add(pathString, delimiter);
- }
-
- /**
- * Add the given components to the end of the path. The components are given
- * in a single string, separated by a given delimiter character. If the
- * given string is empty, it is assumed to refer to the root (empty)
- * category, and nothing is added to the path (rather than adding a single
- * empty component).
- * <P>
- * Note that when a String object is passed to this method, a reference to
- * it is not saved (rather, its content is copied), which will lead to that
- * String object being gc'ed. To reduce the number of garbage objects, you
- * can pass a mutable CharBuffer instead of an immutable String to this
- * method.
- */
- public void add(CharSequence pathString, char delimiter) {
- int len = pathString.length();
- if (len == 0) {
- return; // assume root category meant, so add nothing.
- }
- short pos = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
- for (int i = 0; i < len; i++) {
- char c = pathString.charAt(i);
- if (c == delimiter) {
- if (ncomponents >= ends.length) {
- short[] newends = new short[(ends.length + 1) * 2];
- System.arraycopy(ends, 0, newends, 0, ends.length);
- ends = newends;
- }
- ends[ncomponents++] = pos;
- } else {
- if (pos >= chars.length) {
- char[] newchars = new char[(chars.length + 1) * 2];
- System.arraycopy(chars, 0, newchars, 0, chars.length);
- chars = newchars;
- }
- chars[pos++] = c;
- }
- }
-
- // Don't forget to count the last component!
- if (ncomponents >= ends.length) {
- short[] newends = new short[(ends.length + 1) * 2];
- System.arraycopy(ends, 0, newends, 0, ends.length);
- ends = newends;
- }
- ends[ncomponents++] = pos;
- }
-
- /**
- * Construct a new CategoryPath object, copying an existing path given as an
- * array of strings.
- * <P>
- * The new object occupies exactly the space it needs, without any spare
- * capacity. This is the expected behavior in the typical use case, where
- * this constructor is used to create a temporary object which is never
- * reused.
- */
- public CategoryPath(CharSequence... components) {
- this.ncomponents = (short) components.length;
- this.ends = new short[ncomponents];
- if (ncomponents > 0) {
- this.ends[0] = (short) components[0].length();
- for (int i = 1; i < ncomponents; i++) {
- this.ends[i] = (short) (this.ends[i - 1] + components[i]
- .length());
- }
- this.chars = new char[this.ends[ncomponents - 1]];
- CharSequence cs = components[0];
- if (cs instanceof String) {
- ((String) cs).getChars(0, cs.length(), this.chars, 0);
- } else {
- for (int j = 0, k = cs.length(); j < k; j++) {
- this.chars[j] = cs.charAt(j);
- }
- }
- for (int i = 1; i < ncomponents; i++) {
- cs = components[i];
- int offset = this.ends[i - 1];
- if (cs instanceof String) {
- ((String) cs).getChars(0, cs.length(), this.chars, offset);
- } else {
- for (int j = 0, k = cs.length(); j < k; j++) {
- this.chars[j + offset] = cs.charAt(j);
- }
- }
- }
- } else {
- this.chars = new char[0];
- }
- }
-
- /**
- * Construct a new CategoryPath object, copying the path given in an
- * existing CategoryPath object.
- * <P>
- * This copy-constructor is handy when you need to save a reference to a
- * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot
- * save a reference to the original object because its contents can be
- * changed later by the user. Copying the contents into a new object is a
- * solution.
- * <P>
- * This constructor </I>does not</I> copy the capacity (spare buffer size)
- * of the existing CategoryPath. Rather, the new object occupies exactly the
- * space it needs, without any spare. This is the expected behavior in the
- * typical use case outlined in the previous paragraph.
- */
- public CategoryPath(CategoryPath existing) {
- ncomponents = existing.ncomponents;
- if (ncomponents == 0) {
- chars = new char[0];
- ends = new short[0];
- return;
- }
-
- chars = new char[existing.ends[ncomponents - 1]];
- System.arraycopy(existing.chars, 0, chars, 0, chars.length);
- ends = new short[ncomponents];
- System.arraycopy(existing.ends, 0, ends, 0, ends.length);
- }
-
- /**
- * Construct a new CategoryPath object, copying a prefix with the given
- * number of components of the path given in an existing CategoryPath
- * object.
- * <P>
- * If the given length is negative or bigger than the given path's actual
- * length, the full path is taken.
- * <P>
- * This constructor is often convenient for creating a temporary object with
- * a path's prefix, but this practice is wasteful, and therefore
- * inadvisable. Rather, the application should be written in a way that
- * allows considering only a prefix of a given path, without needing to make
- * a copy of that path.
- */
- public CategoryPath(CategoryPath existing, int prefixLen) {
- if (prefixLen < 0 || prefixLen > existing.ncomponents) {
- ncomponents = existing.ncomponents;
- } else {
- ncomponents = (short) prefixLen;
- }
- if (ncomponents == 0) {
- chars = new char[0];
- ends = new short[0];
- return;
- }
-
- chars = new char[existing.ends[ncomponents - 1]];
- System.arraycopy(existing.chars, 0, chars, 0, chars.length);
- ends = new short[ncomponents];
- System.arraycopy(existing.ends, 0, ends, 0, ends.length);
- }
-
- @Override
- public Object clone() {
- return new CategoryPath(this);
- }
-
- /**
- * Compare the given CategoryPath to another one. For two category paths to
- * be considered equal, only the path they contain needs to be identical The
- * unused capacity of the objects is not considered in the comparison.
- */
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof CategoryPath) {
- CategoryPath other = (CategoryPath) obj;
- if (other.ncomponents != this.ncomponents) {
- return false;
- }
- // Unfortunately, Arrays.equal() can only compare entire arrays,
- // and in our case we potentially have unused parts of the arrays
- // that must not be compared... I wish that some future version
- // of Java has a offset and length parameter to Arrays.equal
- // (sort of like System.arraycopy()).
- if (ncomponents == 0) {
- return true; // nothing to compare...
- }
- for (int i = 0; i < ncomponents; i++) {
- if (this.ends[i] != other.ends[i]) {
- return false;
- }
- }
- int len = ends[ncomponents - 1];
- for (int i = 0; i < len; i++) {
- if (this.chars[i] != other.chars[i]) {
- return false;
- }
- }
- return true;
- }
- return false;
- }
-
- /**
- * Test whether this object is a descendant of another CategoryPath. This is
- * true if the other CategoryPath is the prefix of this.
- */
- public boolean isDescendantOf(CategoryPath other) {
- if (this.ncomponents < other.ncomponents) {
- return false;
- }
- int j = 0;
- for (int i = 0; i < other.ncomponents; i++) {
- if (ends[i] != other.ends[i]) {
- return false;
- }
- for (; j < ends[i]; j++) {
- if (this.chars[j] != other.chars[j]) {
- return false;
- }
- }
- }
- return true;
- }
-
- /**
- * Calculate a hashCode for this path, used when a CategoryPath serves as a
- * hash-table key. If two objects are equal(), their hashCodes need to be
- * equal, so like in equal(), hashCode does not consider unused portions of
- * the internal buffers in its calculation.
- * <P>
- * The hash function used is modeled after Java's String.hashCode() - a
- * simple multiplicative hash function with the multiplier 31. The same hash
- * function also appeared in Kernighan & Ritchie's second edition of
- * "The C Programming Language" (1988).
- */
- @Override
- public int hashCode() {
- if (ncomponents == 0) {
- return 0;
- }
- int hash = ncomponents;
- // Unfortunately, Arrays.hashCode() can only calculate a hash code
- // for an entire arrays, and in our case we potentially have unused
- // parts of the arrays that must be ignored, so must use our own loop
- // over the characters. I wish that some future version of Java will
- // add offset and length parameters to Arrays.hashCode (sort of like
- // System.arraycopy()'s parameters).
- for (int i = 0; i < ncomponents; i++) {
- hash = hash * 31 + ends[i];
- }
- int len = ends[ncomponents - 1];
- for (int i = 0; i < len; i++) {
- hash = hash * 31 + chars[i];
- }
- return hash;
- }
-
- /**
- * Like {@link #hashCode()}, but find the hash function of a prefix with the
- * given number of components, rather than of the entire path.
- */
- public int hashCode(int prefixLen) {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- if (prefixLen == 0) {
- return 0;
- }
- int hash = prefixLen;
- for (int i = 0; i < prefixLen; i++) {
- hash = hash * 31 + ends[i];
- }
- int len = ends[prefixLen - 1];
- for (int i = 0; i < len; i++) {
- hash = hash * 31 + chars[i];
- }
- return hash;
- }
-
- /**
- * Calculate a 64-bit hash function for this path. Unlike
- * {@link #hashCode()}, this method is not part of the Java standard, and is
- * only used if explicitly called by the user.
- * <P>
- * If two objects are equal(), their hash codes need to be equal, so like in
- * {@link #equals(Object)}, longHashCode does not consider unused portions
- * of the internal buffers in its calculation.
- * <P>
- * The hash function used is a simple multiplicative hash function, with the
- * multiplier 65599. While Java's standard multiplier 31 (used in
- * {@link #hashCode()}) gives a good distribution for ASCII strings, it
- * turns out that for foreign-language strings (with 16-bit characters) it
- * gives too many collisions, and a bigger multiplier produces fewer
- * collisions in this case.
- */
- public long longHashCode() {
- if (ncomponents == 0) {
- return 0;
- }
- long hash = ncomponents;
- for (int i = 0; i < ncomponents; i++) {
- hash = hash * 65599 + ends[i];
- }
- int len = ends[ncomponents - 1];
- for (int i = 0; i < len; i++) {
- hash = hash * 65599 + chars[i];
- }
- return hash;
- }
-
- /**
- * Like {@link #longHashCode()}, but find the hash function of a prefix with
- * the given number of components, rather than of the entire path.
- */
- public long longHashCode(int prefixLen) {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- if (prefixLen == 0) {
- return 0;
- }
- long hash = prefixLen;
- for (int i = 0; i < prefixLen; i++) {
- hash = hash * 65599 + ends[i];
- }
- int len = ends[prefixLen - 1];
- for (int i = 0; i < len; i++) {
- hash = hash * 65599 + chars[i];
- }
- return hash;
- }
-
- /**
- * Write out a serialized (as a character sequence) representation of the
- * path to a given Appendable (e.g., a StringBuilder, CharBuffer, Writer, or
- * something similar.
- * <P>
- * This method may throw a IOException if the given Appendable threw this
- * exception while appending.
- */
- public void serializeAppendTo(Appendable out) throws IOException {
- // Note that we use the fact that ncomponents and ends[] are shorts,
- // so we can write them as chars:
- out.append((char) ncomponents);
- if (ncomponents == 0) {
- return;
- }
- for (int i = 0; i < ncomponents; i++) {
- out.append((char) ends[i]);
- }
- int usedchars = ends[ncomponents - 1];
- for (int i = 0; i < usedchars; i++) {
- out.append(chars[i]);
- }
- }
-
- /**
- * Just like {@link #serializeAppendTo(Appendable)}, but writes only a
- * prefix of the CategoryPath.
- */
- public void serializeAppendTo(int prefixLen, Appendable out)
- throws IOException {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- // Note that we use the fact that ncomponents and ends[] are shorts,
- // so we can write them as chars:
- out.append((char) prefixLen);
- if (prefixLen == 0) {
- return;
- }
- for (int i = 0; i < prefixLen; i++) {
- out.append((char) ends[i]);
- }
- int usedchars = ends[prefixLen - 1];
- for (int i = 0; i < usedchars; i++) {
- out.append(chars[i]);
- }
- }
-
- /**
- * Set a CategoryPath from a character-sequence representation written by
- * {@link #serializeAppendTo(Appendable)}.
- * <P>
- * Reading starts at the given offset into the given character sequence, and
- * the offset right after the end of this path is returned.
- */
- public int setFromSerialized(CharSequence buffer, int offset) {
- ncomponents = (short) buffer.charAt(offset++);
- if (ncomponents == 0) {
- return offset;
- }
-
- if (ncomponents >= ends.length) {
- ends = new short[Math.max(ends.length * 2, ncomponents)];
- }
- for (int i = 0; i < ncomponents; i++) {
- ends[i] = (short) buffer.charAt(offset++);
- }
-
- int usedchars = ends[ncomponents - 1];
- if (usedchars > chars.length) {
- chars = new char[Math.max(chars.length * 2, usedchars)];
- }
- for (int i = 0; i < usedchars; i++) {
- chars[i] = buffer.charAt(offset++);
- }
-
- return offset;
- }
-
- /**
- * Check whether the current path is identical to the one serialized (with
- * {@link #serializeAppendTo(Appendable)}) in the given buffer, at the given
- * offset.
- */
- public boolean equalsToSerialized(CharSequence buffer, int offset) {
- int n = (short) buffer.charAt(offset++);
- if (ncomponents != n) {
- return false;
- }
- if (ncomponents == 0) {
- return true;
- }
- for (int i = 0; i < ncomponents; i++) {
- if (ends[i] != (short) buffer.charAt(offset++)) {
- return false;
- }
- }
- int usedchars = ends[ncomponents - 1];
- for (int i = 0; i < usedchars; i++) {
- if (chars[i] != buffer.charAt(offset++)) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Just like {@link #equalsToSerialized(CharSequence, int)}, but compare to
- * a prefix of the CategoryPath, instead of the whole CategoryPath.
- */
- public boolean equalsToSerialized(int prefixLen, CharSequence buffer,
- int offset) {
- if (prefixLen < 0 || prefixLen > ncomponents) {
- prefixLen = ncomponents;
- }
- int n = (short) buffer.charAt(offset++);
- if (prefixLen != n) {
- return false;
- }
- if (prefixLen == 0) {
- return true;
- }
- for (int i = 0; i < prefixLen; i++) {
- if (ends[i] != (short) buffer.charAt(offset++)) {
- return false;
- }
- }
- int usedchars = ends[prefixLen - 1];
- for (int i = 0; i < usedchars; i++) {
- if (chars[i] != buffer.charAt(offset++)) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * This method calculates a hash function of a path that has been written to
- * (using {@link #serializeAppendTo(Appendable)}) a character buffer. It is
- * guaranteed that the value returned is identical to that which
- * {@link #hashCode()} would have produced for the original object before it
- * was serialized.
- */
- public static int hashCodeOfSerialized(CharSequence buffer, int offset) {
- // Note: the algorithm here must be identical to that of hashCode(),
- // in order that they produce identical results!
- int ncomponents = (short) buffer.charAt(offset++);
- if (ncomponents == 0) {
- return 0;
- }
- int hash = ncomponents;
- for (int i = 0; i < ncomponents; i++) {
- hash = hash * 31 + buffer.charAt(offset++);
- }
- int len = buffer.charAt(offset - 1);
- for (int i = 0; i < len; i++) {
- hash = hash * 31 + buffer.charAt(offset++);
- }
- return hash;
- }
-
- /**
- * Serializes the content of this CategoryPath to a byte stream, using UTF-8
- * encoding to convert characters to bytes, and treating the ends as 16-bit
- * characters.
- *
- * @param osw
- * The output byte stream.
- * @throws IOException
- * If there are encoding errors.
- */
- // TODO (Facet): consolidate all de/serialize method names to
- // serialize() and unserialize()
- public void serializeToStreamWriter(OutputStreamWriter osw)
- throws IOException {
- osw.write(this.ncomponents);
- if (this.ncomponents <= 0) {
- return;
- }
- for (int j = 0; j < this.ncomponents; j++) {
- osw.write(this.ends[j]);
- }
- osw.write(this.chars, 0, this.ends[this.ncomponents - 1]);
- }
-
- /**
- * Serializes the content of this CategoryPath to a byte stream, using UTF-8
- * encoding to convert characters to bytes, and treating the ends as 16-bit
- * characters.
- *
- * @param isr
- * The input stream.
- * @throws IOException
- * If there are encoding errors.
- */
- public void deserializeFromStreamReader(InputStreamReader isr)
- throws IOException {
- this.ncomponents = (short) isr.read();
- if (this.ncomponents <= 0) {
- return;
- }
- if (this.ends == null || this.ends.length < this.ncomponents) {
- this.ends = new short[this.ncomponents];
- }
- for (int j = 0; j < this.ncomponents; j++) {
- this.ends[j] = (short) isr.read();
- }
- if (this.chars == null
- || this.ends[this.ncomponents - 1] > chars.length) {
- this.chars = new char[this.ends[this.ncomponents - 1]];
- }
- isr.read(this.chars, 0, this.ends[this.ncomponents - 1]);
- }
-
- private void writeObject(java.io.ObjectOutputStream out)
- throws IOException {
- OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8");
- this.serializeToStreamWriter(osw);
- osw.flush();
- }
-
- private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
- InputStreamReader isr = new InputStreamReader(in, "UTF-8");
- this.deserializeFromStreamReader(isr);
- }
-
- /**
- * Compares this CategoryPath with the other CategoryPath for lexicographic
- * order.
- * Returns a negative integer, zero, or a positive integer as this
- * CategoryPath lexicographically precedes, equals to, or lexicographically follows
- * the other CategoryPath.
- */
- public int compareTo(CategoryPath other) {
- int minlength = (this.length() < other.length()) ? this.length() : other.length();
- int ch = 0;
- for (int co = 0 ; co < minlength; co++) {
- if (this.ends[co] <= other.ends[co]) {
- for ( ; ch < this.ends[co] ; ch++) {
- if (this.chars[ch] != other.chars[ch]) {
- return this.chars[ch] - other.chars[ch];
- }
- }
- if (this.ends[co] < other.ends[co]) {
- return -1;
- }
- } else /* this.ends[co] > other.ends[co] */ {
- for ( ; ch < other.ends[co] ; ch++) {
- if (this.chars[ch] != other.chars[ch]) {
- return this.chars[ch] - other.chars[ch];
- }
- }
- return +1;
- }
- }
- // one is a prefix of the other
- return this.length() - other.length();
- }
-}