package org.apache.lucene.facet.taxonomy; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Serializable; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * A CategoryPath holds a sequence of string components, specifying the * hierarchical name of a category. *

* CategoryPath is designed to reduce the number of object allocations, in two * ways: First, it keeps the components internally in two arrays, rather than * keeping individual strings. Second, it allows reusing the same CategoryPath * object (which can be clear()ed and new components add()ed again) and of * add()'s parameter (which can be a reusable object, not just a string). * * @lucene.experimental */ public class CategoryPath implements Serializable, Cloneable, Comparable { // A category path is a sequence of string components. It is kept // internally as one large character array "chars" with all the string // concatenated (without separators), and an array of integers "ends" // pointing to the/ end of each component. Both arrays may be larger // than actually in use. An additional integer, "ncomponents" specifies // how many components are actually set. // We use shorts instead of ints for "ends" to save a bit of space. This // means that our path lengths are limited to 32767 characters - which // should not be a problem in any realistic scenario. protected char[] chars; protected short[] ends; protected short ncomponents; /** * Return the number of components in the facet path. Note that this is * not the number of characters, but the number of components. */ public short length() { return ncomponents; } /** * Trim the last components from the path. * * @param nTrim * Number of components to trim. If larger than the number of * components this path has, the entire path will be cleared. */ public void trim(int nTrim) { if (nTrim >= this.ncomponents) { clear(); } else if (nTrim > 0) { this.ncomponents -= nTrim; } } /** * Returns the current character capacity of the CategoryPath. The character * capacity is the size of the internal buffer used to hold the characters * of all the path's components. When a component is added and the capacity * is not big enough, the buffer is automatically grown, and capacityChars() * increases. */ public int capacityChars() { return chars.length; } /** * Returns the current component capacity of the CategoryPath. The component * capacity is the maximum number of components that the internal buffer can * currently hold. When a component is added beyond this capacity, the * buffer is automatically grown, and capacityComponents() increases. */ public int capacityComponents() { return ends.length; } /** * Construct a new empty CategoryPath object. CategoryPath objects are meant * to be reused, by add()ing components, and later clear()ing, and add()ing * components again. The CategoryPath object is created with a buffer * pre-allocated for a given number of characters and components, but the * buffer will grow as necessary (see {@link #capacityChars()} and * {@link #capacityComponents()}). */ public CategoryPath(int capacityChars, int capacityComponents) { ncomponents = 0; chars = new char[capacityChars]; ends = new short[capacityComponents]; } /** * Create an empty CategoryPath object. Equivalent to the constructor * {@link #CategoryPath(int, int)} with the two initial-capacity arguments * set to zero. */ public CategoryPath() { this(0, 0); } /** * Add the given component to the end of the path. *

* Note that when a String object is passed to this method, a reference to * it is not saved (rather, its content is copied), which will lead to that * String object being gc'ed. To reduce the number of garbage objects, you * can pass a mutable CharBuffer instead of an immutable String to this * method. */ public void add(CharSequence component) { // Set the new end, increasing the "ends" array sizes if necessary: if (ncomponents >= ends.length) { short[] newends = new short[(ends.length + 1) * 2]; System.arraycopy(ends, 0, newends, 0, ends.length); ends = newends; } short prevend = (ncomponents == 0) ? 0 : ends[ncomponents - 1]; int cmplen = component.length(); ends[ncomponents] = (short) (prevend + cmplen); // Copy the new component's characters, increasing the "chars" array // sizes if necessary: if (ends[ncomponents] > chars.length) { char[] newchars = new char[ends[ncomponents] * 2]; System.arraycopy(chars, 0, newchars, 0, chars.length); chars = newchars; } for (int i = 0; i < cmplen; i++) { chars[prevend++] = component.charAt(i); } ncomponents++; } /** * Empty the CategoryPath object, so that it has zero components. The * capacity of the object (see {@link #capacityChars()} and * {@link #capacityComponents()}) is not reduced, so that the object can be * reused without frequent reallocations. */ public void clear() { ncomponents = 0; } /** * Build a string representation of the path, with its components separated * by the given delimiter character. The resulting string is appended to a * given Appendable, e.g., a StringBuilder, CharBuffer or Writer. *

* Note that the two cases of zero components and one component with zero * length produce indistinguishable results (both of them append nothing). * This is normally not a problem, because components should not normally * have zero lengths. *

* An IOException can be thrown if the given Appendable's append() throws * this exception. */ public void appendTo(Appendable out, char delimiter) throws IOException { if (ncomponents == 0) { return; // just append nothing... } for (int i = 0; i < ends[0]; i++) { out.append(chars[i]); } for (int j = 1; j < ncomponents; j++) { out.append(delimiter); for (int i = ends[j - 1]; i < ends[j]; i++) { out.append(chars[i]); } } } /** * like {@link #appendTo(Appendable, char)}, but takes only a prefix of the * path, rather than the whole path. *

* If the given prefix length is negative or bigger than the path's actual * length, the whole path is taken. */ public void appendTo(Appendable out, char delimiter, int prefixLen) throws IOException { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } if (prefixLen == 0) { return; // just append nothing... } for (int i = 0; i < ends[0]; i++) { out.append(chars[i]); } for (int j = 1; j < prefixLen; j++) { out.append(delimiter); for (int i = ends[j - 1]; i < ends[j]; i++) { out.append(chars[i]); } } } /** * like {@link #appendTo(Appendable, char)}, but takes only a part of the * path, rather than the whole path. *

* Note that the two cases of zero components and one component with zero * length produce indistinguishable results (both of them return an empty * string). This is normally not a problem, because components should not * normally have zero lengths. */ public String toString(char delimiter) { if (ncomponents == 0) { return ""; } StringBuilder sb = new StringBuilder(ends[ncomponents - 1] + (ncomponents - 1)); try { this.appendTo(sb, delimiter); } catch (IOException e) { // can't happen, because StringBuilder.append() never actually // throws an exception! } return sb.toString(); } /** * This method, an implementation of the {@link Object#toString()} * interface, is to allow simple printing of a CategoryPath, for debugging * purposes. When possible, it recommended to avoid using it it, and rather, * if you want to output the path with its components separated by a * delimiter character, specify the delimiter explicitly, with * {@link #toString(char)}. */ @Override public String toString() { return toString('/'); } /** * like {@link #toString(char)}, but takes only a prefix with a given number * of components, rather than the whole path. *

* If the given length is negative or bigger than the path's actual length, * the whole path is taken. */ public String toString(char delimiter, int prefixLen) { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } if (prefixLen == 0) { return ""; } StringBuilder sb = new StringBuilder(ends[prefixLen - 1] + (prefixLen - 1)); try { this.appendTo(sb, delimiter, prefixLen); } catch (IOException e) { // can't happen, because sb.append() never actually throws an // exception } return sb.toString(); } /** * like {@link #toString(char)}, but takes only a part of the path, rather * than the whole path. *

* start specifies the first component in the subpath, and * end is one past the last component. If start is * negative, 0 is assumed, and if end is negative or past the * end of the path, the path is taken until the end. Otherwise, if * end<=start, an empty string is returned. An emptry string is * returned also in the case that the path is empty. */ public String toString(char delimiter, int start, int end) { if (start < 0) { start = 0; } if (end < 0 || end > ncomponents) { end = ncomponents; } if (end <= start) { return ""; } int startchar = (start == 0) ? 0 : ends[start - 1]; StringBuilder sb = new StringBuilder(ends[end - 1] - startchar + (end - start) - 1); try { this.appendTo(sb, delimiter, start, end); } catch (IOException e) { // can't happen, because sb.append() never actually throws an // exception } return sb.toString(); } /** * Return the i'th component of the path, in a new String object. If there * is no i'th component, a null is returned. */ public String getComponent(int i) { if (i < 0 || i >= ncomponents) { return null; } if (i == 0) { return new String(chars, 0, ends[0]); } return new String(chars, ends[i - 1], ends[i] - ends[i - 1]); } /** * Return the last component of the path, in a new String object. If the * path is empty, a null is returned. */ public String lastComponent() { if (ncomponents == 0) { return null; } if (ncomponents == 1) { return new String(chars, 0, ends[0]); } return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1] - ends[ncomponents - 2]); } /** * Copies the specified number of components from this category path to the * specified character array, with the components separated by a given * delimiter character. The array must be large enough to hold the * components and separators - the amount of needed space can be calculated * with {@link #charsNeededForFullPath()}. *

* This method returns the number of characters written to the array. * * @param outputBuffer * The destination character array. * @param outputBufferStart * The first location to write in the output array. * @param numberOfComponentsToCopy * The number of path components to write to the destination * buffer. * @param separatorChar * The separator inserted between every pair of path components * in the output buffer. * @see #charsNeededForFullPath() */ public int copyToCharArray(char[] outputBuffer, int outputBufferStart, int numberOfComponentsToCopy, char separatorChar) { if (numberOfComponentsToCopy == 0) { return 0; } if (numberOfComponentsToCopy < 0 || numberOfComponentsToCopy > ncomponents) { numberOfComponentsToCopy = ncomponents; } int outputBufferInitialStart = outputBufferStart; // for calculating // chars copied. int sourceStart = 0; int sourceLength = ends[0]; for (int component = 0; component < numberOfComponentsToCopy; component++) { if (component > 0) { sourceStart = ends[component - 1]; sourceLength = ends[component] - sourceStart; outputBuffer[outputBufferStart++] = separatorChar; } System.arraycopy(chars, sourceStart, outputBuffer, outputBufferStart, sourceLength); outputBufferStart += sourceLength; } return outputBufferStart - outputBufferInitialStart; } /** * Returns the number of characters required to represent this entire * category path, if written using * {@link #copyToCharArray(char[], int, int, char)} or * {@link #appendTo(Appendable, char)}. This includes the number of * characters in all the components, plus the number of separators between * them (each one character in the aforementioned methods). */ public int charsNeededForFullPath() { if (ncomponents == 0) { return 0; } return ends[ncomponents - 1] + ncomponents - 1; } /** * Construct a new CategoryPath object, given a single string with * components separated by a given delimiter character. *

* The initial capacity of the constructed object will be exactly what is * needed to hold the given path. This fact is convenient when creating a * temporary object that will not be reused later. */ public CategoryPath(String pathString, char delimiter) { if (pathString.length() == 0) { ncomponents = 0; chars = new char[0]; ends = new short[0]; return; } // This constructor is often used for creating a temporary object // (one which will not be reused to hold multiple paths), so we want // to do our best to allocate exactly the needed size - not less (to // avoid reallocation) and not more (so as not to waste space). // To do this, we unfortunately need to make an additional pass on the // given string: int nparts = 1; for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString .indexOf(delimiter, i + 1)) { nparts++; } ends = new short[nparts]; chars = new char[pathString.length() - nparts + 1]; ncomponents = 0; add(pathString, delimiter); } /** * Add the given components to the end of the path. The components are given * in a single string, separated by a given delimiter character. If the * given string is empty, it is assumed to refer to the root (empty) * category, and nothing is added to the path (rather than adding a single * empty component). *

* Note that when a String object is passed to this method, a reference to * it is not saved (rather, its content is copied), which will lead to that * String object being gc'ed. To reduce the number of garbage objects, you * can pass a mutable CharBuffer instead of an immutable String to this * method. */ public void add(CharSequence pathString, char delimiter) { int len = pathString.length(); if (len == 0) { return; // assume root category meant, so add nothing. } short pos = (ncomponents == 0) ? 0 : ends[ncomponents - 1]; for (int i = 0; i < len; i++) { char c = pathString.charAt(i); if (c == delimiter) { if (ncomponents >= ends.length) { short[] newends = new short[(ends.length + 1) * 2]; System.arraycopy(ends, 0, newends, 0, ends.length); ends = newends; } ends[ncomponents++] = pos; } else { if (pos >= chars.length) { char[] newchars = new char[(chars.length + 1) * 2]; System.arraycopy(chars, 0, newchars, 0, chars.length); chars = newchars; } chars[pos++] = c; } } // Don't forget to count the last component! if (ncomponents >= ends.length) { short[] newends = new short[(ends.length + 1) * 2]; System.arraycopy(ends, 0, newends, 0, ends.length); ends = newends; } ends[ncomponents++] = pos; } /** * Construct a new CategoryPath object, copying an existing path given as an * array of strings. *

* The new object occupies exactly the space it needs, without any spare * capacity. This is the expected behavior in the typical use case, where * this constructor is used to create a temporary object which is never * reused. */ public CategoryPath(CharSequence... components) { this.ncomponents = (short) components.length; this.ends = new short[ncomponents]; if (ncomponents > 0) { this.ends[0] = (short) components[0].length(); for (int i = 1; i < ncomponents; i++) { this.ends[i] = (short) (this.ends[i - 1] + components[i] .length()); } this.chars = new char[this.ends[ncomponents - 1]]; CharSequence cs = components[0]; if (cs instanceof String) { ((String) cs).getChars(0, cs.length(), this.chars, 0); } else { for (int j = 0, k = cs.length(); j < k; j++) { this.chars[j] = cs.charAt(j); } } for (int i = 1; i < ncomponents; i++) { cs = components[i]; int offset = this.ends[i - 1]; if (cs instanceof String) { ((String) cs).getChars(0, cs.length(), this.chars, offset); } else { for (int j = 0, k = cs.length(); j < k; j++) { this.chars[j + offset] = cs.charAt(j); } } } } else { this.chars = new char[0]; } } /** * Construct a new CategoryPath object, copying the path given in an * existing CategoryPath object. *

* This copy-constructor is handy when you need to save a reference to a * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot * save a reference to the original object because its contents can be * changed later by the user. Copying the contents into a new object is a * solution. *

* This constructor does not copy the capacity (spare buffer size) * of the existing CategoryPath. Rather, the new object occupies exactly the * space it needs, without any spare. This is the expected behavior in the * typical use case outlined in the previous paragraph. */ public CategoryPath(CategoryPath existing) { ncomponents = existing.ncomponents; if (ncomponents == 0) { chars = new char[0]; ends = new short[0]; return; } chars = new char[existing.ends[ncomponents - 1]]; System.arraycopy(existing.chars, 0, chars, 0, chars.length); ends = new short[ncomponents]; System.arraycopy(existing.ends, 0, ends, 0, ends.length); } /** * Construct a new CategoryPath object, copying a prefix with the given * number of components of the path given in an existing CategoryPath * object. *

* If the given length is negative or bigger than the given path's actual * length, the full path is taken. *

* This constructor is often convenient for creating a temporary object with * a path's prefix, but this practice is wasteful, and therefore * inadvisable. Rather, the application should be written in a way that * allows considering only a prefix of a given path, without needing to make * a copy of that path. */ public CategoryPath(CategoryPath existing, int prefixLen) { if (prefixLen < 0 || prefixLen > existing.ncomponents) { ncomponents = existing.ncomponents; } else { ncomponents = (short) prefixLen; } if (ncomponents == 0) { chars = new char[0]; ends = new short[0]; return; } chars = new char[existing.ends[ncomponents - 1]]; System.arraycopy(existing.chars, 0, chars, 0, chars.length); ends = new short[ncomponents]; System.arraycopy(existing.ends, 0, ends, 0, ends.length); } @Override public Object clone() { return new CategoryPath(this); } /** * Compare the given CategoryPath to another one. For two category paths to * be considered equal, only the path they contain needs to be identical The * unused capacity of the objects is not considered in the comparison. */ @Override public boolean equals(Object obj) { if (obj instanceof CategoryPath) { CategoryPath other = (CategoryPath) obj; if (other.ncomponents != this.ncomponents) { return false; } // Unfortunately, Arrays.equal() can only compare entire arrays, // and in our case we potentially have unused parts of the arrays // that must not be compared... I wish that some future version // of Java has a offset and length parameter to Arrays.equal // (sort of like System.arraycopy()). if (ncomponents == 0) { return true; // nothing to compare... } for (int i = 0; i < ncomponents; i++) { if (this.ends[i] != other.ends[i]) { return false; } } int len = ends[ncomponents - 1]; for (int i = 0; i < len; i++) { if (this.chars[i] != other.chars[i]) { return false; } } return true; } return false; } /** * Test whether this object is a descendant of another CategoryPath. This is * true if the other CategoryPath is the prefix of this. */ public boolean isDescendantOf(CategoryPath other) { if (this.ncomponents < other.ncomponents) { return false; } int j = 0; for (int i = 0; i < other.ncomponents; i++) { if (ends[i] != other.ends[i]) { return false; } for (; j < ends[i]; j++) { if (this.chars[j] != other.chars[j]) { return false; } } } return true; } /** * Calculate a hashCode for this path, used when a CategoryPath serves as a * hash-table key. If two objects are equal(), their hashCodes need to be * equal, so like in equal(), hashCode does not consider unused portions of * the internal buffers in its calculation. *

* The hash function used is modeled after Java's String.hashCode() - a * simple multiplicative hash function with the multiplier 31. The same hash * function also appeared in Kernighan & Ritchie's second edition of * "The C Programming Language" (1988). */ @Override public int hashCode() { if (ncomponents == 0) { return 0; } int hash = ncomponents; // Unfortunately, Arrays.hashCode() can only calculate a hash code // for an entire arrays, and in our case we potentially have unused // parts of the arrays that must be ignored, so must use our own loop // over the characters. I wish that some future version of Java will // add offset and length parameters to Arrays.hashCode (sort of like // System.arraycopy()'s parameters). for (int i = 0; i < ncomponents; i++) { hash = hash * 31 + ends[i]; } int len = ends[ncomponents - 1]; for (int i = 0; i < len; i++) { hash = hash * 31 + chars[i]; } return hash; } /** * Like {@link #hashCode()}, but find the hash function of a prefix with the * given number of components, rather than of the entire path. */ public int hashCode(int prefixLen) { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } if (prefixLen == 0) { return 0; } int hash = prefixLen; for (int i = 0; i < prefixLen; i++) { hash = hash * 31 + ends[i]; } int len = ends[prefixLen - 1]; for (int i = 0; i < len; i++) { hash = hash * 31 + chars[i]; } return hash; } /** * Calculate a 64-bit hash function for this path. Unlike * {@link #hashCode()}, this method is not part of the Java standard, and is * only used if explicitly called by the user. *

* If two objects are equal(), their hash codes need to be equal, so like in * {@link #equals(Object)}, longHashCode does not consider unused portions * of the internal buffers in its calculation. *

* The hash function used is a simple multiplicative hash function, with the * multiplier 65599. While Java's standard multiplier 31 (used in * {@link #hashCode()}) gives a good distribution for ASCII strings, it * turns out that for foreign-language strings (with 16-bit characters) it * gives too many collisions, and a bigger multiplier produces fewer * collisions in this case. */ public long longHashCode() { if (ncomponents == 0) { return 0; } long hash = ncomponents; for (int i = 0; i < ncomponents; i++) { hash = hash * 65599 + ends[i]; } int len = ends[ncomponents - 1]; for (int i = 0; i < len; i++) { hash = hash * 65599 + chars[i]; } return hash; } /** * Like {@link #longHashCode()}, but find the hash function of a prefix with * the given number of components, rather than of the entire path. */ public long longHashCode(int prefixLen) { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } if (prefixLen == 0) { return 0; } long hash = prefixLen; for (int i = 0; i < prefixLen; i++) { hash = hash * 65599 + ends[i]; } int len = ends[prefixLen - 1]; for (int i = 0; i < len; i++) { hash = hash * 65599 + chars[i]; } return hash; } /** * Write out a serialized (as a character sequence) representation of the * path to a given Appendable (e.g., a StringBuilder, CharBuffer, Writer, or * something similar. *

* This method may throw a IOException if the given Appendable threw this * exception while appending. */ public void serializeAppendTo(Appendable out) throws IOException { // Note that we use the fact that ncomponents and ends[] are shorts, // so we can write them as chars: out.append((char) ncomponents); if (ncomponents == 0) { return; } for (int i = 0; i < ncomponents; i++) { out.append((char) ends[i]); } int usedchars = ends[ncomponents - 1]; for (int i = 0; i < usedchars; i++) { out.append(chars[i]); } } /** * Just like {@link #serializeAppendTo(Appendable)}, but writes only a * prefix of the CategoryPath. */ public void serializeAppendTo(int prefixLen, Appendable out) throws IOException { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } // Note that we use the fact that ncomponents and ends[] are shorts, // so we can write them as chars: out.append((char) prefixLen); if (prefixLen == 0) { return; } for (int i = 0; i < prefixLen; i++) { out.append((char) ends[i]); } int usedchars = ends[prefixLen - 1]; for (int i = 0; i < usedchars; i++) { out.append(chars[i]); } } /** * Set a CategoryPath from a character-sequence representation written by * {@link #serializeAppendTo(Appendable)}. *

* Reading starts at the given offset into the given character sequence, and * the offset right after the end of this path is returned. */ public int setFromSerialized(CharSequence buffer, int offset) { ncomponents = (short) buffer.charAt(offset++); if (ncomponents == 0) { return offset; } if (ncomponents >= ends.length) { ends = new short[Math.max(ends.length * 2, ncomponents)]; } for (int i = 0; i < ncomponents; i++) { ends[i] = (short) buffer.charAt(offset++); } int usedchars = ends[ncomponents - 1]; if (usedchars > chars.length) { chars = new char[Math.max(chars.length * 2, usedchars)]; } for (int i = 0; i < usedchars; i++) { chars[i] = buffer.charAt(offset++); } return offset; } /** * Check whether the current path is identical to the one serialized (with * {@link #serializeAppendTo(Appendable)}) in the given buffer, at the given * offset. */ public boolean equalsToSerialized(CharSequence buffer, int offset) { int n = (short) buffer.charAt(offset++); if (ncomponents != n) { return false; } if (ncomponents == 0) { return true; } for (int i = 0; i < ncomponents; i++) { if (ends[i] != (short) buffer.charAt(offset++)) { return false; } } int usedchars = ends[ncomponents - 1]; for (int i = 0; i < usedchars; i++) { if (chars[i] != buffer.charAt(offset++)) { return false; } } return true; } /** * Just like {@link #equalsToSerialized(CharSequence, int)}, but compare to * a prefix of the CategoryPath, instead of the whole CategoryPath. */ public boolean equalsToSerialized(int prefixLen, CharSequence buffer, int offset) { if (prefixLen < 0 || prefixLen > ncomponents) { prefixLen = ncomponents; } int n = (short) buffer.charAt(offset++); if (prefixLen != n) { return false; } if (prefixLen == 0) { return true; } for (int i = 0; i < prefixLen; i++) { if (ends[i] != (short) buffer.charAt(offset++)) { return false; } } int usedchars = ends[prefixLen - 1]; for (int i = 0; i < usedchars; i++) { if (chars[i] != buffer.charAt(offset++)) { return false; } } return true; } /** * This method calculates a hash function of a path that has been written to * (using {@link #serializeAppendTo(Appendable)}) a character buffer. It is * guaranteed that the value returned is identical to that which * {@link #hashCode()} would have produced for the original object before it * was serialized. */ public static int hashCodeOfSerialized(CharSequence buffer, int offset) { // Note: the algorithm here must be identical to that of hashCode(), // in order that they produce identical results! int ncomponents = (short) buffer.charAt(offset++); if (ncomponents == 0) { return 0; } int hash = ncomponents; for (int i = 0; i < ncomponents; i++) { hash = hash * 31 + buffer.charAt(offset++); } int len = buffer.charAt(offset - 1); for (int i = 0; i < len; i++) { hash = hash * 31 + buffer.charAt(offset++); } return hash; } /** * Serializes the content of this CategoryPath to a byte stream, using UTF-8 * encoding to convert characters to bytes, and treating the ends as 16-bit * characters. * * @param osw * The output byte stream. * @throws IOException * If there are encoding errors. */ // TODO (Facet): consolidate all de/serialize method names to // serialize() and unserialize() public void serializeToStreamWriter(OutputStreamWriter osw) throws IOException { osw.write(this.ncomponents); if (this.ncomponents <= 0) { return; } for (int j = 0; j < this.ncomponents; j++) { osw.write(this.ends[j]); } osw.write(this.chars, 0, this.ends[this.ncomponents - 1]); } /** * Serializes the content of this CategoryPath to a byte stream, using UTF-8 * encoding to convert characters to bytes, and treating the ends as 16-bit * characters. * * @param isr * The input stream. * @throws IOException * If there are encoding errors. */ public void deserializeFromStreamReader(InputStreamReader isr) throws IOException { this.ncomponents = (short) isr.read(); if (this.ncomponents <= 0) { return; } if (this.ends == null || this.ends.length < this.ncomponents) { this.ends = new short[this.ncomponents]; } for (int j = 0; j < this.ncomponents; j++) { this.ends[j] = (short) isr.read(); } if (this.chars == null || this.ends[this.ncomponents - 1] > chars.length) { this.chars = new char[this.ends[this.ncomponents - 1]]; } isr.read(this.chars, 0, this.ends[this.ncomponents - 1]); } private void writeObject(java.io.ObjectOutputStream out) throws IOException { OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8"); this.serializeToStreamWriter(osw); osw.flush(); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { InputStreamReader isr = new InputStreamReader(in, "UTF-8"); this.deserializeFromStreamReader(isr); } /** * Compares this CategoryPath with the other CategoryPath for lexicographic * order. * Returns a negative integer, zero, or a positive integer as this * CategoryPath lexicographically precedes, equals to, or lexicographically follows * the other CategoryPath. */ public int compareTo(CategoryPath other) { int minlength = (this.length() < other.length()) ? this.length() : other.length(); int ch = 0; for (int co = 0 ; co < minlength; co++) { if (this.ends[co] <= other.ends[co]) { for ( ; ch < this.ends[co] ; ch++) { if (this.chars[ch] != other.chars[ch]) { return this.chars[ch] - other.chars[ch]; } } if (this.ends[co] < other.ends[co]) { return -1; } } else /* this.ends[co] > other.ends[co] */ { for ( ; ch < other.ends[co] ; ch++) { if (this.chars[ch] != other.chars[ch]) { return this.chars[ch] - other.chars[ch]; } } return +1; } } // one is a prefix of the other return this.length() - other.length(); } }