1 package org.apache.lucene.facet.taxonomy;
3 import java.io.IOException;
4 import java.io.InputStreamReader;
5 import java.io.OutputStreamWriter;
6 import java.io.Serializable;
9 * Licensed to the Apache Software Foundation (ASF) under one or more
10 * contributor license agreements. See the NOTICE file distributed with
11 * this work for additional information regarding copyright ownership.
12 * The ASF licenses this file to You under the Apache License, Version 2.0
13 * (the "License"); you may not use this file except in compliance with
14 * the License. You may obtain a copy of the License at
16 * http://www.apache.org/licenses/LICENSE-2.0
18 * Unless required by applicable law or agreed to in writing, software
19 * distributed under the License is distributed on an "AS IS" BASIS,
20 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 * See the License for the specific language governing permissions and
22 * limitations under the License.
26 * A CategoryPath holds a sequence of string components, specifying the
27 * hierarchical name of a category.
29 * CategoryPath is designed to reduce the number of object allocations, in two
30 * ways: First, it keeps the components internally in two arrays, rather than
31 * keeping individual strings. Second, it allows reusing the same CategoryPath
32 * object (which can be clear()ed and new components add()ed again) and of
33 * add()'s parameter (which can be a reusable object, not just a string).
35 * @lucene.experimental
37 public class CategoryPath implements Serializable, Cloneable, Comparable<CategoryPath> {
39 // A category path is a sequence of string components. It is kept
40 // internally as one large character array "chars" with all the string
41 // concatenated (without separators), and an array of integers "ends"
42 // pointing to the/ end of each component. Both arrays may be larger
43 // than actually in use. An additional integer, "ncomponents" specifies
44 // how many components are actually set.
45 // We use shorts instead of ints for "ends" to save a bit of space. This
46 // means that our path lengths are limited to 32767 characters - which
47 // should not be a problem in any realistic scenario.
48 protected char[] chars;
49 protected short[] ends;
50 protected short ncomponents;
53 * Return the number of components in the facet path. Note that this is
54 * <I>not</I> the number of characters, but the number of components.
56 public short length() {
61 * Trim the last components from the path.
64 * Number of components to trim. If larger than the number of
65 * components this path has, the entire path will be cleared.
67 public void trim(int nTrim) {
68 if (nTrim >= this.ncomponents) {
70 } else if (nTrim > 0) {
71 this.ncomponents -= nTrim;
76 * Returns the current character capacity of the CategoryPath. The character
77 * capacity is the size of the internal buffer used to hold the characters
78 * of all the path's components. When a component is added and the capacity
79 * is not big enough, the buffer is automatically grown, and capacityChars()
82 public int capacityChars() {
87 * Returns the current component capacity of the CategoryPath. The component
88 * capacity is the maximum number of components that the internal buffer can
89 * currently hold. When a component is added beyond this capacity, the
90 * buffer is automatically grown, and capacityComponents() increases.
92 public int capacityComponents() {
97 * Construct a new empty CategoryPath object. CategoryPath objects are meant
98 * to be reused, by add()ing components, and later clear()ing, and add()ing
99 * components again. The CategoryPath object is created with a buffer
100 * pre-allocated for a given number of characters and components, but the
101 * buffer will grow as necessary (see {@link #capacityChars()} and
102 * {@link #capacityComponents()}).
104 public CategoryPath(int capacityChars, int capacityComponents) {
106 chars = new char[capacityChars];
107 ends = new short[capacityComponents];
111 * Create an empty CategoryPath object. Equivalent to the constructor
112 * {@link #CategoryPath(int, int)} with the two initial-capacity arguments
115 public CategoryPath() {
120 * Add the given component to the end of the path.
122 * Note that when a String object is passed to this method, a reference to
123 * it is not saved (rather, its content is copied), which will lead to that
124 * String object being gc'ed. To reduce the number of garbage objects, you
125 * can pass a mutable CharBuffer instead of an immutable String to this
128 public void add(CharSequence component) {
129 // Set the new end, increasing the "ends" array sizes if necessary:
130 if (ncomponents >= ends.length) {
131 short[] newends = new short[(ends.length + 1) * 2];
132 System.arraycopy(ends, 0, newends, 0, ends.length);
135 short prevend = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
136 int cmplen = component.length();
137 ends[ncomponents] = (short) (prevend + cmplen);
139 // Copy the new component's characters, increasing the "chars" array
140 // sizes if necessary:
141 if (ends[ncomponents] > chars.length) {
142 char[] newchars = new char[ends[ncomponents] * 2];
143 System.arraycopy(chars, 0, newchars, 0, chars.length);
146 for (int i = 0; i < cmplen; i++) {
147 chars[prevend++] = component.charAt(i);
154 * Empty the CategoryPath object, so that it has zero components. The
155 * capacity of the object (see {@link #capacityChars()} and
156 * {@link #capacityComponents()}) is not reduced, so that the object can be
157 * reused without frequent reallocations.
159 public void clear() {
164 * Build a string representation of the path, with its components separated
165 * by the given delimiter character. The resulting string is appended to a
166 * given Appendable, e.g., a StringBuilder, CharBuffer or Writer.
168 * Note that the two cases of zero components and one component with zero
169 * length produce indistinguishable results (both of them append nothing).
170 * This is normally not a problem, because components should not normally
173 * An IOException can be thrown if the given Appendable's append() throws
176 public void appendTo(Appendable out, char delimiter) throws IOException {
177 if (ncomponents == 0) {
178 return; // just append nothing...
180 for (int i = 0; i < ends[0]; i++) {
181 out.append(chars[i]);
183 for (int j = 1; j < ncomponents; j++) {
184 out.append(delimiter);
185 for (int i = ends[j - 1]; i < ends[j]; i++) {
186 out.append(chars[i]);
192 * like {@link #appendTo(Appendable, char)}, but takes only a prefix of the
193 * path, rather than the whole path.
195 * If the given prefix length is negative or bigger than the path's actual
196 * length, the whole path is taken.
198 public void appendTo(Appendable out, char delimiter, int prefixLen)
200 if (prefixLen < 0 || prefixLen > ncomponents) {
201 prefixLen = ncomponents;
203 if (prefixLen == 0) {
204 return; // just append nothing...
206 for (int i = 0; i < ends[0]; i++) {
207 out.append(chars[i]);
209 for (int j = 1; j < prefixLen; j++) {
210 out.append(delimiter);
211 for (int i = ends[j - 1]; i < ends[j]; i++) {
212 out.append(chars[i]);
218 * like {@link #appendTo(Appendable, char)}, but takes only a part of the
219 * path, rather than the whole path.
221 * <code>start</code> specifies the first component in the subpath, and
222 * <code>end</code> is one past the last component. If <code>start</code> is
223 * negative, 0 is assumed, and if <code>end</code> is negative or past the
224 * end of the path, the path is taken until the end. Otherwise, if
225 * <code>end<=start</code>, nothing is appended. Nothing is appended also in
226 * the case that the path is empty.
228 public void appendTo(Appendable out, char delimiter, int start, int end)
233 if (end < 0 || end > ncomponents) {
237 return; // just append nothing...
239 for (int i = (start == 0 ? 0 : ends[start - 1]); i < ends[start]; i++) {
240 out.append(chars[i]);
242 for (int j = start + 1; j < end; j++) {
243 out.append(delimiter);
244 for (int i = ends[j - 1]; i < ends[j]; i++) {
245 out.append(chars[i]);
251 * Build a string representation of the path, with its components separated
252 * by the given delimiter character. The resulting string is returned as a
253 * new String object. To avoid this temporary object creation, consider
254 * using {@link #appendTo(Appendable, char)} instead.
256 * Note that the two cases of zero components and one component with zero
257 * length produce indistinguishable results (both of them return an empty
258 * string). This is normally not a problem, because components should not
259 * normally have zero lengths.
261 public String toString(char delimiter) {
262 if (ncomponents == 0) {
265 StringBuilder sb = new StringBuilder(ends[ncomponents - 1]
266 + (ncomponents - 1));
268 this.appendTo(sb, delimiter);
269 } catch (IOException e) {
270 // can't happen, because StringBuilder.append() never actually
271 // throws an exception!
273 return sb.toString();
277 * This method, an implementation of the {@link Object#toString()}
278 * interface, is to allow simple printing of a CategoryPath, for debugging
279 * purposes. When possible, it recommended to avoid using it it, and rather,
280 * if you want to output the path with its components separated by a
281 * delimiter character, specify the delimiter explicitly, with
282 * {@link #toString(char)}.
285 public String toString() {
286 return toString('/');
290 * like {@link #toString(char)}, but takes only a prefix with a given number
291 * of components, rather than the whole path.
293 * If the given length is negative or bigger than the path's actual length,
294 * the whole path is taken.
296 public String toString(char delimiter, int prefixLen) {
297 if (prefixLen < 0 || prefixLen > ncomponents) {
298 prefixLen = ncomponents;
300 if (prefixLen == 0) {
303 StringBuilder sb = new StringBuilder(ends[prefixLen - 1]
306 this.appendTo(sb, delimiter, prefixLen);
307 } catch (IOException e) {
308 // can't happen, because sb.append() never actually throws an
311 return sb.toString();
315 * like {@link #toString(char)}, but takes only a part of the path, rather
316 * than the whole path.
318 * <code>start</code> specifies the first component in the subpath, and
319 * <code>end</code> is one past the last component. If <code>start</code> is
320 * negative, 0 is assumed, and if <code>end</code> is negative or past the
321 * end of the path, the path is taken until the end. Otherwise, if
322 * <code>end<=start</code>, an empty string is returned. An emptry string is
323 * returned also in the case that the path is empty.
325 public String toString(char delimiter, int start, int end) {
329 if (end < 0 || end > ncomponents) {
335 int startchar = (start == 0) ? 0 : ends[start - 1];
336 StringBuilder sb = new StringBuilder(ends[end - 1] - startchar
337 + (end - start) - 1);
339 this.appendTo(sb, delimiter, start, end);
340 } catch (IOException e) {
341 // can't happen, because sb.append() never actually throws an
344 return sb.toString();
348 * Return the i'th component of the path, in a new String object. If there
349 * is no i'th component, a null is returned.
351 public String getComponent(int i) {
352 if (i < 0 || i >= ncomponents) {
356 return new String(chars, 0, ends[0]);
358 return new String(chars, ends[i - 1], ends[i] - ends[i - 1]);
362 * Return the last component of the path, in a new String object. If the
363 * path is empty, a null is returned.
365 public String lastComponent() {
366 if (ncomponents == 0) {
369 if (ncomponents == 1) {
370 return new String(chars, 0, ends[0]);
372 return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1]
373 - ends[ncomponents - 2]);
377 * Copies the specified number of components from this category path to the
378 * specified character array, with the components separated by a given
379 * delimiter character. The array must be large enough to hold the
380 * components and separators - the amount of needed space can be calculated
381 * with {@link #charsNeededForFullPath()}.
383 * This method returns the number of characters written to the array.
385 * @param outputBuffer
386 * The destination character array.
387 * @param outputBufferStart
388 * The first location to write in the output array.
389 * @param numberOfComponentsToCopy
390 * The number of path components to write to the destination
392 * @param separatorChar
393 * The separator inserted between every pair of path components
394 * in the output buffer.
395 * @see #charsNeededForFullPath()
397 public int copyToCharArray(char[] outputBuffer, int outputBufferStart,
398 int numberOfComponentsToCopy, char separatorChar) {
399 if (numberOfComponentsToCopy == 0) {
402 if (numberOfComponentsToCopy < 0
403 || numberOfComponentsToCopy > ncomponents) {
404 numberOfComponentsToCopy = ncomponents;
406 int outputBufferInitialStart = outputBufferStart; // for calculating
409 int sourceLength = ends[0];
410 for (int component = 0; component < numberOfComponentsToCopy; component++) {
412 sourceStart = ends[component - 1];
413 sourceLength = ends[component] - sourceStart;
414 outputBuffer[outputBufferStart++] = separatorChar;
416 System.arraycopy(chars, sourceStart, outputBuffer,
417 outputBufferStart, sourceLength);
418 outputBufferStart += sourceLength;
420 return outputBufferStart - outputBufferInitialStart;
424 * Returns the number of characters required to represent this entire
425 * category path, if written using
426 * {@link #copyToCharArray(char[], int, int, char)} or
427 * {@link #appendTo(Appendable, char)}. This includes the number of
428 * characters in all the components, plus the number of separators between
429 * them (each one character in the aforementioned methods).
431 public int charsNeededForFullPath() {
432 if (ncomponents == 0) {
435 return ends[ncomponents - 1] + ncomponents - 1;
439 * Construct a new CategoryPath object, given a single string with
440 * components separated by a given delimiter character.
442 * The initial capacity of the constructed object will be exactly what is
443 * needed to hold the given path. This fact is convenient when creating a
444 * temporary object that will not be reused later.
446 public CategoryPath(String pathString, char delimiter) {
447 if (pathString.length() == 0) {
454 // This constructor is often used for creating a temporary object
455 // (one which will not be reused to hold multiple paths), so we want
456 // to do our best to allocate exactly the needed size - not less (to
457 // avoid reallocation) and not more (so as not to waste space).
458 // To do this, we unfortunately need to make an additional pass on the
461 for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString
462 .indexOf(delimiter, i + 1)) {
466 ends = new short[nparts];
467 chars = new char[pathString.length() - nparts + 1];
470 add(pathString, delimiter);
474 * Add the given components to the end of the path. The components are given
475 * in a single string, separated by a given delimiter character. If the
476 * given string is empty, it is assumed to refer to the root (empty)
477 * category, and nothing is added to the path (rather than adding a single
480 * Note that when a String object is passed to this method, a reference to
481 * it is not saved (rather, its content is copied), which will lead to that
482 * String object being gc'ed. To reduce the number of garbage objects, you
483 * can pass a mutable CharBuffer instead of an immutable String to this
486 public void add(CharSequence pathString, char delimiter) {
487 int len = pathString.length();
489 return; // assume root category meant, so add nothing.
491 short pos = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
492 for (int i = 0; i < len; i++) {
493 char c = pathString.charAt(i);
494 if (c == delimiter) {
495 if (ncomponents >= ends.length) {
496 short[] newends = new short[(ends.length + 1) * 2];
497 System.arraycopy(ends, 0, newends, 0, ends.length);
500 ends[ncomponents++] = pos;
502 if (pos >= chars.length) {
503 char[] newchars = new char[(chars.length + 1) * 2];
504 System.arraycopy(chars, 0, newchars, 0, chars.length);
511 // Don't forget to count the last component!
512 if (ncomponents >= ends.length) {
513 short[] newends = new short[(ends.length + 1) * 2];
514 System.arraycopy(ends, 0, newends, 0, ends.length);
517 ends[ncomponents++] = pos;
521 * Construct a new CategoryPath object, copying an existing path given as an
524 * The new object occupies exactly the space it needs, without any spare
525 * capacity. This is the expected behavior in the typical use case, where
526 * this constructor is used to create a temporary object which is never
529 public CategoryPath(CharSequence... components) {
530 this.ncomponents = (short) components.length;
531 this.ends = new short[ncomponents];
532 if (ncomponents > 0) {
533 this.ends[0] = (short) components[0].length();
534 for (int i = 1; i < ncomponents; i++) {
535 this.ends[i] = (short) (this.ends[i - 1] + components[i]
538 this.chars = new char[this.ends[ncomponents - 1]];
539 CharSequence cs = components[0];
540 if (cs instanceof String) {
541 ((String) cs).getChars(0, cs.length(), this.chars, 0);
543 for (int j = 0, k = cs.length(); j < k; j++) {
544 this.chars[j] = cs.charAt(j);
547 for (int i = 1; i < ncomponents; i++) {
549 int offset = this.ends[i - 1];
550 if (cs instanceof String) {
551 ((String) cs).getChars(0, cs.length(), this.chars, offset);
553 for (int j = 0, k = cs.length(); j < k; j++) {
554 this.chars[j + offset] = cs.charAt(j);
559 this.chars = new char[0];
564 * Construct a new CategoryPath object, copying the path given in an
565 * existing CategoryPath object.
567 * This copy-constructor is handy when you need to save a reference to a
568 * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot
569 * save a reference to the original object because its contents can be
570 * changed later by the user. Copying the contents into a new object is a
573 * This constructor </I>does not</I> copy the capacity (spare buffer size)
574 * of the existing CategoryPath. Rather, the new object occupies exactly the
575 * space it needs, without any spare. This is the expected behavior in the
576 * typical use case outlined in the previous paragraph.
578 public CategoryPath(CategoryPath existing) {
579 ncomponents = existing.ncomponents;
580 if (ncomponents == 0) {
586 chars = new char[existing.ends[ncomponents - 1]];
587 System.arraycopy(existing.chars, 0, chars, 0, chars.length);
588 ends = new short[ncomponents];
589 System.arraycopy(existing.ends, 0, ends, 0, ends.length);
593 * Construct a new CategoryPath object, copying a prefix with the given
594 * number of components of the path given in an existing CategoryPath
597 * If the given length is negative or bigger than the given path's actual
598 * length, the full path is taken.
600 * This constructor is often convenient for creating a temporary object with
601 * a path's prefix, but this practice is wasteful, and therefore
602 * inadvisable. Rather, the application should be written in a way that
603 * allows considering only a prefix of a given path, without needing to make
604 * a copy of that path.
606 public CategoryPath(CategoryPath existing, int prefixLen) {
607 if (prefixLen < 0 || prefixLen > existing.ncomponents) {
608 ncomponents = existing.ncomponents;
610 ncomponents = (short) prefixLen;
612 if (ncomponents == 0) {
618 chars = new char[existing.ends[ncomponents - 1]];
619 System.arraycopy(existing.chars, 0, chars, 0, chars.length);
620 ends = new short[ncomponents];
621 System.arraycopy(existing.ends, 0, ends, 0, ends.length);
625 public Object clone() {
626 return new CategoryPath(this);
630 * Compare the given CategoryPath to another one. For two category paths to
631 * be considered equal, only the path they contain needs to be identical The
632 * unused capacity of the objects is not considered in the comparison.
635 public boolean equals(Object obj) {
636 if (obj instanceof CategoryPath) {
637 CategoryPath other = (CategoryPath) obj;
638 if (other.ncomponents != this.ncomponents) {
641 // Unfortunately, Arrays.equal() can only compare entire arrays,
642 // and in our case we potentially have unused parts of the arrays
643 // that must not be compared... I wish that some future version
644 // of Java has a offset and length parameter to Arrays.equal
645 // (sort of like System.arraycopy()).
646 if (ncomponents == 0) {
647 return true; // nothing to compare...
649 for (int i = 0; i < ncomponents; i++) {
650 if (this.ends[i] != other.ends[i]) {
654 int len = ends[ncomponents - 1];
655 for (int i = 0; i < len; i++) {
656 if (this.chars[i] != other.chars[i]) {
666 * Test whether this object is a descendant of another CategoryPath. This is
667 * true if the other CategoryPath is the prefix of this.
669 public boolean isDescendantOf(CategoryPath other) {
670 if (this.ncomponents < other.ncomponents) {
674 for (int i = 0; i < other.ncomponents; i++) {
675 if (ends[i] != other.ends[i]) {
678 for (; j < ends[i]; j++) {
679 if (this.chars[j] != other.chars[j]) {
688 * Calculate a hashCode for this path, used when a CategoryPath serves as a
689 * hash-table key. If two objects are equal(), their hashCodes need to be
690 * equal, so like in equal(), hashCode does not consider unused portions of
691 * the internal buffers in its calculation.
693 * The hash function used is modeled after Java's String.hashCode() - a
694 * simple multiplicative hash function with the multiplier 31. The same hash
695 * function also appeared in Kernighan & Ritchie's second edition of
696 * "The C Programming Language" (1988).
699 public int hashCode() {
700 if (ncomponents == 0) {
703 int hash = ncomponents;
704 // Unfortunately, Arrays.hashCode() can only calculate a hash code
705 // for an entire arrays, and in our case we potentially have unused
706 // parts of the arrays that must be ignored, so must use our own loop
707 // over the characters. I wish that some future version of Java will
708 // add offset and length parameters to Arrays.hashCode (sort of like
709 // System.arraycopy()'s parameters).
710 for (int i = 0; i < ncomponents; i++) {
711 hash = hash * 31 + ends[i];
713 int len = ends[ncomponents - 1];
714 for (int i = 0; i < len; i++) {
715 hash = hash * 31 + chars[i];
721 * Like {@link #hashCode()}, but find the hash function of a prefix with the
722 * given number of components, rather than of the entire path.
724 public int hashCode(int prefixLen) {
725 if (prefixLen < 0 || prefixLen > ncomponents) {
726 prefixLen = ncomponents;
728 if (prefixLen == 0) {
731 int hash = prefixLen;
732 for (int i = 0; i < prefixLen; i++) {
733 hash = hash * 31 + ends[i];
735 int len = ends[prefixLen - 1];
736 for (int i = 0; i < len; i++) {
737 hash = hash * 31 + chars[i];
743 * Calculate a 64-bit hash function for this path. Unlike
744 * {@link #hashCode()}, this method is not part of the Java standard, and is
745 * only used if explicitly called by the user.
747 * If two objects are equal(), their hash codes need to be equal, so like in
748 * {@link #equals(Object)}, longHashCode does not consider unused portions
749 * of the internal buffers in its calculation.
751 * The hash function used is a simple multiplicative hash function, with the
752 * multiplier 65599. While Java's standard multiplier 31 (used in
753 * {@link #hashCode()}) gives a good distribution for ASCII strings, it
754 * turns out that for foreign-language strings (with 16-bit characters) it
755 * gives too many collisions, and a bigger multiplier produces fewer
756 * collisions in this case.
758 public long longHashCode() {
759 if (ncomponents == 0) {
762 long hash = ncomponents;
763 for (int i = 0; i < ncomponents; i++) {
764 hash = hash * 65599 + ends[i];
766 int len = ends[ncomponents - 1];
767 for (int i = 0; i < len; i++) {
768 hash = hash * 65599 + chars[i];
774 * Like {@link #longHashCode()}, but find the hash function of a prefix with
775 * the given number of components, rather than of the entire path.
777 public long longHashCode(int prefixLen) {
778 if (prefixLen < 0 || prefixLen > ncomponents) {
779 prefixLen = ncomponents;
781 if (prefixLen == 0) {
784 long hash = prefixLen;
785 for (int i = 0; i < prefixLen; i++) {
786 hash = hash * 65599 + ends[i];
788 int len = ends[prefixLen - 1];
789 for (int i = 0; i < len; i++) {
790 hash = hash * 65599 + chars[i];
796 * Write out a serialized (as a character sequence) representation of the
797 * path to a given Appendable (e.g., a StringBuilder, CharBuffer, Writer, or
800 * This method may throw a IOException if the given Appendable threw this
801 * exception while appending.
803 public void serializeAppendTo(Appendable out) throws IOException {
804 // Note that we use the fact that ncomponents and ends[] are shorts,
805 // so we can write them as chars:
806 out.append((char) ncomponents);
807 if (ncomponents == 0) {
810 for (int i = 0; i < ncomponents; i++) {
811 out.append((char) ends[i]);
813 int usedchars = ends[ncomponents - 1];
814 for (int i = 0; i < usedchars; i++) {
815 out.append(chars[i]);
820 * Just like {@link #serializeAppendTo(Appendable)}, but writes only a
821 * prefix of the CategoryPath.
823 public void serializeAppendTo(int prefixLen, Appendable out)
825 if (prefixLen < 0 || prefixLen > ncomponents) {
826 prefixLen = ncomponents;
828 // Note that we use the fact that ncomponents and ends[] are shorts,
829 // so we can write them as chars:
830 out.append((char) prefixLen);
831 if (prefixLen == 0) {
834 for (int i = 0; i < prefixLen; i++) {
835 out.append((char) ends[i]);
837 int usedchars = ends[prefixLen - 1];
838 for (int i = 0; i < usedchars; i++) {
839 out.append(chars[i]);
844 * Set a CategoryPath from a character-sequence representation written by
845 * {@link #serializeAppendTo(Appendable)}.
847 * Reading starts at the given offset into the given character sequence, and
848 * the offset right after the end of this path is returned.
850 public int setFromSerialized(CharSequence buffer, int offset) {
851 ncomponents = (short) buffer.charAt(offset++);
852 if (ncomponents == 0) {
856 if (ncomponents >= ends.length) {
857 ends = new short[Math.max(ends.length * 2, ncomponents)];
859 for (int i = 0; i < ncomponents; i++) {
860 ends[i] = (short) buffer.charAt(offset++);
863 int usedchars = ends[ncomponents - 1];
864 if (usedchars > chars.length) {
865 chars = new char[Math.max(chars.length * 2, usedchars)];
867 for (int i = 0; i < usedchars; i++) {
868 chars[i] = buffer.charAt(offset++);
875 * Check whether the current path is identical to the one serialized (with
876 * {@link #serializeAppendTo(Appendable)}) in the given buffer, at the given
879 public boolean equalsToSerialized(CharSequence buffer, int offset) {
880 int n = (short) buffer.charAt(offset++);
881 if (ncomponents != n) {
884 if (ncomponents == 0) {
887 for (int i = 0; i < ncomponents; i++) {
888 if (ends[i] != (short) buffer.charAt(offset++)) {
892 int usedchars = ends[ncomponents - 1];
893 for (int i = 0; i < usedchars; i++) {
894 if (chars[i] != buffer.charAt(offset++)) {
902 * Just like {@link #equalsToSerialized(CharSequence, int)}, but compare to
903 * a prefix of the CategoryPath, instead of the whole CategoryPath.
905 public boolean equalsToSerialized(int prefixLen, CharSequence buffer,
907 if (prefixLen < 0 || prefixLen > ncomponents) {
908 prefixLen = ncomponents;
910 int n = (short) buffer.charAt(offset++);
911 if (prefixLen != n) {
914 if (prefixLen == 0) {
917 for (int i = 0; i < prefixLen; i++) {
918 if (ends[i] != (short) buffer.charAt(offset++)) {
922 int usedchars = ends[prefixLen - 1];
923 for (int i = 0; i < usedchars; i++) {
924 if (chars[i] != buffer.charAt(offset++)) {
932 * This method calculates a hash function of a path that has been written to
933 * (using {@link #serializeAppendTo(Appendable)}) a character buffer. It is
934 * guaranteed that the value returned is identical to that which
935 * {@link #hashCode()} would have produced for the original object before it
938 public static int hashCodeOfSerialized(CharSequence buffer, int offset) {
939 // Note: the algorithm here must be identical to that of hashCode(),
940 // in order that they produce identical results!
941 int ncomponents = (short) buffer.charAt(offset++);
942 if (ncomponents == 0) {
945 int hash = ncomponents;
946 for (int i = 0; i < ncomponents; i++) {
947 hash = hash * 31 + buffer.charAt(offset++);
949 int len = buffer.charAt(offset - 1);
950 for (int i = 0; i < len; i++) {
951 hash = hash * 31 + buffer.charAt(offset++);
957 * Serializes the content of this CategoryPath to a byte stream, using UTF-8
958 * encoding to convert characters to bytes, and treating the ends as 16-bit
962 * The output byte stream.
963 * @throws IOException
964 * If there are encoding errors.
966 // TODO (Facet): consolidate all de/serialize method names to
967 // serialize() and unserialize()
968 public void serializeToStreamWriter(OutputStreamWriter osw)
970 osw.write(this.ncomponents);
971 if (this.ncomponents <= 0) {
974 for (int j = 0; j < this.ncomponents; j++) {
975 osw.write(this.ends[j]);
977 osw.write(this.chars, 0, this.ends[this.ncomponents - 1]);
981 * Serializes the content of this CategoryPath to a byte stream, using UTF-8
982 * encoding to convert characters to bytes, and treating the ends as 16-bit
987 * @throws IOException
988 * If there are encoding errors.
990 public void deserializeFromStreamReader(InputStreamReader isr)
992 this.ncomponents = (short) isr.read();
993 if (this.ncomponents <= 0) {
996 if (this.ends == null || this.ends.length < this.ncomponents) {
997 this.ends = new short[this.ncomponents];
999 for (int j = 0; j < this.ncomponents; j++) {
1000 this.ends[j] = (short) isr.read();
1002 if (this.chars == null
1003 || this.ends[this.ncomponents - 1] > chars.length) {
1004 this.chars = new char[this.ends[this.ncomponents - 1]];
1006 isr.read(this.chars, 0, this.ends[this.ncomponents - 1]);
1009 private void writeObject(java.io.ObjectOutputStream out)
1010 throws IOException {
1011 OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8");
1012 this.serializeToStreamWriter(osw);
1016 private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
1017 InputStreamReader isr = new InputStreamReader(in, "UTF-8");
1018 this.deserializeFromStreamReader(isr);
1022 * Compares this CategoryPath with the other CategoryPath for lexicographic
1024 * Returns a negative integer, zero, or a positive integer as this
1025 * CategoryPath lexicographically precedes, equals to, or lexicographically follows
1026 * the other CategoryPath.
1028 public int compareTo(CategoryPath other) {
1029 int minlength = (this.length() < other.length()) ? this.length() : other.length();
1031 for (int co = 0 ; co < minlength; co++) {
1032 if (this.ends[co] <= other.ends[co]) {
1033 for ( ; ch < this.ends[co] ; ch++) {
1034 if (this.chars[ch] != other.chars[ch]) {
1035 return this.chars[ch] - other.chars[ch];
1038 if (this.ends[co] < other.ends[co]) {
1041 } else /* this.ends[co] > other.ends[co] */ {
1042 for ( ; ch < other.ends[co] ; ch++) {
1043 if (this.chars[ch] != other.chars[ch]) {
1044 return this.chars[ch] - other.chars[ch];
1050 // one is a prefix of the other
1051 return this.length() - other.length();