lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java

   1 package org.apache.lucene.facet.taxonomy;
   2
   3 import java.io.IOException;
   4 import java.io.InputStreamReader;
   5 import java.io.OutputStreamWriter;
   6 import java.io.Serializable;
   7
   8 /**
   9  * Licensed to the Apache Software Foundation (ASF) under one or more
  10  * contributor license agreements.  See the NOTICE file distributed with
  11  * this work for additional information regarding copyright ownership.
  12  * The ASF licenses this file to You under the Apache License, Version 2.0
  13  * (the "License"); you may not use this file except in compliance with
  14  * the License.  You may obtain a copy of the License at
  15  *
  16  *     http://www.apache.org/licenses/LICENSE-2.0
  17  *
  18  * Unless required by applicable law or agreed to in writing, software
  19  * distributed under the License is distributed on an "AS IS" BASIS,
  20  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  21  * See the License for the specific language governing permissions and
  22  * limitations under the License.
  23  */
  24
  25 /**
  26  * A CategoryPath holds a sequence of string components, specifying the
  27  * hierarchical name of a category.
  28  * <P>
  29  * CategoryPath is designed to reduce the number of object allocations, in two
  30  * ways: First, it keeps the components internally in two arrays, rather than
  31  * keeping individual strings. Second, it allows reusing the same CategoryPath
  32  * object (which can be clear()ed and new components add()ed again) and of
  33  * add()'s parameter (which can be a reusable object, not just a string).
  34  *
  35  * @lucene.experimental
  36  */
  37 public class CategoryPath implements Serializable, Cloneable, Comparable<CategoryPath> {
  38
  39   // A category path is a sequence of string components. It is kept
  40   // internally as one large character array "chars" with all the string
  41   // concatenated (without separators), and an array of integers "ends"
  42   // pointing to the/ end of each component. Both arrays may be larger
  43   // than actually in use. An additional integer, "ncomponents" specifies
  44   // how many components are actually set.
  45   // We use shorts instead of ints for "ends" to save a bit of space. This
  46   // means that our path lengths are limited to 32767 characters - which
  47   // should not be a problem in any realistic scenario.
  48   protected char[] chars;
  49   protected short[] ends;
  50   protected short ncomponents;
  51
  52   /**
  53    * Return the number of components in the facet path. Note that this is
  54    * <I>not</I> the number of characters, but the number of components.
  55    */
  56   public short length() {
  57     return ncomponents;
  58   }
  59
  60   /**
  61    * Trim the last components from the path.
  62    *
  63    * @param nTrim
  64    *            Number of components to trim. If larger than the number of
  65    *            components this path has, the entire path will be cleared.
  66    */
  67   public void trim(int nTrim) {
  68     if (nTrim >= this.ncomponents) {
  69       clear();
  70     } else if (nTrim > 0) {
  71       this.ncomponents -= nTrim;
  72     }
  73   }
  74
  75   /**
  76    * Returns the current character capacity of the CategoryPath. The character
  77    * capacity is the size of the internal buffer used to hold the characters
  78    * of all the path's components. When a component is added and the capacity
  79    * is not big enough, the buffer is automatically grown, and capacityChars()
  80    * increases.
  81    */
  82   public int capacityChars() {
  83     return chars.length;
  84   }
  85
  86   /**
  87    * Returns the current component capacity of the CategoryPath. The component
  88    * capacity is the maximum number of components that the internal buffer can
  89    * currently hold. When a component is added beyond this capacity, the
  90    * buffer is automatically grown, and capacityComponents() increases.
  91    */
  92   public int capacityComponents() {
  93     return ends.length;
  94   }
  95
  96   /**
  97    * Construct a new empty CategoryPath object. CategoryPath objects are meant
  98    * to be reused, by add()ing components, and later clear()ing, and add()ing
  99    * components again. The CategoryPath object is created with a buffer
 100    * pre-allocated for a given number of characters and components, but the
 101    * buffer will grow as necessary (see {@link #capacityChars()} and
 102    * {@link #capacityComponents()}).
 103    */
 104   public CategoryPath(int capacityChars, int capacityComponents) {
 105     ncomponents = 0;
 106     chars = new char[capacityChars];
 107     ends = new short[capacityComponents];
 108   }
 109
 110   /**
 111    * Create an empty CategoryPath object. Equivalent to the constructor
 112    * {@link #CategoryPath(int, int)} with the two initial-capacity arguments
 113    * set to zero.
 114    */
 115   public CategoryPath() {
 116     this(0, 0);
 117   }
 118
 119   /**
 120    * Add the given component to the end of the path.
 121    * <P>
 122    * Note that when a String object is passed to this method, a reference to
 123    * it is not saved (rather, its content is copied), which will lead to that
 124    * String object being gc'ed. To reduce the number of garbage objects, you
 125    * can pass a mutable CharBuffer instead of an immutable String to this
 126    * method.
 127    */
 128   public void add(CharSequence component) {
 129     // Set the new end, increasing the "ends" array sizes if necessary:
 130     if (ncomponents >= ends.length) {
 131       short[] newends = new short[(ends.length + 1) * 2];
 132       System.arraycopy(ends, 0, newends, 0, ends.length);
 133       ends = newends;
 134     }
 135     short prevend = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
 136     int cmplen = component.length();
 137     ends[ncomponents] = (short) (prevend + cmplen);
 138
 139     // Copy the new component's characters, increasing the "chars" array
 140     // sizes if necessary:
 141     if (ends[ncomponents] > chars.length) {
 142       char[] newchars = new char[ends[ncomponents] * 2];
 143       System.arraycopy(chars, 0, newchars, 0, chars.length);
 144       chars = newchars;
 145     }
 146     for (int i = 0; i < cmplen; i++) {
 147       chars[prevend++] = component.charAt(i);
 148     }
 149
 150     ncomponents++;
 151   }
 152
 153   /**
 154    * Empty the CategoryPath object, so that it has zero components. The
 155    * capacity of the object (see {@link #capacityChars()} and
 156    * {@link #capacityComponents()}) is not reduced, so that the object can be
 157    * reused without frequent reallocations.
 158    */
 159   public void clear() {
 160     ncomponents = 0;
 161   }
 162
 163   /**
 164    * Build a string representation of the path, with its components separated
 165    * by the given delimiter character. The resulting string is appended to a
 166    * given Appendable, e.g., a StringBuilder, CharBuffer or Writer.
 167    * <P>
 168    * Note that the two cases of zero components and one component with zero
 169    * length produce indistinguishable results (both of them append nothing).
 170    * This is normally not a problem, because components should not normally
 171    * have zero lengths.
 172    * <P>
 173    * An IOException can be thrown if the given Appendable's append() throws
 174    * this exception.
 175    */
 176   public void appendTo(Appendable out, char delimiter) throws IOException {
 177     if (ncomponents == 0) {
 178       return; // just append nothing...
 179     }
 180     for (int i = 0; i < ends[0]; i++) {
 181       out.append(chars[i]);
 182     }
 183     for (int j = 1; j < ncomponents; j++) {
 184       out.append(delimiter);
 185       for (int i = ends[j - 1]; i < ends[j]; i++) {
 186         out.append(chars[i]);
 187       }
 188     }
 189   }
 190
 191   /**
 192    * like {@link #appendTo(Appendable, char)}, but takes only a prefix of the
 193    * path, rather than the whole path.
 194    * <P>
 195    * If the given prefix length is negative or bigger than the path's actual
 196    * length, the whole path is taken.
 197    */
 198   public void appendTo(Appendable out, char delimiter, int prefixLen)
 199       throws IOException {
 200     if (prefixLen < 0 || prefixLen > ncomponents) {
 201       prefixLen = ncomponents;
 202     }
 203     if (prefixLen == 0) {
 204       return; // just append nothing...
 205     }
 206     for (int i = 0; i < ends[0]; i++) {
 207       out.append(chars[i]);
 208     }
 209     for (int j = 1; j < prefixLen; j++) {
 210       out.append(delimiter);
 211       for (int i = ends[j - 1]; i < ends[j]; i++) {
 212         out.append(chars[i]);
 213       }
 214     }
 215   }
 216
 217   /**
 218    * like {@link #appendTo(Appendable, char)}, but takes only a part of the
 219    * path, rather than the whole path.
 220    * <P>
 221    * <code>start</code> specifies the first component in the subpath, and
 222    * <code>end</code> is one past the last component. If <code>start</code> is
 223    * negative, 0 is assumed, and if <code>end</code> is negative or past the
 224    * end of the path, the path is taken until the end. Otherwise, if
 225    * <code>end<=start</code>, nothing is appended. Nothing is appended also in
 226    * the case that the path is empty.
 227    */
 228   public void appendTo(Appendable out, char delimiter, int start, int end)
 229       throws IOException {
 230     if (start < 0) {
 231       start = 0;
 232     }
 233     if (end < 0 || end > ncomponents) {
 234       end = ncomponents;
 235     }
 236     if (end <= start) {
 237       return; // just append nothing...
 238     }
 239     for (int i = (start == 0 ? 0 : ends[start - 1]); i < ends[start]; i++) {
 240       out.append(chars[i]);
 241     }
 242     for (int j = start + 1; j < end; j++) {
 243       out.append(delimiter);
 244       for (int i = ends[j - 1]; i < ends[j]; i++) {
 245         out.append(chars[i]);
 246       }
 247     }
 248   }
 249
 250   /**
 251    * Build a string representation of the path, with its components separated
 252    * by the given delimiter character. The resulting string is returned as a
 253    * new String object. To avoid this temporary object creation, consider
 254    * using {@link #appendTo(Appendable, char)} instead.
 255    * <P>
 256    * Note that the two cases of zero components and one component with zero
 257    * length produce indistinguishable results (both of them return an empty
 258    * string). This is normally not a problem, because components should not
 259    * normally have zero lengths.
 260    */
 261   public String toString(char delimiter) {
 262     if (ncomponents == 0) {
 263       return "";
 264     }
 265     StringBuilder sb = new StringBuilder(ends[ncomponents - 1]
 266         + (ncomponents - 1));
 267     try {
 268       this.appendTo(sb, delimiter);
 269     } catch (IOException e) {
 270       // can't happen, because StringBuilder.append() never actually
 271       // throws an exception!
 272     }
 273     return sb.toString();
 274   }
 275
 276   /**
 277    * This method, an implementation of the {@link Object#toString()}
 278    * interface, is to allow simple printing of a CategoryPath, for debugging
 279    * purposes. When possible, it recommended to avoid using it it, and rather,
 280    * if you want to output the path with its components separated by a
 281    * delimiter character, specify the delimiter explicitly, with
 282    * {@link #toString(char)}.
 283    */
 284   @Override
 285   public String toString() {
 286     return toString('/');
 287   }
 288
 289   /**
 290    * like {@link #toString(char)}, but takes only a prefix with a given number
 291    * of components, rather than the whole path.
 292    * <P>
 293    * If the given length is negative or bigger than the path's actual length,
 294    * the whole path is taken.
 295    */
 296   public String toString(char delimiter, int prefixLen) {
 297     if (prefixLen < 0 || prefixLen > ncomponents) {
 298       prefixLen = ncomponents;
 299     }
 300     if (prefixLen == 0) {
 301       return "";
 302     }
 303     StringBuilder sb = new StringBuilder(ends[prefixLen - 1]
 304         + (prefixLen - 1));
 305     try {
 306       this.appendTo(sb, delimiter, prefixLen);
 307     } catch (IOException e) {
 308       // can't happen, because sb.append() never actually throws an
 309       // exception
 310     }
 311     return sb.toString();
 312   }
 313
 314   /**
 315    * like {@link #toString(char)}, but takes only a part of the path, rather
 316    * than the whole path.
 317    * <P>
 318    * <code>start</code> specifies the first component in the subpath, and
 319    * <code>end</code> is one past the last component. If <code>start</code> is
 320    * negative, 0 is assumed, and if <code>end</code> is negative or past the
 321    * end of the path, the path is taken until the end. Otherwise, if
 322    * <code>end<=start</code>, an empty string is returned. An emptry string is
 323    * returned also in the case that the path is empty.
 324    */
 325   public String toString(char delimiter, int start, int end) {
 326     if (start < 0) {
 327       start = 0;
 328     }
 329     if (end < 0 || end > ncomponents) {
 330       end = ncomponents;
 331     }
 332     if (end <= start) {
 333       return "";
 334     }
 335     int startchar = (start == 0) ? 0 : ends[start - 1];
 336     StringBuilder sb = new StringBuilder(ends[end - 1] - startchar
 337         + (end - start) - 1);
 338     try {
 339       this.appendTo(sb, delimiter, start, end);
 340     } catch (IOException e) {
 341       // can't happen, because sb.append() never actually throws an
 342       // exception
 343     }
 344     return sb.toString();
 345   }
 346
 347   /**
 348    * Return the i'th component of the path, in a new String object. If there
 349    * is no i'th component, a null is returned.
 350    */
 351   public String getComponent(int i) {
 352     if (i < 0 || i >= ncomponents) {
 353       return null;
 354     }
 355     if (i == 0) {
 356       return new String(chars, 0, ends[0]);
 357     }
 358     return new String(chars, ends[i - 1], ends[i] - ends[i - 1]);
 359   }
 360
 361   /**
 362    * Return the last component of the path, in a new String object. If the
 363    * path is empty, a null is returned.
 364    */
 365   public String lastComponent() {
 366     if (ncomponents == 0) {
 367       return null;
 368     }
 369     if (ncomponents == 1) {
 370       return new String(chars, 0, ends[0]);
 371     }
 372     return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1]
 373         - ends[ncomponents - 2]);
 374   }
 375
 376   /**
 377    * Copies the specified number of components from this category path to the
 378    * specified character array, with the components separated by a given
 379    * delimiter character. The array must be large enough to hold the
 380    * components and separators - the amount of needed space can be calculated
 381    * with {@link #charsNeededForFullPath()}.
 382    * <P>
 383    * This method returns the number of characters written to the array.
 384    *
 385    * @param outputBuffer
 386    *            The destination character array.
 387    * @param outputBufferStart
 388    *            The first location to write in the output array.
 389    * @param numberOfComponentsToCopy
 390    *            The number of path components to write to the destination
 391    *            buffer.
 392    * @param separatorChar
 393    *            The separator inserted between every pair of path components
 394    *            in the output buffer.
 395    * @see #charsNeededForFullPath()
 396    */
 397   public int copyToCharArray(char[] outputBuffer, int outputBufferStart,
 398       int numberOfComponentsToCopy, char separatorChar) {
 399     if (numberOfComponentsToCopy == 0) {
 400       return 0;
 401     }
 402     if (numberOfComponentsToCopy < 0
 403         || numberOfComponentsToCopy > ncomponents) {
 404       numberOfComponentsToCopy = ncomponents;
 405     }
 406     int outputBufferInitialStart = outputBufferStart; // for calculating
 407                               // chars copied.
 408     int sourceStart = 0;
 409     int sourceLength = ends[0];
 410     for (int component = 0; component < numberOfComponentsToCopy; component++) {
 411       if (component > 0) {
 412         sourceStart = ends[component - 1];
 413         sourceLength = ends[component] - sourceStart;
 414         outputBuffer[outputBufferStart++] = separatorChar;
 415       }
 416       System.arraycopy(chars, sourceStart, outputBuffer,
 417           outputBufferStart, sourceLength);
 418       outputBufferStart += sourceLength;
 419     }
 420     return outputBufferStart - outputBufferInitialStart;
 421   }
 422
 423   /**
 424    * Returns the number of characters required to represent this entire
 425    * category path, if written using
 426    * {@link #copyToCharArray(char[], int, int, char)} or
 427    * {@link #appendTo(Appendable, char)}. This includes the number of
 428    * characters in all the components, plus the number of separators between
 429    * them (each one character in the aforementioned methods).
 430    */
 431   public int charsNeededForFullPath() {
 432     if (ncomponents == 0) {
 433       return 0;
 434     }
 435     return ends[ncomponents - 1] + ncomponents - 1;
 436   }
 437
 438   /**
 439    * Construct a new CategoryPath object, given a single string with
 440    * components separated by a given delimiter character.
 441    * <P>
 442    * The initial capacity of the constructed object will be exactly what is
 443    * needed to hold the given path. This fact is convenient when creating a
 444    * temporary object that will not be reused later.
 445    */
 446   public CategoryPath(String pathString, char delimiter) {
 447     if (pathString.length() == 0) {
 448       ncomponents = 0;
 449       chars = new char[0];
 450       ends = new short[0];
 451       return;
 452     }
 453
 454     // This constructor is often used for creating a temporary object
 455     // (one which will not be reused to hold multiple paths), so we want
 456     // to do our best to allocate exactly the needed size - not less (to
 457     // avoid reallocation) and not more (so as not to waste space).
 458     // To do this, we unfortunately need to make an additional pass on the
 459     // given string:
 460     int nparts = 1;
 461     for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString
 462         .indexOf(delimiter, i + 1)) {
 463       nparts++;
 464     }
 465
 466     ends = new short[nparts];
 467     chars = new char[pathString.length() - nparts + 1];
 468     ncomponents = 0;
 469
 470     add(pathString, delimiter);
 471   }
 472
 473   /**
 474    * Add the given components to the end of the path. The components are given
 475    * in a single string, separated by a given delimiter character. If the
 476    * given string is empty, it is assumed to refer to the root (empty)
 477    * category, and nothing is added to the path (rather than adding a single
 478    * empty component).
 479    * <P>
 480    * Note that when a String object is passed to this method, a reference to
 481    * it is not saved (rather, its content is copied), which will lead to that
 482    * String object being gc'ed. To reduce the number of garbage objects, you
 483    * can pass a mutable CharBuffer instead of an immutable String to this
 484    * method.
 485    */
 486   public void add(CharSequence pathString, char delimiter) {
 487     int len = pathString.length();
 488     if (len == 0) {
 489       return; // assume root category meant, so add nothing.
 490     }
 491     short pos = (ncomponents == 0) ? 0 : ends[ncomponents - 1];
 492     for (int i = 0; i < len; i++) {
 493       char c = pathString.charAt(i);
 494       if (c == delimiter) {
 495         if (ncomponents >= ends.length) {
 496           short[] newends = new short[(ends.length + 1) * 2];
 497           System.arraycopy(ends, 0, newends, 0, ends.length);
 498           ends = newends;
 499         }
 500         ends[ncomponents++] = pos;
 501       } else {
 502         if (pos >= chars.length) {
 503           char[] newchars = new char[(chars.length + 1) * 2];
 504           System.arraycopy(chars, 0, newchars, 0, chars.length);
 505           chars = newchars;
 506         }
 507         chars[pos++] = c;
 508       }
 509     }
 510
 511     // Don't forget to count the last component!
 512     if (ncomponents >= ends.length) {
 513       short[] newends = new short[(ends.length + 1) * 2];
 514       System.arraycopy(ends, 0, newends, 0, ends.length);
 515       ends = newends;
 516     }
 517     ends[ncomponents++] = pos;
 518   }
 519
 520   /**
 521    * Construct a new CategoryPath object, copying an existing path given as an
 522    * array of strings.
 523    * <P>
 524    * The new object occupies exactly the space it needs, without any spare
 525    * capacity. This is the expected behavior in the typical use case, where
 526    * this constructor is used to create a temporary object which is never
 527    * reused.
 528    */
 529   public CategoryPath(CharSequence... components) {
 530     this.ncomponents = (short) components.length;
 531     this.ends = new short[ncomponents];
 532     if (ncomponents > 0) {
 533       this.ends[0] = (short) components[0].length();
 534       for (int i = 1; i < ncomponents; i++) {
 535         this.ends[i] = (short) (this.ends[i - 1] + components[i]
 536             .length());
 537       }
 538       this.chars = new char[this.ends[ncomponents - 1]];
 539       CharSequence cs = components[0];
 540       if (cs instanceof String) {
 541         ((String) cs).getChars(0, cs.length(), this.chars, 0);
 542       } else {
 543         for (int j = 0, k = cs.length(); j < k; j++) {
 544           this.chars[j] = cs.charAt(j);
 545         }
 546       }
 547       for (int i = 1; i < ncomponents; i++) {
 548         cs = components[i];
 549         int offset = this.ends[i - 1];
 550         if (cs instanceof String) {
 551           ((String) cs).getChars(0, cs.length(), this.chars, offset);
 552         } else {
 553           for (int j = 0, k = cs.length(); j < k; j++) {
 554             this.chars[j + offset] = cs.charAt(j);
 555           }
 556         }
 557       }
 558     } else {
 559       this.chars = new char[0];
 560     }
 561   }
 562
 563   /**
 564    * Construct a new CategoryPath object, copying the path given in an
 565    * existing CategoryPath object.
 566    * <P>
 567    * This copy-constructor is handy when you need to save a reference to a
 568    * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot
 569    * save a reference to the original object because its contents can be
 570    * changed later by the user. Copying the contents into a new object is a
 571    * solution.
 572    * <P>
 573    * This constructor </I>does not</I> copy the capacity (spare buffer size)
 574    * of the existing CategoryPath. Rather, the new object occupies exactly the
 575    * space it needs, without any spare. This is the expected behavior in the
 576    * typical use case outlined in the previous paragraph.
 577    */
 578   public CategoryPath(CategoryPath existing) {
 579     ncomponents = existing.ncomponents;
 580     if (ncomponents == 0) {
 581       chars = new char[0];
 582       ends = new short[0];
 583       return;
 584     }
 585
 586     chars = new char[existing.ends[ncomponents - 1]];
 587     System.arraycopy(existing.chars, 0, chars, 0, chars.length);
 588     ends = new short[ncomponents];
 589     System.arraycopy(existing.ends, 0, ends, 0, ends.length);
 590   }
 591
 592   /**
 593    * Construct a new CategoryPath object, copying a prefix with the given
 594    * number of components of the path given in an existing CategoryPath
 595    * object.
 596    * <P>
 597    * If the given length is negative or bigger than the given path's actual
 598    * length, the full path is taken.
 599    * <P>
 600    * This constructor is often convenient for creating a temporary object with
 601    * a path's prefix, but this practice is wasteful, and therefore
 602    * inadvisable. Rather, the application should be written in a way that
 603    * allows considering only a prefix of a given path, without needing to make
 604    * a copy of that path.
 605    */
 606   public CategoryPath(CategoryPath existing, int prefixLen) {
 607     if (prefixLen < 0 || prefixLen > existing.ncomponents) {
 608       ncomponents = existing.ncomponents;
 609     } else {
 610       ncomponents = (short) prefixLen;
 611     }
 612     if (ncomponents == 0) {
 613       chars = new char[0];
 614       ends = new short[0];
 615       return;
 616     }
 617
 618     chars = new char[existing.ends[ncomponents - 1]];
 619     System.arraycopy(existing.chars, 0, chars, 0, chars.length);
 620     ends = new short[ncomponents];
 621     System.arraycopy(existing.ends, 0, ends, 0, ends.length);
 622   }
 623
 624   @Override
 625   public Object clone() {
 626     return new CategoryPath(this);
 627   }
 628
 629   /**
 630    * Compare the given CategoryPath to another one. For two category paths to
 631    * be considered equal, only the path they contain needs to be identical The
 632    * unused capacity of the objects is not considered in the comparison.
 633    */
 634   @Override
 635   public boolean equals(Object obj) {
 636     if (obj instanceof CategoryPath) {
 637       CategoryPath other = (CategoryPath) obj;
 638       if (other.ncomponents != this.ncomponents) {
 639         return false;
 640       }
 641       // Unfortunately, Arrays.equal() can only compare entire arrays,
 642       // and in our case we potentially have unused parts of the arrays
 643       // that must not be compared... I wish that some future version
 644       // of Java has a offset and length parameter to Arrays.equal
 645       // (sort of like System.arraycopy()).
 646       if (ncomponents == 0) {
 647         return true; // nothing to compare...
 648       }
 649       for (int i = 0; i < ncomponents; i++) {
 650         if (this.ends[i] != other.ends[i]) {
 651           return false;
 652         }
 653       }
 654       int len = ends[ncomponents - 1];
 655       for (int i = 0; i < len; i++) {
 656         if (this.chars[i] != other.chars[i]) {
 657           return false;
 658         }
 659       }
 660       return true;
 661     }
 662     return false;
 663   }
 664
 665   /**
 666    * Test whether this object is a descendant of another CategoryPath. This is
 667    * true if the other CategoryPath is the prefix of this.
 668    */
 669   public boolean isDescendantOf(CategoryPath other) {
 670     if (this.ncomponents < other.ncomponents) {
 671       return false;
 672     }
 673     int j = 0;
 674     for (int i = 0; i < other.ncomponents; i++) {
 675       if (ends[i] != other.ends[i]) {
 676         return false;
 677       }
 678       for (; j < ends[i]; j++) {
 679         if (this.chars[j] != other.chars[j]) {
 680           return false;
 681         }
 682       }
 683     }
 684     return true;
 685   }
 686
 687   /**
 688    * Calculate a hashCode for this path, used when a CategoryPath serves as a
 689    * hash-table key. If two objects are equal(), their hashCodes need to be
 690    * equal, so like in equal(), hashCode does not consider unused portions of
 691    * the internal buffers in its calculation.
 692    * <P>
 693    * The hash function used is modeled after Java's String.hashCode() - a
 694    * simple multiplicative hash function with the multiplier 31. The same hash
 695    * function also appeared in Kernighan & Ritchie's second edition of
 696    * "The C Programming Language" (1988).
 697    */
 698   @Override
 699   public int hashCode() {
 700     if (ncomponents == 0) {
 701       return 0;
 702     }
 703     int hash = ncomponents;
 704     // Unfortunately, Arrays.hashCode() can only calculate a hash code
 705     // for an entire arrays, and in our case we potentially have unused
 706     // parts of the arrays that must be ignored, so must use our own loop
 707     // over the characters. I wish that some future version of Java will
 708     // add offset and length parameters to Arrays.hashCode (sort of like
 709     // System.arraycopy()'s parameters).
 710     for (int i = 0; i < ncomponents; i++) {
 711       hash = hash * 31 + ends[i];
 712     }
 713     int len = ends[ncomponents - 1];
 714     for (int i = 0; i < len; i++) {
 715       hash = hash * 31 + chars[i];
 716     }
 717     return hash;
 718   }
 719
 720   /**
 721    * Like {@link #hashCode()}, but find the hash function of a prefix with the
 722    * given number of components, rather than of the entire path.
 723    */
 724   public int hashCode(int prefixLen) {
 725     if (prefixLen < 0 || prefixLen > ncomponents) {
 726       prefixLen = ncomponents;
 727     }
 728     if (prefixLen == 0) {
 729       return 0;
 730     }
 731     int hash = prefixLen;
 732     for (int i = 0; i < prefixLen; i++) {
 733       hash = hash * 31 + ends[i];
 734     }
 735     int len = ends[prefixLen - 1];
 736     for (int i = 0; i < len; i++) {
 737       hash = hash * 31 + chars[i];
 738     }
 739     return hash;
 740   }
 741
 742   /**
 743    * Calculate a 64-bit hash function for this path. Unlike
 744    * {@link #hashCode()}, this method is not part of the Java standard, and is
 745    * only used if explicitly called by the user.
 746    * <P>
 747    * If two objects are equal(), their hash codes need to be equal, so like in
 748    * {@link #equals(Object)}, longHashCode does not consider unused portions
 749    * of the internal buffers in its calculation.
 750    * <P>
 751    * The hash function used is a simple multiplicative hash function, with the
 752    * multiplier 65599. While Java's standard multiplier 31 (used in
 753    * {@link #hashCode()}) gives a good distribution for ASCII strings, it
 754    * turns out that for foreign-language strings (with 16-bit characters) it
 755    * gives too many collisions, and a bigger multiplier produces fewer
 756    * collisions in this case.
 757    */
 758   public long longHashCode() {
 759     if (ncomponents == 0) {
 760       return 0;
 761     }
 762     long hash = ncomponents;
 763     for (int i = 0; i < ncomponents; i++) {
 764       hash = hash * 65599 + ends[i];
 765     }
 766     int len = ends[ncomponents - 1];
 767     for (int i = 0; i < len; i++) {
 768       hash = hash * 65599 + chars[i];
 769     }
 770     return hash;
 771   }
 772
 773   /**
 774    * Like {@link #longHashCode()}, but find the hash function of a prefix with
 775    * the given number of components, rather than of the entire path.
 776    */
 777   public long longHashCode(int prefixLen) {
 778     if (prefixLen < 0 || prefixLen > ncomponents) {
 779       prefixLen = ncomponents;
 780     }
 781     if (prefixLen == 0) {
 782       return 0;
 783     }
 784     long hash = prefixLen;
 785     for (int i = 0; i < prefixLen; i++) {
 786       hash = hash * 65599 + ends[i];
 787     }
 788     int len = ends[prefixLen - 1];
 789     for (int i = 0; i < len; i++) {
 790       hash = hash * 65599 + chars[i];
 791     }
 792     return hash;
 793   }
 794
 795   /**
 796    * Write out a serialized (as a character sequence) representation of the
 797    * path to a given Appendable (e.g., a StringBuilder, CharBuffer, Writer, or
 798    * something similar.
 799    * <P>
 800    * This method may throw a IOException if the given Appendable threw this
 801    * exception while appending.
 802    */
 803   public void serializeAppendTo(Appendable out) throws IOException {
 804     // Note that we use the fact that ncomponents and ends[] are shorts,
 805     // so we can write them as chars:
 806     out.append((char) ncomponents);
 807     if (ncomponents == 0) {
 808       return;
 809     }
 810     for (int i = 0; i < ncomponents; i++) {
 811       out.append((char) ends[i]);
 812     }
 813     int usedchars = ends[ncomponents - 1];
 814     for (int i = 0; i < usedchars; i++) {
 815       out.append(chars[i]);
 816     }
 817   }
 818
 819   /**
 820    * Just like {@link #serializeAppendTo(Appendable)}, but writes only a
 821    * prefix of the CategoryPath.
 822    */
 823   public void serializeAppendTo(int prefixLen, Appendable out)
 824       throws IOException {
 825     if (prefixLen < 0 || prefixLen > ncomponents) {
 826       prefixLen = ncomponents;
 827     }
 828     // Note that we use the fact that ncomponents and ends[] are shorts,
 829     // so we can write them as chars:
 830     out.append((char) prefixLen);
 831     if (prefixLen == 0) {
 832       return;
 833     }
 834     for (int i = 0; i < prefixLen; i++) {
 835       out.append((char) ends[i]);
 836     }
 837     int usedchars = ends[prefixLen - 1];
 838     for (int i = 0; i < usedchars; i++) {
 839       out.append(chars[i]);
 840     }
 841   }
 842
 843   /**
 844    * Set a CategoryPath from a character-sequence representation written by
 845    * {@link #serializeAppendTo(Appendable)}.
 846    * <P>
 847    * Reading starts at the given offset into the given character sequence, and
 848    * the offset right after the end of this path is returned.
 849    */
 850   public int setFromSerialized(CharSequence buffer, int offset) {
 851     ncomponents = (short) buffer.charAt(offset++);
 852     if (ncomponents == 0) {
 853       return offset;
 854     }
 855
 856     if (ncomponents >= ends.length) {
 857       ends = new short[Math.max(ends.length * 2, ncomponents)];
 858     }
 859     for (int i = 0; i < ncomponents; i++) {
 860       ends[i] = (short) buffer.charAt(offset++);
 861     }
 862
 863     int usedchars = ends[ncomponents - 1];
 864     if (usedchars > chars.length) {
 865       chars = new char[Math.max(chars.length * 2, usedchars)];
 866     }
 867     for (int i = 0; i < usedchars; i++) {
 868       chars[i] = buffer.charAt(offset++);
 869     }
 870
 871     return offset;
 872   }
 873
 874   /**
 875    * Check whether the current path is identical to the one serialized (with
 876    * {@link #serializeAppendTo(Appendable)}) in the given buffer, at the given
 877    * offset.
 878    */
 879   public boolean equalsToSerialized(CharSequence buffer, int offset) {
 880     int n = (short) buffer.charAt(offset++);
 881     if (ncomponents != n) {
 882       return false;
 883     }
 884     if (ncomponents == 0) {
 885       return true;
 886     }
 887     for (int i = 0; i < ncomponents; i++) {
 888       if (ends[i] != (short) buffer.charAt(offset++)) {
 889         return false;
 890       }
 891     }
 892     int usedchars = ends[ncomponents - 1];
 893     for (int i = 0; i < usedchars; i++) {
 894       if (chars[i] != buffer.charAt(offset++)) {
 895         return false;
 896       }
 897     }
 898     return true;
 899   }
 900
 901   /**
 902    * Just like {@link #equalsToSerialized(CharSequence, int)}, but compare to
 903    * a prefix of the CategoryPath, instead of the whole CategoryPath.
 904    */
 905   public boolean equalsToSerialized(int prefixLen, CharSequence buffer,
 906       int offset) {
 907     if (prefixLen < 0 || prefixLen > ncomponents) {
 908       prefixLen = ncomponents;
 909     }
 910     int n = (short) buffer.charAt(offset++);
 911     if (prefixLen != n) {
 912       return false;
 913     }
 914     if (prefixLen == 0) {
 915       return true;
 916     }
 917     for (int i = 0; i < prefixLen; i++) {
 918       if (ends[i] != (short) buffer.charAt(offset++)) {
 919         return false;
 920       }
 921     }
 922     int usedchars = ends[prefixLen - 1];
 923     for (int i = 0; i < usedchars; i++) {
 924       if (chars[i] != buffer.charAt(offset++)) {
 925         return false;
 926       }
 927     }
 928     return true;
 929   }
 930
 931   /**
 932    * This method calculates a hash function of a path that has been written to
 933    * (using {@link #serializeAppendTo(Appendable)}) a character buffer. It is
 934    * guaranteed that the value returned is identical to that which
 935    * {@link #hashCode()} would have produced for the original object before it
 936    * was serialized.
 937    */
 938   public static int hashCodeOfSerialized(CharSequence buffer, int offset) {
 939     // Note: the algorithm here must be identical to that of hashCode(),
 940     // in order that they produce identical results!
 941     int ncomponents = (short) buffer.charAt(offset++);
 942     if (ncomponents == 0) {
 943       return 0;
 944     }
 945     int hash = ncomponents;
 946     for (int i = 0; i < ncomponents; i++) {
 947       hash = hash * 31 + buffer.charAt(offset++);
 948     }
 949     int len = buffer.charAt(offset - 1);
 950     for (int i = 0; i < len; i++) {
 951       hash = hash * 31 + buffer.charAt(offset++);
 952     }
 953     return hash;
 954   }
 955
 956   /**
 957    * Serializes the content of this CategoryPath to a byte stream, using UTF-8
 958    * encoding to convert characters to bytes, and treating the ends as 16-bit
 959    * characters.
 960    *
 961    * @param osw
 962    *          The output byte stream.
 963    * @throws IOException
 964    *           If there are encoding errors.
 965    */
 966   // TODO (Facet): consolidate all de/serialize method names to
 967   // serialize() and unserialize()
 968   public void serializeToStreamWriter(OutputStreamWriter osw)
 969       throws IOException {
 970     osw.write(this.ncomponents);
 971     if (this.ncomponents <= 0) {
 972       return;
 973     }
 974     for (int j = 0; j < this.ncomponents; j++) {
 975       osw.write(this.ends[j]);
 976     }
 977     osw.write(this.chars, 0, this.ends[this.ncomponents - 1]);
 978   }
 979
 980   /**
 981    * Serializes the content of this CategoryPath to a byte stream, using UTF-8
 982    * encoding to convert characters to bytes, and treating the ends as 16-bit
 983    * characters.
 984    *
 985    * @param isr
 986    *            The input stream.
 987    * @throws IOException
 988    *             If there are encoding errors.
 989    */
 990   public void deserializeFromStreamReader(InputStreamReader isr)
 991       throws IOException {
 992     this.ncomponents = (short) isr.read();
 993     if (this.ncomponents <= 0) {
 994       return;
 995     }
 996     if (this.ends == null || this.ends.length < this.ncomponents) {
 997       this.ends = new short[this.ncomponents];
 998     }
 999     for (int j = 0; j < this.ncomponents; j++) {
1000       this.ends[j] = (short) isr.read();
1001     }
1002     if (this.chars == null
1003         || this.ends[this.ncomponents - 1] > chars.length) {
1004       this.chars = new char[this.ends[this.ncomponents - 1]];
1005     }
1006     isr.read(this.chars, 0, this.ends[this.ncomponents - 1]);
1007   }
1008
1009   private void writeObject(java.io.ObjectOutputStream out)
1010   throws IOException {
1011     OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8");
1012     this.serializeToStreamWriter(osw);
1013     osw.flush();
1014   }
1015
1016   private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
1017     InputStreamReader isr = new InputStreamReader(in, "UTF-8");
1018     this.deserializeFromStreamReader(isr);
1019   }
1020
1021   /**
1022    * Compares this CategoryPath with the other CategoryPath for lexicographic
1023    * order.
1024    * Returns a negative integer, zero, or a positive integer as this
1025    * CategoryPath lexicographically precedes, equals to, or lexicographically follows
1026    * the other CategoryPath.
1027    */
1028   public int compareTo(CategoryPath other) {
1029     int minlength = (this.length() < other.length()) ? this.length() : other.length();
1030     int ch = 0;
1031     for (int co = 0 ; co < minlength; co++) {
1032       if (this.ends[co] <= other.ends[co]) {
1033         for ( ; ch < this.ends[co] ; ch++) {
1034           if (this.chars[ch] != other.chars[ch]) {
1035             return this.chars[ch] - other.chars[ch];
1036           }
1037         }
1038         if (this.ends[co] < other.ends[co]) {
1039           return -1;
1040         }
1041       } else /* this.ends[co] > other.ends[co] */ {
1042         for ( ; ch < other.ends[co] ; ch++) {
1043           if (this.chars[ch] != other.chars[ch]) {
1044             return this.chars[ch] - other.chars[ch];
1045           }
1046         }
1047         return +1;
1048       }
1049     }
1050     // one is a prefix of the other
1051     return this.length() - other.length();
1052   }
1053 }