lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java

   1 package org.apache.lucene.facet.taxonomy;
   2
   3 import java.io.Closeable;
   4 import java.io.IOException;
   5 import java.util.Map;
   6
   7 import org.apache.lucene.index.IndexWriter;
   8
   9 /**
  10  * Licensed to the Apache Software Foundation (ASF) under one or more
  11  * contributor license agreements.  See the NOTICE file distributed with
  12  * this work for additional information regarding copyright ownership.
  13  * The ASF licenses this file to You under the Apache License, Version 2.0
  14  * (the "License"); you may not use this file except in compliance with
  15  * the License.  You may obtain a copy of the License at
  16  *
  17  *     http://www.apache.org/licenses/LICENSE-2.0
  18  *
  19  * Unless required by applicable law or agreed to in writing, software
  20  * distributed under the License is distributed on an "AS IS" BASIS,
  21  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22  * See the License for the specific language governing permissions and
  23  * limitations under the License.
  24  */
  25
  26 /**
  27  * TaxonomyWriter is the interface which the faceted-search library uses
  28  * to dynamically build the taxonomy at indexing time.
  29  * <P>
  30  * Notes about concurrent access to the taxonomy:
  31  * <P>
  32  * An implementation must allow multiple readers and a single writer to be
  33  * active concurrently. Readers follow so-called "point in time" semantics,
  34  * i.e., a reader object will only see taxonomy entries which were available
  35  * at the time it was created. What the writer writes is only available to
  36  * (new) readers after the writer's commit() is called.
  37  * <P>
  38  * Faceted search keeps two indices - namely Lucene's main index, and this
  39  * taxonomy index. When one or more readers are active concurrently with the
  40  * writer, care must be taken to avoid an inconsistency between the state of
  41  * these two indices: When writing to the indices, the taxonomy must always
  42  * be committed to disk *before* the main index, because the main index
  43  * refers to categories listed in the taxonomy.
  44  * Such control can best be achieved by turning off the main index's
  45  * "autocommit" feature, and explicitly calling commit() for both indices
  46  * (first for the taxonomy, then for the main index).
  47  * In old versions of Lucene (2.2 or earlier), when autocommit could not be
  48  * turned off, a more complicated solution needs to be used. E.g., use
  49  * some sort of (possibly inter-process) locking to ensure that a reader
  50  * is being opened only right after both indices have been flushed (and
  51  * before anything else is written to them).
  52  *
  53  * @lucene.experimental
  54  */
  55 public interface TaxonomyWriter extends Closeable {
  56
  57   /**
  58    * addCategory() adds a category with a given path name to the taxonomy,
  59    * and returns its ordinal. If the category was already present in
  60    * the taxonomy, its existing ordinal is returned.
  61    * <P>
  62    * Before adding a category, addCategory() makes sure that all its
  63    * ancestor categories exist in the taxonomy as well. As result, the
  64    * ordinal of a category is guaranteed to be smaller then the ordinal of
  65    * any of its descendants.
  66    */
  67   public int addCategory(CategoryPath categoryPath) throws IOException;
  68
  69   /**
  70    * Calling commit() ensures that all the categories written so far are
  71    * visible to a reader that is opened (or reopened) after that call.
  72    * When the index is closed(), commit() is also implicitly done.
  73    */
  74   public void commit() throws IOException;
  75
  76   /**
  77    * Like commit(), but also store properties with the index. These properties
  78    * are retrievable by {@link TaxonomyReader#getCommitUserData}.
  79    * See {@link IndexWriter#commit(Map)}.
  80    */
  81   public void commit(Map<String,String> commitUserData) throws IOException;
  82
  83   /**
  84    * prepare most of the work needed for a two-phase commit.
  85    * See {@link IndexWriter#prepareCommit}.
  86    */
  87   public void prepareCommit() throws IOException;
  88
  89   /**
  90    * Like above, and also prepares to store user data with the index.
  91    * See {@link IndexWriter#prepareCommit(Map)}
  92    */
  93   public void prepareCommit(Map<String,String> commitUserData) throws IOException;
  94
  95   /**
  96    * getParent() returns the ordinal of the parent category of the category
  97    * with the given ordinal.
  98    * <P>
  99    * When a category is specified as a path name, finding the path of its
 100    * parent is as trivial as dropping the last component of the path.
 101    * getParent() is functionally equivalent to calling getPath() on the
 102    * given ordinal, dropping the last component of the path, and then calling
 103    * getOrdinal() to get an ordinal back.
 104    * <P>
 105    * If the given ordinal is the ROOT_ORDINAL, an INVALID_ORDINAL is returned.
 106    * If the given ordinal is a top-level category, the ROOT_ORDINAL is returned.
 107    * If an invalid ordinal is given (negative or beyond the last available
 108    * ordinal), an ArrayIndexOutOfBoundsException is thrown. However, it is
 109    * expected that getParent will only be called for ordinals which are
 110    * already known to be in the taxonomy.
 111    * <P>
 112    * TODO (Facet): instead of a getParent(ordinal) method, consider having a
 113    * getCategory(categorypath, prefixlen) which is similar to addCategory
 114    * except it doesn't add new categories; This method can be used to get
 115    * the ordinals of all prefixes of the given category, and it can use
 116    * exactly the same code and cache used by addCategory() so it means less code.
 117    */
 118   public int getParent(int ordinal) throws IOException;
 119
 120   /**
 121    * getSize() returns the number of categories in the taxonomy.
 122    * <P>
 123    * Because categories are numbered consecutively starting with 0, it
 124    * means the taxonomy contains ordinals 0 through getSize()-1.
 125    * <P>
 126    * Note that the number returned by getSize() is often slightly higher
 127    * than the number of categories inserted into the taxonomy; This is
 128    * because when a category is added to the taxonomy, its ancestors
 129    * are also added automatically (including the root, which always get
 130    * ordinal 0).
 131    */
 132   public int getSize();
 133
 134 }