pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / facet / src / java / org / apache / lucene / facet / index / streaming / CountingListTokenizer.java
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/index/streaming/CountingListTokenizer.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/index/streaming/CountingListTokenizer.java
new file mode 100644 (file)
index 0000000..84df4c2
--- /dev/null
@@ -0,0 +1,125 @@
+package org.apache.lucene.facet.index.streaming;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.lucene.analysis.TokenStream;
+
+import org.apache.lucene.facet.index.CategoryListPayloadStream;
+import org.apache.lucene.facet.index.attributes.OrdinalProperty;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.util.PartitionsUtils;
+import org.apache.lucene.util.encoding.IntEncoder;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * {@link CategoryListTokenizer} for facet counting
+ * 
+ * @lucene.experimental
+ */
+public class CountingListTokenizer extends CategoryListTokenizer {
+
+  /** A table for retrieving payload streams by category-list name. */
+  protected HashMap<String, CategoryListPayloadStream> payloadStreamsByName = 
+    new HashMap<String, CategoryListPayloadStream>();
+
+  /** An iterator over the payload streams */
+  protected Iterator<Entry<String, CategoryListPayloadStream>> payloadStreamIterator;
+
+  public CountingListTokenizer(TokenStream input,
+      FacetIndexingParams indexingParams) {
+    super(input, indexingParams);
+    this.payloadStreamsByName = new HashMap<String, CategoryListPayloadStream>();
+  }
+
+  @Override
+  protected void handleStartOfInput() throws IOException {
+    payloadStreamsByName.clear();
+    payloadStreamIterator = null;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (this.categoryAttribute != null) {
+        OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute
+            .getProperty(OrdinalProperty.class);
+        if (ordinalProperty != null && legalCategory()) {
+          CategoryPath categoryPath = this.categoryAttribute
+              .getCategoryPath();
+          int ordinal = ordinalProperty.getOrdinal();
+          CategoryListPayloadStream payloadStream = getPayloadStream(
+              categoryPath, ordinal);
+          int partitionSize = indexingParams.getPartitionSize();
+          payloadStream.appendIntToStream(ordinal % partitionSize);
+        }
+      }
+      return true;
+    }
+    if (this.payloadStreamIterator == null) {
+      this.handleEndOfInput();
+      this.payloadStreamIterator = this.payloadStreamsByName.entrySet()
+          .iterator();
+    }
+    if (this.payloadStreamIterator.hasNext()) {
+      Entry<String, CategoryListPayloadStream> entry = this.payloadStreamIterator
+          .next();
+      String countingListName = entry.getKey();
+      int length = countingListName.length();
+      this.termAttribute.resizeBuffer(length);
+      countingListName.getChars(0, length, termAttribute.buffer(), 0);
+      this.termAttribute.setLength(length);
+      CategoryListPayloadStream payloadStream = entry.getValue();
+      payload.setData(payloadStream.convertStreamToByteArray());
+      this.payloadAttribute.setPayload(payload);
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * A method which allows extending classes to filter the categories going
+   * into the counting list.
+   * 
+   * @return By default returns {@code true}, meaning the current category is
+   *         to be part of the counting list. For categories that should be
+   *         filtered, return {@code false}.
+   */
+  protected boolean legalCategory() {
+    return true;
+  }
+
+  protected CategoryListPayloadStream getPayloadStream(
+      CategoryPath categoryPath, int ordinal) throws IOException {
+    CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath);
+    String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal); 
+    CategoryListPayloadStream fps = payloadStreamsByName.get(name);
+    if (fps == null) {
+      IntEncoder encoder = clParams.createEncoder();
+      fps = new CategoryListPayloadStream(encoder);
+      payloadStreamsByName.put(name, fps);
+    }
+    return fps;
+  }
+
+}