pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / facet / src / test / org / apache / lucene / facet / index / streaming / CategoryTokenizerTest.java
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/index/streaming/CategoryTokenizerTest.java b/lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/index/streaming/CategoryTokenizerTest.java

new file mode 100644 (file)

index 0000000..8d52ab1
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/index/streaming/CategoryTokenizerTest.java
@@ -0,0 +1,115 @@
+package org.apache.lucene.facet.index.streaming;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.store.Directory;
+import org.junit.Test;
+
+import org.apache.lucene.facet.index.CategoryContainerTestBase;
+import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
+import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class CategoryTokenizerTest extends CategoryContainerTestBase {
+
+  /**
+   * Verifies that a {@link CategoryTokenizer} adds the correct
+   * {@link CharTermAttribute}s to a {@link CategoryAttributesStream}.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testTokensDefaultParams() throws IOException {
+    Directory directory = newDirectory();
+    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(
+        directory);
+    DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
+    CategoryTokenizer tokenizer = new CategoryTokenizer(
+        new CategoryAttributesStream(categoryContainer),
+        indexingParams);
+
+    // count the number of tokens
+    Set<String> categoryTerms = new HashSet<String>();
+    for (int i = 0; i < initialCatgeories.length; i++) {
+      categoryTerms.add(initialCatgeories[i]
+          .toString(indexingParams.getFacetDelimChar()));
+    }
+
+    int nTokens;
+    for (nTokens = 0; tokenizer.incrementToken(); nTokens++) {
+      if (!categoryTerms.remove(tokenizer.termAttribute.toString())) {
+        fail("Unexpected term: " + tokenizer.termAttribute.toString());
+      }
+    }
+    assertTrue("all category terms should have been found", categoryTerms
+        .isEmpty());
+
+    // should be 6 - all categories and parents
+    assertEquals("Wrong number of tokens", 3, nTokens);
+
+    taxonomyWriter.close();
+    directory.close();
+  }
+
+  /**
+   * Verifies that {@link CategoryTokenizer} elongates the buffer in
+   * {@link CharTermAttribute} for long categories.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testLongCategoryPath() throws IOException {
+    Directory directory = newDirectory();
+    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(
+        directory);
+
+    List<CategoryPath> longCategory = new ArrayList<CategoryPath>();
+    longCategory.add(new CategoryPath("one", "two", "three", "four",
+        "five", "six", "seven"));
+
+    DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
+    CategoryTokenizer tokenizer = new CategoryTokenizer(
+        new CategoryAttributesStream(new CategoryAttributesIterable(
+            longCategory)), indexingParams);
+
+    // count the number of tokens
+    String categoryTerm = longCategory.get(0).toString(
+        indexingParams.getFacetDelimChar());
+
+    assertTrue("Missing token", tokenizer.incrementToken());
+    if (!categoryTerm.equals(tokenizer.termAttribute.toString())) {
+      fail("Unexpected term: " + tokenizer.termAttribute.toString());
+    }
+
+    assertFalse("Unexpected token", tokenizer.incrementToken());
+
+    taxonomyWriter.close();
+    directory.close();
+  }
+}