lucene-java-3.4.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/index/streaming/CategoryTokenizerTest.java

   1 package org.apache.lucene.facet.index.streaming;
   2
   3 import java.io.IOException;
   4 import java.util.ArrayList;
   5 import java.util.HashSet;
   6 import java.util.List;
   7 import java.util.Set;
   8
   9 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  10 import org.apache.lucene.store.Directory;
  11 import org.junit.Test;
  12
  13 import org.apache.lucene.facet.index.CategoryContainerTestBase;
  14 import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
  15 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
  16 import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
  17 import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
  18 import org.apache.lucene.facet.taxonomy.CategoryPath;
  19 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
  20 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
  21
  22 /**
  23  * Licensed to the Apache Software Foundation (ASF) under one or more
  24  * contributor license agreements.  See the NOTICE file distributed with
  25  * this work for additional information regarding copyright ownership.
  26  * The ASF licenses this file to You under the Apache License, Version 2.0
  27  * (the "License"); you may not use this file except in compliance with
  28  * the License.  You may obtain a copy of the License at
  29  *
  30  *     http://www.apache.org/licenses/LICENSE-2.0
  31  *
  32  * Unless required by applicable law or agreed to in writing, software
  33  * distributed under the License is distributed on an "AS IS" BASIS,
  34  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  35  * See the License for the specific language governing permissions and
  36  * limitations under the License.
  37  */
  38
  39 public class CategoryTokenizerTest extends CategoryContainerTestBase {
  40
  41   /**
  42    * Verifies that a {@link CategoryTokenizer} adds the correct
  43    * {@link CharTermAttribute}s to a {@link CategoryAttributesStream}.
  44    *
  45    * @throws IOException
  46    */
  47   @Test
  48   public void testTokensDefaultParams() throws IOException {
  49     Directory directory = newDirectory();
  50     TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
  51         directory);
  52     DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
  53     CategoryTokenizer tokenizer = new CategoryTokenizer(
  54         new CategoryAttributesStream(categoryContainer),
  55         indexingParams);
  56
  57     // count the number of tokens
  58     Set<String> categoryTerms = new HashSet<String>();
  59     for (int i = 0; i < initialCatgeories.length; i++) {
  60       categoryTerms.add(initialCatgeories[i]
  61           .toString(indexingParams.getFacetDelimChar()));
  62     }
  63
  64     int nTokens;
  65     for (nTokens = 0; tokenizer.incrementToken(); nTokens++) {
  66       if (!categoryTerms.remove(tokenizer.termAttribute.toString())) {
  67         fail("Unexpected term: " + tokenizer.termAttribute.toString());
  68       }
  69     }
  70     assertTrue("all category terms should have been found", categoryTerms
  71         .isEmpty());
  72
  73     // should be 6 - all categories and parents
  74     assertEquals("Wrong number of tokens", 3, nTokens);
  75
  76     taxonomyWriter.close();
  77     directory.close();
  78   }
  79
  80   /**
  81    * Verifies that {@link CategoryTokenizer} elongates the buffer in
  82    * {@link CharTermAttribute} for long categories.
  83    *
  84    * @throws IOException
  85    */
  86   @Test
  87   public void testLongCategoryPath() throws IOException {
  88     Directory directory = newDirectory();
  89     TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
  90         directory);
  91
  92     List<CategoryPath> longCategory = new ArrayList<CategoryPath>();
  93     longCategory.add(new CategoryPath("one", "two", "three", "four",
  94         "five", "six", "seven"));
  95
  96     DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
  97     CategoryTokenizer tokenizer = new CategoryTokenizer(
  98         new CategoryAttributesStream(new CategoryAttributesIterable(
  99             longCategory)), indexingParams);
 100
 101     // count the number of tokens
 102     String categoryTerm = longCategory.get(0).toString(
 103         indexingParams.getFacetDelimChar());
 104
 105     assertTrue("Missing token", tokenizer.incrementToken());
 106     if (!categoryTerm.equals(tokenizer.termAttribute.toString())) {
 107       fail("Unexpected term: " + tokenizer.termAttribute.toString());
 108     }
 109
 110     assertFalse("Unexpected token", tokenizer.incrementToken());
 111
 112     taxonomyWriter.close();
 113     directory.close();
 114   }
 115 }