1 package org.apache.lucene.facet.index.streaming;
3 import java.io.IOException;
4 import java.util.ArrayList;
5 import java.util.HashSet;
9 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
10 import org.apache.lucene.store.Directory;
11 import org.junit.Test;
13 import org.apache.lucene.facet.index.CategoryContainerTestBase;
14 import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
15 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
16 import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
17 import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
18 import org.apache.lucene.facet.taxonomy.CategoryPath;
19 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
20 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
23 * Licensed to the Apache Software Foundation (ASF) under one or more
24 * contributor license agreements. See the NOTICE file distributed with
25 * this work for additional information regarding copyright ownership.
26 * The ASF licenses this file to You under the Apache License, Version 2.0
27 * (the "License"); you may not use this file except in compliance with
28 * the License. You may obtain a copy of the License at
30 * http://www.apache.org/licenses/LICENSE-2.0
32 * Unless required by applicable law or agreed to in writing, software
33 * distributed under the License is distributed on an "AS IS" BASIS,
34 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35 * See the License for the specific language governing permissions and
36 * limitations under the License.
39 public class CategoryTokenizerTest extends CategoryContainerTestBase {
42 * Verifies that a {@link CategoryTokenizer} adds the correct
43 * {@link CharTermAttribute}s to a {@link CategoryAttributesStream}.
48 public void testTokensDefaultParams() throws IOException {
49 Directory directory = newDirectory();
50 TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
52 DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
53 CategoryTokenizer tokenizer = new CategoryTokenizer(
54 new CategoryAttributesStream(categoryContainer),
57 // count the number of tokens
58 Set<String> categoryTerms = new HashSet<String>();
59 for (int i = 0; i < initialCatgeories.length; i++) {
60 categoryTerms.add(initialCatgeories[i]
61 .toString(indexingParams.getFacetDelimChar()));
65 for (nTokens = 0; tokenizer.incrementToken(); nTokens++) {
66 if (!categoryTerms.remove(tokenizer.termAttribute.toString())) {
67 fail("Unexpected term: " + tokenizer.termAttribute.toString());
70 assertTrue("all category terms should have been found", categoryTerms
73 // should be 6 - all categories and parents
74 assertEquals("Wrong number of tokens", 3, nTokens);
76 taxonomyWriter.close();
81 * Verifies that {@link CategoryTokenizer} elongates the buffer in
82 * {@link CharTermAttribute} for long categories.
87 public void testLongCategoryPath() throws IOException {
88 Directory directory = newDirectory();
89 TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
92 List<CategoryPath> longCategory = new ArrayList<CategoryPath>();
93 longCategory.add(new CategoryPath("one", "two", "three", "four",
94 "five", "six", "seven"));
96 DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
97 CategoryTokenizer tokenizer = new CategoryTokenizer(
98 new CategoryAttributesStream(new CategoryAttributesIterable(
99 longCategory)), indexingParams);
101 // count the number of tokens
102 String categoryTerm = longCategory.get(0).toString(
103 indexingParams.getFacetDelimChar());
105 assertTrue("Missing token", tokenizer.incrementToken());
106 if (!categoryTerm.equals(tokenizer.termAttribute.toString())) {
107 fail("Unexpected term: " + tokenizer.termAttribute.toString());
110 assertFalse("Unexpected token", tokenizer.incrementToken());
112 taxonomyWriter.close();