lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java

   1 package org.apache.lucene.analysis;
   2
   3 import java.io.IOException;
   4 import java.io.StringReader;
   5 import java.util.Arrays;
   6 import java.util.HashSet;
   7 import java.util.Locale;
   8 import java.util.Set;
   9
  10 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  11 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  12 import org.junit.Test;
  13
  14 /**
  15  * Licensed to the Apache Software Foundation (ASF) under one or more
  16  * contributor license agreements.  See the NOTICE file distributed with
  17  * this work for additional information regarding copyright ownership.
  18  * The ASF licenses this file to You under the Apache License, Version 2.0
  19  * (the "License"); you may not use this file except in compliance with
  20  * the License.  You may obtain a copy of the License at
  21  *
  22  *     http://www.apache.org/licenses/LICENSE-2.0
  23  *
  24  * Unless required by applicable law or agreed to in writing, software
  25  * distributed under the License is distributed on an "AS IS" BASIS,
  26  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  27  * See the License for the specific language governing permissions and
  28  * limitations under the License.
  29  */
  30
  31 /**
  32  * Testcase for {@link KeywordMarkerFilter}
  33  */
  34 public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
  35
  36   @Test
  37   public void testIncrementToken() throws IOException {
  38     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true);
  39     set.add("lucenefox");
  40     String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
  41         "jumps" };
  42     assertTokenStreamContents(new LowerCaseFilterMock(
  43         new KeywordMarkerFilter(new MockTokenizer(new StringReader(
  44             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
  45     Set<String> jdkSet = new HashSet<String>();
  46     jdkSet.add("LuceneFox");
  47     assertTokenStreamContents(new LowerCaseFilterMock(
  48         new KeywordMarkerFilter(new MockTokenizer(new StringReader(
  49             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), jdkSet)), output);
  50     Set<?> set2 = set;
  51     assertTokenStreamContents(new LowerCaseFilterMock(
  52         new KeywordMarkerFilter(new MockTokenizer(new StringReader(
  53             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
  54   }
  55
  56   // LUCENE-2901
  57   public void testComposition() throws Exception {
  58     TokenStream ts = new LowerCaseFilterMock(
  59                      new KeywordMarkerFilter(
  60                      new KeywordMarkerFilter(
  61                      new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
  62                      new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))),
  63                      new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
  64
  65     assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
  66   }
  67
  68   public static final class LowerCaseFilterMock extends TokenFilter {
  69
  70     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  71     private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  72
  73     public LowerCaseFilterMock(TokenStream in) {
  74       super(in);
  75     }
  76
  77     @Override
  78     public boolean incrementToken() throws IOException {
  79       if (input.incrementToken()) {
  80         if (!keywordAttr.isKeyword()) {
  81           final String term = termAtt.toString().toLowerCase(Locale.ENGLISH);
  82           termAtt.setEmpty().append(term);
  83         }
  84         return true;
  85       }
  86       return false;
  87     }
  88
  89   }
  90 }