lucene-java-3.5.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java

   1 package org.apache.lucene.analysis.ngram;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.analysis.MockTokenizer;
  21 import org.apache.lucene.analysis.TokenStream;
  22 import org.apache.lucene.analysis.WhitespaceTokenizer;
  23 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  24
  25 import java.io.StringReader;
  26
  27 /**
  28  * Tests {@link EdgeNGramTokenFilter} for correctness.
  29  */
  30 public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
  31   private TokenStream input;
  32
  33   @Override
  34   public void setUp() throws Exception {
  35     super.setUp();
  36     input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
  37   }
  38
  39   public void testInvalidInput() throws Exception {
  40     boolean gotException = false;
  41     try {
  42       new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 0, 0);
  43     } catch (IllegalArgumentException e) {
  44       gotException = true;
  45     }
  46     assertTrue(gotException);
  47   }
  48
  49   public void testInvalidInput2() throws Exception {
  50     boolean gotException = false;
  51     try {
  52       new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 2, 1);
  53     } catch (IllegalArgumentException e) {
  54       gotException = true;
  55     }
  56     assertTrue(gotException);
  57   }
  58
  59   public void testInvalidInput3() throws Exception {
  60     boolean gotException = false;
  61     try {
  62       new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, -1, 2);
  63     } catch (IllegalArgumentException e) {
  64       gotException = true;
  65     }
  66     assertTrue(gotException);
  67   }
  68
  69   public void testFrontUnigram() throws Exception {
  70     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);
  71     assertTokenStreamContents(tokenizer, new String[]{"a"}, new int[]{0}, new int[]{1});
  72   }
  73
  74   public void testBackUnigram() throws Exception {
  75     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 1);
  76     assertTokenStreamContents(tokenizer, new String[]{"e"}, new int[]{4}, new int[]{5});
  77   }
  78
  79   public void testOversizedNgrams() throws Exception {
  80     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);
  81     assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0]);
  82   }
  83
  84   public void testFrontRangeOfNgrams() throws Exception {
  85     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
  86     assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
  87   }
  88
  89   public void testBackRangeOfNgrams() throws Exception {
  90     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
  91     assertTokenStreamContents(tokenizer, new String[]{"e","de","cde"}, new int[]{4,3,2}, new int[]{5,5,5});
  92   }
  93
  94   public void testSmallTokenInStream() throws Exception {
  95     input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
  96     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
  97     assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
  98   }
  99
 100   public void testReset() throws Exception {
 101     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
 102     EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
 103     assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
 104     tokenizer.reset(new StringReader("abcde"));
 105     assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
 106   }
 107 }