lucene-java-3.4.0/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java

   1 package org.apache.lucene.analysis;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  21 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  22 import org.apache.lucene.util.Version;
  23
  24 import java.io.StringReader;
  25 import java.io.IOException;
  26 import java.util.Iterator;
  27 import java.util.Set;
  28 import java.util.HashSet;
  29
  30 public class TestStopAnalyzer extends BaseTokenStreamTestCase {
  31
  32   private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT);
  33   private Set<Object> inValidTokens = new HashSet<Object>();
  34
  35   @Override
  36   public void setUp() throws Exception {
  37     super.setUp();
  38
  39     Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
  40     while(it.hasNext()) {
  41       inValidTokens.add(it.next());
  42     }
  43   }
  44
  45   public void testDefaults() throws IOException {
  46     assertTrue(stop != null);
  47     StringReader reader = new StringReader("This is a test of the english stop analyzer");
  48     TokenStream stream = stop.tokenStream("test", reader);
  49     assertTrue(stream != null);
  50     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  51
  52     while (stream.incrementToken()) {
  53       assertFalse(inValidTokens.contains(termAtt.toString()));
  54     }
  55   }
  56
  57   public void testStopList() throws IOException {
  58     Set<Object> stopWordsSet = new HashSet<Object>();
  59     stopWordsSet.add("good");
  60     stopWordsSet.add("test");
  61     stopWordsSet.add("analyzer");
  62     StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
  63     StringReader reader = new StringReader("This is a good test of the english stop analyzer");
  64     TokenStream stream = newStop.tokenStream("test", reader);
  65     assertNotNull(stream);
  66     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  67     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
  68
  69     while (stream.incrementToken()) {
  70       String text = termAtt.toString();
  71       assertFalse(stopWordsSet.contains(text));
  72       assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
  73     }
  74   }
  75
  76   public void testStopListPositions() throws IOException {
  77     Set<Object> stopWordsSet = new HashSet<Object>();
  78     stopWordsSet.add("good");
  79     stopWordsSet.add("test");
  80     stopWordsSet.add("analyzer");
  81     StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
  82     StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
  83     int expectedIncr[] =                  { 1,   1, 1,          3, 1,  1,      1,            2,   1};
  84     TokenStream stream = newStop.tokenStream("test", reader);
  85     assertNotNull(stream);
  86     int i = 0;
  87     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  88     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
  89
  90     while (stream.incrementToken()) {
  91       String text = termAtt.toString();
  92       assertFalse(stopWordsSet.contains(text));
  93       assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
  94     }
  95   }
  96
  97 }