1 package org.apache.lucene.analysis;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
21 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
22 import org.apache.lucene.util.Version;
24 import java.io.StringReader;
25 import java.io.IOException;
26 import java.util.Iterator;
28 import java.util.HashSet;
30 public class TestStopAnalyzer extends BaseTokenStreamTestCase {
32 private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT);
33 private Set<Object> inValidTokens = new HashSet<Object>();
36 public void setUp() throws Exception {
39 Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
41 inValidTokens.add(it.next());
45 public void testDefaults() throws IOException {
46 assertTrue(stop != null);
47 StringReader reader = new StringReader("This is a test of the english stop analyzer");
48 TokenStream stream = stop.tokenStream("test", reader);
49 assertTrue(stream != null);
50 CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
52 while (stream.incrementToken()) {
53 assertFalse(inValidTokens.contains(termAtt.toString()));
57 public void testStopList() throws IOException {
58 Set<Object> stopWordsSet = new HashSet<Object>();
59 stopWordsSet.add("good");
60 stopWordsSet.add("test");
61 stopWordsSet.add("analyzer");
62 StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
63 StringReader reader = new StringReader("This is a good test of the english stop analyzer");
64 TokenStream stream = newStop.tokenStream("test", reader);
65 assertNotNull(stream);
66 CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
67 PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
69 while (stream.incrementToken()) {
70 String text = termAtt.toString();
71 assertFalse(stopWordsSet.contains(text));
72 assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
76 public void testStopListPositions() throws IOException {
77 Set<Object> stopWordsSet = new HashSet<Object>();
78 stopWordsSet.add("good");
79 stopWordsSet.add("test");
80 stopWordsSet.add("analyzer");
81 StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
82 StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
83 int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
84 TokenStream stream = newStop.tokenStream("test", reader);
85 assertNotNull(stream);
87 CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
88 PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
90 while (stream.incrementToken()) {
91 String text = termAtt.toString();
92 assertFalse(stopWordsSet.contains(text));
93 assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());