X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java diff --git a/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java new file mode 100644 index 0000000..2b5865e --- /dev/null +++ b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java @@ -0,0 +1,131 @@ +package org.apache.lucene.analysis; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.English; +import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; +import java.util.HashSet; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +public class TestStopFilter extends BaseTokenStreamTestCase { + + // other StopFilter functionality is already tested by TestStopAnalyzer + + public void testExactCase() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + Set stopWords = new HashSet(Arrays.asList("is", "the", "Time")); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false); + assertTokenStreamContents(stream, new String[] { "Now", "The" }); + } + + public void testIgnoreCase() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + Set stopWords = new HashSet(Arrays.asList( "is", "the", "Time" )); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true); + assertTokenStreamContents(stream, new String[] { "Now" }); + } + + public void testStopFilt() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + String[] stopWords = new String[] { "is", "the", "Time" }; + Set stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + assertTokenStreamContents(stream, new String[] { "Now", "The" }); + } + + /** + * Test Position increments applied by StopFilter with and without enabling this option. + */ + public void testStopPositons() throws IOException { + StringBuilder sb = new StringBuilder(); + ArrayList a = new ArrayList(); + for (int i=0; i<20; i++) { + String w = English.intToEnglish(i).trim(); + sb.append(w).append(" "); + if (i%3 != 0) a.add(w); + } + log(sb.toString()); + String stopWords[] = a.toArray(new String[0]); + for (int i=0; i stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); + // with increments + StringReader reader = new StringReader(sb.toString()); + StopFilter stpf = new StopFilter(Version.LUCENE_24, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + doTestStopPositons(stpf,true); + // without increments + reader = new StringReader(sb.toString()); + stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + doTestStopPositons(stpf,false); + // with increments, concatenating two stop filters + ArrayList a0 = new ArrayList(); + ArrayList a1 = new ArrayList(); + for (int i=0; i stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0); + Set stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1); + reader = new StringReader(sb.toString()); + StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set + stpf0.setEnablePositionIncrements(true); + StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated! + doTestStopPositons(stpf01,true); + } + + private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException { + log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled")); + stpf.setEnablePositionIncrements(enableIcrements); + CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class); + stpf.reset(); + for (int i=0; i<20; i+=3) { + assertTrue(stpf.incrementToken()); + log("Token "+i+": "+stpf); + String w = English.intToEnglish(i).trim(); + assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString()); + assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement()); + } + assertFalse(stpf.incrementToken()); + stpf.end(); + stpf.close(); + } + + // print debug info depending on VERBOSE + private static void log(String s) { + if (VERBOSE) { + System.out.println(s); + } + } +}