1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import TestCase, main
19 class StopAnalyzerTestCase(TestCase):
21 Unit tests ported from Java Lucene
26 self.stop = StopAnalyzer(Version.LUCENE_CURRENT)
27 self.invalidTokens = StopAnalyzer.ENGLISH_STOP_WORDS_SET
29 def testDefaults(self):
31 self.assert_(self.stop is not None)
32 reader = StringReader("This is a test of the english stop analyzer")
33 stream = self.stop.tokenStream("test", reader)
34 self.assert_(stream is not None)
36 termAtt = stream.getAttribute(TermAttribute.class_)
38 while stream.incrementToken():
39 self.assert_(termAtt.term() not in self.invalidTokens)
41 def testStopList(self):
43 stopWords = ["good", "test", "analyzer"]
44 stopWordsSet = HashSet()
45 for stopWord in stopWords:
46 stopWordsSet.add(stopWord)
48 newStop = StopAnalyzer(Version.LUCENE_24, stopWordsSet)
49 reader = StringReader("This is a good test of the english stop analyzer")
50 stream = newStop.tokenStream("test", reader)
51 self.assert_(stream is not None)
53 termAtt = stream.getAttribute(TermAttribute.class_)
54 posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_)
56 while stream.incrementToken():
58 self.assert_(text not in stopWordsSet)
59 # by default stop tokenizer does not apply increments.
60 self.assertEqual(1, posIncrAtt.getPositionIncrement())
62 def testStopListPositions(self):
64 stopWords = ["good", "test", "analyzer"]
65 stopWordsSet = HashSet()
66 for stopWord in stopWords:
67 stopWordsSet.add(stopWord)
69 newStop = StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet)
70 reader = StringReader("This is a good test of the english stop analyzer with positions")
71 expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1]
72 stream = newStop.tokenStream("test", reader)
73 self.assert_(stream is not None)
76 termAtt = stream.getAttribute(TermAttribute.class_)
77 posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_)
79 while stream.incrementToken():
81 self.assert_(text not in stopWordsSet)
82 self.assertEqual(expectedIncr[i],
83 posIncrAtt.getPositionIncrement())
87 if __name__ == "__main__":
90 if '-loop' in sys.argv:
91 sys.argv.remove('-loop')