1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import TestCase, main
18 class BaseTokenStreamTestCase(TestCase):
20 Base class for all Lucene unit tests that use TokenStreams.
23 def _assertTokenStreamContents(self, ts, output,
24 startOffsets=None, endOffsets=None,
25 types=None, posIncrements=None):
27 self.assert_(output is not None)
28 self.assert_(ts.hasAttribute(TermAttribute.class_),
31 termAtt = ts.getAttribute(TermAttribute.class_)
34 if startOffsets is not None or endOffsets is not None:
35 self.assert_(ts.hasAttribute(OffsetAttribute.class_),
36 "has OffsetAttribute")
37 offsetAtt = ts.getAttribute(OffsetAttribute.class_)
41 self.assert_(ts.hasAttribute(TypeAttribute.class_),
43 typeAtt = ts.getAttribute(TypeAttribute.class_)
46 if posIncrements is not None:
47 self.assert_(ts.hasAttribute(PositionIncrementAttribute.class_),
48 "has PositionIncrementAttribute")
49 posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class_)
52 for i in xrange(len(output)):
53 # extra safety to enforce, that the state is not preserved and
54 # also assign bogus values
56 termAtt.setTermBuffer("bogusTerm")
57 if offsetAtt is not None:
58 offsetAtt.setOffset(14584724, 24683243)
59 if typeAtt is not None:
60 typeAtt.setType("bogusType")
61 if posIncrAtt is not None:
62 posIncrAtt.setPositionIncrement(45987657)
64 self.assert_(ts.incrementToken(), "token %d exists" %(i))
65 self.assertEqual(output[i], termAtt.term(), "term %d" %(i))
66 if startOffsets is not None:
67 self.assertEqual(startOffsets[i], offsetAtt.startOffset(),
68 "startOffset %d" %(i))
69 if endOffsets is not None:
70 self.assertEqual(endOffsets[i], offsetAtt.endOffset(),
73 self.assertEqual(types[i], typeAtt.type(), "type %d" %(i))
74 if posIncrements is not None:
75 self.assertEqual(posIncrements[i],
76 posIncrAtt.getPositionIncrement(),
77 "posIncrement %d" %(i))
79 self.assert_(not ts.incrementToken(), "end of stream")
83 def _assertAnalyzesTo(self, a, input, output,
84 startOffsets=None, endOffsets=None,
85 types=None, posIncrements=None):
87 ts = a.tokenStream("dummy", StringReader(input))
88 self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
91 def _assertAnalyzesToReuse(self, a, input, output,
92 startOffsets=None, endOffsets=None,
93 types=None, posIncrements=None):
95 ts = a.reusableTokenStream("dummy", StringReader(input))
96 self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
99 # simple utility method for testing stemmers
100 def _checkOneTerm(self, a, input, expected):
101 self._assertAnalyzesTo(a, input, JArray('string')(expected))
103 def _checkOneTermReuse(self, a, input, expected):
104 self._assertAnalyzesToReuse(a, input, JArray('string')(expected))