1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import main
16 from BaseTokenStreamTestCase import BaseTokenStreamTestCase
20 class AnalyzersTestCase(BaseTokenStreamTestCase):
22 Unit tests ported from Java Lucene
28 self._assertAnalyzesTo(a, "foo bar FOO BAR",
29 [ "foo", "bar", "foo", "bar" ])
30 self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
31 [ "foo", "bar", "foo", "bar" ])
32 self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
33 [ "foo", "bar", "foo", "bar" ])
34 self._assertAnalyzesTo(a, "U.S.A.",
36 self._assertAnalyzesTo(a, "C++",
38 self._assertAnalyzesTo(a, "B2B",
40 self._assertAnalyzesTo(a, "2B",
42 self._assertAnalyzesTo(a, "\"QUOTED\" word",
48 a = WhitespaceAnalyzer()
49 self._assertAnalyzesTo(a, "foo bar FOO BAR",
50 [ "foo", "bar", "FOO", "BAR" ])
51 self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
52 [ "foo", "bar", ".", "FOO", "<>", "BAR" ])
53 self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
54 [ "foo.bar.FOO.BAR" ])
55 self._assertAnalyzesTo(a, "U.S.A.",
57 self._assertAnalyzesTo(a, "C++",
59 self._assertAnalyzesTo(a, "B2B",
61 self._assertAnalyzesTo(a, "2B",
63 self._assertAnalyzesTo(a, "\"QUOTED\" word",
64 [ "\"QUOTED\"", "word" ])
68 a = StopAnalyzer(Version.LUCENE_CURRENT)
69 self._assertAnalyzesTo(a, "foo bar FOO BAR",
70 [ "foo", "bar", "foo", "bar" ])
71 self._assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
72 [ "foo", "bar", "foo", "bar" ])
74 def _verifyPayload(self, ts):
76 payloadAtt = ts.getAttribute(PayloadAttribute.class_)
80 if not ts.incrementToken():
82 self.assertEqual(b, payloadAtt.getPayload().toByteArray()[0])
84 # Make sure old style next() calls result in a new copy of payloads
85 def testPayloadCopy(self):
87 s = "how now brown cow"
88 ts = WhitespaceTokenizer(StringReader(s))
89 ts = PayloadSetter(ts)
90 self._verifyPayload(ts)
92 ts = WhitespaceTokenizer(StringReader(s))
93 ts = PayloadSetter(ts)
94 self._verifyPayload(ts)
97 class PayloadSetter(PythonTokenFilter):
99 def __init__(self, input):
100 super(PayloadSetter, self).__init__(input)
103 self.payloadAtt = self.addAttribute(PayloadAttribute.class_)
104 self.data = JArray('byte')(1)
105 self.p = Payload(self.data, 0, 1)
107 def incrementToken(self):
109 if not self.input.incrementToken():
112 self.payloadAtt.setPayload(self.p)
118 if __name__ == "__main__":
121 if '-loop' in sys.argv:
122 sys.argv.remove('-loop')