PyLucene 3.4.0-1 import
[pylucene.git] / samples / LuceneInAction / lia / analysis / synonym / SynonymAnalyzerTest.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 from unittest import TestCase
16
17 from lucene import \
18      StandardAnalyzer, RAMDirectory, IndexWriter, Term, Document, Field, \
19      IndexSearcher, TermQuery, PhraseQuery, QueryParser, StringReader, \
20      TermAttribute, PositionIncrementAttribute, Version
21
22 from lia.analysis.AnalyzerUtils import AnalyzerUtils
23 from lia.analysis.synonym.SynonymAnalyzer import SynonymAnalyzer
24 from lia.analysis.synonym.MockSynonymEngine import MockSynonymEngine
25
26
27 class SynonymAnalyzerTest(TestCase):
28
29     synonymAnalyzer = SynonymAnalyzer(MockSynonymEngine())
30
31     def setUp(self):
32
33         self.directory = RAMDirectory()
34         writer = IndexWriter(self.directory, self.synonymAnalyzer, True,
35                              IndexWriter.MaxFieldLength.UNLIMITED)
36
37         doc = Document()
38         doc.add(Field("content",
39                       "The quick brown fox jumps over the lazy dogs",
40                       Field.Store.YES, Field.Index.ANALYZED))
41         writer.addDocument(doc)
42         writer.close()
43
44         self.searcher = IndexSearcher(self.directory, True)
45
46     def tearDown(self):
47
48         self.searcher.close()
49
50     def testJumps(self):
51
52         stream = self.synonymAnalyzer.tokenStream("contents",
53                                                   StringReader("jumps"))
54         term = stream.addAttribute(TermAttribute.class_)
55         posIncr = stream.addAttribute(PositionIncrementAttribute.class_)
56
57         i = 0
58         expected = ["jumps", "hops", "leaps"]
59         while stream.incrementToken():
60             self.assertEqual(expected[i], term.term())
61             if i == 0:
62                 expectedPos = 1
63             else:
64                 expectedPos = 0
65
66             self.assertEqual(expectedPos, posIncr.getPositionIncrement())
67             i += 1
68
69         self.assertEqual(3, i)
70
71     def testSearchByAPI(self):
72
73         tq = TermQuery(Term("content", "hops"))
74         topDocs = self.searcher.search(tq, 50)
75         self.assertEqual(1, topDocs.totalHits)
76
77         pq = PhraseQuery()
78         pq.add(Term("content", "fox"))
79         pq.add(Term("content", "hops"))
80         topDocs = self.searcher.search(pq, 50)
81         self.assertEquals(1, topDocs.totalHits)
82
83     def testWithQueryParser(self):
84
85         query = QueryParser(Version.LUCENE_CURRENT, "content",
86                             self.synonymAnalyzer).parse('"fox jumps"')
87         topDocs = self.searcher.search(query, 50)
88         # in Lucene 1.9, position increments are no longer ignored
89         self.assertEqual(1, topDocs.totalHits, "!!!! what?!")
90
91         query = QueryParser(Version.LUCENE_CURRENT, "content",
92                             StandardAnalyzer(Version.LUCENE_CURRENT)).parse('"fox jumps"')
93         topDocs = self.searcher.search(query, 50)
94         self.assertEqual(1, topDocs.totalHits, "*whew*")
95
96     def main(cls):
97
98         query = QueryParser(Version.LUCENE_CURRENT, "content",
99                             cls.synonymAnalyzer).parse('"fox jumps"')
100         print "\"fox jumps\" parses to ", query.toString("content")
101
102         print "From AnalyzerUtils.tokensFromAnalysis: "
103         AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"")
104         print ''
105         
106     main = classmethod(main)