1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import TestCase
18 StandardAnalyzer, RAMDirectory, IndexWriter, Term, Document, Field, \
19 IndexSearcher, TermQuery, PhraseQuery, QueryParser, StringReader, \
20 TermAttribute, PositionIncrementAttribute, Version
22 from lia.analysis.AnalyzerUtils import AnalyzerUtils
23 from lia.analysis.synonym.SynonymAnalyzer import SynonymAnalyzer
24 from lia.analysis.synonym.MockSynonymEngine import MockSynonymEngine
27 class SynonymAnalyzerTest(TestCase):
29 synonymAnalyzer = SynonymAnalyzer(MockSynonymEngine())
33 self.directory = RAMDirectory()
34 writer = IndexWriter(self.directory, self.synonymAnalyzer, True,
35 IndexWriter.MaxFieldLength.UNLIMITED)
38 doc.add(Field("content",
39 "The quick brown fox jumps over the lazy dogs",
40 Field.Store.YES, Field.Index.ANALYZED))
41 writer.addDocument(doc)
44 self.searcher = IndexSearcher(self.directory, True)
52 stream = self.synonymAnalyzer.tokenStream("contents",
53 StringReader("jumps"))
54 term = stream.addAttribute(TermAttribute.class_)
55 posIncr = stream.addAttribute(PositionIncrementAttribute.class_)
58 expected = ["jumps", "hops", "leaps"]
59 while stream.incrementToken():
60 self.assertEqual(expected[i], term.term())
66 self.assertEqual(expectedPos, posIncr.getPositionIncrement())
69 self.assertEqual(3, i)
71 def testSearchByAPI(self):
73 tq = TermQuery(Term("content", "hops"))
74 topDocs = self.searcher.search(tq, 50)
75 self.assertEqual(1, topDocs.totalHits)
78 pq.add(Term("content", "fox"))
79 pq.add(Term("content", "hops"))
80 topDocs = self.searcher.search(pq, 50)
81 self.assertEquals(1, topDocs.totalHits)
83 def testWithQueryParser(self):
85 query = QueryParser(Version.LUCENE_CURRENT, "content",
86 self.synonymAnalyzer).parse('"fox jumps"')
87 topDocs = self.searcher.search(query, 50)
88 # in Lucene 1.9, position increments are no longer ignored
89 self.assertEqual(1, topDocs.totalHits, "!!!! what?!")
91 query = QueryParser(Version.LUCENE_CURRENT, "content",
92 StandardAnalyzer(Version.LUCENE_CURRENT)).parse('"fox jumps"')
93 topDocs = self.searcher.search(query, 50)
94 self.assertEqual(1, topDocs.totalHits, "*whew*")
98 query = QueryParser(Version.LUCENE_CURRENT, "content",
99 cls.synonymAnalyzer).parse('"fox jumps"')
100 print "\"fox jumps\" parses to ", query.toString("content")
102 print "From AnalyzerUtils.tokensFromAnalysis: "
103 AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"")
106 main = classmethod(main)