samples/LuceneInAction/lia/analysis/synonym/SynonymAnalyzerTest.py

   1 # ====================================================================
   2 #   Licensed under the Apache License, Version 2.0 (the "License");
   3 #   you may not use this file except in compliance with the License.
   4 #   You may obtain a copy of the License at
   5 #
   6 #       http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 #   Unless required by applicable law or agreed to in writing, software
   9 #   distributed under the License is distributed on an "AS IS" BASIS,
  10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 #   See the License for the specific language governing permissions and
  12 #   limitations under the License.
  13 # ====================================================================
  14
  15 from unittest import TestCase
  16
  17 from lucene import \
  18      StandardAnalyzer, RAMDirectory, IndexWriter, Term, Document, Field, \
  19      IndexSearcher, TermQuery, PhraseQuery, QueryParser, StringReader, \
  20      TermAttribute, PositionIncrementAttribute, Version
  21
  22 from lia.analysis.AnalyzerUtils import AnalyzerUtils
  23 from lia.analysis.synonym.SynonymAnalyzer import SynonymAnalyzer
  24 from lia.analysis.synonym.MockSynonymEngine import MockSynonymEngine
  25
  26
  27 class SynonymAnalyzerTest(TestCase):
  28
  29     synonymAnalyzer = SynonymAnalyzer(MockSynonymEngine())
  30
  31     def setUp(self):
  32
  33         self.directory = RAMDirectory()
  34         writer = IndexWriter(self.directory, self.synonymAnalyzer, True,
  35                              IndexWriter.MaxFieldLength.UNLIMITED)
  36
  37         doc = Document()
  38         doc.add(Field("content",
  39                       "The quick brown fox jumps over the lazy dogs",
  40                       Field.Store.YES, Field.Index.ANALYZED))
  41         writer.addDocument(doc)
  42         writer.close()
  43
  44         self.searcher = IndexSearcher(self.directory, True)
  45
  46     def tearDown(self):
  47
  48         self.searcher.close()
  49
  50     def testJumps(self):
  51
  52         stream = self.synonymAnalyzer.tokenStream("contents",
  53                                                   StringReader("jumps"))
  54         term = stream.addAttribute(TermAttribute.class_)
  55         posIncr = stream.addAttribute(PositionIncrementAttribute.class_)
  56
  57         i = 0
  58         expected = ["jumps", "hops", "leaps"]
  59         while stream.incrementToken():
  60             self.assertEqual(expected[i], term.term())
  61             if i == 0:
  62                 expectedPos = 1
  63             else:
  64                 expectedPos = 0
  65
  66             self.assertEqual(expectedPos, posIncr.getPositionIncrement())
  67             i += 1
  68
  69         self.assertEqual(3, i)
  70
  71     def testSearchByAPI(self):
  72
  73         tq = TermQuery(Term("content", "hops"))
  74         topDocs = self.searcher.search(tq, 50)
  75         self.assertEqual(1, topDocs.totalHits)
  76
  77         pq = PhraseQuery()
  78         pq.add(Term("content", "fox"))
  79         pq.add(Term("content", "hops"))
  80         topDocs = self.searcher.search(pq, 50)
  81         self.assertEquals(1, topDocs.totalHits)
  82
  83     def testWithQueryParser(self):
  84
  85         query = QueryParser(Version.LUCENE_CURRENT, "content",
  86                             self.synonymAnalyzer).parse('"fox jumps"')
  87         topDocs = self.searcher.search(query, 50)
  88         # in Lucene 1.9, position increments are no longer ignored
  89         self.assertEqual(1, topDocs.totalHits, "!!!! what?!")
  90
  91         query = QueryParser(Version.LUCENE_CURRENT, "content",
  92                             StandardAnalyzer(Version.LUCENE_CURRENT)).parse('"fox jumps"')
  93         topDocs = self.searcher.search(query, 50)
  94         self.assertEqual(1, topDocs.totalHits, "*whew*")
  95
  96     def main(cls):
  97
  98         query = QueryParser(Version.LUCENE_CURRENT, "content",
  99                             cls.synonymAnalyzer).parse('"fox jumps"')
 100         print "\"fox jumps\" parses to ", query.toString("content")
 101
 102         print "From AnalyzerUtils.tokensFromAnalysis: "
 103         AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"")
 104         print ''
 105
 106     main = classmethod(main)