samples/LuceneInAction/lia/searching/QueryParserTest.py

   1 # ====================================================================
   2 #   Licensed under the Apache License, Version 2.0 (the "License");
   3 #   you may not use this file except in compliance with the License.
   4 #   You may obtain a copy of the License at
   5 #
   6 #       http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 #   Unless required by applicable law or agreed to in writing, software
   9 #   distributed under the License is distributed on an "AS IS" BASIS,
  10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 #   See the License for the specific language governing permissions and
  12 #   limitations under the License.
  13 # ====================================================================
  14
  15 from lia.common.LiaTestCase import LiaTestCase
  16
  17 from lucene import \
  18      WhitespaceAnalyzer, StandardAnalyzer, Term, QueryParser, Locale, \
  19      BooleanQuery, FuzzyQuery, IndexSearcher, TermRangeQuery, TermQuery, \
  20      BooleanClause, Version
  21
  22
  23 class QueryParserTest(LiaTestCase):
  24
  25     def setUp(self):
  26
  27         super(QueryParserTest, self).setUp()
  28         self.analyzer = WhitespaceAnalyzer()
  29         self.searcher = IndexSearcher(self.directory, True)
  30
  31     def testToString(self):
  32
  33         query = BooleanQuery()
  34         query.add(FuzzyQuery(Term("field", "kountry")),
  35                   BooleanClause.Occur.MUST)
  36         query.add(TermQuery(Term("title", "western")),
  37                   BooleanClause.Occur.SHOULD)
  38
  39         self.assertEqual("+kountry~0.5 title:western",
  40                          query.toString("field"), "both kinds")
  41
  42     def testPrefixQuery(self):
  43
  44         parser = QueryParser(Version.LUCENE_CURRENT, "category",
  45                              StandardAnalyzer(Version.LUCENE_CURRENT))
  46         parser.setLowercaseExpandedTerms(False)
  47
  48         print parser.parse("/Computers/technology*").toString("category")
  49
  50     def testGrouping(self):
  51
  52         query = QueryParser(Version.LUCENE_CURRENT, "subject",
  53                             self.analyzer).parse("(agile OR extreme) AND methodology")
  54         scoreDocs = self.searcher.search(query, 50).scoreDocs
  55
  56         self.assertHitsIncludeTitle(self.searcher, scoreDocs,
  57                                     "Extreme Programming Explained")
  58         self.assertHitsIncludeTitle(self.searcher, scoreDocs,
  59                                     "The Pragmatic Programmer")
  60
  61     def testTermRangeQuery(self):
  62
  63         query = QueryParser(Version.LUCENE_CURRENT, "subject",
  64                             self.analyzer).parse("title2:[K TO N]")
  65         self.assert_(TermRangeQuery.instance_(query))
  66
  67         scoreDocs = self.searcher.search(query, 10).scoreDocs
  68         self.assertHitsIncludeTitle(self.searcher, scoreDocs, "Mindstorms")
  69
  70         query = QueryParser(Version.LUCENE_CURRENT, "subject",
  71                             self.analyzer).parse("title2:{K TO Mindstorms}")
  72         scoreDocs = self.searcher.search(query, 10).scoreDocs
  73         self.assertHitsIncludeTitle(self.searcher, scoreDocs, "Mindstorms",
  74                                     True)
  75
  76     def testDateRangeQuery(self):
  77
  78         # locale diff between jre and gcj 1/1/04 -> 01/01/04
  79         # expression = "modified:[1/1/04 TO 12/31/04]"
  80
  81         expression = "modified:[01/01/04 TO 12/31/04]"
  82         parser = QueryParser(Version.LUCENE_CURRENT, "subject", self.analyzer)
  83         parser.setLocale(Locale.US)
  84         query = parser.parse(expression)
  85         print expression, "parsed to", query
  86
  87         topDocs = self.searcher.search(query, 50)
  88         self.assert_(topDocs.totalHits > 0)
  89
  90     def testSlop(self):
  91
  92         q = QueryParser(Version.LUCENE_CURRENT, "field",
  93                         self.analyzer).parse('"exact phrase"')
  94         self.assertEqual("\"exact phrase\"", q.toString("field"),
  95                          "zero slop")
  96
  97         qp = QueryParser(Version.LUCENE_CURRENT, "field", self.analyzer)
  98         qp.setPhraseSlop(5)
  99         q = qp.parse('"sloppy phrase"')
 100         self.assertEqual("\"sloppy phrase\"~5", q.toString("field"),
 101                          "sloppy, implicitly")
 102
 103     def testPhraseQuery(self):
 104
 105         analyzer = StandardAnalyzer(Version.LUCENE_24)
 106         q = QueryParser(Version.LUCENE_24, "field",
 107                         analyzer).parse('"This is Some Phrase*"')
 108         self.assertEqual("\"some phrase\"", q.toString("field"), "analyzed")
 109
 110         q = QueryParser(Version.LUCENE_CURRENT, "field",
 111                         self.analyzer).parse('"term"')
 112         self.assert_(TermQuery.instance_(q), "reduced to TermQuery")
 113
 114     def testLowercasing(self):
 115
 116         q = QueryParser(Version.LUCENE_CURRENT, "field",
 117                         self.analyzer).parse("PrefixQuery*")
 118         self.assertEqual("prefixquery*", q.toString("field"), "lowercased")
 119
 120         qp = QueryParser(Version.LUCENE_CURRENT, "field", self.analyzer)
 121         qp.setLowercaseExpandedTerms(False)
 122         q = qp.parse("PrefixQuery*")
 123         self.assertEqual("PrefixQuery*", q.toString("field"), "not lowercased")
 124
 125     def testWildcard(self):
 126
 127         try:
 128             QueryParser(Version.LUCENE_CURRENT, "field",
 129                         self.analyzer).parse("*xyz")
 130             self.fail("Leading wildcard character should not be allowed")
 131         except:
 132             self.assert_(True)
 133
 134     def testBoost(self):
 135
 136          q = QueryParser(Version.LUCENE_CURRENT, "field",
 137                          self.analyzer).parse("term^2")
 138          self.assertEqual("term^2.0", q.toString("field"))
 139
 140     def testParseException(self):
 141
 142         try:
 143             QueryParser(Version.LUCENE_CURRENT, "contents",
 144                         self.analyzer).parse("^&#")
 145         except:
 146             # expression is invalid, as expected
 147             self.assert_(True)
 148         else:
 149             self.fail("ParseException expected, but not thrown")