Configured build for Ubuntu and added Stempel polish analyzer
[pylucene.git] / samples / LuceneInAction / lia / searching / QueryParserTest.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 from lia.common.LiaTestCase import LiaTestCase
16
17 from lucene import \
18      WhitespaceAnalyzer, StandardAnalyzer, Term, QueryParser, Locale, \
19      BooleanQuery, FuzzyQuery, IndexSearcher, TermRangeQuery, TermQuery, \
20      BooleanClause, Version
21
22
23 class QueryParserTest(LiaTestCase):
24
25     def setUp(self):
26
27         super(QueryParserTest, self).setUp()
28         self.analyzer = WhitespaceAnalyzer()
29         self.searcher = IndexSearcher(self.directory, True)
30
31     def testToString(self):
32
33         query = BooleanQuery()
34         query.add(FuzzyQuery(Term("field", "kountry")),
35                   BooleanClause.Occur.MUST)
36         query.add(TermQuery(Term("title", "western")),
37                   BooleanClause.Occur.SHOULD)
38
39         self.assertEqual("+kountry~0.5 title:western",
40                          query.toString("field"), "both kinds")
41
42     def testPrefixQuery(self):
43
44         parser = QueryParser(Version.LUCENE_CURRENT, "category",
45                              StandardAnalyzer(Version.LUCENE_CURRENT))
46         parser.setLowercaseExpandedTerms(False)
47
48         print parser.parse("/Computers/technology*").toString("category")
49
50     def testGrouping(self):
51
52         query = QueryParser(Version.LUCENE_CURRENT, "subject",
53                             self.analyzer).parse("(agile OR extreme) AND methodology")
54         scoreDocs = self.searcher.search(query, 50).scoreDocs
55
56         self.assertHitsIncludeTitle(self.searcher, scoreDocs,
57                                     "Extreme Programming Explained")
58         self.assertHitsIncludeTitle(self.searcher, scoreDocs,
59                                     "The Pragmatic Programmer")
60
61     def testTermRangeQuery(self):
62
63         query = QueryParser(Version.LUCENE_CURRENT, "subject",
64                             self.analyzer).parse("title2:[K TO N]")
65         self.assert_(TermRangeQuery.instance_(query))
66
67         scoreDocs = self.searcher.search(query, 10).scoreDocs
68         self.assertHitsIncludeTitle(self.searcher, scoreDocs, "Mindstorms")
69
70         query = QueryParser(Version.LUCENE_CURRENT, "subject",
71                             self.analyzer).parse("title2:{K TO Mindstorms}")
72         scoreDocs = self.searcher.search(query, 10).scoreDocs
73         self.assertHitsIncludeTitle(self.searcher, scoreDocs, "Mindstorms",
74                                     True)
75
76     def testDateRangeQuery(self):
77
78         # locale diff between jre and gcj 1/1/04 -> 01/01/04
79         # expression = "modified:[1/1/04 TO 12/31/04]"
80         
81         expression = "modified:[01/01/04 TO 12/31/04]"
82         parser = QueryParser(Version.LUCENE_CURRENT, "subject", self.analyzer)
83         parser.setLocale(Locale.US)
84         query = parser.parse(expression)
85         print expression, "parsed to", query
86
87         topDocs = self.searcher.search(query, 50)
88         self.assert_(topDocs.totalHits > 0)
89
90     def testSlop(self):
91
92         q = QueryParser(Version.LUCENE_CURRENT, "field",
93                         self.analyzer).parse('"exact phrase"')
94         self.assertEqual("\"exact phrase\"", q.toString("field"),
95                          "zero slop")
96
97         qp = QueryParser(Version.LUCENE_CURRENT, "field", self.analyzer)
98         qp.setPhraseSlop(5)
99         q = qp.parse('"sloppy phrase"')
100         self.assertEqual("\"sloppy phrase\"~5", q.toString("field"),
101                          "sloppy, implicitly")
102
103     def testPhraseQuery(self):
104
105         analyzer = StandardAnalyzer(Version.LUCENE_24)
106         q = QueryParser(Version.LUCENE_24, "field",
107                         analyzer).parse('"This is Some Phrase*"')
108         self.assertEqual("\"some phrase\"", q.toString("field"), "analyzed")
109
110         q = QueryParser(Version.LUCENE_CURRENT, "field",
111                         self.analyzer).parse('"term"')
112         self.assert_(TermQuery.instance_(q), "reduced to TermQuery")
113
114     def testLowercasing(self):
115
116         q = QueryParser(Version.LUCENE_CURRENT, "field",
117                         self.analyzer).parse("PrefixQuery*")
118         self.assertEqual("prefixquery*", q.toString("field"), "lowercased")
119
120         qp = QueryParser(Version.LUCENE_CURRENT, "field", self.analyzer)
121         qp.setLowercaseExpandedTerms(False)
122         q = qp.parse("PrefixQuery*")
123         self.assertEqual("PrefixQuery*", q.toString("field"), "not lowercased")
124
125     def testWildcard(self):
126
127         try:
128             QueryParser(Version.LUCENE_CURRENT, "field",
129                         self.analyzer).parse("*xyz")
130             self.fail("Leading wildcard character should not be allowed")
131         except:
132             self.assert_(True)
133
134     def testBoost(self):
135
136          q = QueryParser(Version.LUCENE_CURRENT, "field",
137                          self.analyzer).parse("term^2")
138          self.assertEqual("term^2.0", q.toString("field"))
139
140     def testParseException(self):
141
142         try:
143             QueryParser(Version.LUCENE_CURRENT, "contents",
144                         self.analyzer).parse("^&#")
145         except:
146             # expression is invalid, as expected
147             self.assert_(True)
148         else:
149             self.fail("ParseException expected, but not thrown")