1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from unittest import TestCase, main
19 class PhraseQueryTestCase(TestCase):
21 Unit tests ported from Java Lucene
26 self.directory = RAMDirectory()
27 writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
28 IndexWriter.MaxFieldLength.LIMITED)
31 doc.add(Field("field", "one two three four five",
32 Field.Store.YES, Field.Index.ANALYZED))
33 writer.addDocument(doc)
38 self.searcher = IndexSearcher(self.directory, True)
39 self.query = PhraseQuery()
44 self.directory.close()
46 def testNotCloseEnough(self):
49 self.query.add(Term("field", "one"))
50 self.query.add(Term("field", "five"))
51 topDocs = self.searcher.search(self.query, 50)
52 self.assertEqual(0, topDocs.totalHits)
54 def testBarelyCloseEnough(self):
57 self.query.add(Term("field", "one"))
58 self.query.add(Term("field", "five"))
59 topDocs = self.searcher.search(self.query, 50)
60 self.assertEqual(1, topDocs.totalHits)
64 Ensures slop of 0 works for exact matches, but not reversed
67 # slop is zero by default
68 self.query.add(Term("field", "four"))
69 self.query.add(Term("field", "five"))
70 topDocs = self.searcher.search(self.query, 50)
71 self.assertEqual(1, topDocs.totalHits, "exact match")
73 self.query = PhraseQuery()
74 self.query.add(Term("field", "two"))
75 self.query.add(Term("field", "one"))
76 topDocs = self.searcher.search(self.query, 50)
77 self.assertEqual(0, topDocs.totalHits, "reverse not exact")
81 # Ensures slop of 1 works with terms in order.
83 self.query.add(Term("field", "one"))
84 self.query.add(Term("field", "two"))
85 topDocs = self.searcher.search(self.query, 50)
86 self.assertEqual(1, topDocs.totalHits, "in order")
88 # Ensures slop of 1 does not work for phrases out of order
90 self.query = PhraseQuery()
92 self.query.add(Term("field", "two"))
93 self.query.add(Term("field", "one"))
94 topDocs = self.searcher.search(self.query, 50)
95 self.assertEqual(0, topDocs.totalHits, "reversed, slop not 2 or more")
97 def testOrderDoesntMatter(self):
99 As long as slop is at least 2, terms can be reversed
102 self.query.setSlop(2) # must be at least two for reverse order match
103 self.query.add(Term("field", "two"))
104 self.query.add(Term("field", "one"))
105 topDocs = self.searcher.search(self.query, 50)
106 self.assertEqual(1, topDocs.totalHits, "just sloppy enough")
108 self.query = PhraseQuery()
109 self.query.setSlop(2)
110 self.query.add(Term("field", "three"))
111 self.query.add(Term("field", "one"))
112 topDocs = self.searcher.search(self.query, 50)
113 self.assertEqual(0, topDocs.totalHits, "not sloppy enough")
115 def testMulipleTerms(self):
117 slop is the total number of positional moves allowed
121 self.query.setSlop(2)
122 self.query.add(Term("field", "one"))
123 self.query.add(Term("field", "three"))
124 self.query.add(Term("field", "five"))
125 topDocs = self.searcher.search(self.query, 50)
126 self.assertEqual(1, topDocs.totalHits, "two total moves")
128 self.query = PhraseQuery()
129 self.query.setSlop(5) # it takes six moves to match this phrase
130 self.query.add(Term("field", "five"))
131 self.query.add(Term("field", "three"))
132 self.query.add(Term("field", "one"))
133 topDocs = self.searcher.search(self.query, 50)
134 self.assertEqual(0, topDocs.totalHits, "slop of 5 not close enough")
136 self.query.setSlop(6)
137 topDocs = self.searcher.search(self.query, 50)
138 self.assertEqual(1, topDocs.totalHits, "slop of 6 just right")
140 def testPhraseQueryWithStopAnalyzer(self):
142 directory = RAMDirectory()
143 stopAnalyzer = StopAnalyzer(Version.LUCENE_24)
144 writer = IndexWriter(directory, stopAnalyzer, True,
145 IndexWriter.MaxFieldLength.LIMITED)
147 doc.add(Field("field", "the stop words are here",
148 Field.Store.YES, Field.Index.ANALYZED))
149 writer.addDocument(doc)
152 searcher = IndexSearcher(directory, True)
154 # valid exact phrase query
155 query = PhraseQuery()
156 query.add(Term("field","stop"))
157 query.add(Term("field","words"))
158 topDocs = searcher.search(query, 50)
159 self.assertEqual(1, topDocs.totalHits)
161 # currently StopAnalyzer does not leave "holes", so this matches.
162 query = PhraseQuery()
163 query.add(Term("field", "words"))
164 query.add(Term("field", "here"))
165 topDocs = searcher.search(query, 50)
166 self.assertEqual(1, topDocs.totalHits)
170 def testPhraseQueryInConjunctionScorer(self):
172 directory = RAMDirectory()
173 writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
174 IndexWriter.MaxFieldLength.LIMITED)
177 doc.add(Field("source", "marketing info",
178 Field.Store.YES, Field.Index.ANALYZED,
179 Field.TermVector.YES))
180 writer.addDocument(doc)
183 doc.add(Field("contents", "foobar",
184 Field.Store.YES, Field.Index.ANALYZED,
185 Field.TermVector.YES))
186 doc.add(Field("source", "marketing info",
187 Field.Store.YES, Field.Index.ANALYZED,
188 Field.TermVector.YES))
189 writer.addDocument(doc)
194 searcher = IndexSearcher(directory, True)
196 phraseQuery = PhraseQuery()
197 phraseQuery.add(Term("source", "marketing"))
198 phraseQuery.add(Term("source", "info"))
199 topDocs = searcher.search(phraseQuery, 50)
200 self.assertEqual(2, topDocs.totalHits)
202 termQuery = TermQuery(Term("contents","foobar"))
203 booleanQuery = BooleanQuery()
204 booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
205 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
206 topDocs = searcher.search(booleanQuery, 50)
207 self.assertEqual(1, topDocs.totalHits)
211 writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
212 IndexWriter.MaxFieldLength.LIMITED)
214 doc.add(Field("contents", "map entry woo",
215 Field.Store.YES, Field.Index.ANALYZED,
216 Field.TermVector.YES))
217 writer.addDocument(doc)
220 doc.add(Field("contents", "woo map entry",
221 Field.Store.YES, Field.Index.ANALYZED,
222 Field.TermVector.YES))
223 writer.addDocument(doc)
226 doc.add(Field("contents", "map foobarword entry woo",
227 Field.Store.YES, Field.Index.ANALYZED,
228 Field.TermVector.YES))
229 writer.addDocument(doc)
234 searcher = IndexSearcher(directory, True)
236 termQuery = TermQuery(Term("contents", "woo"))
237 phraseQuery = PhraseQuery()
238 phraseQuery.add(Term("contents", "map"))
239 phraseQuery.add(Term("contents", "entry"))
241 topDocs = searcher.search(termQuery, 50)
242 self.assertEqual(3, topDocs.totalHits)
243 topDocs = searcher.search(phraseQuery, 50)
244 self.assertEqual(2, topDocs.totalHits)
246 booleanQuery = BooleanQuery()
247 booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
248 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
249 topDocs = searcher.search(booleanQuery, 50)
250 self.assertEqual(2, topDocs.totalHits)
252 booleanQuery = BooleanQuery()
253 booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
254 booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
255 topDocs = searcher.search(booleanQuery, 50)
256 self.assertEqual(2, topDocs.totalHits)
262 if __name__ == "__main__":
265 if '-loop' in sys.argv:
266 sys.argv.remove('-loop')