1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
17 from unittest import TestCase, main
21 class Test_PyLuceneBase(object):
23 def getAnalyzer(self):
24 return StandardAnalyzer(Version.LUCENE_CURRENT)
29 def closeStore(self, store, *args):
32 def getWriter(self, store, analyzer, create=False):
33 writer = IndexWriter(store, analyzer, create,
34 IndexWriter.MaxFieldLength.LIMITED)
35 #writer.setUseCompoundFile(False)
38 def getReader(self, store, analyzer):
41 def test_indexDocument(self):
43 store = self.openStore()
46 analyzer = self.getAnalyzer()
47 writer = self.getWriter(store, analyzer, True)
50 doc.add(Field("title", "value of testing",
51 Field.Store.YES, Field.Index.ANALYZED))
52 doc.add(Field("docid", str(1),
53 Field.Store.NO, Field.Index.NOT_ANALYZED))
54 doc.add(Field("owner", "unittester",
55 Field.Store.YES, Field.Index.NOT_ANALYZED))
56 doc.add(Field("search_name", "wisdom",
57 Field.Store.YES, Field.Index.NO))
58 doc.add(Field("meta_words", "rabbits are beautiful",
59 Field.Store.NO, Field.Index.ANALYZED))
61 writer.addDocument(doc)
63 self.closeStore(store, writer)
65 def test_indexDocumentWithText(self):
67 store = self.openStore()
70 analyzer = self.getAnalyzer()
71 writer = self.getWriter(store, analyzer, True)
74 doc.add(Field("title", "value of testing",
75 Field.Store.YES, Field.Index.ANALYZED))
76 doc.add(Field("docid", str(1),
77 Field.Store.NO, Field.Index.NOT_ANALYZED))
78 doc.add(Field("owner", "unittester",
79 Field.Store.YES, Field.Index.NOT_ANALYZED))
80 doc.add(Field("search_name", "wisdom",
81 Field.Store.YES, Field.Index.NO))
82 doc.add(Field("meta_words", "rabbits are beautiful",
83 Field.Store.NO, Field.Index.ANALYZED))
85 body_text = "hello world" * 20
86 body_reader = StringReader(body_text)
87 doc.add(Field("content", body_reader))
89 writer.addDocument(doc)
91 self.closeStore(store, writer)
93 def test_indexDocumentWithUnicodeText(self):
95 store = self.openStore()
98 analyzer = self.getAnalyzer()
99 writer = self.getWriter(store, analyzer, True)
102 doc.add(Field("title", "value of testing",
103 Field.Store.YES, Field.Index.ANALYZED))
104 doc.add(Field("docid", str(1),
105 Field.Store.NO, Field.Index.NOT_ANALYZED))
106 doc.add(Field("owner", "unittester",
107 Field.Store.YES, Field.Index.NOT_ANALYZED))
108 doc.add(Field("search_name", "wisdom",
109 Field.Store.YES, Field.Index.NO))
110 doc.add(Field("meta_words", "rabbits are beautiful",
111 Field.Store.NO, Field.Index.ANALYZED))
113 # using a unicode body cause problems, which seems very odd
114 # since the python type is the same regardless affter doing
116 body_text = u"hello world"*20
117 body_reader = StringReader(body_text)
118 doc.add(Field("content", body_reader))
120 writer.addDocument(doc)
122 self.closeStore(store, writer)
124 def test_searchDocuments(self):
126 self.test_indexDocument()
128 store = self.openStore()
131 searcher = IndexSearcher(store, True)
132 query = QueryParser(Version.LUCENE_CURRENT, "title",
133 self.getAnalyzer()).parse("value")
134 topDocs = searcher.search(query, 50)
135 self.assertEqual(topDocs.totalHits, 1)
137 self.closeStore(store, searcher)
139 def test_searchDocumentsWithMultiField(self):
141 Tests searching with MultiFieldQueryParser
144 self.test_indexDocument()
145 store = self.openStore()
148 searcher = IndexSearcher(store, True)
149 SHOULD = BooleanClause.Occur.SHOULD
150 query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
151 "value", ["title", "docid"],
154 topDocs = searcher.search(query, 50)
155 self.assertEquals(1, topDocs.totalHits)
157 self.closeStore(store, searcher)
159 def test_removeDocument(self):
161 self.test_indexDocument()
163 store = self.openStore()
168 searcher = IndexSearcher(store, True)
169 query = TermQuery(Term("docid", str(1)))
170 topDocs = searcher.search(query, 50)
171 self.assertEqual(topDocs.totalHits, 1)
172 # be careful with ids they are ephemeral
173 docid = topDocs.scoreDocs[0].doc
175 reader = IndexReader.open(store, False)
176 reader.deleteDocument(docid)
178 self.closeStore(store, searcher, reader)
180 store = self.openStore()
183 searcher = IndexSearcher(store, True)
184 query = TermQuery(Term("docid", str(1)))
185 topDocs = searcher.search(query, 50)
186 self.assertEqual(topDocs.totalHits, 0)
188 self.closeStore(store, searcher)
190 def test_removeDocuments(self):
192 self.test_indexDocument()
194 store = self.openStore()
197 reader = IndexReader.open(store, False)
198 reader.deleteDocuments(Term('docid', str(1)))
200 self.closeStore(store, reader)
202 store = self.openStore()
205 searcher = IndexSearcher(store, True)
206 query = QueryParser(Version.LUCENE_CURRENT, "title",
207 self.getAnalyzer()).parse("value")
208 topDocs = searcher.search(query, 50)
209 self.assertEqual(topDocs.totalHits, 0)
211 self.closeStore(store, searcher)
213 def test_FieldEnumeration(self):
215 self.test_indexDocument()
217 store = self.openStore()
220 analyzer = self.getAnalyzer()
222 writer = self.getWriter(store, analyzer, False)
224 doc.add(Field("title", "value of testing",
225 Field.Store.YES, Field.Index.ANALYZED))
226 doc.add(Field("docid", str(2),
227 Field.Store.NO, Field.Index.NOT_ANALYZED))
228 doc.add(Field("owner", "unittester",
229 Field.Store.YES, Field.Index.NOT_ANALYZED))
230 doc.add(Field("search_name", "wisdom",
231 Field.Store.YES, Field.Index.NO))
232 doc.add(Field("meta_words", "rabbits are beautiful",
233 Field.Store.NO, Field.Index.ANALYZED))
235 writer.addDocument(doc)
238 doc.add(Field("owner", "unittester",
239 Field.Store.NO, Field.Index.NOT_ANALYZED))
240 doc.add(Field("search_name", "wisdom",
241 Field.Store.YES, Field.Index.NO))
242 doc.add(Field("meta_words", "rabbits are beautiful",
243 Field.Store.NO, Field.Index.ANALYZED))
244 writer.addDocument(doc)
246 self.closeStore(store, writer)
248 store = self.openStore()
251 reader = IndexReader.open(store, True)
252 term_enum = reader.terms(Term("docid", ''))
255 while term_enum.term().field() == 'docid':
256 docids.append(term_enum.term().text())
258 self.assertEqual(len(docids), 2)
260 self.closeStore(store, reader)
262 def test_getFieldNames(self):
264 self.test_indexDocument()
266 store = self.openStore()
269 reader = IndexReader.open(store, True)
270 fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL)
271 for fieldName in fieldNames:
272 self.assert_(fieldName in ['owner', 'search_name', 'meta_words',
275 fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED)
276 for fieldName in fieldNames:
277 self.assert_(fieldName in ['owner', 'meta_words',
280 fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
281 for fieldName in fieldNames:
282 self.assert_(fieldName in ['owner', 'meta_words',
285 store = self.closeStore(store, reader)
288 class Test_PyLuceneWithFSStore(TestCase, Test_PyLuceneBase):
290 STORE_DIR = "testrepo"
294 if not os.path.exists(self.STORE_DIR):
295 os.mkdir(self.STORE_DIR)
299 if os.path.exists(self.STORE_DIR):
300 shutil.rmtree(self.STORE_DIR)
304 return SimpleFSDirectory(File(self.STORE_DIR))
306 def closeStore(self, store, *args):
315 class Test_PyLuceneWithMMapStore(Test_PyLuceneWithFSStore):
319 return MMapDirectory(File(self.STORE_DIR))
323 if __name__ == "__main__":
326 if '-loop' in sys.argv:
327 sys.argv.remove('-loop')