PyLucene 3.4.0-1 import
[pylucene.git] / test / test_PyLucene.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 import os, shutil
16
17 from unittest import TestCase, main
18 from lucene import *
19
20
21 class Test_PyLuceneBase(object):
22
23     def getAnalyzer(self):
24         return StandardAnalyzer(Version.LUCENE_CURRENT)
25
26     def openStore(self):
27         raise NotImplemented
28
29     def closeStore(self, store, *args):
30         pass
31
32     def getWriter(self, store, analyzer, create=False):
33         writer = IndexWriter(store, analyzer, create,
34                              IndexWriter.MaxFieldLength.LIMITED)
35         #writer.setUseCompoundFile(False)
36         return writer
37
38     def getReader(self, store, analyzer):
39         pass
40
41     def test_indexDocument(self):
42
43         store = self.openStore()
44         writer = None
45         try:
46             analyzer = self.getAnalyzer()
47             writer = self.getWriter(store, analyzer, True)
48
49             doc = Document()
50             doc.add(Field("title", "value of testing",
51                           Field.Store.YES, Field.Index.ANALYZED))
52             doc.add(Field("docid", str(1),
53                           Field.Store.NO, Field.Index.NOT_ANALYZED))
54             doc.add(Field("owner", "unittester",
55                           Field.Store.YES, Field.Index.NOT_ANALYZED))
56             doc.add(Field("search_name", "wisdom",
57                           Field.Store.YES, Field.Index.NO))
58             doc.add(Field("meta_words", "rabbits are beautiful",
59                           Field.Store.NO, Field.Index.ANALYZED))
60         
61             writer.addDocument(doc)
62         finally:
63             self.closeStore(store, writer)
64
65     def test_indexDocumentWithText(self):
66
67         store = self.openStore()
68         writer = None
69         try:
70             analyzer = self.getAnalyzer()
71             writer = self.getWriter(store, analyzer, True)
72         
73             doc = Document()
74             doc.add(Field("title", "value of testing",
75                           Field.Store.YES, Field.Index.ANALYZED))
76             doc.add(Field("docid", str(1),
77                           Field.Store.NO, Field.Index.NOT_ANALYZED))
78             doc.add(Field("owner", "unittester",
79                           Field.Store.YES, Field.Index.NOT_ANALYZED))
80             doc.add(Field("search_name", "wisdom",
81                           Field.Store.YES, Field.Index.NO))
82             doc.add(Field("meta_words", "rabbits are beautiful",
83                           Field.Store.NO, Field.Index.ANALYZED))
84
85             body_text = "hello world" * 20
86             body_reader = StringReader(body_text)
87             doc.add(Field("content", body_reader))
88
89             writer.addDocument(doc)
90         finally:
91             self.closeStore(store, writer)
92
93     def test_indexDocumentWithUnicodeText(self):
94
95         store = self.openStore()
96         writer = None
97         try:
98             analyzer = self.getAnalyzer()
99             writer = self.getWriter(store, analyzer, True)
100         
101             doc = Document()
102             doc.add(Field("title", "value of testing",
103                           Field.Store.YES, Field.Index.ANALYZED))
104             doc.add(Field("docid", str(1),
105                           Field.Store.NO, Field.Index.NOT_ANALYZED))
106             doc.add(Field("owner", "unittester",
107                           Field.Store.YES, Field.Index.NOT_ANALYZED))
108             doc.add(Field("search_name", "wisdom",
109                           Field.Store.YES, Field.Index.NO))
110             doc.add(Field("meta_words", "rabbits are beautiful",
111                           Field.Store.NO, Field.Index.ANALYZED))
112
113             # using a unicode body cause problems, which seems very odd
114             # since the python type is the same regardless affter doing
115             # the encode
116             body_text = u"hello world"*20
117             body_reader = StringReader(body_text)
118             doc.add(Field("content", body_reader))
119
120             writer.addDocument(doc)
121         finally:
122             self.closeStore(store, writer)
123
124     def test_searchDocuments(self):
125
126         self.test_indexDocument()
127
128         store = self.openStore()
129         searcher = None
130         try:
131             searcher = IndexSearcher(store, True)
132             query = QueryParser(Version.LUCENE_CURRENT, "title",
133                                 self.getAnalyzer()).parse("value")
134             topDocs = searcher.search(query, 50)
135             self.assertEqual(topDocs.totalHits, 1)
136         finally:
137             self.closeStore(store, searcher)
138
139     def test_searchDocumentsWithMultiField(self):
140         """
141         Tests searching with MultiFieldQueryParser
142         """
143
144         self.test_indexDocument()
145         store = self.openStore()
146         searcher = None
147         try:
148             searcher = IndexSearcher(store, True)
149             SHOULD = BooleanClause.Occur.SHOULD
150             query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
151                                                 "value", ["title", "docid"],
152                                                 [SHOULD, SHOULD],
153                                                 self.getAnalyzer())
154             topDocs = searcher.search(query, 50)
155             self.assertEquals(1, topDocs.totalHits)
156         finally:
157             self.closeStore(store, searcher)
158         
159     def test_removeDocument(self):
160
161         self.test_indexDocument()
162
163         store = self.openStore()
164         searcher = None
165         reader = None
166
167         try:
168             searcher = IndexSearcher(store, True)
169             query = TermQuery(Term("docid", str(1)))
170             topDocs = searcher.search(query, 50)
171             self.assertEqual(topDocs.totalHits, 1)
172             # be careful with ids they are ephemeral
173             docid = topDocs.scoreDocs[0].doc
174         
175             reader = IndexReader.open(store, False)
176             reader.deleteDocument(docid)
177         finally:
178             self.closeStore(store, searcher, reader)
179
180         store = self.openStore()
181         searcher = None
182         try:
183             searcher = IndexSearcher(store, True)
184             query = TermQuery(Term("docid", str(1)))
185             topDocs = searcher.search(query, 50)
186             self.assertEqual(topDocs.totalHits, 0)
187         finally:
188             self.closeStore(store, searcher)
189         
190     def test_removeDocuments(self):
191
192         self.test_indexDocument()
193
194         store = self.openStore()
195         reader = None
196         try:
197             reader = IndexReader.open(store, False)
198             reader.deleteDocuments(Term('docid', str(1)))
199         finally:
200             self.closeStore(store, reader)
201         
202         store = self.openStore()
203         searcher = None
204         try:
205             searcher = IndexSearcher(store, True)
206             query = QueryParser(Version.LUCENE_CURRENT, "title",
207                                 self.getAnalyzer()).parse("value")
208             topDocs = searcher.search(query, 50)
209             self.assertEqual(topDocs.totalHits, 0)
210         finally:
211             self.closeStore(store, searcher)
212         
213     def test_FieldEnumeration(self):
214
215         self.test_indexDocument()
216
217         store = self.openStore()
218         writer = None
219         try:
220             analyzer = self.getAnalyzer()
221         
222             writer = self.getWriter(store, analyzer, False)
223             doc = Document()
224             doc.add(Field("title", "value of testing",
225                           Field.Store.YES, Field.Index.ANALYZED))
226             doc.add(Field("docid", str(2),
227                           Field.Store.NO, Field.Index.NOT_ANALYZED))
228             doc.add(Field("owner", "unittester",
229                           Field.Store.YES, Field.Index.NOT_ANALYZED))
230             doc.add(Field("search_name", "wisdom",
231                           Field.Store.YES, Field.Index.NO))
232             doc.add(Field("meta_words", "rabbits are beautiful",
233                           Field.Store.NO, Field.Index.ANALYZED))
234                                    
235             writer.addDocument(doc)
236         
237             doc = Document()
238             doc.add(Field("owner", "unittester",
239                           Field.Store.NO, Field.Index.NOT_ANALYZED))
240             doc.add(Field("search_name", "wisdom",
241                           Field.Store.YES, Field.Index.NO))
242             doc.add(Field("meta_words", "rabbits are beautiful",
243                           Field.Store.NO, Field.Index.ANALYZED))
244             writer.addDocument(doc)        
245         finally:
246             self.closeStore(store, writer)
247         
248         store = self.openStore()
249         reader = None
250         try:
251             reader = IndexReader.open(store, True)
252             term_enum = reader.terms(Term("docid", ''))
253             docids = []
254
255             while term_enum.term().field() == 'docid':
256                 docids.append(term_enum.term().text())
257                 term_enum.next()
258             self.assertEqual(len(docids), 2)
259         finally:
260             self.closeStore(store, reader)
261
262     def test_getFieldNames(self):
263
264         self.test_indexDocument()
265
266         store = self.openStore()
267         reader = None
268         try:
269             reader = IndexReader.open(store, True)
270             fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL)
271             for fieldName in fieldNames:
272                 self.assert_(fieldName in ['owner', 'search_name', 'meta_words',
273                                            'docid', 'title'])
274         
275             fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED)
276             for fieldName in fieldNames:
277                 self.assert_(fieldName in ['owner', 'meta_words',
278                                            'docid', 'title'])
279
280             fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
281             for fieldName in fieldNames:
282                 self.assert_(fieldName in ['owner', 'meta_words',
283                                            'docid', 'title'])
284         finally:
285             store = self.closeStore(store, reader)
286
287         
288 class Test_PyLuceneWithFSStore(TestCase, Test_PyLuceneBase):
289
290     STORE_DIR = "testrepo"
291
292     def setUp(self):
293
294         if not os.path.exists(self.STORE_DIR):
295             os.mkdir(self.STORE_DIR)
296
297     def tearDown(self):
298
299         if os.path.exists(self.STORE_DIR):
300             shutil.rmtree(self.STORE_DIR)
301
302     def openStore(self):
303
304         return SimpleFSDirectory(File(self.STORE_DIR))
305
306     def closeStore(self, store, *args):
307         
308         for arg in args:
309             if arg is not None:
310                 arg.close()
311
312         store.close()
313
314
315 class Test_PyLuceneWithMMapStore(Test_PyLuceneWithFSStore):
316
317     def openStore(self):
318
319         return MMapDirectory(File(self.STORE_DIR))
320
321
322
323 if __name__ == "__main__":
324     import sys, lucene
325     lucene.initVM()
326     if '-loop' in sys.argv:
327         sys.argv.remove('-loop')
328         while True:
329             try:
330                 main()
331             except:
332                 pass
333     else:
334         main()