Configured build for Ubuntu and added Stempel polish analyzer
[pylucene.git] / samples / LuceneInAction / lia / indexing / BaseIndexingTestCase.py
1 # ====================================================================
2 #   Licensed under the Apache License, Version 2.0 (the "License");
3 #   you may not use this file except in compliance with the License.
4 #   You may obtain a copy of the License at
5 #
6 #       http://www.apache.org/licenses/LICENSE-2.0
7 #
8 #   Unless required by applicable law or agreed to in writing, software
9 #   distributed under the License is distributed on an "AS IS" BASIS,
10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 #   See the License for the specific language governing permissions and
12 #   limitations under the License.
13 # ====================================================================
14
15 import os
16
17 from unittest import TestCase
18
19 from lucene import \
20     SimpleFSDirectory, System, File, \
21     Document, Field, SimpleAnalyzer, IndexWriter, IndexReader
22
23
24 class BaseIndexingTestCase(TestCase):
25
26     keywords = ["1", "2"]
27     unindexed = ["Netherlands", "Italy"]
28     unstored = ["Amsterdam has lots of bridges",
29                 "Venice has lots of canals"]
30     text = ["Amsterdam", "Venice"]
31
32     def setUp(self):
33
34         indexDir = os.path.join(System.getProperty('java.io.tmpdir', 'tmp'),
35                                 'index-dir')
36         self.rmdir(indexDir)
37         self.dir = SimpleFSDirectory(File(indexDir))
38         self.addDocuments(self.dir)
39
40     def rmdir(self, dir):
41
42         for dir, dirnames, filenames in os.walk(dir):
43             for filename in filenames:
44                 os.remove(os.path.join(dir, filename))
45             for dirname in dirnames:
46                 os.rmdir(os.path.join(dir, dirname))
47
48     def addDocuments(self, dir):
49
50         writer = IndexWriter(dir, self.getAnalyzer(), True,
51                              IndexWriter.MaxFieldLength.UNLIMITED)
52         writer.setUseCompoundFile(self.isCompound())
53
54         for i in xrange(len(self.keywords)):
55             doc = Document()
56             doc.add(Field("id", self.keywords[i],
57                           Field.Store.YES, Field.Index.NOT_ANALYZED))
58             doc.add(Field("country", self.unindexed[i],
59                           Field.Store.YES, Field.Index.NO))
60             doc.add(Field("contents", self.unstored[i],
61                           Field.Store.NO, Field.Index.ANALYZED))
62             doc.add(Field("city", self.text[i],
63                           Field.Store.YES, Field.Index.ANALYZED))
64             writer.addDocument(doc)
65
66         writer.optimize()
67         writer.close()
68
69     def getAnalyzer(self):
70
71         return SimpleAnalyzer()
72
73     def isCompound(self):
74
75         return True
76
77     def testIndexWriter(self):
78
79         writer = IndexWriter(self.dir, self.getAnalyzer(), False,
80                              IndexWriter.MaxFieldLength.UNLIMITED)
81         self.assertEqual(len(self.keywords), writer.numDocs())
82         writer.close()
83
84     def testIndexReader(self):
85
86         reader = IndexReader.open(self.dir, True)
87         self.assertEqual(len(self.keywords), reader.maxDoc())
88         self.assertEqual(len(self.keywords), reader.numDocs())
89         reader.close()