samples/LuceneInAction/lia/indexing/BaseIndexingTestCase.py

   1 # ====================================================================
   2 #   Licensed under the Apache License, Version 2.0 (the "License");
   3 #   you may not use this file except in compliance with the License.
   4 #   You may obtain a copy of the License at
   5 #
   6 #       http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 #   Unless required by applicable law or agreed to in writing, software
   9 #   distributed under the License is distributed on an "AS IS" BASIS,
  10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 #   See the License for the specific language governing permissions and
  12 #   limitations under the License.
  13 # ====================================================================
  14
  15 import os
  16
  17 from unittest import TestCase
  18
  19 from lucene import \
  20     SimpleFSDirectory, System, File, \
  21     Document, Field, SimpleAnalyzer, IndexWriter, IndexReader
  22
  23
  24 class BaseIndexingTestCase(TestCase):
  25
  26     keywords = ["1", "2"]
  27     unindexed = ["Netherlands", "Italy"]
  28     unstored = ["Amsterdam has lots of bridges",
  29                 "Venice has lots of canals"]
  30     text = ["Amsterdam", "Venice"]
  31
  32     def setUp(self):
  33
  34         indexDir = os.path.join(System.getProperty('java.io.tmpdir', 'tmp'),
  35                                 'index-dir')
  36         self.rmdir(indexDir)
  37         self.dir = SimpleFSDirectory(File(indexDir))
  38         self.addDocuments(self.dir)
  39
  40     def rmdir(self, dir):
  41
  42         for dir, dirnames, filenames in os.walk(dir):
  43             for filename in filenames:
  44                 os.remove(os.path.join(dir, filename))
  45             for dirname in dirnames:
  46                 os.rmdir(os.path.join(dir, dirname))
  47
  48     def addDocuments(self, dir):
  49
  50         writer = IndexWriter(dir, self.getAnalyzer(), True,
  51                              IndexWriter.MaxFieldLength.UNLIMITED)
  52         writer.setUseCompoundFile(self.isCompound())
  53
  54         for i in xrange(len(self.keywords)):
  55             doc = Document()
  56             doc.add(Field("id", self.keywords[i],
  57                           Field.Store.YES, Field.Index.NOT_ANALYZED))
  58             doc.add(Field("country", self.unindexed[i],
  59                           Field.Store.YES, Field.Index.NO))
  60             doc.add(Field("contents", self.unstored[i],
  61                           Field.Store.NO, Field.Index.ANALYZED))
  62             doc.add(Field("city", self.text[i],
  63                           Field.Store.YES, Field.Index.ANALYZED))
  64             writer.addDocument(doc)
  65
  66         writer.optimize()
  67         writer.close()
  68
  69     def getAnalyzer(self):
  70
  71         return SimpleAnalyzer()
  72
  73     def isCompound(self):
  74
  75         return True
  76
  77     def testIndexWriter(self):
  78
  79         writer = IndexWriter(self.dir, self.getAnalyzer(), False,
  80                              IndexWriter.MaxFieldLength.UNLIMITED)
  81         self.assertEqual(len(self.keywords), writer.numDocs())
  82         writer.close()
  83
  84     def testIndexReader(self):
  85
  86         reader = IndexReader.open(self.dir, True)
  87         self.assertEqual(len(self.keywords), reader.maxDoc())
  88         self.assertEqual(len(self.keywords), reader.numDocs())
  89         reader.close()