1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
17 from unittest import TestCase
19 from datetime import timedelta
22 IndexWriter, SimpleAnalyzer, Document, Field, System, File, \
23 SimpleFSDirectory, RAMDirectory
26 class FSversusRAMDirectoryTest(TestCase):
28 def __init__(self, *args):
30 super(FSversusRAMDirectoryTest, self).__init__(*args)
31 self.docs = self.loadDocuments(3000, 5)
35 fsIndexDir = os.path.join(System.getProperty("java.io.tmpdir", "tmp"),
37 self.rmdir(fsIndexDir)
38 self.ramDir = RAMDirectory()
39 self.fsDir = SimpleFSDirectory(File(fsIndexDir))
43 for dir, dirnames, filenames in os.walk(dir):
44 for filename in filenames:
45 os.remove(os.path.join(dir, filename))
46 for dirname in dirnames:
47 os.rmdir(os.path.join(dir, dirname))
51 ramTiming = self.timeIndexWriter(self.ramDir)
52 fsTiming = self.timeIndexWriter(self.fsDir)
54 #self.assert_(fsTiming > ramTiming)
56 print "RAMDirectory Time:", ramTiming
57 print "FSDirectory Time :", fsTiming
59 def timeIndexWriter(self, dir):
62 self.addDocuments(dir)
64 return timedelta(seconds=time() - start)
66 def addDocuments(self, dir):
68 writer = IndexWriter(dir, SimpleAnalyzer(), True,
69 IndexWriter.MaxFieldLength.UNLIMITED)
72 # change to adjust performance of indexing with FSDirectory
73 # writer.mergeFactor = writer.mergeFactor
74 # writer.maxMergeDocs = writer.maxMergeDocs
75 # writer.minMergeDocs = writer.minMergeDocs
78 for word in self.docs:
80 doc.add(Field("keyword", word,
81 Field.Store.YES, Field.Index.NOT_ANALYZED))
82 doc.add(Field("unindexed", word,
83 Field.Store.YES, Field.Index.NO))
84 doc.add(Field("unstored", word,
85 Field.Store.NO, Field.Index.ANALYZED))
86 doc.add(Field("text", word,
87 Field.Store.YES, Field.Index.ANALYZED))
88 writer.addDocument(doc)
93 def loadDocuments(self, numDocs, wordsPerDoc):
95 return ["Bibamus " * wordsPerDoc] * numDocs