X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/18e520abab50a7e37eff7b09b9754ed32899811b..d595ad44e359460abafebfcb4146140a169312c5:/apps/search/index.py?ds=inline diff --git a/apps/search/index.py b/apps/search/index.py index 33f2aa1aa..a6d5cf6eb 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -9,11 +9,13 @@ from lucene import SimpleFSDirectory, IndexWriter, File, Field, \ HashSet, BooleanClause, Term, CharTermAttribute, \ PhraseQuery, StringReader # KeywordAnalyzer +import sys import os import errno from librarian import dcparser from librarian.parser import WLDocument import catalogue.models +from multiprocessing.pool import ThreadPool import atexit @@ -80,6 +82,7 @@ class Index(IndexStore): def index_book(self, book, overwrite=True): if overwrite: self.remove_book(book) + doc = self.extract_metadata(book) parts = self.extract_content(book) @@ -247,24 +250,38 @@ class ReusableIndex(Index): if you cannot rely on atexit, use ReusableIndex.close_reusable() yourself. """ index = None - def open(self, analyzer=None): + pool = None + pool_jobs = None + + def open(self, analyzer=None, threads=4): if ReusableIndex.index is not None: self.index = ReusableIndex.index else: - Index.open(self,analyzer) + ReusableIndex.pool = ThreadPool(threads) + ReusableIndex.pool_jobs = [] + Index.open(self, analyzer) ReusableIndex.index = self.index atexit.register(ReusableIndex.close_reusable) + def index_book(self, *args, **kw): + job = ReusableIndex.pool.apply_async(Index.index_book, (self,)+ args, kw) + ReusableIndex.pool_jobs.append(job) + @staticmethod def close_reusable(): if ReusableIndex.index is not None: + for job in ReusableIndex.pool_jobs: + job.wait() + ReusableIndex.pool.close() + ReusableIndex.index.optimize() ReusableIndex.index.close() ReusableIndex.index = None - + def close(self): pass + class Search(IndexStore): def __init__(self, default_field="content"): IndexStore.__init__(self)