- self.index.addDocument(doc)
- elif start is not None:
- for frag in fragments.values():
- frag['content'].append(start.text)
- elif end is not None:
- for frag in fragments.values():
- frag['content'].append(end.tail)
- finally:
- snippets.close()
-
-
- def __enter__(self):
- self.open()
- return self
-
- def __exit__(self, type, value, tb):
- self.close()
-
-
-def log_exception_wrapper(f):
- def _wrap(*a):
- try:
- f(*a)
- except Exception, e:
- print("Error in indexing thread: %s" % e)
- traceback.print_exc()
- raise e
- return _wrap
-
-
-class ReusableIndex(Index):
- """
- Works like index, but does not close/optimize Lucene index
- until program exit (uses atexit hook).
- This is usefull for importbooks command.
-
- if you cannot rely on atexit, use ReusableIndex.close_reusable() yourself.
- """
- index = None
- pool = None
- pool_jobs = None
-
- def open(self, analyzer=None, threads=4):
- if ReusableIndex.index is not None:
- self.index = ReusableIndex.index
- else:
- print("opening index")
- ReusableIndex.pool = ThreadPool(threads, initializer=lambda: JVM.attachCurrentThread() )
- ReusableIndex.pool_jobs = []
- Index.open(self, analyzer)
- ReusableIndex.index = self.index
- atexit.register(ReusableIndex.close_reusable)
-
- def index_book(self, *args, **kw):
- job = ReusableIndex.pool.apply_async(log_exception_wrapper(Index.index_book), (self,) + args, kw)
- ReusableIndex.pool_jobs.append(job)
-
- @staticmethod
- def close_reusable():
- if ReusableIndex.index is not None:
- print("wait for indexing to finish")
- for job in ReusableIndex.pool_jobs:
- job.get()
- sys.stdout.write('.')
- sys.stdout.flush()
- print("done.")
- ReusableIndex.pool.close()
-
- ReusableIndex.index.optimize()
- ReusableIndex.index.close()
- ReusableIndex.index = None
-
- def close(self):
- pass
-
-
-class Search(IndexStore):
- def __init__(self, default_field="content"):
- IndexStore.__init__(self)
- self.analyzer = WLAnalyzer() #PolishAnalyzer(Version.LUCENE_34)
- ## self.analyzer = WLAnalyzer()
- self.searcher = IndexSearcher(self.store, True)
- self.parser = QueryParser(Version.LUCENE_34, default_field,
- self.analyzer)
-
- self.parent_filter = TermsFilter()
- self.parent_filter.addTerm(Term("is_book", "true"))