fixed multithreading bug with attaching to jvm
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Tue, 8 Nov 2011 17:25:15 +0000 (18:25 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Tue, 15 Nov 2011 13:24:22 +0000 (14:24 +0100)
apps/search/__init__.py
apps/search/index.py

index bc07a74..a3f8e9e 100644 (file)
@@ -1,3 +1,3 @@
 import lucene
-JVM = lucene.initVM(lucene.CLASSPATH)
-from index import Index, Search, ReusableIndex, MultiSearch, SearchResult
+
+from index import Index, Search, ReusableIndex, MultiSearch, SearchResult, JVM
index 1c79ea2..5ebae2c 100644 (file)
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from django.conf import settings
 from lucene import SimpleFSDirectory, IndexWriter, File, Field, \
     NumericField, Version, Document, JavaError, IndexSearcher, \
@@ -8,8 +9,10 @@ from lucene import SimpleFSDirectory, IndexWriter, File, Field, \
     BlockJoinQuery, BlockJoinCollector, TermsFilter, \
     HashSet, BooleanClause, Term, CharTermAttribute, \
     PhraseQuery, StringReader, TermQuery, BlockJoinQuery, \
-    Sort, Integer
+    Sort, Integer, \
+    initVM, CLASSPATH
     # KeywordAnalyzer
+JVM = initVM(CLASSPATH)
 import sys
 import os
 import errno
@@ -85,7 +88,6 @@ class Index(IndexStore):
         if overwrite:
             self.remove_book(book)
 
-
         doc = self.extract_metadata(book)
         parts = self.extract_content(book)
         block = ArrayList().of_(Document)
@@ -262,14 +264,14 @@ class ReusableIndex(Index):
             self.index = ReusableIndex.index
         else:
             print("opening index")
-            ReusableIndex.pool = ThreadPool(threads)
+            ReusableIndex.pool = ThreadPool(threads, initializer=lambda: JVM.attachCurrentThread() )
             ReusableIndex.pool_jobs = []
             Index.open(self, analyzer)
             ReusableIndex.index = self.index
             atexit.register(ReusableIndex.close_reusable)
 
     def index_book(self, *args, **kw):
-        job = ReusableIndex.pool.apply_async(Index.index_book, (self,)+ args, kw)
+        job = ReusableIndex.pool.apply_async(Index.index_book, (self,) + args, kw)
         ReusableIndex.pool_jobs.append(job)
 
     @staticmethod
@@ -277,7 +279,9 @@ class ReusableIndex(Index):
         if ReusableIndex.index is not None:
             print("closing index")
             for job in ReusableIndex.pool_jobs:
-                job.wait()
+                job.get()
+                sys.stdout.write('.')
+                sys.stdout.flush()
             ReusableIndex.pool.close()
 
             ReusableIndex.index.optimize()