Remove book from index on deletion.
[wolnelektury.git] / apps / search / index.py
index 97145d3..e0f9de9 100644 (file)
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from django.conf import settings
 # -*- coding: utf-8 -*-
 
 from django.conf import settings
-from lucene import SimpleFSDirectory, IndexWriter, CheckIndex, \
+from lucene import SimpleFSDirectory, IndexWriter, IndexWriterConfig, CheckIndex, \
     File, Field, Integer, \
     NumericField, Version, Document, JavaError, IndexSearcher, \
     QueryParser, PerFieldAnalyzerWrapper, \
     File, Field, Integer, \
     NumericField, Version, Document, JavaError, IndexSearcher, \
     QueryParser, PerFieldAnalyzerWrapper, \
@@ -169,11 +169,13 @@ class BaseIndex(IndexStore):
             analyzer = WLAnalyzer()
         self.analyzer = analyzer
 
             analyzer = WLAnalyzer()
         self.analyzer = analyzer
 
-    def open(self, analyzer=None):
+    def open(self, analyzer=None, timeout=None):
         if self.index:
             raise Exception("Index is already opened")
         if self.index:
             raise Exception("Index is already opened")
-        self.index = IndexWriter(self.store, self.analyzer,\
-                                 IndexWriter.MaxFieldLength.LIMITED)
+        conf = IndexWriterConfig(Version.LUCENE_34, analyzer)
+        if timeout:
+            conf.setWriteLockTimeout(long(timeout))
+        self.index = IndexWriter(self.store, conf)
         return self.index
 
     def optimize(self):
         return self.index
 
     def optimize(self):
@@ -553,12 +555,12 @@ class ReusableIndex(Index):
     """
     index = None
 
     """
     index = None
 
-    def open(self, analyzer=None, threads=4):
+    def open(self, analyzer=None, **kw):
         if ReusableIndex.index:
             self.index = ReusableIndex.index
         else:
             print("opening index")
         if ReusableIndex.index:
             self.index = ReusableIndex.index
         else:
             print("opening index")
-            Index.open(self, analyzer)
+            Index.open(self, analyzer, **kw)
             ReusableIndex.index = self.index
             atexit.register(ReusableIndex.close_reusable)
 
             ReusableIndex.index = self.index
             atexit.register(ReusableIndex.close_reusable)
 
@@ -732,7 +734,7 @@ class SearchResult(object):
 
         for f in frags:
             try:
 
         for f in frags:
             try:
-                frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT])
+                frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT], book__id=self.book_id)
             except catalogue.models.Fragment.DoesNotExist:
                 # stale index
                 continue
             except catalogue.models.Fragment.DoesNotExist:
                 # stale index
                 continue
@@ -1212,17 +1214,25 @@ class Search(IndexStore):
         if position is None or length is None:
             return None
         # locate content.
         if position is None or length is None:
             return None
         # locate content.
-        snippets = Snippets(stored.get('book_id')).open()
+        book_id = int(stored.get('book_id'))
+        snippets = Snippets(book_id).open()
         try:
         try:
-            text = snippets.get((int(position),
-                                 int(length)))
-        finally:
-            snippets.close()
+            try:
+                text = snippets.get((int(position),
+                                     int(length)))
+            finally:
+                snippets.close()
 
 
-        tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
-        #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
-        snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+            tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
+            #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
+            snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
 
 
+        except Exception, e:
+            e2 = e
+            if hasattr(e, 'getJavaException'):
+                e2 = unicode(e.getJavaException())
+            raise Exception("Problem fetching snippets for book %d, @%d len=%d" % (book_id, int(position), int(length)),
+                e2)
         return snip
 
     @staticmethod
         return snip
 
     @staticmethod
@@ -1302,7 +1312,7 @@ class Search(IndexStore):
 
         return only_term
 
 
         return only_term
 
-    def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True):
+    def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True, fuzzy=False):
         """
         Return auto-complete hints for tags
         using prefix search.
         """
         Return auto-complete hints for tags
         using prefix search.
@@ -1314,14 +1324,14 @@ class Search(IndexStore):
             if prefix:
                 q = self.make_prefix_phrase(toks, field)
             else:
             if prefix:
                 q = self.make_prefix_phrase(toks, field)
             else:
-                q = self.make_term_query(toks, field)
+                q = self.make_term_query(toks, field, fuzzy=fuzzy)
             top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
 
         no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
 
         return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter)
 
             top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
 
         no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
 
         return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter)
 
-    def hint_books(self, string, max_results=50, prefix=True):
+    def hint_books(self, string, max_results=50, prefix=True, fuzzy=False):
         """
         Returns auto-complete hints for book titles
         Because we do not index 'pseudo' title-tags.
         """
         Returns auto-complete hints for book titles
         Because we do not index 'pseudo' title-tags.
@@ -1332,7 +1342,7 @@ class Search(IndexStore):
         if prefix:
             q = self.make_prefix_phrase(toks, 'title')
         else:
         if prefix:
             q = self.make_prefix_phrase(toks, 'title')
         else:
-            q = self.make_term_query(toks, 'title')
+            q = self.make_term_query(toks, 'title', fuzzy=fuzzy)
 
         return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results)
 
 
         return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results)