Search engine now should be db change change-proof
[wolnelektury.git] / apps / catalogue / models.py
index 9f26ae5..ea09c04 100644 (file)
@@ -16,7 +16,7 @@ from django.utils.datastructures import SortedDict
 from django.utils.safestring import mark_safe
 from django.utils.translation import get_language
 from django.core.urlresolvers import reverse
-from django.db.models.signals import post_save, m2m_changed, pre_delete
+from django.db.models.signals import post_save, m2m_changed, pre_delete, post_delete
 import jsonfield
 
 from django.conf import settings
@@ -24,7 +24,7 @@ from django.conf import settings
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 from catalogue.fields import JSONField, OverwritingFileField
-from catalogue.utils import create_zip, split_tags
+from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue.tasks import touch_tag, index_book
 from shutil import copy
 from glob import glob
@@ -216,13 +216,16 @@ def book_upload_path(ext=None, maxlen=100):
     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
 
 
+def customizations_hash(customizations):
+    customizations.sort()
+    return hash(tuple(customizations))
+
+
 def get_customized_pdf_path(book, customizations):
     """
     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
     """
-    customizations.sort()
-    h = hash(tuple(customizations))
-
+    h = customizations_hash(customizations)
     pdf_name = '%s-custom-%s' % (book.slug, h)
     pdf_file = get_dynamic_path(None, pdf_name, ext='pdf')
 
@@ -565,7 +568,6 @@ class Book(models.Model):
 
 
     def build_html(self):
-        from markupstring import MarkupString
         from django.core.files.base import ContentFile
         from slughifi import slughifi
         from librarian import html
@@ -609,10 +611,9 @@ class Book(models.Model):
                     continue
 
                 text = fragment.to_string()
-                short_text = ''
-                markup = MarkupString(text)
-                if (len(markup) > 240):
-                    short_text = unicode(markup[:160])
+                short_text = truncate_html_words(text, 15)
+                if text == short_text:
+                    short_text = ''
                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
                     text=text, short_text=short_text)
 
@@ -645,7 +646,7 @@ class Book(models.Model):
         result = create_zip.delay(paths, "%s_%s" % (self.slug, format_))
         return result.wait()
 
-    def search_index(self, book_info=None, reuse_index=False):
+    def search_index(self, book_info=None, reuse_index=False, index_tags=True):
         if reuse_index:
             idx = search.ReusableIndex()
         else:
@@ -654,7 +655,8 @@ class Book(models.Model):
         idx.open()
         try:
             idx.index_book(self, book_info)
-            idx.index_tags()
+            if index_tags:
+                idx.index_tags()
         finally:
             idx.close()
 
@@ -677,7 +679,7 @@ class Book(models.Model):
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
-            search_index=True):
+            search_index=True, search_index_tags=True, search_index_reuse=False):
         import re
         from sortify import sortify
 
@@ -749,7 +751,7 @@ class Book(models.Model):
             book.build_mobi()
 
         if not settings.NO_SEARCH_INDEX and search_index:
-            book.search_index()
+            book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
             #index_book.delay(book.id, book_info)
 
         book_descendants = list(book.children.all())
@@ -765,7 +767,7 @@ class Book(models.Model):
             book_descendants += list(child_book.children.all())
 
         for tag in descendants_tags:
-            touch_tag.delay(tag)
+            touch_tag(tag)
 
         book.save()
 
@@ -782,14 +784,26 @@ class Book(models.Model):
             return self._related_info
         else:
             rel = {'tags': {}, 'media': {}}
+
             tags = self.tags.filter(category__in=(
                     'author', 'kind', 'genre', 'epoch'))
             tags = split_tags(tags)
             for category in tags:
                 rel['tags'][category] = [
-                        (t.name, t.get_absolute_url()) for t in tags[category]]
+                        (t.name, t.slug) for t in tags[category]]
+
             for media_format in BookMedia.formats:
                 rel['media'][media_format] = self.has_media(media_format)
+
+            book = self
+            parents = []
+            while book.parent:
+                parents.append((book.parent.title, book.parent.slug))
+                book = book.parent
+            parents = parents[::-1]
+            if parents:
+                rel['parents'] = parents
+
             if self.pk:
                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
             return rel
@@ -991,6 +1005,10 @@ class Fragment(models.Model):
         for lang, langname in settings.LANGUAGES:
             permanent_cache.delete(cache_key % (self.id, lang))
 
+    def get_short_text(self):
+        """Returns short version of the fragment."""
+        return self.short_text if self.short_text else self.text
+
     def short_html(self):
         if self.id:
             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
@@ -1037,7 +1055,7 @@ def _tags_updated_handler(sender, affected_tags, **kwargs):
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
     for tag in affected_tags:
-        touch_tag.delay(tag)
+        touch_tag(tag)
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \
@@ -1058,8 +1076,22 @@ def _pre_delete_handler(sender, instance, **kwargs):
         instance.book.save()
 pre_delete.connect(_pre_delete_handler)
 
+
 def _post_save_handler(sender, instance, **kwargs):
     """ refresh all the short_html stuff on BookMedia update """
     if sender == BookMedia:
         instance.book.save()
 post_save.connect(_post_save_handler)
+
+
+@django.dispatch.receiver(post_delete, sender=Book)
+def _remove_book_from_index_handler(sender, instance, **kwargs):
+    """ remove the book from search index, when it is deleted."""
+    search.JVM.attachCurrentThread()
+    idx = search.Index()
+    idx.open(timeout=10000)  # 10 seconds timeout.
+    try:
+        idx.remove_book(instance)
+        idx.index_tags()
+    finally:
+        idx.close()