some speedups for batch indexing
[wolnelektury.git] / apps / catalogue / models.py
index 9f26ae5..7105cfd 100644 (file)
@@ -24,7 +24,7 @@ from django.conf import settings
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 from catalogue.fields import JSONField, OverwritingFileField
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 from catalogue.fields import JSONField, OverwritingFileField
-from catalogue.utils import create_zip, split_tags
+from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue.tasks import touch_tag, index_book
 from shutil import copy
 from glob import glob
 from catalogue.tasks import touch_tag, index_book
 from shutil import copy
 from glob import glob
@@ -565,7 +565,6 @@ class Book(models.Model):
 
 
     def build_html(self):
 
 
     def build_html(self):
-        from markupstring import MarkupString
         from django.core.files.base import ContentFile
         from slughifi import slughifi
         from librarian import html
         from django.core.files.base import ContentFile
         from slughifi import slughifi
         from librarian import html
@@ -609,10 +608,9 @@ class Book(models.Model):
                     continue
 
                 text = fragment.to_string()
                     continue
 
                 text = fragment.to_string()
-                short_text = ''
-                markup = MarkupString(text)
-                if (len(markup) > 240):
-                    short_text = unicode(markup[:160])
+                short_text = truncate_html_words(text, 15)
+                if text == short_text:
+                    short_text = ''
                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
                     text=text, short_text=short_text)
 
                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
                     text=text, short_text=short_text)
 
@@ -645,7 +643,7 @@ class Book(models.Model):
         result = create_zip.delay(paths, "%s_%s" % (self.slug, format_))
         return result.wait()
 
         result = create_zip.delay(paths, "%s_%s" % (self.slug, format_))
         return result.wait()
 
-    def search_index(self, book_info=None, reuse_index=False):
+    def search_index(self, book_info=None, reuse_index=False, index_tags=True):
         if reuse_index:
             idx = search.ReusableIndex()
         else:
         if reuse_index:
             idx = search.ReusableIndex()
         else:
@@ -654,7 +652,8 @@ class Book(models.Model):
         idx.open()
         try:
             idx.index_book(self, book_info)
         idx.open()
         try:
             idx.index_book(self, book_info)
-            idx.index_tags()
+            if index_tags:
+                idx.index_tags()
         finally:
             idx.close()
 
         finally:
             idx.close()
 
@@ -677,7 +676,7 @@ class Book(models.Model):
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
-            search_index=True):
+            search_index=True, search_index_tags=True, search_index_reuse=False):
         import re
         from sortify import sortify
 
         import re
         from sortify import sortify
 
@@ -749,7 +748,7 @@ class Book(models.Model):
             book.build_mobi()
 
         if not settings.NO_SEARCH_INDEX and search_index:
             book.build_mobi()
 
         if not settings.NO_SEARCH_INDEX and search_index:
-            book.search_index()
+            book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
             #index_book.delay(book.id, book_info)
 
         book_descendants = list(book.children.all())
             #index_book.delay(book.id, book_info)
 
         book_descendants = list(book.children.all())
@@ -765,7 +764,7 @@ class Book(models.Model):
             book_descendants += list(child_book.children.all())
 
         for tag in descendants_tags:
             book_descendants += list(child_book.children.all())
 
         for tag in descendants_tags:
-            touch_tag.delay(tag)
+            touch_tag(tag)
 
         book.save()
 
 
         book.save()
 
@@ -782,14 +781,26 @@ class Book(models.Model):
             return self._related_info
         else:
             rel = {'tags': {}, 'media': {}}
             return self._related_info
         else:
             rel = {'tags': {}, 'media': {}}
+
             tags = self.tags.filter(category__in=(
                     'author', 'kind', 'genre', 'epoch'))
             tags = split_tags(tags)
             for category in tags:
                 rel['tags'][category] = [
             tags = self.tags.filter(category__in=(
                     'author', 'kind', 'genre', 'epoch'))
             tags = split_tags(tags)
             for category in tags:
                 rel['tags'][category] = [
-                        (t.name, t.get_absolute_url()) for t in tags[category]]
+                        (t.name, t.slug) for t in tags[category]]
+
             for media_format in BookMedia.formats:
                 rel['media'][media_format] = self.has_media(media_format)
             for media_format in BookMedia.formats:
                 rel['media'][media_format] = self.has_media(media_format)
+
+            book = self
+            parents = []
+            while book.parent:
+                parents.append((book.parent.title, book.parent.slug))
+                book = book.parent
+            parents = parents[::-1]
+            if parents:
+                rel['parents'] = parents
+
             if self.pk:
                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
             return rel
             if self.pk:
                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
             return rel
@@ -991,6 +1002,10 @@ class Fragment(models.Model):
         for lang, langname in settings.LANGUAGES:
             permanent_cache.delete(cache_key % (self.id, lang))
 
         for lang, langname in settings.LANGUAGES:
             permanent_cache.delete(cache_key % (self.id, lang))
 
+    def get_short_text(self):
+        """Returns short version of the fragment."""
+        return self.short_text if self.short_text else self.text
+
     def short_html(self):
         if self.id:
             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
     def short_html(self):
         if self.id:
             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
@@ -1037,7 +1052,7 @@ def _tags_updated_handler(sender, affected_tags, **kwargs):
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
     for tag in affected_tags:
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
     for tag in affected_tags:
-        touch_tag.delay(tag)
+        touch_tag(tag)
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \