fixes #2133
[wolnelektury.git] / apps / catalogue / models.py
index ba1a5d2..3704b16 100644 (file)
@@ -22,12 +22,11 @@ from django.conf import settings
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
-from catalogue.fields import JSONField, OverwritingFileField
+from catalogue.fields import OverwritingFileField
 from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue import tasks
 import re
 
 from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue import tasks
 import re
 
-import search
 
 # Those are hard-coded here so that makemessages sees them.
 TAG_CATEGORIES = (
 
 # Those are hard-coded here so that makemessages sees them.
 TAG_CATEGORIES = (
@@ -54,6 +53,10 @@ class TagSubcategoryManager(models.Manager):
 
 
 class Tag(TagBase):
 
 
 class Tag(TagBase):
+    """A tag attachable to books and fragments (and possibly anything).
+    
+    Used to represent searchable metadata (authors, epochs, genres, kinds),
+    fragment themes (motifs) and some book hierarchy related kludges."""
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
@@ -115,8 +118,8 @@ class Tag(TagBase):
             objects = Book.tagged.with_all((self,)).order_by()
             if self.category != 'set':
                 # eliminate descendants
             objects = Book.tagged.with_all((self,)).order_by()
             if self.category != 'set':
                 # eliminate descendants
-                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
-                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
+                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()])
+                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
                 if descendants_keys:
                     objects = objects.exclude(pk__in=descendants_keys)
         return objects.count()
                 if descendants_keys:
                     objects = objects.exclude(pk__in=descendants_keys)
         return objects.count()
@@ -213,6 +216,7 @@ def book_upload_path(ext=None, maxlen=100):
 
 
 class BookMedia(models.Model):
 
 
 class BookMedia(models.Model):
+    """Represents media attached to a book."""
     FileFormat = namedtuple("FileFormat", "name ext")
     formats = SortedDict([
         ('mp3', FileFormat(name='MP3', ext='mp3')),
     FileFormat = namedtuple("FileFormat", "name ext")
     formats = SortedDict([
         ('mp3', FileFormat(name='MP3', ext='mp3')),
@@ -226,7 +230,7 @@ class BookMedia(models.Model):
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
-    extra_info  = JSONField(_('extra information'), default='{}', editable=False)
+    extra_info  = jsonfield.JSONField(_('extra information'), default='{}', editable=False)
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
@@ -258,9 +262,9 @@ class BookMedia(models.Model):
             remove_zip("%s_%s" % (old.book.slug, old.type))
         remove_zip("%s_%s" % (self.book.slug, self.type))
 
             remove_zip("%s_%s" % (old.book.slug, old.type))
         remove_zip("%s_%s" % (self.book.slug, self.type))
 
-        extra_info = self.get_extra_info_value()
+        extra_info = self.extra_info
         extra_info.update(self.read_meta())
         extra_info.update(self.read_meta())
-        self.set_extra_info_value(extra_info)
+        self.extra_info = extra_info
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
@@ -323,6 +327,7 @@ class BookMedia(models.Model):
 
 
 class Book(models.Model):
 
 
 class Book(models.Model):
+    """Represents a book imported from WL-XML."""
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
@@ -334,14 +339,14 @@ class Book(models.Model):
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
-    extra_info    = JSONField(_('extra information'), default='{}')
+    extra_info    = jsonfield.JSONField(_('extra information'), default='{}')
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
     cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
                 null=True, blank=True)
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
     cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
                 null=True, blank=True)
-    ebook_formats = ['pdf', 'epub', 'mobi', 'txt']
+    ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt']
     formats = ebook_formats + ['html', 'xml']
 
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
     formats = ebook_formats + ['html', 'xml']
 
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
@@ -428,7 +433,7 @@ class Book(models.Model):
 
         type(self).objects.filter(pk=self.pk).update(_related_info=None)
         # Fragment.short_html relies on book's tags, so reset it here too
 
         type(self).objects.filter(pk=self.pk).update(_related_info=None)
         # Fragment.short_html relies on book's tags, so reset it here too
-        for fragm in self.fragments.all():
+        for fragm in self.fragments.all().iterator():
             fragm.reset_short_html()
 
     def has_description(self):
             fragm.reset_short_html()
 
     def has_description(self):
@@ -477,6 +482,7 @@ class Book(models.Model):
     def build_html(self):
         from django.core.files.base import ContentFile
         from slughifi import slughifi
     def build_html(self):
         from django.core.files.base import ContentFile
         from slughifi import slughifi
+        from sortify import sortify
         from librarian import html
 
         meta_tags = list(self.tags.filter(
         from librarian import html
 
         meta_tags = list(self.tags.filter(
@@ -511,7 +517,7 @@ class Book(models.Model):
                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
                     if created:
                         tag.name = theme_name
                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
                     if created:
                         tag.name = theme_name
-                        tag.sort_key = theme_name.lower()
+                        tag.sort_key = sortify(theme_name.lower())
                         tag.save()
                     themes.append(tag)
                 if not themes:
                         tag.save()
                     themes.append(tag)
                 if not themes:
@@ -533,26 +539,33 @@ class Book(models.Model):
 
     # Thin wrappers for builder tasks
     def build_pdf(self, *args, **kwargs):
 
     # Thin wrappers for builder tasks
     def build_pdf(self, *args, **kwargs):
+        """(Re)builds PDF."""
         return tasks.build_pdf.delay(self.pk, *args, **kwargs)
     def build_epub(self, *args, **kwargs):
         return tasks.build_pdf.delay(self.pk, *args, **kwargs)
     def build_epub(self, *args, **kwargs):
+        """(Re)builds EPUB."""
         return tasks.build_epub.delay(self.pk, *args, **kwargs)
     def build_mobi(self, *args, **kwargs):
         return tasks.build_epub.delay(self.pk, *args, **kwargs)
     def build_mobi(self, *args, **kwargs):
+        """(Re)builds MOBI."""
         return tasks.build_mobi.delay(self.pk, *args, **kwargs)
         return tasks.build_mobi.delay(self.pk, *args, **kwargs)
+    def build_fb2(self, *args, **kwargs):
+        """(Re)build FB2"""
+        return tasks.build_fb2.delay(self.pk, *args, **kwargs)
     def build_txt(self, *args, **kwargs):
     def build_txt(self, *args, **kwargs):
+        """(Re)builds TXT."""
         return tasks.build_txt.delay(self.pk, *args, **kwargs)
 
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
         return tasks.build_txt.delay(self.pk, *args, **kwargs)
 
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
-                b.get_extra_info_value()['author'],
+                b.extra_info['author'],
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
-                    for b in books]
+                    for b in books.iterator()]
         return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 
         return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 
@@ -562,6 +575,7 @@ class Book(models.Model):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
+        import search
         if reuse_index:
             idx = search.ReusableIndex()
         else:
         if reuse_index:
             idx = search.ReusableIndex()
         else:
@@ -593,7 +607,7 @@ class Book(models.Model):
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
-            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
+            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True,
             search_index=True, search_index_tags=True, search_index_reuse=False):
 
         # check for parts before we do anything
             search_index=True, search_index_tags=True, search_index_reuse=False):
 
         # check for parts before we do anything
@@ -628,7 +642,7 @@ class Book(models.Model):
             book.common_slug = book_info.variant_of.slug
         else:
             book.common_slug = book.slug
             book.common_slug = book_info.variant_of.slug
         else:
             book.common_slug = book.slug
-        book.set_extra_info_value(book_info.to_dict())
+        book.extra_info = book_info.to_dict()
         book.save()
 
         meta_tags = Tag.tags_from_info(book_info)
         book.save()
 
         meta_tags = Tag.tags_from_info(book_info)
@@ -663,6 +677,9 @@ class Book(models.Model):
         if not settings.NO_BUILD_MOBI and build_mobi:
             book.build_mobi()
 
         if not settings.NO_BUILD_MOBI and build_mobi:
             book.build_mobi()
 
+        if not settings.NO_BUILD_FB2 and build_fb2:
+            book.build_fb2()
+
         if not settings.NO_SEARCH_INDEX and search_index:
             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
             #index_book.delay(book.id, book_info)
         if not settings.NO_SEARCH_INDEX and search_index:
             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
             #index_book.delay(book.id, book_info)
@@ -675,7 +692,7 @@ class Book(models.Model):
             descendants_tags.update(child_book.tags)
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
             descendants_tags.update(child_book.tags)
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
-            for fragment in child_book.fragments.all():
+            for fragment in child_book.fragments.all().iterator():
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
@@ -723,7 +740,7 @@ class Book(models.Model):
 
     def related_themes(self):
         theme_counter = self.theme_counter
 
     def related_themes(self):
         theme_counter = self.theme_counter
-        book_themes = Tag.objects.filter(pk__in=theme_counter.keys())
+        book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys()))
         for tag in book_themes:
             tag.count = theme_counter[tag.pk]
         return book_themes
         for tag in book_themes:
             tag.count = theme_counter[tag.pk]
         return book_themes
@@ -747,10 +764,10 @@ class Book(models.Model):
 
         if tags is None:
             tags = {}
 
         if tags is None:
             tags = {}
-            for child in self.children.all().order_by():
+            for child in self.children.all().order_by().iterator():
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
-            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
+            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator():
                 tags[tag.pk] = 1
 
             if self.id:
                 tags[tag.pk] = 1
 
             if self.id:
@@ -776,8 +793,8 @@ class Book(models.Model):
 
         if tags is None:
             tags = {}
 
         if tags is None:
             tags = {}
-            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
-                for tag in fragment.tags.filter(category='theme').order_by():
+            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator():
+                for tag in fragment.tags.filter(category='theme').order_by().iterator():
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
@@ -803,7 +820,7 @@ class Book(models.Model):
 
     @classmethod
     def tagged_top_level(cls, tags):
 
     @classmethod
     def tagged_top_level(cls, tags):
-        """ Returns top-level books tagged with `tags'.
+        """ Returns top-level books tagged with `tags`.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
@@ -812,8 +829,9 @@ class Book(models.Model):
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
-        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book',
+            slug__in=[book.book_tag_slug() for book in objects.iterator()])
+        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()]
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
@@ -832,19 +850,20 @@ class Book(models.Model):
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
-            book_ids = set((book.pk for book in books))
-            for book in books:
+            
+            book_ids = set(b['pk'] for b in books.values("pk").iterator())
+            for book in books.iterator():
                 parent = book.parent_id
                 if parent not in book_ids:
                     parent = None
                 books_by_parent.setdefault(parent, []).append(book)
         else:
                 parent = book.parent_id
                 if parent not in book_ids:
                     parent = None
                 books_by_parent.setdefault(parent, []).append(book)
         else:
-            for book in books:
+            for book in books.iterator():
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
         books_by_author = SortedDict()
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
         books_by_author = SortedDict()
-        for tag in Tag.objects.filter(category='author'):
+        for tag in Tag.objects.filter(category='author').iterator():
             books_by_author[tag] = []
 
         for book in books_by_parent.get(None,()):
             books_by_author[tag] = []
 
         for book in books_by_parent.get(None,()):
@@ -866,7 +885,7 @@ class Book(models.Model):
         "LP": (3, u"liceum"),
     }
     def audiences_pl(self):
         "LP": (3, u"liceum"),
     }
     def audiences_pl(self):
-        audiences = self.get_extra_info_value().get('audiences', [])
+        audiences = self.extra_info.get('audiences', [])
         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
         return [a[1] for a in audiences]
 
         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
         return [a[1] for a in audiences]
 
@@ -883,7 +902,8 @@ class Book(models.Model):
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
-    has.short_description = t.upper()
+    has.short_description = ftype.upper()
+    has.__doc__ = None
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
@@ -900,6 +920,7 @@ for t in Book.formats:
 
 
 class Fragment(models.Model):
 
 
 class Fragment(models.Model):
+    """Represents a themed fragment of a book."""
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
@@ -1004,14 +1025,16 @@ def _post_save_handler(sender, instance, **kwargs):
 post_save.connect(_post_save_handler)
 
 
 post_save.connect(_post_save_handler)
 
 
-@django.dispatch.receiver(post_delete, sender=Book)
-def _remove_book_from_index_handler(sender, instance, **kwargs):
-    """ remove the book from search index, when it is deleted."""
-    search.JVM.attachCurrentThread()
-    idx = search.Index()
-    idx.open(timeout=10000)  # 10 seconds timeout.
-    try:
-        idx.remove_book(instance)
-        idx.index_tags()
-    finally:
-        idx.close()
+if not settings.NO_SEARCH_INDEX:
+    @django.dispatch.receiver(post_delete, sender=Book)
+    def _remove_book_from_index_handler(sender, instance, **kwargs):
+        """ remove the book from search index, when it is deleted."""
+        import search
+        search.JVM.attachCurrentThread()
+        idx = search.Index()
+        idx.open(timeout=10000)  # 10 seconds timeout.
+        try:
+            idx.remove_book(instance)
+            idx.index_tags()
+        finally:
+            idx.close()