Merge branch 'master' into production
[wolnelektury.git] / apps / catalogue / models.py
index 53caa95..3704b16 100644 (file)
@@ -22,12 +22,11 @@ from django.conf import settings
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
-from catalogue.fields import JSONField, OverwritingFileField
+from catalogue.fields import OverwritingFileField
 from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue import tasks
 import re
 
 from catalogue.utils import create_zip, split_tags, truncate_html_words
 from catalogue import tasks
 import re
 
-import search
 
 # Those are hard-coded here so that makemessages sees them.
 TAG_CATEGORIES = (
 
 # Those are hard-coded here so that makemessages sees them.
 TAG_CATEGORIES = (
@@ -54,6 +53,10 @@ class TagSubcategoryManager(models.Manager):
 
 
 class Tag(TagBase):
 
 
 class Tag(TagBase):
+    """A tag attachable to books and fragments (and possibly anything).
+    
+    Used to represent searchable metadata (authors, epochs, genres, kinds),
+    fragment themes (motifs) and some book hierarchy related kludges."""
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
@@ -115,8 +118,8 @@ class Tag(TagBase):
             objects = Book.tagged.with_all((self,)).order_by()
             if self.category != 'set':
                 # eliminate descendants
             objects = Book.tagged.with_all((self,)).order_by()
             if self.category != 'set':
                 # eliminate descendants
-                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
-                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
+                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()])
+                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
                 if descendants_keys:
                     objects = objects.exclude(pk__in=descendants_keys)
         return objects.count()
                 if descendants_keys:
                     objects = objects.exclude(pk__in=descendants_keys)
         return objects.count()
@@ -213,6 +216,7 @@ def book_upload_path(ext=None, maxlen=100):
 
 
 class BookMedia(models.Model):
 
 
 class BookMedia(models.Model):
+    """Represents media attached to a book."""
     FileFormat = namedtuple("FileFormat", "name ext")
     formats = SortedDict([
         ('mp3', FileFormat(name='MP3', ext='mp3')),
     FileFormat = namedtuple("FileFormat", "name ext")
     formats = SortedDict([
         ('mp3', FileFormat(name='MP3', ext='mp3')),
@@ -226,7 +230,7 @@ class BookMedia(models.Model):
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
-    extra_info  = JSONField(_('extra information'), default='{}', editable=False)
+    extra_info  = jsonfield.JSONField(_('extra information'), default='{}', editable=False)
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
@@ -244,7 +248,7 @@ class BookMedia(models.Model):
 
         try:
             old = BookMedia.objects.get(pk=self.pk)
 
         try:
             old = BookMedia.objects.get(pk=self.pk)
-        except BookMedia.DoesNotExist, e:
+        except BookMedia.DoesNotExist:
             old = None
         else:
             # if name changed, change the file name, too
             old = None
         else:
             # if name changed, change the file name, too
@@ -258,9 +262,9 @@ class BookMedia(models.Model):
             remove_zip("%s_%s" % (old.book.slug, old.type))
         remove_zip("%s_%s" % (self.book.slug, self.type))
 
             remove_zip("%s_%s" % (old.book.slug, old.type))
         remove_zip("%s_%s" % (self.book.slug, self.type))
 
-        extra_info = self.get_extra_info_value()
+        extra_info = self.extra_info
         extra_info.update(self.read_meta())
         extra_info.update(self.read_meta())
-        self.set_extra_info_value(extra_info)
+        self.extra_info = extra_info
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
@@ -323,6 +327,7 @@ class BookMedia(models.Model):
 
 
 class Book(models.Model):
 
 
 class Book(models.Model):
+    """Represents a book imported from WL-XML."""
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
@@ -334,14 +339,14 @@ class Book(models.Model):
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
-    extra_info    = JSONField(_('extra information'), default='{}')
+    extra_info    = jsonfield.JSONField(_('extra information'), default='{}')
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
     cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
                 null=True, blank=True)
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
     cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
                 null=True, blank=True)
-    ebook_formats = ['pdf', 'epub', 'mobi', 'txt']
+    ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt']
     formats = ebook_formats + ['html', 'xml']
 
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
     formats = ebook_formats + ['html', 'xml']
 
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
@@ -398,18 +403,18 @@ class Book(models.Model):
             book_tag.save()
         return book_tag
 
             book_tag.save()
         return book_tag
 
-    def has_media(self, type):
-        if type in Book.formats:
-            return bool(getattr(self, "%s_file" % type))
+    def has_media(self, type_):
+        if type_ in Book.formats:
+            return bool(getattr(self, "%s_file" % type_))
         else:
         else:
-            return self.media.filter(type=type).exists()
+            return self.media.filter(type=type_).exists()
 
 
-    def get_media(self, type):
-        if self.has_media(type):
-            if type in Book.formats:
-                return getattr(self, "%s_file" % type)
+    def get_media(self, type_):
+        if self.has_media(type_):
+            if type_ in Book.formats:
+                return getattr(self, "%s_file" % type_)
             else:                                             
             else:                                             
-                return self.media.filter(type=type)
+                return self.media.filter(type=type_)
         else:
             return None
 
         else:
             return None
 
@@ -428,7 +433,7 @@ class Book(models.Model):
 
         type(self).objects.filter(pk=self.pk).update(_related_info=None)
         # Fragment.short_html relies on book's tags, so reset it here too
 
         type(self).objects.filter(pk=self.pk).update(_related_info=None)
         # Fragment.short_html relies on book's tags, so reset it here too
-        for fragm in self.fragments.all():
+        for fragm in self.fragments.all().iterator():
             fragm.reset_short_html()
 
     def has_description(self):
             fragm.reset_short_html()
 
     def has_description(self):
@@ -477,6 +482,7 @@ class Book(models.Model):
     def build_html(self):
         from django.core.files.base import ContentFile
         from slughifi import slughifi
     def build_html(self):
         from django.core.files.base import ContentFile
         from slughifi import slughifi
+        from sortify import sortify
         from librarian import html
 
         meta_tags = list(self.tags.filter(
         from librarian import html
 
         meta_tags = list(self.tags.filter(
@@ -511,7 +517,7 @@ class Book(models.Model):
                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
                     if created:
                         tag.name = theme_name
                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
                     if created:
                         tag.name = theme_name
-                        tag.sort_key = theme_name.lower()
+                        tag.sort_key = sortify(theme_name.lower())
                         tag.save()
                     themes.append(tag)
                 if not themes:
                         tag.save()
                     themes.append(tag)
                 if not themes:
@@ -531,18 +537,35 @@ class Book(models.Model):
             return True
         return False
 
             return True
         return False
 
+    # Thin wrappers for builder tasks
+    def build_pdf(self, *args, **kwargs):
+        """(Re)builds PDF."""
+        return tasks.build_pdf.delay(self.pk, *args, **kwargs)
+    def build_epub(self, *args, **kwargs):
+        """(Re)builds EPUB."""
+        return tasks.build_epub.delay(self.pk, *args, **kwargs)
+    def build_mobi(self, *args, **kwargs):
+        """(Re)builds MOBI."""
+        return tasks.build_mobi.delay(self.pk, *args, **kwargs)
+    def build_fb2(self, *args, **kwargs):
+        """(Re)build FB2"""
+        return tasks.build_fb2.delay(self.pk, *args, **kwargs)
+    def build_txt(self, *args, **kwargs):
+        """(Re)builds TXT."""
+        return tasks.build_txt.delay(self.pk, *args, **kwargs)
+
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
-                b.get_extra_info_value()['author'],
+                b.extra_info['author'],
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
-                    for b in books]
+                    for b in books.iterator()]
         return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 
         return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 
@@ -552,6 +575,7 @@ class Book(models.Model):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
+        import search
         if reuse_index:
             idx = search.ReusableIndex()
         else:
         if reuse_index:
             idx = search.ReusableIndex()
         else:
@@ -583,7 +607,7 @@ class Book(models.Model):
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
-            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
+            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True,
             search_index=True, search_index_tags=True, search_index_reuse=False):
 
         # check for parts before we do anything
             search_index=True, search_index_tags=True, search_index_reuse=False):
 
         # check for parts before we do anything
@@ -592,7 +616,7 @@ class Book(models.Model):
             for part_url in book_info.parts:
                 try:
                     children.append(Book.objects.get(slug=part_url.slug))
             for part_url in book_info.parts:
                 try:
                     children.append(Book.objects.get(slug=part_url.slug))
-                except Book.DoesNotExist, e:
+                except Book.DoesNotExist:
                     raise Book.DoesNotExist(_('Book "%s" does not exist.') %
                             part_url.slug)
 
                     raise Book.DoesNotExist(_('Book "%s" does not exist.') %
                             part_url.slug)
 
@@ -618,7 +642,7 @@ class Book(models.Model):
             book.common_slug = book_info.variant_of.slug
         else:
             book.common_slug = book.slug
             book.common_slug = book_info.variant_of.slug
         else:
             book.common_slug = book.slug
-        book.set_extra_info_value(book_info.to_dict())
+        book.extra_info = book_info.to_dict()
         book.save()
 
         meta_tags = Tag.tags_from_info(book_info)
         book.save()
 
         meta_tags = Tag.tags_from_info(book_info)
@@ -640,18 +664,21 @@ class Book(models.Model):
 
         if book.build_html():
             if not settings.NO_BUILD_TXT and build_txt:
 
         if book.build_html():
             if not settings.NO_BUILD_TXT and build_txt:
-                tasks.build_txt.delay(book.pk)
+                book.build_txt()
 
         book.build_cover(book_info)
 
         if not settings.NO_BUILD_EPUB and build_epub:
 
         book.build_cover(book_info)
 
         if not settings.NO_BUILD_EPUB and build_epub:
-            tasks.build_epub.delay(book.pk)
+            book.build_epub()
 
         if not settings.NO_BUILD_PDF and build_pdf:
 
         if not settings.NO_BUILD_PDF and build_pdf:
-            tasks.build_pdf.delay(book.pk)
+            book.build_pdf()
 
         if not settings.NO_BUILD_MOBI and build_mobi:
 
         if not settings.NO_BUILD_MOBI and build_mobi:
-            tasks.build_mobi.delay(book.pk)
+            book.build_mobi()
+
+        if not settings.NO_BUILD_FB2 and build_fb2:
+            book.build_fb2()
 
         if not settings.NO_SEARCH_INDEX and search_index:
             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
 
         if not settings.NO_SEARCH_INDEX and search_index:
             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
@@ -665,7 +692,7 @@ class Book(models.Model):
             descendants_tags.update(child_book.tags)
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
             descendants_tags.update(child_book.tags)
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
-            for fragment in child_book.fragments.all():
+            for fragment in child_book.fragments.all().iterator():
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
@@ -711,6 +738,13 @@ class Book(models.Model):
                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
             return rel
 
                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
             return rel
 
+    def related_themes(self):
+        theme_counter = self.theme_counter
+        book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys()))
+        for tag in book_themes:
+            tag.count = theme_counter[tag.pk]
+        return book_themes
+
     def reset_tag_counter(self):
         if self.id is None:
             return
     def reset_tag_counter(self):
         if self.id is None:
             return
@@ -730,10 +764,10 @@ class Book(models.Model):
 
         if tags is None:
             tags = {}
 
         if tags is None:
             tags = {}
-            for child in self.children.all().order_by():
+            for child in self.children.all().order_by().iterator():
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
-            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
+            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator():
                 tags[tag.pk] = 1
 
             if self.id:
                 tags[tag.pk] = 1
 
             if self.id:
@@ -759,8 +793,8 @@ class Book(models.Model):
 
         if tags is None:
             tags = {}
 
         if tags is None:
             tags = {}
-            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
-                for tag in fragment.tags.filter(category='theme').order_by():
+            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator():
+                for tag in fragment.tags.filter(category='theme').order_by().iterator():
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
@@ -786,7 +820,7 @@ class Book(models.Model):
 
     @classmethod
     def tagged_top_level(cls, tags):
 
     @classmethod
     def tagged_top_level(cls, tags):
-        """ Returns top-level books tagged with `tags'.
+        """ Returns top-level books tagged with `tags`.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
@@ -795,8 +829,9 @@ class Book(models.Model):
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
-        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book',
+            slug__in=[book.book_tag_slug() for book in objects.iterator()])
+        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()]
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
@@ -815,19 +850,20 @@ class Book(models.Model):
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
-            book_ids = set((book.pk for book in books))
-            for book in books:
+            
+            book_ids = set(b['pk'] for b in books.values("pk").iterator())
+            for book in books.iterator():
                 parent = book.parent_id
                 if parent not in book_ids:
                     parent = None
                 books_by_parent.setdefault(parent, []).append(book)
         else:
                 parent = book.parent_id
                 if parent not in book_ids:
                     parent = None
                 books_by_parent.setdefault(parent, []).append(book)
         else:
-            for book in books:
+            for book in books.iterator():
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
         books_by_author = SortedDict()
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
         books_by_author = SortedDict()
-        for tag in Tag.objects.filter(category='author'):
+        for tag in Tag.objects.filter(category='author').iterator():
             books_by_author[tag] = []
 
         for book in books_by_parent.get(None,()):
             books_by_author[tag] = []
 
         for book in books_by_parent.get(None,()):
@@ -849,7 +885,7 @@ class Book(models.Model):
         "LP": (3, u"liceum"),
     }
     def audiences_pl(self):
         "LP": (3, u"liceum"),
     }
     def audiences_pl(self):
-        audiences = self.get_extra_info_value().get('audiences', [])
+        audiences = self.extra_info.get('audiences', [])
         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
         return [a[1] for a in audiences]
 
         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
         return [a[1] for a in audiences]
 
@@ -866,7 +902,8 @@ class Book(models.Model):
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
-    has.short_description = t.upper()
+    has.short_description = ftype.upper()
+    has.__doc__ = None
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
@@ -883,6 +920,7 @@ for t in Book.formats:
 
 
 class Fragment(models.Model):
 
 
 class Fragment(models.Model):
+    """Represents a themed fragment of a book."""
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
@@ -987,12 +1025,16 @@ def _post_save_handler(sender, instance, **kwargs):
 post_save.connect(_post_save_handler)
 
 
 post_save.connect(_post_save_handler)
 
 
-@django.dispatch.receiver(post_delete, sender=Book)
-def _remove_book_from_index_handler(sender, instance, **kwargs):
-    """ remove the book from search index, when it is deleted."""
-    idx = search.Index()
-    idx.open(timeout=10000)  # 10 seconds timeout.
-    try:
-        idx.remove_book(instance)
-    finally:
-        idx.close()
+if not settings.NO_SEARCH_INDEX:
+    @django.dispatch.receiver(post_delete, sender=Book)
+    def _remove_book_from_index_handler(sender, instance, **kwargs):
+        """ remove the book from search index, when it is deleted."""
+        import search
+        search.JVM.attachCurrentThread()
+        idx = search.Index()
+        idx.open(timeout=10000)  # 10 seconds timeout.
+        try:
+            idx.remove_book(instance)
+            idx.index_tags()
+        finally:
+            idx.close()