Dead links reporting.
[wolnelektury.git] / apps / catalogue / models / book.py
index 02051fb..bfc9ec4 100644 (file)
@@ -2,31 +2,47 @@
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
+from collections import OrderedDict
+from random import randint
 import re
 import re
-from django.conf import settings as settings
-from django.core.cache import get_cache
-from django.db import models
+from django.conf import settings
+from django.db import connection, models, transaction
 from django.db.models import permalink
 import django.dispatch
 from django.db.models import permalink
 import django.dispatch
-from django.utils.datastructures import SortedDict
+from django.contrib.contenttypes.fields import GenericRelation
+from django.core.urlresolvers import reverse
 from django.utils.translation import ugettext_lazy as _
 import jsonfield
 from django.utils.translation import ugettext_lazy as _
 import jsonfield
+from fnpdjango.storage import BofhFileSystemStorage
+from ssify import flush_ssi_includes
+from newtagging import managers
 from catalogue import constants
 from catalogue.fields import EbookField
 from catalogue.models import Tag, Fragment, BookMedia
 from catalogue import constants
 from catalogue.fields import EbookField
 from catalogue.models import Tag, Fragment, BookMedia
-from catalogue.utils import create_zip, split_tags, book_upload_path
+from catalogue.utils import create_zip
 from catalogue import app_settings
 from catalogue import tasks
 from catalogue import app_settings
 from catalogue import tasks
-from newtagging import managers
 
 
+bofh_storage = BofhFileSystemStorage()
+
+
+def _cover_upload_to(i, n):
+    return 'book/cover/%s.jpg' % i.slug
 
 
-permanent_cache = get_cache('permanent')
+def _cover_thumb_upload_to(i, n):
+    return 'book/cover_thumb/%s.jpg' % i.slug
+
+def _ebook_upload_to(upload_path):
+    def _upload_to(i, n):
+        return upload_path % i.slug
+    return _upload_to
 
 
 class Book(models.Model):
     """Represents a book imported from WL-XML."""
 
 
 class Book(models.Model):
     """Represents a book imported from WL-XML."""
-    title         = models.CharField(_('title'), max_length=120)
+    title         = models.CharField(_('title'), max_length=32767)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
+    sort_key_author = models.CharField(_('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
             unique=True)
     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
             unique=True)
     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
@@ -42,22 +58,32 @@ class Book(models.Model):
     # files generated during publication
 
     cover = EbookField('cover', _('cover'),
     # files generated during publication
 
     cover = EbookField('cover', _('cover'),
-                upload_to=book_upload_path('jpg'), null=True, blank=True)
+            null=True, blank=True,
+            upload_to=_cover_upload_to,
+            storage=bofh_storage, max_length=255)
+    # Cleaner version of cover for thumbs
+    cover_thumb = EbookField('cover_thumb', _('cover thumbnail'),
+            null=True, blank=True,
+            upload_to=_cover_thumb_upload_to,
+            max_length=255)
     ebook_formats = constants.EBOOK_FORMATS
     formats = ebook_formats + ['html', 'xml']
 
     parent = models.ForeignKey('self', blank=True, null=True,
         related_name='children')
     ebook_formats = constants.EBOOK_FORMATS
     formats = ebook_formats + ['html', 'xml']
 
     parent = models.ForeignKey('self', blank=True, null=True,
         related_name='children')
-
-    _related_info = jsonfield.JSONField(blank=True, null=True, editable=False)
+    ancestor = models.ManyToManyField('self', blank=True, null=True,
+        editable=False, related_name='descendant', symmetrical=False)
 
     objects  = models.Manager()
     tagged   = managers.ModelTaggedItemManager(Tag)
     tags     = managers.TagDescriptor(Tag)
 
     objects  = models.Manager()
     tagged   = managers.ModelTaggedItemManager(Tag)
     tags     = managers.TagDescriptor(Tag)
+    tag_relations = GenericRelation(Tag.intermediary_table_model)
 
     html_built = django.dispatch.Signal()
     published = django.dispatch.Signal()
 
 
     html_built = django.dispatch.Signal()
     published = django.dispatch.Signal()
 
+    short_html_url_name = 'catalogue_book_short'
+
     class AlreadyExists(Exception):
         pass
 
     class AlreadyExists(Exception):
         pass
 
@@ -70,15 +96,19 @@ class Book(models.Model):
     def __unicode__(self):
         return self.title
 
     def __unicode__(self):
         return self.title
 
-    def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
+    def save(self, force_insert=False, force_update=False, **kwargs):
         from sortify import sortify
 
         from sortify import sortify
 
-        self.sort_key = sortify(self.title)
+        self.sort_key = sortify(self.title)[:120]
+        self.title = unicode(self.title) # ???
 
 
-        ret = super(Book, self).save(force_insert, force_update, **kwargs)
+        try:
+            author = self.tags.filter(category='author')[0].sort_key
+        except IndexError:
+            author = u''
+        self.sort_key_author = author
 
 
-        if reset_short_html:
-            self.reset_short_html()
+        ret = super(Book, self).save(force_insert, force_update, **kwargs)
 
         return ret
 
 
         return ret
 
@@ -95,17 +125,11 @@ class Book(models.Model):
     def name(self):
         return self.title
 
     def name(self):
         return self.title
 
-    def book_tag_slug(self):
-        return ('l-' + self.slug)[:120]
+    def language_code(self):
+        return constants.LANGUAGES_3TO2.get(self.language, self.language)
 
 
-    def book_tag(self):
-        slug = self.book_tag_slug()
-        book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
-        if created:
-            book_tag.name = self.title[:50]
-            book_tag.sort_key = self.title.lower()
-            book_tag.save()
-        return book_tag
+    def language_name(self):
+        return dict(settings.LANGUAGES).get(self.language_code(), "")
 
     def has_media(self, type_):
         if type_ in Book.formats:
 
     def has_media(self, type_):
         if type_ in Book.formats:
@@ -117,7 +141,7 @@ class Book(models.Model):
         if self.has_media(type_):
             if type_ in Book.formats:
                 return getattr(self, "%s_file" % type_)
         if self.has_media(type_):
             if type_ in Book.formats:
                 return getattr(self, "%s_file" % type_)
-            else:                                             
+            else:
                 return self.media.filter(type=type_)
         else:
             return None
                 return self.media.filter(type=type_)
         else:
             return None
@@ -129,16 +153,7 @@ class Book(models.Model):
     def get_ogg(self):
         return self.get_media("ogg")
     def get_daisy(self):
     def get_ogg(self):
         return self.get_media("ogg")
     def get_daisy(self):
-        return self.get_media("daisy")                       
-
-    def reset_short_html(self):
-        if self.id is None:
-            return
-
-        type(self).objects.filter(pk=self.pk).update(_related_info=None)
-        # Fragment.short_html relies on book's tags, so reset it here too
-        for fragm in self.fragments.all().iterator():
-            fragm.reset_short_html()
+        return self.get_media("daisy")
 
     def has_description(self):
         return len(self.description) > 0
 
     def has_description(self):
         return len(self.description) > 0
@@ -195,9 +210,9 @@ class Book(models.Model):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
         return create_zip(paths, "%s_%s" % (self.slug, format_))
 
     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
-        import search
         if index is None:
         if index is None:
-            index = search.Index()
+            from search.index import Index
+            index = Index()
         try:
             index.index_book(self, book_info)
             if index_tags:
         try:
             index.index_book(self, book_info)
             if index_tags:
@@ -292,21 +307,21 @@ class Book(models.Model):
             child.parent = None
             child.parent_number = 0
             child.save()
             child.parent = None
             child.parent_number = 0
             child.save()
-            tasks.fix_tree_tags.delay(child)
             if old_cover:
                 notify_cover_changed.append(child)
 
             if old_cover:
                 notify_cover_changed.append(child)
 
-        # delete old fragments when overwriting
-        book.fragments.all().delete()
-        # Build HTML, fix the tree tags, build cover.
-        has_own_text = bool(book.html_file.build())
-        tasks.fix_tree_tags.delay(book)
+        cls.repopulate_ancestors()
+
+        # No saves beyond this point.
+
+        # Build cover.
         if 'cover' not in dont_build:
             book.cover.build_delay()
         if 'cover' not in dont_build:
             book.cover.build_delay()
-        
-        # No saves behind this point.
+            book.cover_thumb.build_delay()
 
 
-        if has_own_text:
+        # Build HTML and ebooks.
+        book.html_file.build_delay()
+        if not children:
             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
                 if format_ not in dont_build:
                     getattr(book, '%s_file' % format_).build_delay()
             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
                 if format_ not in dont_build:
                     getattr(book, '%s_file' % format_).build_delay()
@@ -320,46 +335,59 @@ class Book(models.Model):
         for child in notify_cover_changed:
             child.parent_cover_changed()
 
         for child in notify_cover_changed:
             child.parent_cover_changed()
 
-        cls.published.send(sender=book)
+        cls.published.send(sender=cls, instance=book)
         return book
 
         return book
 
-    def fix_tree_tags(self):
-        """Fixes the l-tags on the book's subtree.
-
-        Makes sure that:
-        * the book has its parents book-tags,
-        * its fragments have the book's and its parents book-tags,
-        * runs those for every child book too,
-        * touches all relevant tags,
-        * resets tag and theme counter on the book and its ancestry.
-        """
-        def fix_subtree(book, parent_tags):
-            affected_tags = set(book.tags)
-            book.tags = list(book.tags.exclude(category='book')) + parent_tags
-            sub_parent_tags = parent_tags + [book.book_tag()]
-            for frag in book.fragments.all():
-                affected_tags.update(frag.tags)
-                frag.tags = list(frag.tags.exclude(category='book')
-                                    ) + sub_parent_tags
-            for child in book.children.all():
-                affected_tags.update(fix_subtree(child, sub_parent_tags))
-            return affected_tags
-
-        parent_tags = []
-        parent = self.parent
-        while parent is not None:
-            parent_tags.append(parent.book_tag())
-            parent = parent.parent
-
-        affected_tags = fix_subtree(self, parent_tags)
-        for tag in affected_tags:
-            tasks.touch_tag(tag)
-
-        book = self
-        while book is not None:
-            book.reset_tag_counter()
-            book.reset_theme_counter()
-            book = book.parent
+    @classmethod
+    def repopulate_ancestors(cls):
+        """Fixes the ancestry cache."""
+        # TODO: table names
+        with transaction.atomic():
+            cursor = connection.cursor()
+            if connection.vendor == 'postgres':
+                cursor.execute("TRUNCATE catalogue_book_ancestor")
+                cursor.execute("""
+                    WITH RECURSIVE ancestry AS (
+                        SELECT book.id, book.parent_id
+                        FROM catalogue_book AS book
+                        WHERE book.parent_id IS NOT NULL
+                        UNION
+                        SELECT ancestor.id, book.parent_id
+                        FROM ancestry AS ancestor, catalogue_book AS book
+                        WHERE ancestor.parent_id = book.id
+                            AND book.parent_id IS NOT NULL
+                        )
+                    INSERT INTO catalogue_book_ancestor
+                        (from_book_id, to_book_id)
+                        SELECT id, parent_id
+                        FROM ancestry
+                        ORDER BY id;
+                    """)
+            else:
+                cursor.execute("DELETE FROM catalogue_book_ancestor")
+                for b in cls.objects.exclude(parent=None):
+                    parent = b.parent
+                    while parent is not None:
+                        b.ancestor.add(parent)
+                        parent = parent.parent
+
+    def flush_includes(self, languages=True):
+        if not languages:
+            return
+        if languages is True:
+            languages = [lc for (lc, _ln) in settings.LANGUAGES]
+        flush_ssi_includes([
+            template % (self.pk, lang)
+            for template in [
+                '/katalog/b/%d/mini.%s.html',
+                '/katalog/b/%d/mini_nolink.%s.html',
+                '/katalog/b/%d/short.%s.html',
+                '/katalog/b/%d/wide.%s.html',
+                '/api/include/book/%d.%s.json',
+                '/api/include/book/%d.%s.xml',
+                ]
+            for lang in languages
+            ])
 
     def cover_info(self, inherit=True):
         """Returns a dictionary to serve as fallback for BookInfo.
 
     def cover_info(self, inherit=True):
         """Returns a dictionary to serve as fallback for BookInfo.
@@ -380,120 +408,40 @@ class Book(models.Model):
             info = parent_info
         return info
 
             info = parent_info
         return info
 
+    def related_themes(self):
+        return Tag.objects.usage_for_queryset(
+            Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
+            counts=True).filter(category='theme')
+
     def parent_cover_changed(self):
         """Called when parent book's cover image is changed."""
         if not self.cover_info(inherit=False):
             if 'cover' not in app_settings.DONT_BUILD:
                 self.cover.build_delay()
     def parent_cover_changed(self):
         """Called when parent book's cover image is changed."""
         if not self.cover_info(inherit=False):
             if 'cover' not in app_settings.DONT_BUILD:
                 self.cover.build_delay()
+                self.cover_thumb.build_delay()
             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
                 if format_ not in app_settings.DONT_BUILD:
                     getattr(self, '%s_file' % format_).build_delay()
             for child in self.children.all():
                 child.parent_cover_changed()
 
             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
                 if format_ not in app_settings.DONT_BUILD:
                     getattr(self, '%s_file' % format_).build_delay()
             for child in self.children.all():
                 child.parent_cover_changed()
 
-    def related_info(self):
-        """Keeps info about related objects (tags, media) in cache field."""
-        if self._related_info is not None:
-            return self._related_info
-        else:
-            rel = {'tags': {}, 'media': {}}
-
-            tags = self.tags.filter(category__in=(
-                    'author', 'kind', 'genre', 'epoch'))
-            tags = split_tags(tags)
-            for category in tags:
-                rel['tags'][category] = [
-                        (t.name, t.slug) for t in tags[category]]
-
-            for media_format in BookMedia.formats:
-                rel['media'][media_format] = self.has_media(media_format)
-
-            book = self
-            parents = []
-            while book.parent:
-                parents.append((book.parent.title, book.parent.slug))
-                book = book.parent
-            parents = parents[::-1]
-            if parents:
-                rel['parents'] = parents
-
-            if self.pk:
-                type(self).objects.filter(pk=self.pk).update(_related_info=rel)
-            return rel
-
-    def related_themes(self):
-        theme_counter = self.theme_counter
-        book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys()))
-        for tag in book_themes:
-            tag.count = theme_counter[tag.pk]
-        return book_themes
-
-    def reset_tag_counter(self):
-        if self.id is None:
-            return
-
-        cache_key = "Book.tag_counter/%d" % self.id
-        permanent_cache.delete(cache_key)
-        if self.parent:
-            self.parent.reset_tag_counter()
-
-    @property
-    def tag_counter(self):
-        if self.id:
-            cache_key = "Book.tag_counter/%d" % self.id
-            tags = permanent_cache.get(cache_key)
-        else:
-            tags = None
-
-        if tags is None:
-            tags = {}
-            for child in self.children.all().order_by().iterator():
-                for tag_pk, value in child.tag_counter.iteritems():
-                    tags[tag_pk] = tags.get(tag_pk, 0) + value
-            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator():
-                tags[tag.pk] = 1
-
-            if self.id:
-                permanent_cache.set(cache_key, tags)
-        return tags
-
-    def reset_theme_counter(self):
-        if self.id is None:
-            return
-
-        cache_key = "Book.theme_counter/%d" % self.id
-        permanent_cache.delete(cache_key)
-        if self.parent:
-            self.parent.reset_theme_counter()
+    def other_versions(self):
+        """Find other versions (i.e. in other languages) of the book."""
+        return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
 
 
-    @property
-    def theme_counter(self):
-        if self.id:
-            cache_key = "Book.theme_counter/%d" % self.id
-            tags = permanent_cache.get(cache_key)
-        else:
-            tags = None
-
-        if tags is None:
-            tags = {}
-            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator():
-                for tag in fragment.tags.filter(category='theme').order_by().iterator():
-                    tags[tag.pk] = tags.get(tag.pk, 0) + 1
-
-            if self.id:
-                permanent_cache.set(cache_key, tags)
-        return tags
+    def parents(self):
+        books = []
+        parent = self.parent
+        while parent is not None:
+            books.insert(0, parent)
+            parent = parent.parent
+        return books
 
     def pretty_title(self, html_links=False):
 
     def pretty_title(self, html_links=False):
-        book = self
-        rel_info = book.related_info()
-        names = [(name, Tag.create_url('author', slug))
-                    for name, slug in rel_info['tags']['author']]
-        if 'parents' in rel_info:
-            books = [(name, Book.create_url(slug))
-                        for name, slug in rel_info['parents']]
-            names.extend(reversed(books))
-        names.append((self.title, self.get_absolute_url()))
+        names = [(tag.name, tag.get_absolute_url())
+            for tag in self.tags.filter(category='author')]
+        books = self.parents() + [self]
+        names.extend([(b.title, b.get_absolute_url()) for b in books])
 
         if html_links:
             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 
         if html_links:
             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
@@ -509,17 +457,8 @@ class Book(models.Model):
         also tagged with those tags.
 
         """
         also tagged with those tags.
 
         """
-        # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         objects = cls.tagged.with_all(tags)
-        parents = objects.filter(html_file='').only('slug')
-        # eliminate descendants
-        l_tags = Tag.objects.filter(category='book',
-            slug__in=[book.book_tag_slug() for book in parents.iterator()])
-        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).only('pk').iterator()]
-        if descendants_keys:
-            objects = objects.exclude(pk__in=descendants_keys)
-
-        return objects
+        return objects.exclude(ancestor__in=objects)
 
     @classmethod
     def book_list(cls, filter=None):
 
     @classmethod
     def book_list(cls, filter=None):
@@ -534,7 +473,7 @@ class Book(models.Model):
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
                 'title', 'parent', 'slug')
         if filter:
             books = books.filter(filter).distinct()
-            
+
             book_ids = set(b['pk'] for b in books.values("pk").iterator())
             for book in books.iterator():
                 parent = book.parent_id
             book_ids = set(b['pk'] for b in books.values("pk").iterator())
             for book in books.iterator():
                 parent = book.parent_id
@@ -546,11 +485,11 @@ class Book(models.Model):
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
                 books_by_parent.setdefault(book.parent_id, []).append(book)
 
         orphans = []
-        books_by_author = SortedDict()
+        books_by_author = OrderedDict()
         for tag in Tag.objects.filter(category='author').iterator():
             books_by_author[tag] = []
 
         for tag in Tag.objects.filter(category='author').iterator():
             books_by_author[tag] = []
 
-        for book in books_by_parent.get(None,()):
+        for book in books_by_parent.get(None, ()):
             authors = list(book.tags.filter(category='author'))
             if authors:
                 for author in authors:
             authors = list(book.tags.filter(category='author'))
             if authors:
                 for author in authors:
@@ -574,11 +513,22 @@ class Book(models.Model):
         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
         return [a[1] for a in audiences]
 
         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
         return [a[1] for a in audiences]
 
+    def stage_note(self):
+        stage = self.extra_info.get('stage')
+        if stage and stage < '0.4':
+            return (_('This work needs modernisation'),
+                    reverse('infopage', args=['wymagajace-uwspolczesnienia']))
+        else:
+            return None, None
+
     def choose_fragment(self):
     def choose_fragment(self):
-        tag = self.book_tag()
-        fragments = Fragment.tagged.with_any([tag])
-        if fragments.exists():
-            return fragments.order_by('?')[0]
+        fragments = self.fragments.order_by()
+        fragments_count = fragments.count()
+        if not fragments_count and self.children.exists():
+            fragments = Fragment.objects.filter(book__ancestor=self).order_by()
+            fragments_count = fragments.count()
+        if fragments_count:
+            return fragments[randint(0, fragments_count - 1)]
         elif self.parent:
             return self.parent.choose_fragment()
         else:
         elif self.parent:
             return self.parent.choose_fragment()
         else:
@@ -588,6 +538,15 @@ class Book(models.Model):
 # add the file fields
 for format_ in Book.formats:
     field_name = "%s_file" % format_
 # add the file fields
 for format_ in Book.formats:
     field_name = "%s_file" % format_
+    # This weird globals() assignment makes Django migrations comfortable.
+    _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
+    _upload_to.__name__ = '_%s_upload_to' % format_
+    globals()[_upload_to.__name__] = _upload_to
+
     EbookField(format_, _("%s file" % format_.upper()),
     EbookField(format_, _("%s file" % format_.upper()),
-            upload_to=book_upload_path(format_),
-            blank=True, default='').contribute_to_class(Book, field_name)
+        upload_to=_upload_to,
+        storage=bofh_storage,
+        max_length=255,
+        blank=True,
+        default=''
+    ).contribute_to_class(Book, field_name)