fix for toc events
[wolnelektury.git] / src / catalogue / fields.py
index 38cac77..ebe5cf4 100644 (file)
@@ -1,14 +1,20 @@
-# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 #
+import io
 import os
+import pkg_resources
+import random
+import time
+from urllib.request import urlopen
+from django.apps import apps
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
 from django.utils.deconstruct import deconstructible
-from django.utils.translation import gettext_lazy as _
-from catalogue.constants import LANGUAGES_3TO2, EBOOK_FORMATS_WITH_CHILDREN, EBOOK_FORMATS_WITHOUT_CHILDREN
+from librarian.cover import make_cover
+from catalogue.constants import LANGUAGES_3TO2
 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
 from waiter.utils import clear_cache
 
@@ -29,6 +35,25 @@ class UploadToPath(object):
         return isinstance(other, type(self)) and other.path == self.path
 
 
+def get_make_cover(book):
+    extra = book.get_extra_info_json()
+    cover_logo = extra.get('logo_mono', extra.get('logo'))
+    if cover_logo:
+        while True:
+            try:
+                cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
+            except:
+                time.sleep(2)
+            else:
+                break
+    
+    def mc(*args, **kwargs):
+        if cover_logo:
+            kwargs['cover_logo'] = cover_logo
+        return make_cover(*args, **kwargs)
+    return mc
+    
+
 class EbookFieldFile(FieldFile):
     """Represents contents of an ebook file field."""
 
@@ -66,17 +91,14 @@ class EbookField(models.FileField):
     """Represents an ebook file field, attachable to a model."""
     attr_class = EbookFieldFile
     ext = None
+    for_parents = True
     librarian2_api = False
     ZIP = None
 
-    def __init__(self, verbose_name_=None, with_etag=True, **kwargs):
-        # This is just for compatibility with older migrations,
-        # where first argument was for ebook format.
-        # Can be scrapped if old migrations are updated/removed.
-        verbose_name = verbose_name_ or _("%s file") % self.ext
-        kwargs.setdefault('verbose_name', verbose_name_ )
-
+    def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
+        kwargs.setdefault('verbose_name', verbose_name)
         self.with_etag = with_etag
+        self.etag_field_name = etag_field_name
         kwargs.setdefault('max_length', 255)
         kwargs.setdefault('blank', True)
         kwargs.setdefault('default', '')
@@ -94,16 +116,15 @@ class EbookField(models.FileField):
             del kwargs['default']
         if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
             del kwargs['upload_to']
-        if not self.with_etag:
+        # with_etag creates a second field, which then deconstructs to manage
+        # its own migrations. So for migrations, etag_field_name is explicitly
+        # set to avoid double creation of the etag field.
+        if self.with_etag:
+            kwargs['etag_field_name'] = self.etag_field_name
+        else:
             kwargs['with_etag'] = self.with_etag
-        # Compatibility
-        verbose_name = kwargs.get('verbose_name')
-        if verbose_name:
-            del kwargs['verbose_name']
-            if verbose_name != _("%s file") % self.ext:
-                args = [verbose_name] + args
-        return name, path, args, kwargs
 
+        return name, path, args, kwargs
 
     @classmethod
     def get_upload_to(cls, directory):
@@ -114,8 +135,8 @@ class EbookField(models.FileField):
     def contribute_to_class(self, cls, name):
         super(EbookField, self).contribute_to_class(cls, name)
 
-        self.etag_field_name = f'{name}_etag'
-        if self.with_etag:
+        if self.with_etag and not self.etag_field_name:
+            self.etag_field_name = f'{name}_etag'
             self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
             self.etag_field.contribute_to_class(cls, f'{name}_etag')
 
@@ -129,42 +150,52 @@ class EbookField(models.FileField):
         setattr(cls, 'has_%s' % self.attname, has)
 
     def get_current_etag(self):
-        import pkg_resources
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         librarian_version = pkg_resources.get_distribution("librarian").version
-        return librarian_version
-
-    def schedule_stale(self, queryset=None):
-        """Schedule building this format for all the books where etag is stale."""
+        etag = librarian_version
+        mis = MediaInsertSet.get_for_format(self.ext)
+        if mis is not None:
+            etag += '_' + mis.etag
+        return etag
+
+    def find_stale(self, limit):
+        """Find some books where this format is stale."""
         # If there is not ETag field, bail. That's true for xml file field.
         if not self.with_etag:
-            return
+            return []
 
         etag = self.get_current_etag()
-        if queryset is None:
-            queryset = self.model.objects.all()
 
-        if self.format_name in EBOOK_FORMATS_WITHOUT_CHILDREN + ['html']:
+        queryset = self.model.objects.all()
+        if not self.for_parents:
             queryset = queryset.filter(children=None)
 
         queryset = queryset.exclude(**{
             f'{self.etag_field_name}__in': [
                 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
-            ]
+           ]
         })
-        for obj in queryset:
-            fieldfile = getattr(obj, self.attname)
-            priority = EBOOK_REBUILD_PRIORITY if fieldfile else EBOOK_BUILD_PRIORITY
-            fieldfile.build_delay(priority=priority)
+
+        queryset = queryset.order_by('?')[:limit]
+        return queryset
 
     @classmethod
-    def schedule_all_stale(cls, model):
+    def find_all_stale(cls, model, limit):
         """Schedules all stale ebooks of all formats to rebuild."""
+        found = []
         for field in model._meta.fields:
             if isinstance(field, cls):
-                field.schedule_stale()
+                for instance in field.find_stale(limit):
+                    found.append((
+                        field.name,
+                        instance
+                    ))
+        random.shuffle(found)
+        found = found[:limit]
+        return found
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         """Transforms an librarian.WLDocument into an librarian.OutputFile.
         """
         raise NotImplemented()
@@ -177,8 +208,10 @@ class EbookField(models.FileField):
         book = fieldfile.instance
         out = self.transform(
             book.wldocument2() if self.librarian2_api else book.wldocument(),
+            book,
         )
-        fieldfile.save(None, File(open(out.get_filename(), 'rb')), save=False)
+        with open(out.get_filename(), 'rb') as f:
+            fieldfile.save(None, File(f), save=False)
         self.set_file_permissions(fieldfile)
         if book.pk is not None:
             book.save(update_fields=[self.attname])
@@ -195,18 +228,22 @@ class XmlField(EbookField):
 
 class TxtField(EbookField):
     ext = 'txt'
+    for_parents = False
+    librarian2_api = True
 
     @staticmethod
-    def transform(wldoc):
-        return wldoc.as_text()
+    def transform(wldoc, book):
+        from librarian.builders.txt import TxtBuilder
+        return TxtBuilder().build(wldoc)
 
 
 class Fb2Field(EbookField):
     ext = 'fb2'
+    for_parents = False
     ZIP = 'wolnelektury_pl_fb2'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         return wldoc.as_fb2()
 
 
@@ -215,13 +252,17 @@ class PdfField(EbookField):
     ZIP = 'wolnelektury_pl_pdf'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return wldoc.as_pdf(
-            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True,
-            base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'])
+            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
+            cover=get_make_cover(book),
+            base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
+            fundraising=MediaInsertSet.get_texts_for('pdf'),
+        )
 
     def build(self, fieldfile):
-        BuildEbook.build(self, fieldfile)
+        super().build(fieldfile)
         clear_cache(fieldfile.instance.slug)
 
 
@@ -231,11 +272,13 @@ class EpubField(EbookField):
     ZIP = 'wolnelektury_pl_epub'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.builders import EpubBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return EpubBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
-                fundraising=settings.EPUB_FUNDRAISING
+                fundraising=MediaInsertSet.get_texts_for('epub'),
+                cover=get_make_cover(book),
             ).build(wldoc)
 
 
@@ -245,16 +288,20 @@ class MobiField(EbookField):
     ZIP = 'wolnelektury_pl_mobi'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.builders import MobiBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return MobiBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
-                fundraising=settings.EPUB_FUNDRAISING
+                fundraising=MediaInsertSet.get_texts_for('mobi'),
+                cover=get_make_cover(book),
             ).build(wldoc)
 
 
 class HtmlField(EbookField):
     ext = 'html'
+    for_parents = False
+    librarian2_api = True
 
     def build(self, fieldfile):
         from django.core.files.base import ContentFile
@@ -265,7 +312,7 @@ class HtmlField(EbookField):
 
         book = fieldfile.instance
 
-        html_output = self.transform(book.wldocument(parse_dublincore=False))
+        html_output = self.transform(book.wldocument2(), book)
 
         # Delete old fragments, create from scratch if necessary.
         book.fragments.all().delete()
@@ -306,7 +353,6 @@ class HtmlField(EbookField):
                             tag.name = theme_name
                             setattr(tag, "name_%s" % lang, theme_name)
                             tag.sort_key = sortify(theme_name.lower())
-                            tag.for_books = True
                             tag.save()
                         themes.append(tag)
                     elif lang is not None:
@@ -336,33 +382,31 @@ class HtmlField(EbookField):
 
                 new_fragment.save()
                 new_fragment.tags = set(meta_tags + themes)
-                for theme in themes:
-                    if not theme.for_books:
-                        theme.for_books = True
-                        theme.save()
             book.html_built.send(sender=type(self), instance=book)
             return True
         return False
 
     @staticmethod
-    def transform(wldoc):
-        # ugly, but we can't use wldoc.book_info here
-        from librarian import DCNS
-        url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
-        if url_elem is None:
+    def transform(wldoc, book):
+        from librarian.builders.html import HtmlBuilder
+        url = wldoc.meta.url
+        if not url:
             gal_url = ''
             gal_path = ''
         else:
-            slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
-            gal_url = gallery_url(slug=slug)
-            gal_path = gallery_path(slug=slug)
-        return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
+            gal_url = gallery_url(slug=url.slug)
+            gal_path = gallery_path(slug=url.slug)
+        return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc)
 
 
 class CoverField(EbookField):
     ext = 'jpg'
     directory = 'cover'
 
+    @staticmethod
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
+
     def set_file_permissions(self, fieldfile):
         pass
 
@@ -371,19 +415,15 @@ class CoverCleanField(CoverField):
     directory = 'cover_clean'
 
     @staticmethod
-    def transform(wldoc):
-        if wldoc.book_info.cover_box_position == 'none':
-            from librarian.cover import WLCover
-            return WLCover(wldoc.book_info, width=240).output_file()
-        from librarian.covers.marquise import MarquiseCover
-        return MarquiseCover(wldoc.book_info, width=240).output_file()
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
 
 
 class CoverThumbField(CoverField):
     directory = 'cover_thumb'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLCover
         return WLCover(wldoc.book_info, height=193).output_file()
 
@@ -392,7 +432,7 @@ class CoverApiThumbField(CoverField):
     directory = 'cover_api_thumb'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=500).output_file()
 
@@ -401,7 +441,7 @@ class SimpleCoverField(CoverField):
     directory = 'cover_simple'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
 
@@ -410,6 +450,6 @@ class CoverEbookpointField(CoverField):
     directory = 'cover_ebookpoint'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import EbookpointCover
         return EbookpointCover(wldoc.book_info).output_file()