Update to new librarian api for html, txt.
[wolnelektury.git] / src / catalogue / fields.py
index 38cac77..ebe5cf4 100644 (file)
@@ -1,14 +1,20 @@
-# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 #
 #
+import io
 import os
 import os
+import pkg_resources
+import random
+import time
+from urllib.request import urlopen
+from django.apps import apps
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
 from django.utils.deconstruct import deconstructible
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
 from django.utils.deconstruct import deconstructible
-from django.utils.translation import gettext_lazy as _
-from catalogue.constants import LANGUAGES_3TO2, EBOOK_FORMATS_WITH_CHILDREN, EBOOK_FORMATS_WITHOUT_CHILDREN
+from librarian.cover import make_cover
+from catalogue.constants import LANGUAGES_3TO2
 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
 from waiter.utils import clear_cache
 
 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
 from waiter.utils import clear_cache
 
@@ -29,6 +35,25 @@ class UploadToPath(object):
         return isinstance(other, type(self)) and other.path == self.path
 
 
         return isinstance(other, type(self)) and other.path == self.path
 
 
+def get_make_cover(book):
+    extra = book.get_extra_info_json()
+    cover_logo = extra.get('logo_mono', extra.get('logo'))
+    if cover_logo:
+        while True:
+            try:
+                cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
+            except:
+                time.sleep(2)
+            else:
+                break
+    
+    def mc(*args, **kwargs):
+        if cover_logo:
+            kwargs['cover_logo'] = cover_logo
+        return make_cover(*args, **kwargs)
+    return mc
+    
+
 class EbookFieldFile(FieldFile):
     """Represents contents of an ebook file field."""
 
 class EbookFieldFile(FieldFile):
     """Represents contents of an ebook file field."""
 
@@ -66,17 +91,14 @@ class EbookField(models.FileField):
     """Represents an ebook file field, attachable to a model."""
     attr_class = EbookFieldFile
     ext = None
     """Represents an ebook file field, attachable to a model."""
     attr_class = EbookFieldFile
     ext = None
+    for_parents = True
     librarian2_api = False
     ZIP = None
 
     librarian2_api = False
     ZIP = None
 
-    def __init__(self, verbose_name_=None, with_etag=True, **kwargs):
-        # This is just for compatibility with older migrations,
-        # where first argument was for ebook format.
-        # Can be scrapped if old migrations are updated/removed.
-        verbose_name = verbose_name_ or _("%s file") % self.ext
-        kwargs.setdefault('verbose_name', verbose_name_ )
-
+    def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
+        kwargs.setdefault('verbose_name', verbose_name)
         self.with_etag = with_etag
         self.with_etag = with_etag
+        self.etag_field_name = etag_field_name
         kwargs.setdefault('max_length', 255)
         kwargs.setdefault('blank', True)
         kwargs.setdefault('default', '')
         kwargs.setdefault('max_length', 255)
         kwargs.setdefault('blank', True)
         kwargs.setdefault('default', '')
@@ -94,16 +116,15 @@ class EbookField(models.FileField):
             del kwargs['default']
         if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
             del kwargs['upload_to']
             del kwargs['default']
         if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
             del kwargs['upload_to']
-        if not self.with_etag:
+        # with_etag creates a second field, which then deconstructs to manage
+        # its own migrations. So for migrations, etag_field_name is explicitly
+        # set to avoid double creation of the etag field.
+        if self.with_etag:
+            kwargs['etag_field_name'] = self.etag_field_name
+        else:
             kwargs['with_etag'] = self.with_etag
             kwargs['with_etag'] = self.with_etag
-        # Compatibility
-        verbose_name = kwargs.get('verbose_name')
-        if verbose_name:
-            del kwargs['verbose_name']
-            if verbose_name != _("%s file") % self.ext:
-                args = [verbose_name] + args
-        return name, path, args, kwargs
 
 
+        return name, path, args, kwargs
 
     @classmethod
     def get_upload_to(cls, directory):
 
     @classmethod
     def get_upload_to(cls, directory):
@@ -114,8 +135,8 @@ class EbookField(models.FileField):
     def contribute_to_class(self, cls, name):
         super(EbookField, self).contribute_to_class(cls, name)
 
     def contribute_to_class(self, cls, name):
         super(EbookField, self).contribute_to_class(cls, name)
 
-        self.etag_field_name = f'{name}_etag'
-        if self.with_etag:
+        if self.with_etag and not self.etag_field_name:
+            self.etag_field_name = f'{name}_etag'
             self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
             self.etag_field.contribute_to_class(cls, f'{name}_etag')
 
             self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
             self.etag_field.contribute_to_class(cls, f'{name}_etag')
 
@@ -129,42 +150,52 @@ class EbookField(models.FileField):
         setattr(cls, 'has_%s' % self.attname, has)
 
     def get_current_etag(self):
         setattr(cls, 'has_%s' % self.attname, has)
 
     def get_current_etag(self):
-        import pkg_resources
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         librarian_version = pkg_resources.get_distribution("librarian").version
         librarian_version = pkg_resources.get_distribution("librarian").version
-        return librarian_version
-
-    def schedule_stale(self, queryset=None):
-        """Schedule building this format for all the books where etag is stale."""
+        etag = librarian_version
+        mis = MediaInsertSet.get_for_format(self.ext)
+        if mis is not None:
+            etag += '_' + mis.etag
+        return etag
+
+    def find_stale(self, limit):
+        """Find some books where this format is stale."""
         # If there is not ETag field, bail. That's true for xml file field.
         if not self.with_etag:
         # If there is not ETag field, bail. That's true for xml file field.
         if not self.with_etag:
-            return
+            return []
 
         etag = self.get_current_etag()
 
         etag = self.get_current_etag()
-        if queryset is None:
-            queryset = self.model.objects.all()
 
 
-        if self.format_name in EBOOK_FORMATS_WITHOUT_CHILDREN + ['html']:
+        queryset = self.model.objects.all()
+        if not self.for_parents:
             queryset = queryset.filter(children=None)
 
         queryset = queryset.exclude(**{
             f'{self.etag_field_name}__in': [
                 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
             queryset = queryset.filter(children=None)
 
         queryset = queryset.exclude(**{
             f'{self.etag_field_name}__in': [
                 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
-            ]
+           ]
         })
         })
-        for obj in queryset:
-            fieldfile = getattr(obj, self.attname)
-            priority = EBOOK_REBUILD_PRIORITY if fieldfile else EBOOK_BUILD_PRIORITY
-            fieldfile.build_delay(priority=priority)
+
+        queryset = queryset.order_by('?')[:limit]
+        return queryset
 
     @classmethod
 
     @classmethod
-    def schedule_all_stale(cls, model):
+    def find_all_stale(cls, model, limit):
         """Schedules all stale ebooks of all formats to rebuild."""
         """Schedules all stale ebooks of all formats to rebuild."""
+        found = []
         for field in model._meta.fields:
             if isinstance(field, cls):
         for field in model._meta.fields:
             if isinstance(field, cls):
-                field.schedule_stale()
+                for instance in field.find_stale(limit):
+                    found.append((
+                        field.name,
+                        instance
+                    ))
+        random.shuffle(found)
+        found = found[:limit]
+        return found
 
     @staticmethod
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         """Transforms an librarian.WLDocument into an librarian.OutputFile.
         """
         raise NotImplemented()
         """Transforms an librarian.WLDocument into an librarian.OutputFile.
         """
         raise NotImplemented()
@@ -177,8 +208,10 @@ class EbookField(models.FileField):
         book = fieldfile.instance
         out = self.transform(
             book.wldocument2() if self.librarian2_api else book.wldocument(),
         book = fieldfile.instance
         out = self.transform(
             book.wldocument2() if self.librarian2_api else book.wldocument(),
+            book,
         )
         )
-        fieldfile.save(None, File(open(out.get_filename(), 'rb')), save=False)
+        with open(out.get_filename(), 'rb') as f:
+            fieldfile.save(None, File(f), save=False)
         self.set_file_permissions(fieldfile)
         if book.pk is not None:
             book.save(update_fields=[self.attname])
         self.set_file_permissions(fieldfile)
         if book.pk is not None:
             book.save(update_fields=[self.attname])
@@ -195,18 +228,22 @@ class XmlField(EbookField):
 
 class TxtField(EbookField):
     ext = 'txt'
 
 class TxtField(EbookField):
     ext = 'txt'
+    for_parents = False
+    librarian2_api = True
 
     @staticmethod
 
     @staticmethod
-    def transform(wldoc):
-        return wldoc.as_text()
+    def transform(wldoc, book):
+        from librarian.builders.txt import TxtBuilder
+        return TxtBuilder().build(wldoc)
 
 
 class Fb2Field(EbookField):
     ext = 'fb2'
 
 
 class Fb2Field(EbookField):
     ext = 'fb2'
+    for_parents = False
     ZIP = 'wolnelektury_pl_fb2'
 
     @staticmethod
     ZIP = 'wolnelektury_pl_fb2'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         return wldoc.as_fb2()
 
 
         return wldoc.as_fb2()
 
 
@@ -215,13 +252,17 @@ class PdfField(EbookField):
     ZIP = 'wolnelektury_pl_pdf'
 
     @staticmethod
     ZIP = 'wolnelektury_pl_pdf'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return wldoc.as_pdf(
         return wldoc.as_pdf(
-            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True,
-            base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'])
+            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
+            cover=get_make_cover(book),
+            base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
+            fundraising=MediaInsertSet.get_texts_for('pdf'),
+        )
 
     def build(self, fieldfile):
 
     def build(self, fieldfile):
-        BuildEbook.build(self, fieldfile)
+        super().build(fieldfile)
         clear_cache(fieldfile.instance.slug)
 
 
         clear_cache(fieldfile.instance.slug)
 
 
@@ -231,11 +272,13 @@ class EpubField(EbookField):
     ZIP = 'wolnelektury_pl_epub'
 
     @staticmethod
     ZIP = 'wolnelektury_pl_epub'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.builders import EpubBuilder
         from librarian.builders import EpubBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return EpubBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
         return EpubBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
-                fundraising=settings.EPUB_FUNDRAISING
+                fundraising=MediaInsertSet.get_texts_for('epub'),
+                cover=get_make_cover(book),
             ).build(wldoc)
 
 
             ).build(wldoc)
 
 
@@ -245,16 +288,20 @@ class MobiField(EbookField):
     ZIP = 'wolnelektury_pl_mobi'
 
     @staticmethod
     ZIP = 'wolnelektury_pl_mobi'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.builders import MobiBuilder
         from librarian.builders import MobiBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
         return MobiBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
         return MobiBuilder(
                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
-                fundraising=settings.EPUB_FUNDRAISING
+                fundraising=MediaInsertSet.get_texts_for('mobi'),
+                cover=get_make_cover(book),
             ).build(wldoc)
 
 
 class HtmlField(EbookField):
     ext = 'html'
             ).build(wldoc)
 
 
 class HtmlField(EbookField):
     ext = 'html'
+    for_parents = False
+    librarian2_api = True
 
     def build(self, fieldfile):
         from django.core.files.base import ContentFile
 
     def build(self, fieldfile):
         from django.core.files.base import ContentFile
@@ -265,7 +312,7 @@ class HtmlField(EbookField):
 
         book = fieldfile.instance
 
 
         book = fieldfile.instance
 
-        html_output = self.transform(book.wldocument(parse_dublincore=False))
+        html_output = self.transform(book.wldocument2(), book)
 
         # Delete old fragments, create from scratch if necessary.
         book.fragments.all().delete()
 
         # Delete old fragments, create from scratch if necessary.
         book.fragments.all().delete()
@@ -306,7 +353,6 @@ class HtmlField(EbookField):
                             tag.name = theme_name
                             setattr(tag, "name_%s" % lang, theme_name)
                             tag.sort_key = sortify(theme_name.lower())
                             tag.name = theme_name
                             setattr(tag, "name_%s" % lang, theme_name)
                             tag.sort_key = sortify(theme_name.lower())
-                            tag.for_books = True
                             tag.save()
                         themes.append(tag)
                     elif lang is not None:
                             tag.save()
                         themes.append(tag)
                     elif lang is not None:
@@ -336,33 +382,31 @@ class HtmlField(EbookField):
 
                 new_fragment.save()
                 new_fragment.tags = set(meta_tags + themes)
 
                 new_fragment.save()
                 new_fragment.tags = set(meta_tags + themes)
-                for theme in themes:
-                    if not theme.for_books:
-                        theme.for_books = True
-                        theme.save()
             book.html_built.send(sender=type(self), instance=book)
             return True
         return False
 
     @staticmethod
             book.html_built.send(sender=type(self), instance=book)
             return True
         return False
 
     @staticmethod
-    def transform(wldoc):
-        # ugly, but we can't use wldoc.book_info here
-        from librarian import DCNS
-        url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
-        if url_elem is None:
+    def transform(wldoc, book):
+        from librarian.builders.html import HtmlBuilder
+        url = wldoc.meta.url
+        if not url:
             gal_url = ''
             gal_path = ''
         else:
             gal_url = ''
             gal_path = ''
         else:
-            slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
-            gal_url = gallery_url(slug=slug)
-            gal_path = gallery_path(slug=slug)
-        return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
+            gal_url = gallery_url(slug=url.slug)
+            gal_path = gallery_path(slug=url.slug)
+        return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc)
 
 
 class CoverField(EbookField):
     ext = 'jpg'
     directory = 'cover'
 
 
 
 class CoverField(EbookField):
     ext = 'jpg'
     directory = 'cover'
 
+    @staticmethod
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
+
     def set_file_permissions(self, fieldfile):
         pass
 
     def set_file_permissions(self, fieldfile):
         pass
 
@@ -371,19 +415,15 @@ class CoverCleanField(CoverField):
     directory = 'cover_clean'
 
     @staticmethod
     directory = 'cover_clean'
 
     @staticmethod
-    def transform(wldoc):
-        if wldoc.book_info.cover_box_position == 'none':
-            from librarian.cover import WLCover
-            return WLCover(wldoc.book_info, width=240).output_file()
-        from librarian.covers.marquise import MarquiseCover
-        return MarquiseCover(wldoc.book_info, width=240).output_file()
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
 
 
 class CoverThumbField(CoverField):
     directory = 'cover_thumb'
 
     @staticmethod
 
 
 class CoverThumbField(CoverField):
     directory = 'cover_thumb'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLCover
         return WLCover(wldoc.book_info, height=193).output_file()
 
         from librarian.cover import WLCover
         return WLCover(wldoc.book_info, height=193).output_file()
 
@@ -392,7 +432,7 @@ class CoverApiThumbField(CoverField):
     directory = 'cover_api_thumb'
 
     @staticmethod
     directory = 'cover_api_thumb'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=500).output_file()
 
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=500).output_file()
 
@@ -401,7 +441,7 @@ class SimpleCoverField(CoverField):
     directory = 'cover_simple'
 
     @staticmethod
     directory = 'cover_simple'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
 
         from librarian.cover import WLNoBoxCover
         return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
 
@@ -410,6 +450,6 @@ class CoverEbookpointField(CoverField):
     directory = 'cover_ebookpoint'
 
     @staticmethod
     directory = 'cover_ebookpoint'
 
     @staticmethod
-    def transform(wldoc):
+    def transform(wldoc, book):
         from librarian.cover import EbookpointCover
         return EbookpointCover(wldoc.book_info).output_file()
         from librarian.cover import EbookpointCover
         return EbookpointCover(wldoc.book_info).output_file()