Librarian.
[wolnelektury.git] / src / catalogue / fields.py
index 5b49307..c4dec7e 100644 (file)
-# -*- coding: utf-8 -*-
-# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 #
 #
+import io
+import os
+import pkg_resources
+import random
+import time
+from urllib.request import urlopen
+from django.apps import apps
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
 from django.conf import settings
 from django.core.files import File
 from django.db import models
 from django.db.models.fields.files import FieldFile
-from catalogue import app_settings
+from django.utils.deconstruct import deconstructible
+from librarian.cover import make_cover
 from catalogue.constants import LANGUAGES_3TO2
 from catalogue.constants import LANGUAGES_3TO2
-from catalogue.utils import remove_zip, truncate_html_words, gallery_path, gallery_url
-from celery.task import Task, task
-from celery.utils.log import get_task_logger
+from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
 from waiter.utils import clear_cache
 
 from waiter.utils import clear_cache
 
-task_logger = get_task_logger(__name__)
-
+ETAG_SCHEDULED_SUFFIX = '-scheduled'
+EBOOK_BUILD_PRIORITY = 0
+EBOOK_REBUILD_PRIORITY = 9
+
+
+@deconstructible
+class UploadToPath(object):
+    def __init__(self, path):
+        self.path = path
+
+    def __call__(self, instance, filename):
+        return self.path % instance.slug
+
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and other.path == self.path
+
+
+def get_make_cover(book):
+    extra = book.get_extra_info_json()
+    cover_logo = extra.get('logo_mono', extra.get('logo'))
+    if cover_logo:
+        while True:
+            try:
+                cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
+            except:
+                time.sleep(2)
+            else:
+                break
+    
+    def mc(*args, **kwargs):
+        if cover_logo:
+            kwargs['cover_logo'] = cover_logo
+        return make_cover(*args, **kwargs)
+    return mc
+    
 
 class EbookFieldFile(FieldFile):
     """Represents contents of an ebook file field."""
 
     def build(self):
         """Build the ebook immediately."""
 
 class EbookFieldFile(FieldFile):
     """Represents contents of an ebook file field."""
 
     def build(self):
         """Build the ebook immediately."""
-        return self.field.builder.build(self)
+        etag = self.field.get_current_etag()
+        self.field.build(self)
+        self.update_etag(etag)
+        self.instance.clear_cache()
 
 
-    def build_delay(self):
+    def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
         """Builds the ebook in a delayed task."""
         """Builds the ebook in a delayed task."""
-        return self.field.builder.delay(self.instance, self.field.attname)
+        from .tasks import build_field
+
+        self.update_etag(
+            "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
+        )
+        return build_field.apply_async(
+            [self.instance.pk, self.field.attname],
+            priority=priority
+        )
+
+    def set_readable(self, readable):
+        import os
+        permissions = 0o644 if readable else 0o600
+        os.chmod(self.path, permissions)
+
+    def update_etag(self, etag):
+        setattr(self.instance, self.field.etag_field_name, etag)
+        if self.instance.pk:
+            self.instance.save(update_fields=[self.field.etag_field_name])
 
 
 class EbookField(models.FileField):
     """Represents an ebook file field, attachable to a model."""
     attr_class = EbookFieldFile
 
 
 class EbookField(models.FileField):
     """Represents an ebook file field, attachable to a model."""
     attr_class = EbookFieldFile
-
-    def __init__(self, format_name, *args, **kwargs):
-        super(EbookField, self).__init__(*args, **kwargs)
-        self.format_name = format_name
+    ext = None
+    for_parents = True
+    librarian2_api = False
+    ZIP = None
+
+    def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
+        kwargs.setdefault('verbose_name', verbose_name)
+        self.with_etag = with_etag
+        self.etag_field_name = etag_field_name
+        kwargs.setdefault('max_length', 255)
+        kwargs.setdefault('blank', True)
+        kwargs.setdefault('default', '')
+        kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
+
+        super().__init__(**kwargs)
 
     def deconstruct(self):
 
     def deconstruct(self):
-        name, path, args, kwargs = super(EbookField, self).deconstruct()
-        args.insert(0, self.format_name)
+        name, path, args, kwargs = super().deconstruct()
+        if kwargs.get('max_length') == 255:
+            del kwargs['max_length']
+        if kwargs.get('blank') is True:
+            del kwargs['blank']
+        if kwargs.get('default') == '':
+            del kwargs['default']
+        if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
+            del kwargs['upload_to']
+        # with_etag creates a second field, which then deconstructs to manage
+        # its own migrations. So for migrations, etag_field_name is explicitly
+        # set to avoid double creation of the etag field.
+        if self.with_etag:
+            kwargs['etag_field_name'] = self.etag_field_name
+        else:
+            kwargs['with_etag'] = self.with_etag
+
         return name, path, args, kwargs
 
         return name, path, args, kwargs
 
-    @property
-    def builder(self):
-        """Finds a celery task suitable for the format of the field."""
-        return BuildEbook.for_format(self.format_name)
+    @classmethod
+    def get_upload_to(cls, directory):
+        directory = getattr(cls, 'directory', cls.ext)
+        upload_template = f'book/{directory}/%s.{cls.ext}'
+        return UploadToPath(upload_template)
 
     def contribute_to_class(self, cls, name):
         super(EbookField, self).contribute_to_class(cls, name)
 
 
     def contribute_to_class(self, cls, name):
         super(EbookField, self).contribute_to_class(cls, name)
 
+        if self.with_etag and not self.etag_field_name:
+            self.etag_field_name = f'{name}_etag'
+            self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
+            self.etag_field.contribute_to_class(cls, f'{name}_etag')
+
         def has(model_instance):
             return bool(getattr(model_instance, self.attname, None))
         has.__doc__ = None
         has.__name__ = str("has_%s" % self.attname)
         has.short_description = self.name
         has.boolean = True
         def has(model_instance):
             return bool(getattr(model_instance, self.attname, None))
         has.__doc__ = None
         has.__name__ = str("has_%s" % self.attname)
         has.short_description = self.name
         has.boolean = True
+
         setattr(cls, 'has_%s' % self.attname, has)
 
         setattr(cls, 'has_%s' % self.attname, has)
 
+    def get_current_etag(self):
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
+        librarian_version = pkg_resources.get_distribution("librarian").version
+        etag = librarian_version
+        mis = MediaInsertSet.get_for_format(self.ext)
+        if mis is not None:
+            etag += '_' + mis.etag
+        return etag
 
 
-class BuildEbook(Task):
-    formats = {}
+    def find_stale(self, limit):
+        """Find some books where this format is stale."""
+        # If there is not ETag field, bail. That's true for xml file field.
+        if not self.with_etag:
+            return []
 
 
-    @classmethod
-    def register(cls, format_name):
-        """A decorator for registering subclasses for particular formats."""
-        def wrapper(builder):
-            cls.formats[format_name] = builder
-            return builder
-        return wrapper
+        etag = self.get_current_etag()
+
+        queryset = self.model.objects.all()
+        if not self.for_parents:
+            queryset = queryset.filter(children=None)
+
+        queryset = queryset.exclude(**{
+            f'{self.etag_field_name}__in': [
+                etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
+           ]
+        })
+
+        queryset = queryset.order_by('?')[:limit]
+        return queryset
 
     @classmethod
 
     @classmethod
-    def for_format(cls, format_name):
-        """Returns a celery task suitable for specified format."""
-        return cls.formats.get(format_name, BuildEbookTask)
+    def find_all_stale(cls, model, limit):
+        """Schedules all stale ebooks of all formats to rebuild."""
+        found = []
+        for field in model._meta.fields:
+            if isinstance(field, cls):
+                for instance in field.find_stale(limit):
+                    found.append((
+                        field.name,
+                        instance
+                    ))
+        random.shuffle(found)
+        found = found[:limit]
+        return found
 
     @staticmethod
 
     @staticmethod
-    def transform(wldoc, fieldfile):
+    def transform(wldoc, book):
         """Transforms an librarian.WLDocument into an librarian.OutputFile.
         """Transforms an librarian.WLDocument into an librarian.OutputFile.
-
-        By default, it just calls relevant wldoc.as_??? method.
-
         """
         """
-        return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
+        raise NotImplemented()
 
 
-    def run(self, obj, field_name):
-        """Just run `build` on FieldFile, can't pass it directly to Celery."""
-        task_logger.info("%s -> %s" % (obj.slug, field_name))
-        ret = self.build(getattr(obj, field_name))
-        obj.flush_includes()
-        return ret
+    def set_file_permissions(self, fieldfile):
+        if fieldfile.instance.preview:
+            fieldfile.set_readable(False)
 
     def build(self, fieldfile):
         book = fieldfile.instance
 
     def build(self, fieldfile):
         book = fieldfile.instance
-        out = self.transform(book.wldocument(), fieldfile)
-        fieldfile.save(None, File(open(out.get_filename())), save=False)
+        out = self.transform(
+            book.wldocument2() if self.librarian2_api else book.wldocument(),
+            book,
+        )
+        with open(out.get_filename(), 'rb') as f:
+            fieldfile.save(None, File(f), save=False)
+        self.set_file_permissions(fieldfile)
         if book.pk is not None:
         if book.pk is not None:
-            type(book).objects.filter(pk=book.pk).update(**{
-                fieldfile.field.attname: fieldfile
-            })
-        if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
-            remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
-# Don't decorate BuildEbook, because we want to subclass it.
-BuildEbookTask = task(BuildEbook, ignore_result=True)
+            book.save(update_fields=[self.attname])
+        if self.ZIP:
+            remove_zip(self.ZIP)
 
 
 
 
-@BuildEbook.register('txt')
-@task(ignore_result=True)
-class BuildTxt(BuildEbook):
+class XmlField(EbookField):
+    ext = 'xml'
+
+    def build(self, fieldfile):
+        pass
+
+
+class TxtField(EbookField):
+    ext = 'txt'
+    for_parents = False
+
     @staticmethod
     @staticmethod
-    def transform(wldoc, fieldfile):
+    def transform(wldoc, book):
         return wldoc.as_text()
 
 
         return wldoc.as_text()
 
 
-@BuildEbook.register('pdf')
-@task(ignore_result=True)
-class BuildPdf(BuildEbook):
+class Fb2Field(EbookField):
+    ext = 'fb2'
+    for_parents = False
+    ZIP = 'wolnelektury_pl_fb2'
+
     @staticmethod
     @staticmethod
-    def transform(wldoc, fieldfile):
-        return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True,
-                            ilustr_path=gallery_path(wldoc.book_info.url.slug))
+    def transform(wldoc, book):
+        return wldoc.as_fb2()
+
+
+class PdfField(EbookField):
+    ext = 'pdf'
+    ZIP = 'wolnelektury_pl_pdf'
+
+    @staticmethod
+    def transform(wldoc, book):
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
+        return wldoc.as_pdf(
+            morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
+            cover=get_make_cover(book),
+            base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
+            fundraising=MediaInsertSet.get_texts_for('pdf'),
+        )
 
     def build(self, fieldfile):
 
     def build(self, fieldfile):
-        BuildEbook.build(self, fieldfile)
+        super().build(fieldfile)
         clear_cache(fieldfile.instance.slug)
 
 
         clear_cache(fieldfile.instance.slug)
 
 
-@BuildEbook.register('epub')
-@task(ignore_result=True)
-class BuildEpub(BuildEbook):
+class EpubField(EbookField):
+    ext = 'epub'
+    librarian2_api = True
+    ZIP = 'wolnelektury_pl_epub'
+
     @staticmethod
     @staticmethod
-    def transform(wldoc, fieldfile):
-        return wldoc.as_epub(cover=True, ilustr_path=gallery_path(wldoc.book_info.url.slug))
+    def transform(wldoc, book):
+        from librarian.builders import EpubBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
+        return EpubBuilder(
+                base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
+                fundraising=MediaInsertSet.get_texts_for('epub'),
+                cover=get_make_cover(book),
+            ).build(wldoc)
+
 
 
+class MobiField(EbookField):
+    ext = 'mobi'
+    librarian2_api = True
+    ZIP = 'wolnelektury_pl_mobi'
 
 
-@BuildEbook.register('mobi')
-@task(ignore_result=True)
-class BuildMobi(BuildEbook):
     @staticmethod
     @staticmethod
-    def transform(wldoc, fieldfile):
-        return wldoc.as_mobi(cover=True, ilustr_path=gallery_path(wldoc.book_info.url.slug))
+    def transform(wldoc, book):
+        from librarian.builders import MobiBuilder
+        MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
+        return MobiBuilder(
+                base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
+                fundraising=MediaInsertSet.get_texts_for('mobi'),
+                cover=get_make_cover(book),
+            ).build(wldoc)
 
 
 
 
-@BuildEbook.register('html')
-@task(ignore_result=True)
-class BuildHtml(BuildEbook):
+class HtmlField(EbookField):
+    ext = 'html'
+    for_parents = False
+
     def build(self, fieldfile):
         from django.core.files.base import ContentFile
     def build(self, fieldfile):
         from django.core.files.base import ContentFile
-        from fnpdjango.utils.text.slughifi import slughifi
+        from slugify import slugify
         from sortify import sortify
         from librarian import html
         from catalogue.models import Fragment, Tag
 
         book = fieldfile.instance
 
         from sortify import sortify
         from librarian import html
         from catalogue.models import Fragment, Tag
 
         book = fieldfile.instance
 
-        html_output = self.transform(book.wldocument(), fieldfile)
+        html_output = self.transform(book.wldocument(parse_dublincore=False), book)
 
         # Delete old fragments, create from scratch if necessary.
         book.fragments.all().delete()
 
         # Delete old fragments, create from scratch if necessary.
         book.fragments.all().delete()
@@ -167,7 +323,8 @@ class BuildHtml(BuildEbook):
             if lang not in [ln[0] for ln in settings.LANGUAGES]:
                 lang = None
 
             if lang not in [ln[0] for ln in settings.LANGUAGES]:
                 lang = None
 
-            fieldfile.save(None, ContentFile(html_output.get_string()), save=False)
+            fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
+            self.set_file_permissions(fieldfile)
             type(book).objects.filter(pk=book.pk).update(**{
                 fieldfile.field.attname: fieldfile
             })
             type(book).objects.filter(pk=book.pk).update(**{
                 fieldfile.field.attname: fieldfile
             })
@@ -186,18 +343,23 @@ class BuildHtml(BuildEbook):
                     if lang == settings.LANGUAGE_CODE:
                         # Allow creating themes if book in default language.
                         tag, created = Tag.objects.get_or_create(
                     if lang == settings.LANGUAGE_CODE:
                         # Allow creating themes if book in default language.
                         tag, created = Tag.objects.get_or_create(
-                                            slug=slughifi(theme_name),
-                                            category='theme')
+                            slug=slugify(theme_name),
+                            category='theme'
+                        )
                         if created:
                             tag.name = theme_name
                             setattr(tag, "name_%s" % lang, theme_name)
                             tag.sort_key = sortify(theme_name.lower())
                         if created:
                             tag.name = theme_name
                             setattr(tag, "name_%s" % lang, theme_name)
                             tag.sort_key = sortify(theme_name.lower())
+                            tag.for_books = True
                             tag.save()
                         themes.append(tag)
                     elif lang is not None:
                         # Don't create unknown themes in non-default languages.
                         try:
                             tag.save()
                         themes.append(tag)
                     elif lang is not None:
                         # Don't create unknown themes in non-default languages.
                         try:
-                            tag = Tag.objects.get(category='theme', **{"name_%s" % lang: theme_name})
+                            tag = Tag.objects.get(
+                                category='theme',
+                                **{"name_%s" % lang: theme_name}
+                            )
                         except Tag.DoesNotExist:
                             pass
                         else:
                         except Tag.DoesNotExist:
                             pass
                         else:
@@ -209,40 +371,89 @@ class BuildHtml(BuildEbook):
                 short_text = truncate_html_words(text, 15)
                 if text == short_text:
                     short_text = ''
                 short_text = truncate_html_words(text, 15)
                 if text == short_text:
                     short_text = ''
-                new_fragment = Fragment.objects.create(anchor=fragment.id, book=book, text=text, short_text=short_text)
+                new_fragment = Fragment.objects.create(
+                    anchor=fragment.id,
+                    book=book,
+                    text=text,
+                    short_text=short_text
+                )
 
                 new_fragment.save()
                 new_fragment.tags = set(meta_tags + themes)
 
                 new_fragment.save()
                 new_fragment.tags = set(meta_tags + themes)
+                for theme in themes:
+                    if not theme.for_books:
+                        theme.for_books = True
+                        theme.save()
             book.html_built.send(sender=type(self), instance=book)
             return True
         return False
 
     @staticmethod
             book.html_built.send(sender=type(self), instance=book)
             return True
         return False
 
     @staticmethod
-    def transform(wldoc, fieldfile):
-        return wldoc.as_html(options={'gallery': "'%s'" % gallery_url(wldoc.book_info.url.slug)})
+    def transform(wldoc, book):
+        # ugly, but we can't use wldoc.book_info here
+        from librarian import DCNS
+        url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
+        if url_elem is None:
+            gal_url = ''
+            gal_path = ''
+        else:
+            slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
+            gal_url = gallery_url(slug=slug)
+            gal_path = gallery_path(slug=slug)
+        return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
+
+
+class CoverField(EbookField):
+    ext = 'jpg'
+    directory = 'cover'
 
 
+    @staticmethod
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
 
 
-@BuildEbook.register('cover_thumb')
-@task(ignore_result=True)
-class BuildCoverThumb(BuildEbook):
-    @classmethod
-    def transform(cls, wldoc, fieldfile):
+    def set_file_permissions(self, fieldfile):
+        pass
+
+
+class CoverCleanField(CoverField):
+    directory = 'cover_clean'
+
+    @staticmethod
+    def transform(wldoc, book):
+        return get_make_cover(book)(wldoc.book_info, width=360).output_file()
+
+
+class CoverThumbField(CoverField):
+    directory = 'cover_thumb'
+
+    @staticmethod
+    def transform(wldoc, book):
         from librarian.cover import WLCover
         return WLCover(wldoc.book_info, height=193).output_file()
 
 
         from librarian.cover import WLCover
         return WLCover(wldoc.book_info, height=193).output_file()
 
 
-class OverwritingFieldFile(FieldFile):
-    """
-        Deletes the old file before saving the new one.
-    """
+class CoverApiThumbField(CoverField):
+    directory = 'cover_api_thumb'
+
+    @staticmethod
+    def transform(wldoc, book):
+        from librarian.cover import WLNoBoxCover
+        return WLNoBoxCover(wldoc.book_info, height=500).output_file()
+
+
+class SimpleCoverField(CoverField):
+    directory = 'cover_simple'
+
+    @staticmethod
+    def transform(wldoc, book):
+        from librarian.cover import WLNoBoxCover
+        return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
 
 
-    def save(self, name, content, *args, **kwargs):
-        leave = kwargs.pop('leave', None)
-        # delete if there's a file already and there's a new one coming
-        if not leave and self and (not hasattr(content, 'path') or content.path != self.path):
-            self.delete(save=False)
-        return super(OverwritingFieldFile, self).save(name, content, *args, **kwargs)
 
 
+class CoverEbookpointField(CoverField):
+    directory = 'cover_ebookpoint'
 
 
-class OverwritingFileField(models.FileField):
-    attr_class = OverwritingFieldFile
+    @staticmethod
+    def transform(wldoc, book):
+        from librarian.cover import EbookpointCover
+        return EbookpointCover(wldoc.book_info).output_file()