X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/10b367c03d1e3a8ad6e365b02a7234bfd640e3f6..86530a9e72f32d28ef1971ac9fa705c85b1bd3b6:/src/catalogue/fields.py?ds=sidebyside diff --git a/src/catalogue/fields.py b/src/catalogue/fields.py index 92e8de48c..ebe5cf478 100644 --- a/src/catalogue/fields.py +++ b/src/catalogue/fields.py @@ -1,103 +1,204 @@ -# -*- coding: utf-8 -*- -# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # +import io +import os +import pkg_resources +import random +import time +from urllib.request import urlopen +from django.apps import apps from django.conf import settings from django.core.files import File -from django.core.files.storage import FileSystemStorage from django.db import models from django.db.models.fields.files import FieldFile -from catalogue import app_settings +from django.utils.deconstruct import deconstructible +from librarian.cover import make_cover from catalogue.constants import LANGUAGES_3TO2 -from catalogue.utils import remove_zip, truncate_html_words, gallery_path, gallery_url -from celery.task import Task, task -from celery.utils.log import get_task_logger +from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url from waiter.utils import clear_cache -task_logger = get_task_logger(__name__) - +ETAG_SCHEDULED_SUFFIX = '-scheduled' +EBOOK_BUILD_PRIORITY = 0 +EBOOK_REBUILD_PRIORITY = 9 + + +@deconstructible +class UploadToPath(object): + def __init__(self, path): + self.path = path + + def __call__(self, instance, filename): + return self.path % instance.slug + + def __eq__(self, other): + return isinstance(other, type(self)) and other.path == self.path + + +def get_make_cover(book): + extra = book.get_extra_info_json() + cover_logo = extra.get('logo_mono', extra.get('logo')) + if cover_logo: + while True: + try: + cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read()) + except: + time.sleep(2) + else: + break + + def mc(*args, **kwargs): + if cover_logo: + kwargs['cover_logo'] = cover_logo + return make_cover(*args, **kwargs) + return mc + class EbookFieldFile(FieldFile): """Represents contents of an ebook file field.""" def build(self): """Build the ebook immediately.""" - return self.field.builder.build(self) + etag = self.field.get_current_etag() + self.field.build(self) + self.update_etag(etag) + self.instance.clear_cache() - def build_delay(self): + def build_delay(self, priority=EBOOK_BUILD_PRIORITY): """Builds the ebook in a delayed task.""" - return self.field.builder.delay(self.instance, self.field.attname) + from .tasks import build_field - def get_url(self): - return self.instance.media_url(self.field.attname.split('_')[0]) + self.update_etag( + "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX]) + ) + return build_field.apply_async( + [self.instance.pk, self.field.attname], + priority=priority + ) def set_readable(self, readable): import os permissions = 0o644 if readable else 0o600 os.chmod(self.path, permissions) + def update_etag(self, etag): + setattr(self.instance, self.field.etag_field_name, etag) + if self.instance.pk: + self.instance.save(update_fields=[self.field.etag_field_name]) + class EbookField(models.FileField): """Represents an ebook file field, attachable to a model.""" attr_class = EbookFieldFile - - def __init__(self, format_name, *args, **kwargs): - super(EbookField, self).__init__(*args, **kwargs) - self.format_name = format_name + ext = None + for_parents = True + librarian2_api = False + ZIP = None + + def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs): + kwargs.setdefault('verbose_name', verbose_name) + self.with_etag = with_etag + self.etag_field_name = etag_field_name + kwargs.setdefault('max_length', 255) + kwargs.setdefault('blank', True) + kwargs.setdefault('default', '') + kwargs.setdefault('upload_to', self.get_upload_to(self.ext)) + + super().__init__(**kwargs) def deconstruct(self): - name, path, args, kwargs = super(EbookField, self).deconstruct() - args.insert(0, self.format_name) + name, path, args, kwargs = super().deconstruct() + if kwargs.get('max_length') == 255: + del kwargs['max_length'] + if kwargs.get('blank') is True: + del kwargs['blank'] + if kwargs.get('default') == '': + del kwargs['default'] + if self.get_upload_to(self.ext) == kwargs.get('upload_to'): + del kwargs['upload_to'] + # with_etag creates a second field, which then deconstructs to manage + # its own migrations. So for migrations, etag_field_name is explicitly + # set to avoid double creation of the etag field. + if self.with_etag: + kwargs['etag_field_name'] = self.etag_field_name + else: + kwargs['with_etag'] = self.with_etag + return name, path, args, kwargs - @property - def builder(self): - """Finds a celery task suitable for the format of the field.""" - return BuildEbook.for_format(self.format_name) + @classmethod + def get_upload_to(cls, directory): + directory = getattr(cls, 'directory', cls.ext) + upload_template = f'book/{directory}/%s.{cls.ext}' + return UploadToPath(upload_template) def contribute_to_class(self, cls, name): super(EbookField, self).contribute_to_class(cls, name) + if self.with_etag and not self.etag_field_name: + self.etag_field_name = f'{name}_etag' + self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True) + self.etag_field.contribute_to_class(cls, f'{name}_etag') + def has(model_instance): return bool(getattr(model_instance, self.attname, None)) has.__doc__ = None has.__name__ = str("has_%s" % self.attname) has.short_description = self.name has.boolean = True + setattr(cls, 'has_%s' % self.attname, has) + def get_current_etag(self): + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') + librarian_version = pkg_resources.get_distribution("librarian").version + etag = librarian_version + mis = MediaInsertSet.get_for_format(self.ext) + if mis is not None: + etag += '_' + mis.etag + return etag -class BuildEbook(Task): - formats = {} + def find_stale(self, limit): + """Find some books where this format is stale.""" + # If there is not ETag field, bail. That's true for xml file field. + if not self.with_etag: + return [] - @classmethod - def register(cls, format_name): - """A decorator for registering subclasses for particular formats.""" - def wrapper(builder): - cls.formats[format_name] = builder - return builder - return wrapper + etag = self.get_current_etag() + + queryset = self.model.objects.all() + if not self.for_parents: + queryset = queryset.filter(children=None) + + queryset = queryset.exclude(**{ + f'{self.etag_field_name}__in': [ + etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}' + ] + }) + + queryset = queryset.order_by('?')[:limit] + return queryset @classmethod - def for_format(cls, format_name): - """Returns a celery task suitable for specified format.""" - return cls.formats.get(format_name, BuildEbookTask) + def find_all_stale(cls, model, limit): + """Schedules all stale ebooks of all formats to rebuild.""" + found = [] + for field in model._meta.fields: + if isinstance(field, cls): + for instance in field.find_stale(limit): + found.append(( + field.name, + instance + )) + random.shuffle(found) + found = found[:limit] + return found @staticmethod - def transform(wldoc, fieldfile): + def transform(wldoc, book): """Transforms an librarian.WLDocument into an librarian.OutputFile. - - By default, it just calls relevant wldoc.as_??? method. - """ - return getattr(wldoc, "as_%s" % fieldfile.field.format_name)() - - def run(self, obj, field_name): - """Just run `build` on FieldFile, can't pass it directly to Celery.""" - task_logger.info("%s -> %s" % (obj.slug, field_name)) - ret = self.build(getattr(obj, field_name)) - obj.flush_includes() - return ret + raise NotImplemented() def set_file_permissions(self, fieldfile): if fieldfile.instance.preview: @@ -105,72 +206,113 @@ class BuildEbook(Task): def build(self, fieldfile): book = fieldfile.instance - out = self.transform(book.wldocument(), fieldfile) - fieldfile.save(None, File(open(out.get_filename())), save=False) + out = self.transform( + book.wldocument2() if self.librarian2_api else book.wldocument(), + book, + ) + with open(out.get_filename(), 'rb') as f: + fieldfile.save(None, File(f), save=False) self.set_file_permissions(fieldfile) if book.pk is not None: - books = type(book).objects.filter(pk=book.pk) - books.update(**{ - fieldfile.field.attname: fieldfile - }) - for book in books: - book.save() # just to trigger post-save - if fieldfile.field.format_name in app_settings.FORMAT_ZIPS: - remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name]) -# Don't decorate BuildEbook, because we want to subclass it. -BuildEbookTask = task(BuildEbook, ignore_result=True) + book.save(update_fields=[self.attname]) + if self.ZIP: + remove_zip(self.ZIP) + +class XmlField(EbookField): + ext = 'xml' + + def build(self, fieldfile): + pass + + +class TxtField(EbookField): + ext = 'txt' + for_parents = False + librarian2_api = True -@BuildEbook.register('txt') -@task(ignore_result=True) -class BuildTxt(BuildEbook): @staticmethod - def transform(wldoc, fieldfile): - return wldoc.as_text() + def transform(wldoc, book): + from librarian.builders.txt import TxtBuilder + return TxtBuilder().build(wldoc) + +class Fb2Field(EbookField): + ext = 'fb2' + for_parents = False + ZIP = 'wolnelektury_pl_fb2' -@BuildEbook.register('pdf') -@task(ignore_result=True) -class BuildPdf(BuildEbook): @staticmethod - def transform(wldoc, fieldfile): - return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True, - ilustr_path=gallery_path(wldoc.book_info.url.slug)) + def transform(wldoc, book): + return wldoc.as_fb2() + + +class PdfField(EbookField): + ext = 'pdf' + ZIP = 'wolnelektury_pl_pdf' + + @staticmethod + def transform(wldoc, book): + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') + return wldoc.as_pdf( + morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, + cover=get_make_cover(book), + base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'], + fundraising=MediaInsertSet.get_texts_for('pdf'), + ) def build(self, fieldfile): - BuildEbook.build(self, fieldfile) + super().build(fieldfile) clear_cache(fieldfile.instance.slug) -@BuildEbook.register('epub') -@task(ignore_result=True) -class BuildEpub(BuildEbook): +class EpubField(EbookField): + ext = 'epub' + librarian2_api = True + ZIP = 'wolnelektury_pl_epub' + @staticmethod - def transform(wldoc, fieldfile): - return wldoc.as_epub(cover=True, ilustr_path=gallery_path(wldoc.book_info.url.slug)) + def transform(wldoc, book): + from librarian.builders import EpubBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') + return EpubBuilder( + base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', + fundraising=MediaInsertSet.get_texts_for('epub'), + cover=get_make_cover(book), + ).build(wldoc) -@BuildEbook.register('mobi') -@task(ignore_result=True) -class BuildMobi(BuildEbook): +class MobiField(EbookField): + ext = 'mobi' + librarian2_api = True + ZIP = 'wolnelektury_pl_mobi' + @staticmethod - def transform(wldoc, fieldfile): - return wldoc.as_mobi(cover=True, ilustr_path=gallery_path(wldoc.book_info.url.slug)) + def transform(wldoc, book): + from librarian.builders import MobiBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') + return MobiBuilder( + base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', + fundraising=MediaInsertSet.get_texts_for('mobi'), + cover=get_make_cover(book), + ).build(wldoc) + +class HtmlField(EbookField): + ext = 'html' + for_parents = False + librarian2_api = True -@BuildEbook.register('html') -@task(ignore_result=True) -class BuildHtml(BuildEbook): def build(self, fieldfile): from django.core.files.base import ContentFile - from fnpdjango.utils.text.slughifi import slughifi + from slugify import slugify from sortify import sortify from librarian import html from catalogue.models import Fragment, Tag book = fieldfile.instance - html_output = self.transform(book.wldocument(parse_dublincore=False), fieldfile) + html_output = self.transform(book.wldocument2(), book) # Delete old fragments, create from scratch if necessary. book.fragments.all().delete() @@ -184,7 +326,7 @@ class BuildHtml(BuildEbook): if lang not in [ln[0] for ln in settings.LANGUAGES]: lang = None - fieldfile.save(None, ContentFile(html_output.get_string()), save=False) + fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False) self.set_file_permissions(fieldfile) type(book).objects.filter(pk=book.pk).update(**{ fieldfile.field.attname: fieldfile @@ -204,19 +346,22 @@ class BuildHtml(BuildEbook): if lang == settings.LANGUAGE_CODE: # Allow creating themes if book in default language. tag, created = Tag.objects.get_or_create( - slug=slughifi(theme_name), - category='theme') + slug=slugify(theme_name), + category='theme' + ) if created: tag.name = theme_name setattr(tag, "name_%s" % lang, theme_name) tag.sort_key = sortify(theme_name.lower()) - tag.for_books = True tag.save() themes.append(tag) elif lang is not None: # Don't create unknown themes in non-default languages. try: - tag = Tag.objects.get(category='theme', **{"name_%s" % lang: theme_name}) + tag = Tag.objects.get( + category='theme', + **{"name_%s" % lang: theme_name} + ) except Tag.DoesNotExist: pass else: @@ -228,82 +373,83 @@ class BuildHtml(BuildEbook): short_text = truncate_html_words(text, 15) if text == short_text: short_text = '' - new_fragment = Fragment.objects.create(anchor=fragment.id, book=book, text=text, short_text=short_text) + new_fragment = Fragment.objects.create( + anchor=fragment.id, + book=book, + text=text, + short_text=short_text + ) new_fragment.save() new_fragment.tags = set(meta_tags + themes) - for theme in themes: - if not theme.for_books: - theme.for_books = True - theme.save() book.html_built.send(sender=type(self), instance=book) return True return False @staticmethod - def transform(wldoc, fieldfile): - # ugly, but we can't use wldoc.book_info here - from librarian import DCNS - url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url')) - if url_elem is None: - gallery = '' + def transform(wldoc, book): + from librarian.builders.html import HtmlBuilder + url = wldoc.meta.url + if not url: + gal_url = '' + gal_path = '' else: - gallery = gallery_url(slug=url_elem.text.rsplit('/', 1)[1]) - return wldoc.as_html(options={'gallery': "'%s'" % gallery}) + gal_url = gallery_url(slug=url.slug) + gal_path = gallery_path(slug=url.slug) + return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc) -class BuildCover(BuildEbook): +class CoverField(EbookField): + ext = 'jpg' + directory = 'cover' + + @staticmethod + def transform(wldoc, book): + return get_make_cover(book)(wldoc.book_info, width=360).output_file() + def set_file_permissions(self, fieldfile): pass -@BuildEbook.register('cover_thumb') -@task(ignore_result=True) -class BuildCoverThumb(BuildCover): - @classmethod - def transform(cls, wldoc, fieldfile): +class CoverCleanField(CoverField): + directory = 'cover_clean' + + @staticmethod + def transform(wldoc, book): + return get_make_cover(book)(wldoc.book_info, width=360).output_file() + + +class CoverThumbField(CoverField): + directory = 'cover_thumb' + + @staticmethod + def transform(wldoc, book): from librarian.cover import WLCover return WLCover(wldoc.book_info, height=193).output_file() -@BuildEbook.register('cover_api_thumb') -@task(ignore_result=True) -class BuildCoverApiThumb(BuildCover): - @classmethod - def transform(cls, wldoc, fieldfile): +class CoverApiThumbField(CoverField): + directory = 'cover_api_thumb' + + @staticmethod + def transform(wldoc, book): from librarian.cover import WLNoBoxCover return WLNoBoxCover(wldoc.book_info, height=500).output_file() -@BuildEbook.register('simple_cover') -@task(ignore_result=True) -class BuildSimpleCover(BuildCover): - @classmethod - def transform(cls, wldoc, fieldfile): +class SimpleCoverField(CoverField): + directory = 'cover_simple' + + @staticmethod + def transform(wldoc, book): from librarian.cover import WLNoBoxCover return WLNoBoxCover(wldoc.book_info, height=1000).output_file() -# not used, but needed for migrations -class OverwritingFieldFile(FieldFile): - """ - Deletes the old file before saving the new one. - """ +class CoverEbookpointField(CoverField): + directory = 'cover_ebookpoint' - def save(self, name, content, *args, **kwargs): - leave = kwargs.pop('leave', None) - # delete if there's a file already and there's a new one coming - if not leave and self and (not hasattr(content, 'path') or content.path != self.path): - self.delete(save=False) - return super(OverwritingFieldFile, self).save(name, content, *args, **kwargs) - - -class OverwritingFileField(models.FileField): - attr_class = OverwritingFieldFile - - -class OverwriteStorage(FileSystemStorage): - - def get_available_name(self, name, max_length=None): - self.delete(name) - return name + @staticmethod + def transform(wldoc, book): + from librarian.cover import EbookpointCover + return EbookpointCover(wldoc.book_info).output_file()