X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/4727eb31e50fa1571c83c4be5e228534b07574a7..86530a9e72f32d28ef1971ac9fa705c85b1bd3b6:/src/catalogue/fields.py diff --git a/src/catalogue/fields.py b/src/catalogue/fields.py index 6ae4da3c4..ebe5cf478 100644 --- a/src/catalogue/fields.py +++ b/src/catalogue/fields.py @@ -1,14 +1,20 @@ -# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # +import io import os +import pkg_resources +import random +import time +from urllib.request import urlopen +from django.apps import apps from django.conf import settings from django.core.files import File from django.db import models from django.db.models.fields.files import FieldFile from django.utils.deconstruct import deconstructible -from django.utils.translation import gettext_lazy as _ -from catalogue.constants import LANGUAGES_3TO2, EBOOK_FORMATS_WITH_CHILDREN, EBOOK_FORMATS_WITHOUT_CHILDREN +from librarian.cover import make_cover +from catalogue.constants import LANGUAGES_3TO2 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url from waiter.utils import clear_cache @@ -29,6 +35,25 @@ class UploadToPath(object): return isinstance(other, type(self)) and other.path == self.path +def get_make_cover(book): + extra = book.get_extra_info_json() + cover_logo = extra.get('logo_mono', extra.get('logo')) + if cover_logo: + while True: + try: + cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read()) + except: + time.sleep(2) + else: + break + + def mc(*args, **kwargs): + if cover_logo: + kwargs['cover_logo'] = cover_logo + return make_cover(*args, **kwargs) + return mc + + class EbookFieldFile(FieldFile): """Represents contents of an ebook file field.""" @@ -66,6 +91,7 @@ class EbookField(models.FileField): """Represents an ebook file field, attachable to a model.""" attr_class = EbookFieldFile ext = None + for_parents = True librarian2_api = False ZIP = None @@ -124,42 +150,52 @@ class EbookField(models.FileField): setattr(cls, 'has_%s' % self.attname, has) def get_current_etag(self): - import pkg_resources + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') librarian_version = pkg_resources.get_distribution("librarian").version - return librarian_version - - def schedule_stale(self, queryset=None): - """Schedule building this format for all the books where etag is stale.""" + etag = librarian_version + mis = MediaInsertSet.get_for_format(self.ext) + if mis is not None: + etag += '_' + mis.etag + return etag + + def find_stale(self, limit): + """Find some books where this format is stale.""" # If there is not ETag field, bail. That's true for xml file field. if not self.with_etag: - return + return [] etag = self.get_current_etag() - if queryset is None: - queryset = self.model.objects.all() - if self.format_name in EBOOK_FORMATS_WITHOUT_CHILDREN + ['html']: + queryset = self.model.objects.all() + if not self.for_parents: queryset = queryset.filter(children=None) queryset = queryset.exclude(**{ f'{self.etag_field_name}__in': [ etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}' - ] + ] }) - for obj in queryset: - fieldfile = getattr(obj, self.attname) - priority = EBOOK_REBUILD_PRIORITY if fieldfile else EBOOK_BUILD_PRIORITY - fieldfile.build_delay(priority=priority) + + queryset = queryset.order_by('?')[:limit] + return queryset @classmethod - def schedule_all_stale(cls, model): + def find_all_stale(cls, model, limit): """Schedules all stale ebooks of all formats to rebuild.""" + found = [] for field in model._meta.fields: if isinstance(field, cls): - field.schedule_stale() + for instance in field.find_stale(limit): + found.append(( + field.name, + instance + )) + random.shuffle(found) + found = found[:limit] + return found @staticmethod - def transform(wldoc): + def transform(wldoc, book): """Transforms an librarian.WLDocument into an librarian.OutputFile. """ raise NotImplemented() @@ -172,6 +208,7 @@ class EbookField(models.FileField): book = fieldfile.instance out = self.transform( book.wldocument2() if self.librarian2_api else book.wldocument(), + book, ) with open(out.get_filename(), 'rb') as f: fieldfile.save(None, File(f), save=False) @@ -191,18 +228,22 @@ class XmlField(EbookField): class TxtField(EbookField): ext = 'txt' + for_parents = False + librarian2_api = True @staticmethod - def transform(wldoc): - return wldoc.as_text() + def transform(wldoc, book): + from librarian.builders.txt import TxtBuilder + return TxtBuilder().build(wldoc) class Fb2Field(EbookField): ext = 'fb2' + for_parents = False ZIP = 'wolnelektury_pl_fb2' @staticmethod - def transform(wldoc): + def transform(wldoc, book): return wldoc.as_fb2() @@ -211,10 +252,14 @@ class PdfField(EbookField): ZIP = 'wolnelektury_pl_pdf' @staticmethod - def transform(wldoc): + def transform(wldoc, book): + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') return wldoc.as_pdf( - morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True, - base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc']) + morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, + cover=get_make_cover(book), + base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'], + fundraising=MediaInsertSet.get_texts_for('pdf'), + ) def build(self, fieldfile): super().build(fieldfile) @@ -227,11 +272,13 @@ class EpubField(EbookField): ZIP = 'wolnelektury_pl_epub' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.builders import EpubBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') return EpubBuilder( base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', - fundraising=settings.EPUB_FUNDRAISING + fundraising=MediaInsertSet.get_texts_for('epub'), + cover=get_make_cover(book), ).build(wldoc) @@ -241,16 +288,20 @@ class MobiField(EbookField): ZIP = 'wolnelektury_pl_mobi' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.builders import MobiBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') return MobiBuilder( base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', - fundraising=settings.EPUB_FUNDRAISING + fundraising=MediaInsertSet.get_texts_for('mobi'), + cover=get_make_cover(book), ).build(wldoc) class HtmlField(EbookField): ext = 'html' + for_parents = False + librarian2_api = True def build(self, fieldfile): from django.core.files.base import ContentFile @@ -261,7 +312,7 @@ class HtmlField(EbookField): book = fieldfile.instance - html_output = self.transform(book.wldocument(parse_dublincore=False)) + html_output = self.transform(book.wldocument2(), book) # Delete old fragments, create from scratch if necessary. book.fragments.all().delete() @@ -302,7 +353,6 @@ class HtmlField(EbookField): tag.name = theme_name setattr(tag, "name_%s" % lang, theme_name) tag.sort_key = sortify(theme_name.lower()) - tag.for_books = True tag.save() themes.append(tag) elif lang is not None: @@ -332,27 +382,21 @@ class HtmlField(EbookField): new_fragment.save() new_fragment.tags = set(meta_tags + themes) - for theme in themes: - if not theme.for_books: - theme.for_books = True - theme.save() book.html_built.send(sender=type(self), instance=book) return True return False @staticmethod - def transform(wldoc): - # ugly, but we can't use wldoc.book_info here - from librarian import DCNS - url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url')) - if url_elem is None: + def transform(wldoc, book): + from librarian.builders.html import HtmlBuilder + url = wldoc.meta.url + if not url: gal_url = '' gal_path = '' else: - slug = url_elem.text.rstrip('/').rsplit('/', 1)[1] - gal_url = gallery_url(slug=slug) - gal_path = gallery_path(slug=slug) - return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)) + gal_url = gallery_url(slug=url.slug) + gal_path = gallery_path(slug=url.slug) + return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc) class CoverField(EbookField): @@ -360,8 +404,8 @@ class CoverField(EbookField): directory = 'cover' @staticmethod - def transform(wldoc): - return wldoc.as_cover() + def transform(wldoc, book): + return get_make_cover(book)(wldoc.book_info, width=360).output_file() def set_file_permissions(self, fieldfile): pass @@ -371,16 +415,15 @@ class CoverCleanField(CoverField): directory = 'cover_clean' @staticmethod - def transform(wldoc): - from librarian.covers.marquise import MarquiseCover - return MarquiseCover(wldoc.book_info, width=360).output_file() + def transform(wldoc, book): + return get_make_cover(book)(wldoc.book_info, width=360).output_file() class CoverThumbField(CoverField): directory = 'cover_thumb' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.cover import WLCover return WLCover(wldoc.book_info, height=193).output_file() @@ -389,7 +432,7 @@ class CoverApiThumbField(CoverField): directory = 'cover_api_thumb' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.cover import WLNoBoxCover return WLNoBoxCover(wldoc.book_info, height=500).output_file() @@ -398,7 +441,7 @@ class SimpleCoverField(CoverField): directory = 'cover_simple' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.cover import WLNoBoxCover return WLNoBoxCover(wldoc.book_info, height=1000).output_file() @@ -407,6 +450,6 @@ class CoverEbookpointField(CoverField): directory = 'cover_ebookpoint' @staticmethod - def transform(wldoc): + def transform(wldoc, book): from librarian.cover import EbookpointCover return EbookpointCover(wldoc.book_info).output_file()