X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/4157358510703a54cde8f3b0f9814f2cd1c9f40a..a3b153ea6e30da275edf3e34b8d2f8ca1bb436cc:/src/catalogue/fields.py diff --git a/src/catalogue/fields.py b/src/catalogue/fields.py index 38cac775b..bf17d3581 100644 --- a/src/catalogue/fields.py +++ b/src/catalogue/fields.py @@ -2,13 +2,16 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os +import pkg_resources +import random +from django.apps import apps from django.conf import settings from django.core.files import File from django.db import models from django.db.models.fields.files import FieldFile from django.utils.deconstruct import deconstructible from django.utils.translation import gettext_lazy as _ -from catalogue.constants import LANGUAGES_3TO2, EBOOK_FORMATS_WITH_CHILDREN, EBOOK_FORMATS_WITHOUT_CHILDREN +from catalogue.constants import LANGUAGES_3TO2 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url from waiter.utils import clear_cache @@ -66,17 +69,14 @@ class EbookField(models.FileField): """Represents an ebook file field, attachable to a model.""" attr_class = EbookFieldFile ext = None + for_parents = True librarian2_api = False ZIP = None - def __init__(self, verbose_name_=None, with_etag=True, **kwargs): - # This is just for compatibility with older migrations, - # where first argument was for ebook format. - # Can be scrapped if old migrations are updated/removed. - verbose_name = verbose_name_ or _("%s file") % self.ext - kwargs.setdefault('verbose_name', verbose_name_ ) - + def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs): + kwargs.setdefault('verbose_name', verbose_name) self.with_etag = with_etag + self.etag_field_name = etag_field_name kwargs.setdefault('max_length', 255) kwargs.setdefault('blank', True) kwargs.setdefault('default', '') @@ -94,16 +94,15 @@ class EbookField(models.FileField): del kwargs['default'] if self.get_upload_to(self.ext) == kwargs.get('upload_to'): del kwargs['upload_to'] - if not self.with_etag: + # with_etag creates a second field, which then deconstructs to manage + # its own migrations. So for migrations, etag_field_name is explicitly + # set to avoid double creation of the etag field. + if self.with_etag: + kwargs['etag_field_name'] = self.etag_field_name + else: kwargs['with_etag'] = self.with_etag - # Compatibility - verbose_name = kwargs.get('verbose_name') - if verbose_name: - del kwargs['verbose_name'] - if verbose_name != _("%s file") % self.ext: - args = [verbose_name] + args - return name, path, args, kwargs + return name, path, args, kwargs @classmethod def get_upload_to(cls, directory): @@ -114,8 +113,8 @@ class EbookField(models.FileField): def contribute_to_class(self, cls, name): super(EbookField, self).contribute_to_class(cls, name) - self.etag_field_name = f'{name}_etag' - if self.with_etag: + if self.with_etag and not self.etag_field_name: + self.etag_field_name = f'{name}_etag' self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True) self.etag_field.contribute_to_class(cls, f'{name}_etag') @@ -129,39 +128,49 @@ class EbookField(models.FileField): setattr(cls, 'has_%s' % self.attname, has) def get_current_etag(self): - import pkg_resources + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') librarian_version = pkg_resources.get_distribution("librarian").version - return librarian_version - - def schedule_stale(self, queryset=None): - """Schedule building this format for all the books where etag is stale.""" + etag = librarian_version + mis = MediaInsertSet.get_for_format(self.ext) + if mis is not None: + etag += '_' + mis.etag + return etag + + def find_stale(self, limit): + """Find some books where this format is stale.""" # If there is not ETag field, bail. That's true for xml file field. if not self.with_etag: - return + return [] etag = self.get_current_etag() - if queryset is None: - queryset = self.model.objects.all() - if self.format_name in EBOOK_FORMATS_WITHOUT_CHILDREN + ['html']: + queryset = self.model.objects.all() + if not self.for_parents: queryset = queryset.filter(children=None) queryset = queryset.exclude(**{ f'{self.etag_field_name}__in': [ etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}' - ] + ] }) - for obj in queryset: - fieldfile = getattr(obj, self.attname) - priority = EBOOK_REBUILD_PRIORITY if fieldfile else EBOOK_BUILD_PRIORITY - fieldfile.build_delay(priority=priority) + + queryset = queryset.order_by('?')[:limit] + return queryset @classmethod - def schedule_all_stale(cls, model): + def find_all_stale(cls, model, limit): """Schedules all stale ebooks of all formats to rebuild.""" + found = [] for field in model._meta.fields: if isinstance(field, cls): - field.schedule_stale() + for instance in field.find_stale(limit): + found.append(( + field.name, + instance + )) + random.shuffle(found) + found = found[:limit] + return found @staticmethod def transform(wldoc): @@ -178,7 +187,8 @@ class EbookField(models.FileField): out = self.transform( book.wldocument2() if self.librarian2_api else book.wldocument(), ) - fieldfile.save(None, File(open(out.get_filename(), 'rb')), save=False) + with open(out.get_filename(), 'rb') as f: + fieldfile.save(None, File(f), save=False) self.set_file_permissions(fieldfile) if book.pk is not None: book.save(update_fields=[self.attname]) @@ -195,6 +205,7 @@ class XmlField(EbookField): class TxtField(EbookField): ext = 'txt' + for_parents = False @staticmethod def transform(wldoc): @@ -203,6 +214,7 @@ class TxtField(EbookField): class Fb2Field(EbookField): ext = 'fb2' + for_parents = False ZIP = 'wolnelektury_pl_fb2' @staticmethod @@ -221,7 +233,7 @@ class PdfField(EbookField): base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc']) def build(self, fieldfile): - BuildEbook.build(self, fieldfile) + super().build(fieldfile) clear_cache(fieldfile.instance.slug) @@ -233,9 +245,10 @@ class EpubField(EbookField): @staticmethod def transform(wldoc): from librarian.builders import EpubBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') return EpubBuilder( base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', - fundraising=settings.EPUB_FUNDRAISING + fundraising=MediaInsertSet.get_texts_for('epub') ).build(wldoc) @@ -247,14 +260,16 @@ class MobiField(EbookField): @staticmethod def transform(wldoc): from librarian.builders import MobiBuilder + MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet') return MobiBuilder( base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/', - fundraising=settings.EPUB_FUNDRAISING + fundraising=MediaInsertSet.get_texts_for('mobi') ).build(wldoc) class HtmlField(EbookField): ext = 'html' + for_parents = False def build(self, fieldfile): from django.core.files.base import ContentFile @@ -363,6 +378,10 @@ class CoverField(EbookField): ext = 'jpg' directory = 'cover' + @staticmethod + def transform(wldoc): + return wldoc.as_cover() + def set_file_permissions(self, fieldfile): pass @@ -372,11 +391,8 @@ class CoverCleanField(CoverField): @staticmethod def transform(wldoc): - if wldoc.book_info.cover_box_position == 'none': - from librarian.cover import WLCover - return WLCover(wldoc.book_info, width=240).output_file() from librarian.covers.marquise import MarquiseCover - return MarquiseCover(wldoc.book_info, width=240).output_file() + return MarquiseCover(wldoc.book_info, width=360).output_file() class CoverThumbField(CoverField):