1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 
   9 from urllib.request import urlopen
 
  10 from django.apps import apps
 
  11 from django.conf import settings
 
  12 from django.core.files import File
 
  13 from django.db import models
 
  14 from django.db.models.fields.files import FieldFile
 
  15 from django.utils.deconstruct import deconstructible
 
  16 from librarian.cover import make_cover
 
  17 from catalogue.constants import LANGUAGES_3TO2
 
  18 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
 
  19 from waiter.utils import clear_cache
 
  21 ETAG_SCHEDULED_SUFFIX = '-scheduled'
 
  22 EBOOK_BUILD_PRIORITY = 0
 
  23 EBOOK_REBUILD_PRIORITY = 9
 
  27 class UploadToPath(object):
 
  28     def __init__(self, path):
 
  31     def __call__(self, instance, filename):
 
  32         return self.path % instance.slug
 
  34     def __eq__(self, other):
 
  35         return isinstance(other, type(self)) and other.path == self.path
 
  38 def get_make_cover(book):
 
  39     extra = book.get_extra_info_json()
 
  40     cover_logo = extra.get('logo_mono', extra.get('logo'))
 
  44                 cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
 
  50     def mc(*args, **kwargs):
 
  52             kwargs['cover_logo'] = cover_logo
 
  53         return make_cover(*args, **kwargs)
 
  57 class EbookFieldFile(FieldFile):
 
  58     """Represents contents of an ebook file field."""
 
  61         """Build the ebook immediately."""
 
  62         etag = self.field.get_current_etag()
 
  63         self.field.build(self)
 
  64         self.update_etag(etag)
 
  65         self.instance.clear_cache()
 
  67     def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
 
  68         """Builds the ebook in a delayed task."""
 
  69         from .tasks import build_field
 
  72             "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
 
  74         return build_field.apply_async(
 
  75             [self.instance.pk, self.field.attname],
 
  79     def set_readable(self, readable):
 
  81         permissions = 0o644 if readable else 0o600
 
  82         os.chmod(self.path, permissions)
 
  84     def update_etag(self, etag):
 
  85         setattr(self.instance, self.field.etag_field_name, etag)
 
  87             self.instance.save(update_fields=[self.field.etag_field_name])
 
  90 class EbookField(models.FileField):
 
  91     """Represents an ebook file field, attachable to a model."""
 
  92     attr_class = EbookFieldFile
 
  95     librarian2_api = False
 
  98     def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
 
  99         kwargs.setdefault('verbose_name', verbose_name)
 
 100         self.with_etag = with_etag
 
 101         self.etag_field_name = etag_field_name
 
 102         kwargs.setdefault('max_length', 255)
 
 103         kwargs.setdefault('blank', True)
 
 104         kwargs.setdefault('default', '')
 
 105         kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
 
 107         super().__init__(**kwargs)
 
 109     def deconstruct(self):
 
 110         name, path, args, kwargs = super().deconstruct()
 
 111         if kwargs.get('max_length') == 255:
 
 112             del kwargs['max_length']
 
 113         if kwargs.get('blank') is True:
 
 115         if kwargs.get('default') == '':
 
 116             del kwargs['default']
 
 117         if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
 
 118             del kwargs['upload_to']
 
 119         # with_etag creates a second field, which then deconstructs to manage
 
 120         # its own migrations. So for migrations, etag_field_name is explicitly
 
 121         # set to avoid double creation of the etag field.
 
 123             kwargs['etag_field_name'] = self.etag_field_name
 
 125             kwargs['with_etag'] = self.with_etag
 
 127         return name, path, args, kwargs
 
 130     def get_upload_to(cls, directory):
 
 131         directory = getattr(cls, 'directory', cls.ext)
 
 132         upload_template = f'book/{directory}/%s.{cls.ext}'
 
 133         return UploadToPath(upload_template)
 
 135     def contribute_to_class(self, cls, name):
 
 136         super(EbookField, self).contribute_to_class(cls, name)
 
 138         if self.with_etag and not self.etag_field_name:
 
 139             self.etag_field_name = f'{name}_etag'
 
 140             self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
 
 141             self.etag_field.contribute_to_class(cls, f'{name}_etag')
 
 143         def has(model_instance):
 
 144             return bool(getattr(model_instance, self.attname, None))
 
 146         has.__name__ = str("has_%s" % self.attname)
 
 147         has.short_description = self.name
 
 150         setattr(cls, 'has_%s' % self.attname, has)
 
 152     def get_current_etag(self):
 
 153         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
 
 154         librarian_version = pkg_resources.get_distribution("librarian").version
 
 155         etag = librarian_version
 
 156         mis = MediaInsertSet.get_for_format(self.ext)
 
 158             etag += '_' + mis.etag
 
 161     def find_stale(self, limit):
 
 162         """Find some books where this format is stale."""
 
 163         # If there is not ETag field, bail. That's true for xml file field.
 
 164         if not self.with_etag:
 
 167         etag = self.get_current_etag()
 
 169         queryset = self.model.objects.all()
 
 170         if not self.for_parents:
 
 171             queryset = queryset.filter(children=None)
 
 173         queryset = queryset.exclude(**{
 
 174             f'{self.etag_field_name}__in': [
 
 175                 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
 
 179         queryset = queryset.order_by('?')[:limit]
 
 183     def find_all_stale(cls, model, limit):
 
 184         """Schedules all stale ebooks of all formats to rebuild."""
 
 186         for field in model._meta.fields:
 
 187             if isinstance(field, cls):
 
 188                 for instance in field.find_stale(limit):
 
 193         random.shuffle(found)
 
 194         found = found[:limit]
 
 198     def transform(wldoc, book):
 
 199         """Transforms an librarian.WLDocument into an librarian.OutputFile.
 
 201         raise NotImplemented()
 
 203     def set_file_permissions(self, fieldfile):
 
 204         if fieldfile.instance.preview:
 
 205             fieldfile.set_readable(False)
 
 207     def build(self, fieldfile):
 
 208         book = fieldfile.instance
 
 209         out = self.transform(
 
 210             book.wldocument2() if self.librarian2_api else book.wldocument(),
 
 213         with open(out.get_filename(), 'rb') as f:
 
 214             fieldfile.save(None, File(f), save=False)
 
 215         self.set_file_permissions(fieldfile)
 
 216         if book.pk is not None:
 
 217             book.save(update_fields=[self.attname])
 
 222 class XmlField(EbookField):
 
 225     def build(self, fieldfile):
 
 229 class TxtField(EbookField):
 
 234     def transform(wldoc, book):
 
 235         return wldoc.as_text()
 
 238 class Fb2Field(EbookField):
 
 241     ZIP = 'wolnelektury_pl_fb2'
 
 244     def transform(wldoc, book):
 
 245         return wldoc.as_fb2()
 
 248 class PdfField(EbookField):
 
 250     ZIP = 'wolnelektury_pl_pdf'
 
 253     def transform(wldoc, book):
 
 254         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
 
 256             morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
 
 257             cover=get_make_cover(book),
 
 258             base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
 
 259             fundraising=MediaInsertSet.get_texts_for('pdf'),
 
 262     def build(self, fieldfile):
 
 263         super().build(fieldfile)
 
 264         clear_cache(fieldfile.instance.slug)
 
 267 class EpubField(EbookField):
 
 269     librarian2_api = True
 
 270     ZIP = 'wolnelektury_pl_epub'
 
 273     def transform(wldoc, book):
 
 274         from librarian.builders import EpubBuilder
 
 275         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
 
 277                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
 
 278                 fundraising=MediaInsertSet.get_texts_for('epub'),
 
 279                 cover=get_make_cover(book),
 
 283 class MobiField(EbookField):
 
 285     librarian2_api = True
 
 286     ZIP = 'wolnelektury_pl_mobi'
 
 289     def transform(wldoc, book):
 
 290         from librarian.builders import MobiBuilder
 
 291         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
 
 293                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
 
 294                 fundraising=MediaInsertSet.get_texts_for('mobi'),
 
 295                 cover=get_make_cover(book),
 
 299 class HtmlField(EbookField):
 
 303     def build(self, fieldfile):
 
 304         from django.core.files.base import ContentFile
 
 305         from slugify import slugify
 
 306         from sortify import sortify
 
 307         from librarian import html
 
 308         from catalogue.models import Fragment, Tag
 
 310         book = fieldfile.instance
 
 312         html_output = self.transform(book.wldocument(parse_dublincore=False), book)
 
 314         # Delete old fragments, create from scratch if necessary.
 
 315         book.fragments.all().delete()
 
 318             meta_tags = list(book.tags.filter(
 
 319                 category__in=('author', 'epoch', 'genre', 'kind')))
 
 322             lang = LANGUAGES_3TO2.get(lang, lang)
 
 323             if lang not in [ln[0] for ln in settings.LANGUAGES]:
 
 326             fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
 
 327             self.set_file_permissions(fieldfile)
 
 328             type(book).objects.filter(pk=book.pk).update(**{
 
 329                 fieldfile.field.attname: fieldfile
 
 333             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
 
 334             for fragment in closed_fragments.values():
 
 336                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 
 337                 except AttributeError:
 
 340                 for theme_name in theme_names:
 
 343                     if lang == settings.LANGUAGE_CODE:
 
 344                         # Allow creating themes if book in default language.
 
 345                         tag, created = Tag.objects.get_or_create(
 
 346                             slug=slugify(theme_name),
 
 350                             tag.name = theme_name
 
 351                             setattr(tag, "name_%s" % lang, theme_name)
 
 352                             tag.sort_key = sortify(theme_name.lower())
 
 355                     elif lang is not None:
 
 356                         # Don't create unknown themes in non-default languages.
 
 358                             tag = Tag.objects.get(
 
 360                                 **{"name_%s" % lang: theme_name}
 
 362                         except Tag.DoesNotExist:
 
 369                 text = fragment.to_string()
 
 370                 short_text = truncate_html_words(text, 15)
 
 371                 if text == short_text:
 
 373                 new_fragment = Fragment.objects.create(
 
 377                     short_text=short_text
 
 381                 new_fragment.tags = set(meta_tags + themes)
 
 382             book.html_built.send(sender=type(self), instance=book)
 
 387     def transform(wldoc, book):
 
 388         # ugly, but we can't use wldoc.book_info here
 
 389         from librarian import DCNS
 
 390         url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
 
 395             slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
 
 396             gal_url = gallery_url(slug=slug)
 
 397             gal_path = gallery_path(slug=slug)
 
 398         return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
 
 401 class CoverField(EbookField):
 
 406     def transform(wldoc, book):
 
 407         return get_make_cover(book)(wldoc.book_info, width=360).output_file()
 
 409     def set_file_permissions(self, fieldfile):
 
 413 class CoverCleanField(CoverField):
 
 414     directory = 'cover_clean'
 
 417     def transform(wldoc, book):
 
 418         return get_make_cover(book)(wldoc.book_info, width=360).output_file()
 
 421 class CoverThumbField(CoverField):
 
 422     directory = 'cover_thumb'
 
 425     def transform(wldoc, book):
 
 426         from librarian.cover import WLCover
 
 427         return WLCover(wldoc.book_info, height=193).output_file()
 
 430 class CoverApiThumbField(CoverField):
 
 431     directory = 'cover_api_thumb'
 
 434     def transform(wldoc, book):
 
 435         from librarian.cover import WLNoBoxCover
 
 436         return WLNoBoxCover(wldoc.book_info, height=500).output_file()
 
 439 class SimpleCoverField(CoverField):
 
 440     directory = 'cover_simple'
 
 443     def transform(wldoc, book):
 
 444         from librarian.cover import WLNoBoxCover
 
 445         return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
 
 448 class CoverEbookpointField(CoverField):
 
 449     directory = 'cover_ebookpoint'
 
 452     def transform(wldoc, book):
 
 453         from librarian.cover import EbookpointCover
 
 454         return EbookpointCover(wldoc.book_info).output_file()