1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
9 from urllib.request import urlopen
10 from django.apps import apps
11 from django.conf import settings
12 from django.core.files import File
13 from django.db import models
14 from django.db.models.fields.files import FieldFile
15 from django.utils.deconstruct import deconstructible
16 from librarian.cover import make_cover
17 from catalogue.constants import LANGUAGES_3TO2
18 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
19 from waiter.utils import clear_cache
21 ETAG_SCHEDULED_SUFFIX = '-scheduled'
22 EBOOK_BUILD_PRIORITY = 0
23 EBOOK_REBUILD_PRIORITY = 9
27 class UploadToPath(object):
28 def __init__(self, path):
31 def __call__(self, instance, filename):
32 return self.path % instance.slug
34 def __eq__(self, other):
35 return isinstance(other, type(self)) and other.path == self.path
38 def get_make_cover(book):
39 extra = book.get_extra_info_json()
40 cover_logo = extra.get('logo_mono', extra.get('logo'))
44 cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
50 def mc(*args, **kwargs):
52 kwargs['cover_logo'] = cover_logo
53 return make_cover(*args, **kwargs)
57 class EbookFieldFile(FieldFile):
58 """Represents contents of an ebook file field."""
61 """Build the ebook immediately."""
62 etag = self.field.get_current_etag()
63 self.field.build(self)
64 self.update_etag(etag)
65 self.instance.clear_cache()
67 def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
68 """Builds the ebook in a delayed task."""
69 from .tasks import build_field
72 "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
74 return build_field.apply_async(
75 [self.instance.pk, self.field.attname],
79 def set_readable(self, readable):
81 permissions = 0o644 if readable else 0o600
82 os.chmod(self.path, permissions)
84 def update_etag(self, etag):
85 setattr(self.instance, self.field.etag_field_name, etag)
87 self.instance.save(update_fields=[self.field.etag_field_name])
90 class EbookField(models.FileField):
91 """Represents an ebook file field, attachable to a model."""
92 attr_class = EbookFieldFile
95 librarian2_api = False
98 def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
99 kwargs.setdefault('verbose_name', verbose_name)
100 self.with_etag = with_etag
101 self.etag_field_name = etag_field_name
102 kwargs.setdefault('max_length', 255)
103 kwargs.setdefault('blank', True)
104 kwargs.setdefault('default', '')
105 kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
107 super().__init__(**kwargs)
109 def deconstruct(self):
110 name, path, args, kwargs = super().deconstruct()
111 if kwargs.get('max_length') == 255:
112 del kwargs['max_length']
113 if kwargs.get('blank') is True:
115 if kwargs.get('default') == '':
116 del kwargs['default']
117 if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
118 del kwargs['upload_to']
119 # with_etag creates a second field, which then deconstructs to manage
120 # its own migrations. So for migrations, etag_field_name is explicitly
121 # set to avoid double creation of the etag field.
123 kwargs['etag_field_name'] = self.etag_field_name
125 kwargs['with_etag'] = self.with_etag
127 return name, path, args, kwargs
130 def get_upload_to(cls, directory):
131 directory = getattr(cls, 'directory', cls.ext)
132 upload_template = f'book/{directory}/%s.{cls.ext}'
133 return UploadToPath(upload_template)
135 def contribute_to_class(self, cls, name):
136 super(EbookField, self).contribute_to_class(cls, name)
138 if self.with_etag and not self.etag_field_name:
139 self.etag_field_name = f'{name}_etag'
140 self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
141 self.etag_field.contribute_to_class(cls, f'{name}_etag')
143 def has(model_instance):
144 return bool(getattr(model_instance, self.attname, None))
146 has.__name__ = str("has_%s" % self.attname)
147 has.short_description = self.name
150 setattr(cls, 'has_%s' % self.attname, has)
152 def get_current_etag(self):
153 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
154 librarian_version = pkg_resources.get_distribution("librarian").version
155 etag = librarian_version
156 mis = MediaInsertSet.get_for_format(self.ext)
158 etag += '_' + mis.etag
161 def find_stale(self, limit):
162 """Find some books where this format is stale."""
163 # If there is not ETag field, bail. That's true for xml file field.
164 if not self.with_etag:
167 etag = self.get_current_etag()
169 queryset = self.model.objects.all()
170 if not self.for_parents:
171 queryset = queryset.filter(children=None)
173 queryset = queryset.exclude(**{
174 f'{self.etag_field_name}__in': [
175 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
179 queryset = queryset.order_by('?')[:limit]
183 def find_all_stale(cls, model, limit):
184 """Schedules all stale ebooks of all formats to rebuild."""
186 for field in model._meta.fields:
187 if isinstance(field, cls):
188 for instance in field.find_stale(limit):
193 random.shuffle(found)
194 found = found[:limit]
198 def transform(wldoc, book):
199 """Transforms an librarian.WLDocument into an librarian.OutputFile.
201 raise NotImplemented()
203 def set_file_permissions(self, fieldfile):
204 if fieldfile.instance.preview:
205 fieldfile.set_readable(False)
207 def build(self, fieldfile):
208 book = fieldfile.instance
209 out = self.transform(
210 book.wldocument2() if self.librarian2_api else book.wldocument(),
213 with open(out.get_filename(), 'rb') as f:
214 fieldfile.save(None, File(f), save=False)
215 self.set_file_permissions(fieldfile)
216 if book.pk is not None:
217 book.save(update_fields=[self.attname])
222 class XmlField(EbookField):
225 def build(self, fieldfile):
229 class TxtField(EbookField):
232 librarian2_api = True
235 def transform(wldoc, book):
236 from librarian.builders.txt import TxtBuilder
237 return TxtBuilder().build(wldoc)
240 class Fb2Field(EbookField):
243 ZIP = 'wolnelektury_pl_fb2'
246 def transform(wldoc, book):
247 return wldoc.as_fb2()
250 class PdfField(EbookField):
252 ZIP = 'wolnelektury_pl_pdf'
255 def transform(wldoc, book):
256 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
258 morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
259 cover=get_make_cover(book),
260 base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
261 fundraising=MediaInsertSet.get_texts_for('pdf'),
264 def build(self, fieldfile):
265 super().build(fieldfile)
266 clear_cache(fieldfile.instance.slug)
269 class EpubField(EbookField):
271 librarian2_api = True
272 ZIP = 'wolnelektury_pl_epub'
275 def transform(wldoc, book):
276 from librarian.builders import EpubBuilder
277 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
279 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
280 fundraising=MediaInsertSet.get_texts_for('epub'),
281 cover=get_make_cover(book),
285 class MobiField(EbookField):
287 librarian2_api = True
288 ZIP = 'wolnelektury_pl_mobi'
291 def transform(wldoc, book):
292 from librarian.builders import MobiBuilder
293 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
295 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
296 fundraising=MediaInsertSet.get_texts_for('mobi'),
297 cover=get_make_cover(book),
301 class HtmlField(EbookField):
304 librarian2_api = True
306 def build(self, fieldfile):
307 from django.core.files.base import ContentFile
308 from slugify import slugify
309 from sortify import sortify
310 from librarian import html
311 from catalogue.models import Fragment, Tag
313 book = fieldfile.instance
315 html_output = self.transform(book.wldocument2(), book)
317 # Delete old fragments, create from scratch if necessary.
318 book.fragments.all().delete()
321 meta_tags = list(book.tags.filter(
322 category__in=('author', 'epoch', 'genre', 'kind')))
325 lang = LANGUAGES_3TO2.get(lang, lang)
326 if lang not in [ln[0] for ln in settings.LANGUAGES]:
329 fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
330 self.set_file_permissions(fieldfile)
331 type(book).objects.filter(pk=book.pk).update(**{
332 fieldfile.field.attname: fieldfile
336 closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
337 for fragment in closed_fragments.values():
339 theme_names = [s.strip() for s in fragment.themes.split(',')]
340 except AttributeError:
343 for theme_name in theme_names:
346 if lang == settings.LANGUAGE_CODE:
347 # Allow creating themes if book in default language.
348 tag, created = Tag.objects.get_or_create(
349 slug=slugify(theme_name),
353 tag.name = theme_name
354 setattr(tag, "name_%s" % lang, theme_name)
355 tag.sort_key = sortify(theme_name.lower())
358 elif lang is not None:
359 # Don't create unknown themes in non-default languages.
361 tag = Tag.objects.get(
363 **{"name_%s" % lang: theme_name}
365 except Tag.DoesNotExist:
372 text = fragment.to_string()
373 short_text = truncate_html_words(text, 15)
374 if text == short_text:
376 new_fragment = Fragment.objects.create(
380 short_text=short_text
384 new_fragment.tags = set(meta_tags + themes)
385 book.html_built.send(sender=type(self), instance=book)
390 def transform(wldoc, book):
391 from librarian.builders.html import HtmlBuilder
397 gal_url = gallery_url(slug=url.slug)
398 gal_path = gallery_path(slug=url.slug)
399 return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc)
402 class CoverField(EbookField):
407 def transform(wldoc, book):
408 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
410 def set_file_permissions(self, fieldfile):
414 class CoverCleanField(CoverField):
415 directory = 'cover_clean'
418 def transform(wldoc, book):
419 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
422 class CoverThumbField(CoverField):
423 directory = 'cover_thumb'
426 def transform(wldoc, book):
427 from librarian.cover import WLCover
428 return WLCover(wldoc.book_info, height=193).output_file()
431 class CoverApiThumbField(CoverField):
432 directory = 'cover_api_thumb'
435 def transform(wldoc, book):
436 from librarian.cover import WLNoBoxCover
437 return WLNoBoxCover(wldoc.book_info, height=500).output_file()
440 class SimpleCoverField(CoverField):
441 directory = 'cover_simple'
444 def transform(wldoc, book):
445 from librarian.cover import WLNoBoxCover
446 return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
449 class CoverEbookpointField(CoverField):
450 directory = 'cover_ebookpoint'
453 def transform(wldoc, book):
454 from librarian.cover import EbookpointCover
455 return EbookpointCover(wldoc.book_info).output_file()