1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
9 from urllib.request import urlopen
10 from django.apps import apps
11 from django.conf import settings
12 from django.core.files import File
13 from django.db import models
14 from django.db.models.fields.files import FieldFile
15 from django.utils.deconstruct import deconstructible
16 from librarian.cover import make_cover
17 from catalogue.constants import LANGUAGES_3TO2
18 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
19 from waiter.utils import clear_cache
21 ETAG_SCHEDULED_SUFFIX = '-scheduled'
22 EBOOK_BUILD_PRIORITY = 0
23 EBOOK_REBUILD_PRIORITY = 9
27 class UploadToPath(object):
28 def __init__(self, path):
31 def __call__(self, instance, filename):
32 return self.path % instance.slug
34 def __eq__(self, other):
35 return isinstance(other, type(self)) and other.path == self.path
38 def get_make_cover(book):
39 extra = book.get_extra_info_json()
40 cover_logo = extra.get('logo_mono', extra.get('logo'))
44 cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
50 def mc(*args, **kwargs):
52 kwargs['cover_logo'] = cover_logo
53 return make_cover(*args, **kwargs)
57 class EbookFieldFile(FieldFile):
58 """Represents contents of an ebook file field."""
61 """Build the ebook immediately."""
62 etag = self.field.get_current_etag()
63 self.field.build(self)
64 self.update_etag(etag)
65 self.instance.clear_cache()
67 def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
68 """Builds the ebook in a delayed task."""
69 from .tasks import build_field
72 "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
74 return build_field.apply_async(
75 [self.instance.pk, self.field.attname],
79 def set_readable(self, readable):
81 permissions = 0o644 if readable else 0o600
82 os.chmod(self.path, permissions)
84 def update_etag(self, etag):
85 setattr(self.instance, self.field.etag_field_name, etag)
87 self.instance.save(update_fields=[self.field.etag_field_name])
90 class EbookField(models.FileField):
91 """Represents an ebook file field, attachable to a model."""
92 attr_class = EbookFieldFile
95 librarian2_api = False
98 def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
99 kwargs.setdefault('verbose_name', verbose_name)
100 self.with_etag = with_etag
101 self.etag_field_name = etag_field_name
102 kwargs.setdefault('max_length', 255)
103 kwargs.setdefault('blank', True)
104 kwargs.setdefault('default', '')
105 kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
107 super().__init__(**kwargs)
109 def deconstruct(self):
110 name, path, args, kwargs = super().deconstruct()
111 if kwargs.get('max_length') == 255:
112 del kwargs['max_length']
113 if kwargs.get('blank') is True:
115 if kwargs.get('default') == '':
116 del kwargs['default']
117 if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
118 del kwargs['upload_to']
119 # with_etag creates a second field, which then deconstructs to manage
120 # its own migrations. So for migrations, etag_field_name is explicitly
121 # set to avoid double creation of the etag field.
123 kwargs['etag_field_name'] = self.etag_field_name
125 kwargs['with_etag'] = self.with_etag
127 return name, path, args, kwargs
130 def get_upload_to(cls, directory):
131 directory = getattr(cls, 'directory', cls.ext)
132 upload_template = f'book/{directory}/%s.{cls.ext}'
133 return UploadToPath(upload_template)
135 def contribute_to_class(self, cls, name):
136 super(EbookField, self).contribute_to_class(cls, name)
138 if self.with_etag and not self.etag_field_name:
139 self.etag_field_name = f'{name}_etag'
140 self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
141 self.etag_field.contribute_to_class(cls, f'{name}_etag')
143 def has(model_instance):
144 return bool(getattr(model_instance, self.attname, None))
146 has.__name__ = str("has_%s" % self.attname)
147 has.short_description = self.name
150 setattr(cls, 'has_%s' % self.attname, has)
152 def get_current_etag(self):
153 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
154 librarian_version = pkg_resources.get_distribution("librarian").version
155 etag = librarian_version
156 mis = MediaInsertSet.get_for_format(self.ext)
158 etag += '_' + mis.etag
161 def find_stale(self, limit):
162 """Find some books where this format is stale."""
163 # If there is not ETag field, bail. That's true for xml file field.
164 if not self.with_etag:
167 etag = self.get_current_etag()
169 queryset = self.model.objects.all()
170 if not self.for_parents:
171 queryset = queryset.filter(children=None)
173 queryset = queryset.exclude(**{
174 f'{self.etag_field_name}__in': [
175 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
179 queryset = queryset.order_by('?')[:limit]
183 def find_all_stale(cls, model, limit):
184 """Schedules all stale ebooks of all formats to rebuild."""
186 for field in model._meta.fields:
187 if isinstance(field, cls):
188 for instance in field.find_stale(limit):
193 random.shuffle(found)
194 found = found[:limit]
198 def transform(wldoc, book):
199 """Transforms an librarian.WLDocument into an librarian.OutputFile.
201 raise NotImplemented()
203 def set_file_permissions(self, fieldfile):
204 if fieldfile.instance.preview:
205 fieldfile.set_readable(False)
207 def build(self, fieldfile):
208 book = fieldfile.instance
209 out = self.transform(
210 book.wldocument2() if self.librarian2_api else book.wldocument(),
213 with open(out.get_filename(), 'rb') as f:
214 fieldfile.save(None, File(f), save=False)
215 self.set_file_permissions(fieldfile)
216 if book.pk is not None:
217 book.save(update_fields=[self.attname])
222 class XmlField(EbookField):
225 def build(self, fieldfile):
229 class TxtField(EbookField):
234 def transform(wldoc, book):
235 return wldoc.as_text()
238 class Fb2Field(EbookField):
241 ZIP = 'wolnelektury_pl_fb2'
244 def transform(wldoc, book):
245 return wldoc.as_fb2()
248 class PdfField(EbookField):
250 ZIP = 'wolnelektury_pl_pdf'
253 def transform(wldoc, book):
254 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
256 morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
257 cover=get_make_cover(book),
258 base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'],
259 fundraising=MediaInsertSet.get_texts_for('pdf'),
262 def build(self, fieldfile):
263 super().build(fieldfile)
264 clear_cache(fieldfile.instance.slug)
267 class EpubField(EbookField):
269 librarian2_api = True
270 ZIP = 'wolnelektury_pl_epub'
273 def transform(wldoc, book):
274 from librarian.builders import EpubBuilder
275 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
277 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
278 fundraising=MediaInsertSet.get_texts_for('epub'),
279 cover=get_make_cover(book),
283 class MobiField(EbookField):
285 librarian2_api = True
286 ZIP = 'wolnelektury_pl_mobi'
289 def transform(wldoc, book):
290 from librarian.builders import MobiBuilder
291 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
293 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
294 fundraising=MediaInsertSet.get_texts_for('mobi'),
295 cover=get_make_cover(book),
299 class HtmlField(EbookField):
303 def build(self, fieldfile):
304 from django.core.files.base import ContentFile
305 from slugify import slugify
306 from sortify import sortify
307 from librarian import html
308 from catalogue.models import Fragment, Tag
310 book = fieldfile.instance
312 html_output = self.transform(book.wldocument(parse_dublincore=False), book)
314 # Delete old fragments, create from scratch if necessary.
315 book.fragments.all().delete()
318 meta_tags = list(book.tags.filter(
319 category__in=('author', 'epoch', 'genre', 'kind')))
322 lang = LANGUAGES_3TO2.get(lang, lang)
323 if lang not in [ln[0] for ln in settings.LANGUAGES]:
326 fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
327 self.set_file_permissions(fieldfile)
328 type(book).objects.filter(pk=book.pk).update(**{
329 fieldfile.field.attname: fieldfile
333 closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
334 for fragment in closed_fragments.values():
336 theme_names = [s.strip() for s in fragment.themes.split(',')]
337 except AttributeError:
340 for theme_name in theme_names:
343 if lang == settings.LANGUAGE_CODE:
344 # Allow creating themes if book in default language.
345 tag, created = Tag.objects.get_or_create(
346 slug=slugify(theme_name),
350 tag.name = theme_name
351 setattr(tag, "name_%s" % lang, theme_name)
352 tag.sort_key = sortify(theme_name.lower())
355 elif lang is not None:
356 # Don't create unknown themes in non-default languages.
358 tag = Tag.objects.get(
360 **{"name_%s" % lang: theme_name}
362 except Tag.DoesNotExist:
369 text = fragment.to_string()
370 short_text = truncate_html_words(text, 15)
371 if text == short_text:
373 new_fragment = Fragment.objects.create(
377 short_text=short_text
381 new_fragment.tags = set(meta_tags + themes)
382 book.html_built.send(sender=type(self), instance=book)
387 def transform(wldoc, book):
388 # ugly, but we can't use wldoc.book_info here
389 from librarian import DCNS
390 url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
395 slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
396 gal_url = gallery_url(slug=slug)
397 gal_path = gallery_path(slug=slug)
398 return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
401 class CoverField(EbookField):
406 def transform(wldoc, book):
407 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
409 def set_file_permissions(self, fieldfile):
413 class CoverCleanField(CoverField):
414 directory = 'cover_clean'
417 def transform(wldoc, book):
418 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
421 class CoverThumbField(CoverField):
422 directory = 'cover_thumb'
425 def transform(wldoc, book):
426 from librarian.cover import WLCover
427 return WLCover(wldoc.book_info, height=193).output_file()
430 class CoverApiThumbField(CoverField):
431 directory = 'cover_api_thumb'
434 def transform(wldoc, book):
435 from librarian.cover import WLNoBoxCover
436 return WLNoBoxCover(wldoc.book_info, height=500).output_file()
439 class SimpleCoverField(CoverField):
440 directory = 'cover_simple'
443 def transform(wldoc, book):
444 from librarian.cover import WLNoBoxCover
445 return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
448 class CoverEbookpointField(CoverField):
449 directory = 'cover_ebookpoint'
452 def transform(wldoc, book):
453 from librarian.cover import EbookpointCover
454 return EbookpointCover(wldoc.book_info).output_file()