1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
9 from urllib.request import urlopen
10 from django.apps import apps
11 from django.conf import settings
12 from django.core.files import File
13 from django.db import models
14 from django.db.models.fields.files import FieldFile
15 from django.utils.deconstruct import deconstructible
16 from librarian.cover import make_cover
17 from catalogue.constants import LANGUAGES_3TO2
18 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
19 from waiter.utils import clear_cache
21 ETAG_SCHEDULED_SUFFIX = '-scheduled'
22 EBOOK_BUILD_PRIORITY = 0
23 EBOOK_REBUILD_PRIORITY = 9
27 class UploadToPath(object):
28 def __init__(self, path):
31 def __call__(self, instance, filename):
32 return self.path % instance.slug
34 def __eq__(self, other):
35 return isinstance(other, type(self)) and other.path == self.path
38 def get_make_cover(book):
39 extra = book.get_extra_info_json()
40 cover_logo = extra.get('logo_mono', extra.get('logo'))
44 cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
50 def mc(*args, **kwargs):
52 kwargs['cover_logo'] = cover_logo
53 return make_cover(*args, **kwargs)
57 class EbookFieldFile(FieldFile):
58 """Represents contents of an ebook file field."""
61 """Build the ebook immediately."""
62 etag = self.field.get_current_etag()
63 self.field.build(self)
64 self.update_etag(etag)
65 self.instance.clear_cache()
67 def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
68 """Builds the ebook in a delayed task."""
69 from .tasks import build_field
72 "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
74 return build_field.apply_async(
75 [self.instance.pk, self.field.attname],
79 def set_readable(self, readable):
81 permissions = 0o644 if readable else 0o600
82 os.chmod(self.path, permissions)
84 def update_etag(self, etag):
85 setattr(self.instance, self.field.etag_field_name, etag)
87 self.instance.save(update_fields=[self.field.etag_field_name])
90 class EbookField(models.FileField):
91 """Represents an ebook file field, attachable to a model."""
92 attr_class = EbookFieldFile
95 librarian2_api = False
98 def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
99 kwargs.setdefault('verbose_name', verbose_name)
100 self.with_etag = with_etag
101 self.etag_field_name = etag_field_name
102 kwargs.setdefault('max_length', 255)
103 kwargs.setdefault('blank', True)
104 kwargs.setdefault('default', '')
105 kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
107 super().__init__(**kwargs)
109 def deconstruct(self):
110 name, path, args, kwargs = super().deconstruct()
111 if kwargs.get('max_length') == 255:
112 del kwargs['max_length']
113 if kwargs.get('blank') is True:
115 if kwargs.get('default') == '':
116 del kwargs['default']
117 if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
118 del kwargs['upload_to']
119 # with_etag creates a second field, which then deconstructs to manage
120 # its own migrations. So for migrations, etag_field_name is explicitly
121 # set to avoid double creation of the etag field.
123 kwargs['etag_field_name'] = self.etag_field_name
125 kwargs['with_etag'] = self.with_etag
127 return name, path, args, kwargs
130 def get_upload_to(cls, directory):
131 directory = getattr(cls, 'directory', cls.ext)
132 upload_template = f'book/{directory}/%s.{cls.ext}'
133 return UploadToPath(upload_template)
135 def contribute_to_class(self, cls, name):
136 super(EbookField, self).contribute_to_class(cls, name)
138 if self.with_etag and not self.etag_field_name:
139 self.etag_field_name = f'{name}_etag'
140 self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
141 self.etag_field.contribute_to_class(cls, f'{name}_etag')
143 def has(model_instance):
144 return bool(getattr(model_instance, self.attname, None))
146 has.__name__ = str("has_%s" % self.attname)
147 has.short_description = self.name
150 setattr(cls, 'has_%s' % self.attname, has)
152 def get_current_etag(self):
153 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
154 librarian_version = pkg_resources.get_distribution("librarian").version
155 etag = librarian_version
156 mis = MediaInsertSet.get_for_format(self.ext)
158 etag += '_' + mis.etag
161 def find_stale(self, limit):
162 """Find some books where this format is stale."""
163 # If there is not ETag field, bail. That's true for xml file field.
164 if not self.with_etag:
167 etag = self.get_current_etag()
169 queryset = self.model.objects.all()
170 if not self.for_parents:
171 queryset = queryset.filter(children=None)
173 queryset = queryset.exclude(**{
174 f'{self.etag_field_name}__in': [
175 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
179 queryset = queryset.order_by('?')[:limit]
183 def find_all_stale(cls, model, limit):
184 """Schedules all stale ebooks of all formats to rebuild."""
186 for field in model._meta.fields:
187 if isinstance(field, cls):
188 for instance in field.find_stale(limit):
193 random.shuffle(found)
194 found = found[:limit]
198 def transform(wldoc, book):
199 """Transforms an librarian.WLDocument into an librarian.OutputFile.
201 raise NotImplemented()
203 def set_file_permissions(self, fieldfile):
204 if fieldfile.instance.preview:
205 fieldfile.set_readable(False)
207 def build(self, fieldfile):
208 book = fieldfile.instance
209 out = self.transform(
210 book.wldocument2() if self.librarian2_api else book.wldocument(),
213 with open(out.get_filename(), 'rb') as f:
214 fieldfile.save(None, File(f), save=False)
215 self.set_file_permissions(fieldfile)
216 if book.pk is not None:
217 book.save(update_fields=[self.attname])
222 class XmlField(EbookField):
225 def build(self, fieldfile):
229 class TxtField(EbookField):
234 def transform(wldoc, book):
235 return wldoc.as_text()
238 class Fb2Field(EbookField):
241 ZIP = 'wolnelektury_pl_fb2'
244 def transform(wldoc, book):
245 return wldoc.as_fb2()
248 class PdfField(EbookField):
250 ZIP = 'wolnelektury_pl_pdf'
253 def transform(wldoc, book):
255 morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
256 cover=get_make_cover(book),
257 base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'])
259 def build(self, fieldfile):
260 super().build(fieldfile)
261 clear_cache(fieldfile.instance.slug)
264 class EpubField(EbookField):
266 librarian2_api = True
267 ZIP = 'wolnelektury_pl_epub'
270 def transform(wldoc, book):
271 from librarian.builders import EpubBuilder
272 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
274 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
275 fundraising=MediaInsertSet.get_texts_for('epub'),
276 cover=get_make_cover(book),
280 class MobiField(EbookField):
282 librarian2_api = True
283 ZIP = 'wolnelektury_pl_mobi'
286 def transform(wldoc, book):
287 from librarian.builders import MobiBuilder
288 MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
290 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
291 fundraising=MediaInsertSet.get_texts_for('mobi'),
292 cover=get_make_cover(book),
296 class HtmlField(EbookField):
300 def build(self, fieldfile):
301 from django.core.files.base import ContentFile
302 from slugify import slugify
303 from sortify import sortify
304 from librarian import html
305 from catalogue.models import Fragment, Tag
307 book = fieldfile.instance
309 html_output = self.transform(book.wldocument(parse_dublincore=False), book)
311 # Delete old fragments, create from scratch if necessary.
312 book.fragments.all().delete()
315 meta_tags = list(book.tags.filter(
316 category__in=('author', 'epoch', 'genre', 'kind')))
319 lang = LANGUAGES_3TO2.get(lang, lang)
320 if lang not in [ln[0] for ln in settings.LANGUAGES]:
323 fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
324 self.set_file_permissions(fieldfile)
325 type(book).objects.filter(pk=book.pk).update(**{
326 fieldfile.field.attname: fieldfile
330 closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
331 for fragment in closed_fragments.values():
333 theme_names = [s.strip() for s in fragment.themes.split(',')]
334 except AttributeError:
337 for theme_name in theme_names:
340 if lang == settings.LANGUAGE_CODE:
341 # Allow creating themes if book in default language.
342 tag, created = Tag.objects.get_or_create(
343 slug=slugify(theme_name),
347 tag.name = theme_name
348 setattr(tag, "name_%s" % lang, theme_name)
349 tag.sort_key = sortify(theme_name.lower())
353 elif lang is not None:
354 # Don't create unknown themes in non-default languages.
356 tag = Tag.objects.get(
358 **{"name_%s" % lang: theme_name}
360 except Tag.DoesNotExist:
367 text = fragment.to_string()
368 short_text = truncate_html_words(text, 15)
369 if text == short_text:
371 new_fragment = Fragment.objects.create(
375 short_text=short_text
379 new_fragment.tags = set(meta_tags + themes)
381 if not theme.for_books:
382 theme.for_books = True
384 book.html_built.send(sender=type(self), instance=book)
389 def transform(wldoc, book):
390 # ugly, but we can't use wldoc.book_info here
391 from librarian import DCNS
392 url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
397 slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
398 gal_url = gallery_url(slug=slug)
399 gal_path = gallery_path(slug=slug)
400 return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
403 class CoverField(EbookField):
408 def transform(wldoc, book):
409 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
411 def set_file_permissions(self, fieldfile):
415 class CoverCleanField(CoverField):
416 directory = 'cover_clean'
419 def transform(wldoc, book):
420 return get_make_cover(book)(wldoc.book_info, width=360).output_file()
423 class CoverThumbField(CoverField):
424 directory = 'cover_thumb'
427 def transform(wldoc, book):
428 from librarian.cover import WLCover
429 return WLCover(wldoc.book_info, height=193).output_file()
432 class CoverApiThumbField(CoverField):
433 directory = 'cover_api_thumb'
436 def transform(wldoc, book):
437 from librarian.cover import WLNoBoxCover
438 return WLNoBoxCover(wldoc.book_info, height=500).output_file()
441 class SimpleCoverField(CoverField):
442 directory = 'cover_simple'
445 def transform(wldoc, book):
446 from librarian.cover import WLNoBoxCover
447 return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
450 class CoverEbookpointField(CoverField):
451 directory = 'cover_ebookpoint'
454 def transform(wldoc, book):
455 from librarian.cover import EbookpointCover
456 return EbookpointCover(wldoc.book_info).output_file()