1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from django.utils.deconstruct import deconstructible
10 from django.utils.translation import gettext_lazy as _
11 from catalogue.constants import LANGUAGES_3TO2, EBOOK_FORMATS_WITH_CHILDREN, EBOOK_FORMATS_WITHOUT_CHILDREN
12 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
13 from waiter.utils import clear_cache
15 ETAG_SCHEDULED_SUFFIX = '-scheduled'
16 EBOOK_BUILD_PRIORITY = 0
17 EBOOK_REBUILD_PRIORITY = 9
21 class UploadToPath(object):
22 def __init__(self, path):
25 def __call__(self, instance, filename):
26 return self.path % instance.slug
28 def __eq__(self, other):
29 return isinstance(other, type(self)) and other.path == self.path
32 class EbookFieldFile(FieldFile):
33 """Represents contents of an ebook file field."""
36 """Build the ebook immediately."""
37 etag = self.field.get_current_etag()
38 self.field.build(self)
39 self.update_etag(etag)
40 self.instance.clear_cache()
42 def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
43 """Builds the ebook in a delayed task."""
44 from .tasks import build_field
47 "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
49 return build_field.apply_async(
50 [self.instance.pk, self.field.attname],
54 def set_readable(self, readable):
56 permissions = 0o644 if readable else 0o600
57 os.chmod(self.path, permissions)
59 def update_etag(self, etag):
60 setattr(self.instance, self.field.etag_field_name, etag)
62 self.instance.save(update_fields=[self.field.etag_field_name])
65 class EbookField(models.FileField):
66 """Represents an ebook file field, attachable to a model."""
67 attr_class = EbookFieldFile
69 librarian2_api = False
72 def __init__(self, verbose_name_=None, with_etag=True, **kwargs):
73 # This is just for compatibility with older migrations,
74 # where first argument was for ebook format.
75 # Can be scrapped if old migrations are updated/removed.
76 verbose_name = verbose_name_ or _("%s file") % self.ext
77 kwargs.setdefault('verbose_name', verbose_name_ )
79 self.with_etag = with_etag
80 kwargs.setdefault('max_length', 255)
81 kwargs.setdefault('blank', True)
82 kwargs.setdefault('default', '')
83 kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
85 super().__init__(**kwargs)
87 def deconstruct(self):
88 name, path, args, kwargs = super().deconstruct()
89 if kwargs.get('max_length') == 255:
90 del kwargs['max_length']
91 if kwargs.get('blank') is True:
93 if kwargs.get('default') == '':
95 if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
96 del kwargs['upload_to']
97 if not self.with_etag:
98 kwargs['with_etag'] = self.with_etag
100 verbose_name = kwargs.get('verbose_name')
102 del kwargs['verbose_name']
103 if verbose_name != _("%s file") % self.ext:
104 args = [verbose_name] + args
105 return name, path, args, kwargs
109 def get_upload_to(cls, directory):
110 directory = getattr(cls, 'directory', cls.ext)
111 upload_template = f'book/{directory}/%s.{cls.ext}'
112 return UploadToPath(upload_template)
114 def contribute_to_class(self, cls, name):
115 super(EbookField, self).contribute_to_class(cls, name)
117 self.etag_field_name = f'{name}_etag'
119 self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
120 self.etag_field.contribute_to_class(cls, f'{name}_etag')
122 def has(model_instance):
123 return bool(getattr(model_instance, self.attname, None))
125 has.__name__ = str("has_%s" % self.attname)
126 has.short_description = self.name
129 setattr(cls, 'has_%s' % self.attname, has)
131 def get_current_etag(self):
133 librarian_version = pkg_resources.get_distribution("librarian").version
134 return librarian_version
136 def schedule_stale(self, queryset=None):
137 """Schedule building this format for all the books where etag is stale."""
138 # If there is not ETag field, bail. That's true for xml file field.
139 if not self.with_etag:
142 etag = self.get_current_etag()
144 queryset = self.model.objects.all()
146 if self.format_name in EBOOK_FORMATS_WITHOUT_CHILDREN + ['html']:
147 queryset = queryset.filter(children=None)
149 queryset = queryset.exclude(**{
150 f'{self.etag_field_name}__in': [
151 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
155 fieldfile = getattr(obj, self.attname)
156 priority = EBOOK_REBUILD_PRIORITY if fieldfile else EBOOK_BUILD_PRIORITY
157 fieldfile.build_delay(priority=priority)
160 def schedule_all_stale(cls, model):
161 """Schedules all stale ebooks of all formats to rebuild."""
162 for field in model._meta.fields:
163 if isinstance(field, cls):
164 field.schedule_stale()
167 def transform(wldoc):
168 """Transforms an librarian.WLDocument into an librarian.OutputFile.
170 raise NotImplemented()
172 def set_file_permissions(self, fieldfile):
173 if fieldfile.instance.preview:
174 fieldfile.set_readable(False)
176 def build(self, fieldfile):
177 book = fieldfile.instance
178 out = self.transform(
179 book.wldocument2() if self.librarian2_api else book.wldocument(),
181 fieldfile.save(None, File(open(out.get_filename(), 'rb')), save=False)
182 self.set_file_permissions(fieldfile)
183 if book.pk is not None:
184 book.save(update_fields=[self.attname])
189 class XmlField(EbookField):
192 def build(self, fieldfile):
196 class TxtField(EbookField):
200 def transform(wldoc):
201 return wldoc.as_text()
204 class Fb2Field(EbookField):
206 ZIP = 'wolnelektury_pl_fb2'
209 def transform(wldoc):
210 return wldoc.as_fb2()
213 class PdfField(EbookField):
215 ZIP = 'wolnelektury_pl_pdf'
218 def transform(wldoc):
220 morefloats=settings.LIBRARIAN_PDF_MOREFLOATS, cover=True,
221 base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'])
223 def build(self, fieldfile):
224 BuildEbook.build(self, fieldfile)
225 clear_cache(fieldfile.instance.slug)
228 class EpubField(EbookField):
230 librarian2_api = True
231 ZIP = 'wolnelektury_pl_epub'
234 def transform(wldoc):
235 from librarian.builders import EpubBuilder
237 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
238 fundraising=settings.EPUB_FUNDRAISING
242 class MobiField(EbookField):
244 librarian2_api = True
245 ZIP = 'wolnelektury_pl_mobi'
248 def transform(wldoc):
249 from librarian.builders import MobiBuilder
251 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
252 fundraising=settings.EPUB_FUNDRAISING
256 class HtmlField(EbookField):
259 def build(self, fieldfile):
260 from django.core.files.base import ContentFile
261 from slugify import slugify
262 from sortify import sortify
263 from librarian import html
264 from catalogue.models import Fragment, Tag
266 book = fieldfile.instance
268 html_output = self.transform(book.wldocument(parse_dublincore=False))
270 # Delete old fragments, create from scratch if necessary.
271 book.fragments.all().delete()
274 meta_tags = list(book.tags.filter(
275 category__in=('author', 'epoch', 'genre', 'kind')))
278 lang = LANGUAGES_3TO2.get(lang, lang)
279 if lang not in [ln[0] for ln in settings.LANGUAGES]:
282 fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
283 self.set_file_permissions(fieldfile)
284 type(book).objects.filter(pk=book.pk).update(**{
285 fieldfile.field.attname: fieldfile
289 closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
290 for fragment in closed_fragments.values():
292 theme_names = [s.strip() for s in fragment.themes.split(',')]
293 except AttributeError:
296 for theme_name in theme_names:
299 if lang == settings.LANGUAGE_CODE:
300 # Allow creating themes if book in default language.
301 tag, created = Tag.objects.get_or_create(
302 slug=slugify(theme_name),
306 tag.name = theme_name
307 setattr(tag, "name_%s" % lang, theme_name)
308 tag.sort_key = sortify(theme_name.lower())
312 elif lang is not None:
313 # Don't create unknown themes in non-default languages.
315 tag = Tag.objects.get(
317 **{"name_%s" % lang: theme_name}
319 except Tag.DoesNotExist:
326 text = fragment.to_string()
327 short_text = truncate_html_words(text, 15)
328 if text == short_text:
330 new_fragment = Fragment.objects.create(
334 short_text=short_text
338 new_fragment.tags = set(meta_tags + themes)
340 if not theme.for_books:
341 theme.for_books = True
343 book.html_built.send(sender=type(self), instance=book)
348 def transform(wldoc):
349 # ugly, but we can't use wldoc.book_info here
350 from librarian import DCNS
351 url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
356 slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
357 gal_url = gallery_url(slug=slug)
358 gal_path = gallery_path(slug=slug)
359 return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
362 class CoverField(EbookField):
366 def set_file_permissions(self, fieldfile):
370 class CoverCleanField(CoverField):
371 directory = 'cover_clean'
374 def transform(wldoc):
375 if wldoc.book_info.cover_box_position == 'none':
376 from librarian.cover import WLCover
377 return WLCover(wldoc.book_info, width=240).output_file()
378 from librarian.covers.marquise import MarquiseCover
379 return MarquiseCover(wldoc.book_info, width=240).output_file()
382 class CoverThumbField(CoverField):
383 directory = 'cover_thumb'
386 def transform(wldoc):
387 from librarian.cover import WLCover
388 return WLCover(wldoc.book_info, height=193).output_file()
391 class CoverApiThumbField(CoverField):
392 directory = 'cover_api_thumb'
395 def transform(wldoc):
396 from librarian.cover import WLNoBoxCover
397 return WLNoBoxCover(wldoc.book_info, height=500).output_file()
400 class SimpleCoverField(CoverField):
401 directory = 'cover_simple'
404 def transform(wldoc):
405 from librarian.cover import WLNoBoxCover
406 return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
409 class CoverEbookpointField(CoverField):
410 directory = 'cover_ebookpoint'
413 def transform(wldoc):
414 from librarian.cover import EbookpointCover
415 return EbookpointCover(wldoc.book_info).output_file()