087fe0d88a457f7b808996b95f061c80c38730bc
[wolnelektury.git] / src / catalogue / fields.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 import io
5 import os
6 import pkg_resources
7 import random
8 import time
9 from urllib.request import urlopen
10 from django.apps import apps
11 from django.conf import settings
12 from django.core.files import File
13 from django.db import models
14 from django.db.models.fields.files import FieldFile
15 from django.utils.deconstruct import deconstructible
16 from librarian.cover import make_cover
17 from catalogue.constants import LANGUAGES_3TO2
18 from catalogue.utils import absolute_url, remove_zip, truncate_html_words, gallery_path, gallery_url
19 from waiter.utils import clear_cache
20
21 ETAG_SCHEDULED_SUFFIX = '-scheduled'
22 EBOOK_BUILD_PRIORITY = 0
23 EBOOK_REBUILD_PRIORITY = 9
24
25
26 @deconstructible
27 class UploadToPath(object):
28     def __init__(self, path):
29         self.path = path
30
31     def __call__(self, instance, filename):
32         return self.path % instance.slug
33
34     def __eq__(self, other):
35         return isinstance(other, type(self)) and other.path == self.path
36
37
38 def get_make_cover(book):
39     extra = book.get_extra_info_json()
40     cover_logo = extra.get('logo_mono', extra.get('logo'))
41     if cover_logo:
42         while True:
43             try:
44                 cover_logo = io.BytesIO(urlopen(cover_logo, timeout=3).read())
45             except:
46                 time.sleep(2)
47             else:
48                 break
49     
50     def mc(*args, **kwargs):
51         if cover_logo:
52             kwargs['cover_logo'] = cover_logo
53         return make_cover(*args, **kwargs)
54     return mc
55     
56
57 class EbookFieldFile(FieldFile):
58     """Represents contents of an ebook file field."""
59
60     def build(self):
61         """Build the ebook immediately."""
62         etag = self.field.get_current_etag()
63         self.field.build(self)
64         self.update_etag(etag)
65         self.instance.clear_cache()
66
67     def build_delay(self, priority=EBOOK_BUILD_PRIORITY):
68         """Builds the ebook in a delayed task."""
69         from .tasks import build_field
70
71         self.update_etag(
72             "".join([self.field.get_current_etag(), ETAG_SCHEDULED_SUFFIX])
73         )
74         return build_field.apply_async(
75             [self.instance.pk, self.field.attname],
76             priority=priority
77         )
78
79     def set_readable(self, readable):
80         import os
81         permissions = 0o644 if readable else 0o600
82         os.chmod(self.path, permissions)
83
84     def update_etag(self, etag):
85         setattr(self.instance, self.field.etag_field_name, etag)
86         if self.instance.pk:
87             self.instance.save(update_fields=[self.field.etag_field_name])
88
89
90 class EbookField(models.FileField):
91     """Represents an ebook file field, attachable to a model."""
92     attr_class = EbookFieldFile
93     ext = None
94     for_parents = True
95     librarian2_api = False
96     ZIP = None
97
98     def __init__(self, verbose_name=None, with_etag=True, etag_field_name=None, **kwargs):
99         kwargs.setdefault('verbose_name', verbose_name)
100         self.with_etag = with_etag
101         self.etag_field_name = etag_field_name
102         kwargs.setdefault('max_length', 255)
103         kwargs.setdefault('blank', True)
104         kwargs.setdefault('default', '')
105         kwargs.setdefault('upload_to', self.get_upload_to(self.ext))
106
107         super().__init__(**kwargs)
108
109     def deconstruct(self):
110         name, path, args, kwargs = super().deconstruct()
111         if kwargs.get('max_length') == 255:
112             del kwargs['max_length']
113         if kwargs.get('blank') is True:
114             del kwargs['blank']
115         if kwargs.get('default') == '':
116             del kwargs['default']
117         if self.get_upload_to(self.ext) == kwargs.get('upload_to'):
118             del kwargs['upload_to']
119         # with_etag creates a second field, which then deconstructs to manage
120         # its own migrations. So for migrations, etag_field_name is explicitly
121         # set to avoid double creation of the etag field.
122         if self.with_etag:
123             kwargs['etag_field_name'] = self.etag_field_name
124         else:
125             kwargs['with_etag'] = self.with_etag
126
127         return name, path, args, kwargs
128
129     @classmethod
130     def get_upload_to(cls, directory):
131         directory = getattr(cls, 'directory', cls.ext)
132         upload_template = f'book/{directory}/%s.{cls.ext}'
133         return UploadToPath(upload_template)
134
135     def contribute_to_class(self, cls, name):
136         super(EbookField, self).contribute_to_class(cls, name)
137
138         if self.with_etag and not self.etag_field_name:
139             self.etag_field_name = f'{name}_etag'
140             self.etag_field = models.CharField(max_length=255, editable=False, default='', db_index=True)
141             self.etag_field.contribute_to_class(cls, f'{name}_etag')
142
143         def has(model_instance):
144             return bool(getattr(model_instance, self.attname, None))
145         has.__doc__ = None
146         has.__name__ = str("has_%s" % self.attname)
147         has.short_description = self.name
148         has.boolean = True
149
150         setattr(cls, 'has_%s' % self.attname, has)
151
152     def get_current_etag(self):
153         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
154         librarian_version = pkg_resources.get_distribution("librarian").version
155         etag = librarian_version
156         mis = MediaInsertSet.get_for_format(self.ext)
157         if mis is not None:
158             etag += '_' + mis.etag
159         return etag
160
161     def find_stale(self, limit):
162         """Find some books where this format is stale."""
163         # If there is not ETag field, bail. That's true for xml file field.
164         if not self.with_etag:
165             return []
166
167         etag = self.get_current_etag()
168
169         queryset = self.model.objects.all()
170         if not self.for_parents:
171             queryset = queryset.filter(children=None)
172
173         queryset = queryset.exclude(**{
174             f'{self.etag_field_name}__in': [
175                 etag, f'{etag}{ETAG_SCHEDULED_SUFFIX}'
176            ]
177         })
178
179         queryset = queryset.order_by('?')[:limit]
180         return queryset
181
182     @classmethod
183     def find_all_stale(cls, model, limit):
184         """Schedules all stale ebooks of all formats to rebuild."""
185         found = []
186         for field in model._meta.fields:
187             if isinstance(field, cls):
188                 for instance in field.find_stale(limit):
189                     found.append((
190                         field.name,
191                         instance
192                     ))
193         random.shuffle(found)
194         found = found[:limit]
195         return found
196
197     @staticmethod
198     def transform(wldoc, book):
199         """Transforms an librarian.WLDocument into an librarian.OutputFile.
200         """
201         raise NotImplemented()
202
203     def set_file_permissions(self, fieldfile):
204         if fieldfile.instance.preview:
205             fieldfile.set_readable(False)
206
207     def build(self, fieldfile):
208         book = fieldfile.instance
209         out = self.transform(
210             book.wldocument2() if self.librarian2_api else book.wldocument(),
211             book,
212         )
213         with open(out.get_filename(), 'rb') as f:
214             fieldfile.save(None, File(f), save=False)
215         self.set_file_permissions(fieldfile)
216         if book.pk is not None:
217             book.save(update_fields=[self.attname])
218         if self.ZIP:
219             remove_zip(self.ZIP)
220
221
222 class XmlField(EbookField):
223     ext = 'xml'
224
225     def build(self, fieldfile):
226         pass
227
228
229 class TxtField(EbookField):
230     ext = 'txt'
231     for_parents = False
232
233     @staticmethod
234     def transform(wldoc, book):
235         return wldoc.as_text()
236
237
238 class Fb2Field(EbookField):
239     ext = 'fb2'
240     for_parents = False
241     ZIP = 'wolnelektury_pl_fb2'
242
243     @staticmethod
244     def transform(wldoc, book):
245         return wldoc.as_fb2()
246
247
248 class PdfField(EbookField):
249     ext = 'pdf'
250     ZIP = 'wolnelektury_pl_pdf'
251
252     @staticmethod
253     def transform(wldoc, book):
254         return wldoc.as_pdf(
255             morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
256             cover=get_make_cover(book),
257             base_url=absolute_url(gallery_url(wldoc.book_info.url.slug)), customizations=['notoc'])
258
259     def build(self, fieldfile):
260         super().build(fieldfile)
261         clear_cache(fieldfile.instance.slug)
262
263
264 class EpubField(EbookField):
265     ext = 'epub'
266     librarian2_api = True
267     ZIP = 'wolnelektury_pl_epub'
268
269     @staticmethod
270     def transform(wldoc, book):
271         from librarian.builders import EpubBuilder
272         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
273         return EpubBuilder(
274                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
275                 fundraising=MediaInsertSet.get_texts_for('epub'),
276                 cover=get_make_cover(book),
277             ).build(wldoc)
278
279
280 class MobiField(EbookField):
281     ext = 'mobi'
282     librarian2_api = True
283     ZIP = 'wolnelektury_pl_mobi'
284
285     @staticmethod
286     def transform(wldoc, book):
287         from librarian.builders import MobiBuilder
288         MediaInsertSet = apps.get_model('annoy', 'MediaInsertSet')
289         return MobiBuilder(
290                 base_url='file://' + os.path.abspath(gallery_path(wldoc.meta.url.slug)) + '/',
291                 fundraising=MediaInsertSet.get_texts_for('mobi'),
292                 cover=get_make_cover(book),
293             ).build(wldoc)
294
295
296 class HtmlField(EbookField):
297     ext = 'html'
298     for_parents = False
299
300     def build(self, fieldfile):
301         from django.core.files.base import ContentFile
302         from slugify import slugify
303         from sortify import sortify
304         from librarian import html
305         from catalogue.models import Fragment, Tag
306
307         book = fieldfile.instance
308
309         html_output = self.transform(book.wldocument(parse_dublincore=False))
310
311         # Delete old fragments, create from scratch if necessary.
312         book.fragments.all().delete()
313
314         if html_output:
315             meta_tags = list(book.tags.filter(
316                 category__in=('author', 'epoch', 'genre', 'kind')))
317
318             lang = book.language
319             lang = LANGUAGES_3TO2.get(lang, lang)
320             if lang not in [ln[0] for ln in settings.LANGUAGES]:
321                 lang = None
322
323             fieldfile.save(None, ContentFile(html_output.get_bytes()), save=False)
324             self.set_file_permissions(fieldfile)
325             type(book).objects.filter(pk=book.pk).update(**{
326                 fieldfile.field.attname: fieldfile
327             })
328
329             # Extract fragments
330             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
331             for fragment in closed_fragments.values():
332                 try:
333                     theme_names = [s.strip() for s in fragment.themes.split(',')]
334                 except AttributeError:
335                     continue
336                 themes = []
337                 for theme_name in theme_names:
338                     if not theme_name:
339                         continue
340                     if lang == settings.LANGUAGE_CODE:
341                         # Allow creating themes if book in default language.
342                         tag, created = Tag.objects.get_or_create(
343                             slug=slugify(theme_name),
344                             category='theme'
345                         )
346                         if created:
347                             tag.name = theme_name
348                             setattr(tag, "name_%s" % lang, theme_name)
349                             tag.sort_key = sortify(theme_name.lower())
350                             tag.for_books = True
351                             tag.save()
352                         themes.append(tag)
353                     elif lang is not None:
354                         # Don't create unknown themes in non-default languages.
355                         try:
356                             tag = Tag.objects.get(
357                                 category='theme',
358                                 **{"name_%s" % lang: theme_name}
359                             )
360                         except Tag.DoesNotExist:
361                             pass
362                         else:
363                             themes.append(tag)
364                 if not themes:
365                     continue
366
367                 text = fragment.to_string()
368                 short_text = truncate_html_words(text, 15)
369                 if text == short_text:
370                     short_text = ''
371                 new_fragment = Fragment.objects.create(
372                     anchor=fragment.id,
373                     book=book,
374                     text=text,
375                     short_text=short_text
376                 )
377
378                 new_fragment.save()
379                 new_fragment.tags = set(meta_tags + themes)
380                 for theme in themes:
381                     if not theme.for_books:
382                         theme.for_books = True
383                         theme.save()
384             book.html_built.send(sender=type(self), instance=book)
385             return True
386         return False
387
388     @staticmethod
389     def transform(wldoc, book):
390         # ugly, but we can't use wldoc.book_info here
391         from librarian import DCNS
392         url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
393         if url_elem is None:
394             gal_url = ''
395             gal_path = ''
396         else:
397             slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
398             gal_url = gallery_url(slug=slug)
399             gal_path = gallery_path(slug=slug)
400         return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
401
402
403 class CoverField(EbookField):
404     ext = 'jpg'
405     directory = 'cover'
406
407     @staticmethod
408     def transform(wldoc, book):
409         return get_make_cover(book)(wldoc.book_info, width=360).output_file()
410
411     def set_file_permissions(self, fieldfile):
412         pass
413
414
415 class CoverCleanField(CoverField):
416     directory = 'cover_clean'
417
418     @staticmethod
419     def transform(wldoc, book):
420         return get_make_cover(book)(wldoc.book_info, width=360).output_file()
421
422
423 class CoverThumbField(CoverField):
424     directory = 'cover_thumb'
425
426     @staticmethod
427     def transform(wldoc, book):
428         from librarian.cover import WLCover
429         return WLCover(wldoc.book_info, height=193).output_file()
430
431
432 class CoverApiThumbField(CoverField):
433     directory = 'cover_api_thumb'
434
435     @staticmethod
436     def transform(wldoc, book):
437         from librarian.cover import WLNoBoxCover
438         return WLNoBoxCover(wldoc.book_info, height=500).output_file()
439
440
441 class SimpleCoverField(CoverField):
442     directory = 'cover_simple'
443
444     @staticmethod
445     def transform(wldoc, book):
446         from librarian.cover import WLNoBoxCover
447         return WLNoBoxCover(wldoc.book_info, height=1000).output_file()
448
449
450 class CoverEbookpointField(CoverField):
451     directory = 'cover_ebookpoint'
452
453     @staticmethod
454     def transform(wldoc, book):
455         from librarian.cover import EbookpointCover
456         return EbookpointCover(wldoc.book_info).output_file()