Use secure transport for requirements.
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.constants import LANGUAGES_3TO2
11 from catalogue.utils import remove_zip, truncate_html_words
12 from celery.task import Task, task
13 from waiter.utils import clear_cache
14
15
16 class EbookFieldFile(FieldFile):
17     """Represents contents of an ebook file field."""
18
19     def build(self):
20         """Build the ebook immediately."""
21         return self.field.builder.build(self)
22
23     def build_delay(self):
24         """Builds the ebook in a delayed task."""
25         return self.field.builder.delay(self.instance, self.field.attname)
26
27
28 class EbookField(models.FileField):
29     """Represents an ebook file field, attachable to a model."""
30     attr_class = EbookFieldFile
31
32     def __init__(self, format_name, *args, **kwargs):
33         super(EbookField, self).__init__(*args, **kwargs)
34         self.format_name = format_name
35
36     def deconstruct(self):
37         name, path, args, kwargs = super(EbookField, self).deconstruct()
38         args.insert(0, self.format_name)
39         return name, path, args, kwargs
40
41     @property
42     def builder(self):
43         """Finds a celery task suitable for the format of the field."""
44         return BuildEbook.for_format(self.format_name)
45
46     def contribute_to_class(self, cls, name):
47         super(EbookField, self).contribute_to_class(cls, name)
48
49         def has(model_instance):
50             return bool(getattr(model_instance, self.attname, None))
51         has.__doc__ = None
52         has.__name__ = str("has_%s" % self.attname)
53         has.short_description = self.name
54         has.boolean = True
55         setattr(cls, 'has_%s' % self.attname, has)
56
57
58 class BuildEbook(Task):
59     formats = {}
60
61     @classmethod
62     def register(cls, format_name):
63         """A decorator for registering subclasses for particular formats."""
64         def wrapper(builder):
65             cls.formats[format_name] = builder
66             return builder
67         return wrapper
68
69     @classmethod
70     def for_format(cls, format_name):
71         """Returns a celery task suitable for specified format."""
72         return cls.formats.get(format_name, BuildEbookTask)
73
74     @staticmethod
75     def transform(wldoc, fieldfile):
76         """Transforms an librarian.WLDocument into an librarian.OutputFile.
77
78         By default, it just calls relevant wldoc.as_??? method.
79
80         """
81         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
82
83     def run(self, obj, field_name):
84         """Just run `build` on FieldFile, can't pass it directly to Celery."""
85         return self.build(getattr(obj, field_name))
86
87     def build(self, fieldfile):
88         book = fieldfile.instance
89         out = self.transform(book.wldocument(), fieldfile)
90         fieldfile.save(None, File(open(out.get_filename())), save=False)
91         if book.pk is not None:
92             type(book).objects.filter(pk=book.pk).update(**{
93                 fieldfile.field.attname: fieldfile
94             })
95         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
96             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
97 # Don't decorate BuildEbook, because we want to subclass it.
98 BuildEbookTask = task(BuildEbook, ignore_result=True)
99
100
101 @BuildEbook.register('txt')
102 @task(ignore_result=True)
103 class BuildTxt(BuildEbook):
104     @staticmethod
105     def transform(wldoc, fieldfile):
106         return wldoc.as_text()
107
108
109 @BuildEbook.register('pdf')
110 @task(ignore_result=True)
111 class BuildPdf(BuildEbook):
112     @staticmethod
113     def transform(wldoc, fieldfile):
114         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
115             cover=True)
116
117     def build(self, fieldfile):
118         BuildEbook.build(self, fieldfile)
119         clear_cache(fieldfile.instance.slug)
120
121
122 @BuildEbook.register('epub')
123 @task(ignore_result=True)
124 class BuildEpub(BuildEbook):
125     @staticmethod
126     def transform(wldoc, fieldfile):
127         return wldoc.as_epub(cover=True)
128
129
130 @BuildEbook.register('html')
131 @task(ignore_result=True)
132 class BuildHtml(BuildEbook):
133     def build(self, fieldfile):
134         from django.core.files.base import ContentFile
135         from fnpdjango.utils.text.slughifi import slughifi
136         from sortify import sortify
137         from librarian import html
138         from catalogue.models import Fragment, Tag
139
140         book = fieldfile.instance
141
142         html_output = self.transform(
143                         book.wldocument(parse_dublincore=False),
144                         fieldfile)
145
146         # Delete old fragments, create from scratch if necessary.
147         book.fragments.all().delete()
148
149         if html_output:
150             meta_tags = list(book.tags.filter(
151                 category__in=('author', 'epoch', 'genre', 'kind')))
152
153             lang = book.language
154             lang = LANGUAGES_3TO2.get(lang, lang)
155             if lang not in [ln[0] for ln in settings.LANGUAGES]:
156                 lang = None
157
158             fieldfile.save(None, ContentFile(html_output.get_string()),
159                     save=False)
160             type(book).objects.filter(pk=book.pk).update(**{
161                 fieldfile.field.attname: fieldfile
162             })
163
164             # Extract fragments
165             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
166             for fragment in closed_fragments.values():
167                 try:
168                     theme_names = [s.strip() for s in fragment.themes.split(',')]
169                 except AttributeError:
170                     continue
171                 themes = []
172                 for theme_name in theme_names:
173                     if not theme_name:
174                         continue
175                     if lang == settings.LANGUAGE_CODE:
176                         # Allow creating themes if book in default language.
177                         tag, created = Tag.objects.get_or_create(
178                                             slug=slughifi(theme_name),
179                                             category='theme')
180                         if created:
181                             tag.name = theme_name
182                             setattr(tag, "name_%s" % lang, theme_name)
183                             tag.sort_key = sortify(theme_name.lower())
184                             tag.save()
185                         themes.append(tag)
186                     elif lang is not None:
187                         # Don't create unknown themes in non-default languages.
188                         try:
189                             tag = Tag.objects.get(category='theme',
190                                     **{"name_%s" % lang: theme_name})
191                         except Tag.DoesNotExist:
192                             pass
193                         else:
194                             themes.append(tag)
195                 if not themes:
196                     continue
197
198                 text = fragment.to_string()
199                 short_text = truncate_html_words(text, 15)
200                 if text == short_text:
201                     short_text = ''
202                 new_fragment = Fragment.objects.create(anchor=fragment.id,
203                         book=book, text=text, short_text=short_text)
204
205                 new_fragment.save()
206                 new_fragment.tags = set(meta_tags + themes)
207             book.html_built.send(sender=type(self), instance=book)
208             return True
209         return False
210
211 @BuildEbook.register('cover_thumb')
212 @task(ignore_result=True)
213 class BuildCoverThumb(BuildEbook):
214     @classmethod
215     def transform(cls, wldoc, fieldfile):
216         from librarian.cover import WLCover
217         return WLCover(wldoc.book_info, height=193).output_file()
218
219
220
221 class OverwritingFieldFile(FieldFile):
222     """
223         Deletes the old file before saving the new one.
224     """
225
226     def save(self, name, content, *args, **kwargs):
227         leave = kwargs.pop('leave', None)
228         # delete if there's a file already and there's a new one coming
229         if not leave and self and (not hasattr(content, 'path') or
230                                    content.path != self.path):
231             self.delete(save=False)
232         return super(OverwritingFieldFile, self).save(
233                 name, content, *args, **kwargs)
234
235
236 class OverwritingFileField(models.FileField):
237     attr_class = OverwritingFieldFile