Tighter sponsors html, every pageview gets ~10kB lighter.
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.constants import LANGUAGES_3TO2
11 from catalogue.utils import remove_zip, truncate_html_words
12 from celery.task import Task, task
13 from waiter.utils import clear_cache
14
15
16 class EbookFieldFile(FieldFile):
17     """Represents contents of an ebook file field."""
18
19     def build(self):
20         """Build the ebook immediately."""
21         return self.field.builder.build(self)
22
23     def build_delay(self):
24         """Builds the ebook in a delayed task."""
25         return self.field.builder.delay(self.instance, self.field.attname)
26
27
28 class EbookField(models.FileField):
29     """Represents an ebook file field, attachable to a model."""
30     attr_class = EbookFieldFile
31
32     def __init__(self, format_name, *args, **kwargs):
33         super(EbookField, self).__init__(*args, **kwargs)
34         self.format_name = format_name
35
36     @property
37     def builder(self):
38         """Finds a celery task suitable for the format of the field."""
39         return BuildEbook.for_format(self.format_name)
40
41     def contribute_to_class(self, cls, name):
42         super(EbookField, self).contribute_to_class(cls, name)
43
44         def has(model_instance):
45             return bool(getattr(model_instance, self.attname, None))
46         has.__doc__ = None
47         has.__name__ = "has_%s" % self.attname
48         has.short_description = self.name
49         has.boolean = True
50         setattr(cls, 'has_%s' % self.attname, has)
51
52
53 class BuildEbook(Task):
54     formats = {}
55
56     @classmethod
57     def register(cls, format_name):
58         """A decorator for registering subclasses for particular formats."""
59         def wrapper(builder):
60             cls.formats[format_name] = builder
61             return builder
62         return wrapper
63
64     @classmethod
65     def for_format(cls, format_name):
66         """Returns a celery task suitable for specified format."""
67         return cls.formats.get(format_name, BuildEbookTask)
68
69     @staticmethod
70     def transform(wldoc, fieldfile):
71         """Transforms an librarian.WLDocument into an librarian.OutputFile.
72
73         By default, it just calls relevant wldoc.as_??? method.
74
75         """
76         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
77
78     def run(self, obj, field_name):
79         """Just run `build` on FieldFile, can't pass it directly to Celery."""
80         return self.build(getattr(obj, field_name))
81
82     def build(self, fieldfile):
83         book = fieldfile.instance
84         out = self.transform(book.wldocument(), fieldfile)
85         fieldfile.save(None, File(open(out.get_filename())), save=False)
86         if book.pk is not None:
87             type(book).objects.filter(pk=book.pk).update(**{
88                 fieldfile.field.attname: fieldfile
89             })
90         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
91             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
92 # Don't decorate BuildEbook, because we want to subclass it.
93 BuildEbookTask = task(BuildEbook, ignore_result=True)
94
95
96 @BuildEbook.register('txt')
97 @task(ignore_result=True)
98 class BuildTxt(BuildEbook):
99     @staticmethod
100     def transform(wldoc, fieldfile):
101         return wldoc.as_text()
102
103
104 @BuildEbook.register('pdf')
105 @task(ignore_result=True)
106 class BuildPdf(BuildEbook):
107     @staticmethod
108     def transform(wldoc, fieldfile):
109         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
110             cover=True)
111
112     def build(self, fieldfile):
113         BuildEbook.build(self, fieldfile)
114         clear_cache(fieldfile.instance.slug)
115
116
117 @BuildEbook.register('epub')
118 @task(ignore_result=True)
119 class BuildEpub(BuildEbook):
120     @staticmethod
121     def transform(wldoc, fieldfile):
122         return wldoc.as_epub(cover=True)
123
124
125 @BuildEbook.register('html')
126 @task(ignore_result=True)
127 class BuildHtml(BuildEbook):
128     def build(self, fieldfile):
129         from django.core.files.base import ContentFile
130         from fnpdjango.utils.text.slughifi import slughifi
131         from sortify import sortify
132         from librarian import html
133         from catalogue.models import Fragment, Tag
134
135         book = fieldfile.instance
136
137         meta_tags = list(book.tags.filter(
138             category__in=('author', 'epoch', 'genre', 'kind')))
139         book_tag = book.book_tag()
140
141         html_output = self.transform(
142                         book.wldocument(parse_dublincore=False),
143                         fieldfile)
144         lang = book.language
145         lang = LANGUAGES_3TO2.get(lang, lang)
146         if lang not in [ln[0] for ln in settings.LANGUAGES]:
147             lang = None
148
149         if html_output:
150             fieldfile.save(None, ContentFile(html_output.get_string()),
151                     save=False)
152             type(book).objects.filter(pk=book.pk).update(**{
153                 fieldfile.field.attname: fieldfile
154             })
155
156             # get ancestor l-tags for adding to new fragments
157             ancestor_tags = []
158             p = book.parent
159             while p:
160                 ancestor_tags.append(p.book_tag())
161                 p = p.parent
162
163             # Delete old fragments and create them from scratch
164             book.fragments.all().delete()
165             # Extract fragments
166             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
167             for fragment in closed_fragments.values():
168                 try:
169                     theme_names = [s.strip() for s in fragment.themes.split(',')]
170                 except AttributeError:
171                     continue
172                 themes = []
173                 for theme_name in theme_names:
174                     if not theme_name:
175                         continue
176                     if lang == settings.LANGUAGE_CODE:
177                         # Allow creating themes if book in default language.
178                         tag, created = Tag.objects.get_or_create(
179                                             slug=slughifi(theme_name),
180                                             category='theme')
181                         if created:
182                             tag.name = theme_name
183                             setattr(tag, "name_%s" % lang, theme_name)
184                             tag.sort_key = sortify(theme_name.lower())
185                             tag.save()
186                         themes.append(tag)
187                     elif lang is not None:
188                         # Don't create unknown themes in non-default languages.
189                         try:
190                             tag = Tag.objects.get(category='theme',
191                                     **{"name_%s" % lang: theme_name})
192                         except Tag.DoesNotExist:
193                             pass
194                         else:
195                             themes.append(tag)
196                 if not themes:
197                     continue
198
199                 text = fragment.to_string()
200                 short_text = truncate_html_words(text, 15)
201                 if text == short_text:
202                     short_text = ''
203                 new_fragment = Fragment.objects.create(anchor=fragment.id,
204                         book=book, text=text, short_text=short_text)
205
206                 new_fragment.save()
207                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
208             book.html_built.send(sender=book)
209             return True
210         return False
211
212 @BuildEbook.register('cover_thumb')
213 @task(ignore_result=True)
214 class BuildCoverThumb(BuildEbook):
215     @classmethod
216     def transform(cls, wldoc, fieldfile):
217         from librarian.cover import WLCover
218         return WLCover(wldoc.book_info, height=193).output_file()
219
220
221
222 class OverwritingFieldFile(FieldFile):
223     """
224         Deletes the old file before saving the new one.
225     """
226
227     def save(self, name, content, *args, **kwargs):
228         leave = kwargs.pop('leave', None)
229         # delete if there's a file already and there's a new one coming
230         if not leave and self and (not hasattr(content, 'path') or
231                                    content.path != self.path):
232             self.delete(save=False)
233         return super(OverwritingFieldFile, self).save(
234                 name, content, *args, **kwargs)
235
236
237 class OverwritingFileField(models.FileField):
238     attr_class = OverwritingFieldFile
239
240
241 try:
242     # check for south
243     from south.modelsinspector import add_introspection_rules
244 except ImportError:
245     pass
246 else:
247     add_introspection_rules([
248         (
249             [EbookField],
250             [],
251             {'format_name': ('format_name', {})}
252         )
253     ], ["^catalogue\.fields\.EbookField"])
254     add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])