68aaa4025fd80707381055b8a93560129006e4af
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.utils import remove_zip, truncate_html_words
11 from celery.task import Task, task
12 from waiter.utils import clear_cache
13
14
15 class EbookFieldFile(FieldFile):
16     """Represents contents of an ebook file field."""
17
18     def build(self):
19         """Build the ebook immediately."""
20         return self.field.builder.build(self)
21
22     def build_delay(self):
23         """Builds the ebook in a delayed task."""
24         return self.field.builder.delay(self.instance, self.field.attname)
25
26
27 class EbookField(models.FileField):
28     """Represents an ebook file field, attachable to a model."""
29     attr_class = EbookFieldFile
30
31     def __init__(self, format_name, *args, **kwargs):
32         super(EbookField, self).__init__(*args, **kwargs)
33         self.format_name = format_name
34
35     @property
36     def builder(self):
37         """Finds a celery task suitable for the format of the field."""
38         return BuildEbook.for_format(self.format_name)
39
40     def contribute_to_class(self, cls, name):
41         super(EbookField, self).contribute_to_class(cls, name)
42
43         def has(model_instance):
44             return bool(getattr(model_instance, self.attname, None))
45         has.__doc__ = None
46         has.__name__ = "has_%s" % self.attname
47         has.short_description = self.name
48         has.boolean = True
49         setattr(cls, 'has_%s' % self.attname, has)
50
51
52 class BuildEbook(Task):
53     formats = {}
54
55     @classmethod
56     def register(cls, format_name):
57         """A decorator for registering subclasses for particular formats."""
58         def wrapper(builder):
59             cls.formats[format_name] = builder
60             return builder
61         return wrapper
62
63     @classmethod
64     def for_format(cls, format_name):
65         """Returns a celery task suitable for specified format."""
66         return cls.formats.get(format_name, BuildEbookTask)
67
68     @staticmethod
69     def transform(wldoc, fieldfile):
70         """Transforms an librarian.WLDocument into an librarian.OutputFile.
71
72         By default, it just calls relevant wldoc.as_??? method.
73
74         """
75         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
76
77     def run(self, obj, field_name):
78         """Just run `build` on FieldFile, can't pass it directly to Celery."""
79         return self.build(getattr(obj, field_name))
80
81     def build(self, fieldfile):
82         book = fieldfile.instance
83         out = self.transform(book.wldocument(), fieldfile)
84         fieldfile.save(None, File(open(out.get_filename())), save=False)
85         if book.pk is not None:
86             type(book).objects.filter(pk=book.pk).update(**{
87                 fieldfile.field.attname: fieldfile
88             })
89         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
90             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
91 # Don't decorate BuildEbook, because we want to subclass it.
92 BuildEbookTask = task(BuildEbook, ignore_result=True)
93
94
95 @BuildEbook.register('txt')
96 @task(ignore_result=True)
97 class BuildTxt(BuildEbook):
98     @staticmethod
99     def transform(wldoc, fieldfile):
100         return wldoc.as_text()
101
102
103 @BuildEbook.register('pdf')
104 @task(ignore_result=True)
105 class BuildPdf(BuildEbook):
106     @staticmethod
107     def transform(wldoc, fieldfile):
108         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS,
109             cover=True)
110
111     def build(self, fieldfile):
112         BuildEbook.build(self, fieldfile)
113         clear_cache(fieldfile.instance.slug)
114
115
116 @BuildEbook.register('epub')
117 @task(ignore_result=True)
118 class BuildEpub(BuildEbook):
119     @staticmethod
120     def transform(wldoc, fieldfile):
121         return wldoc.as_epub(cover=True)
122
123
124 @BuildEbook.register('html')
125 @task(ignore_result=True)
126 class BuildHtml(BuildEbook):
127     def build(self, fieldfile):
128         from django.core.files.base import ContentFile
129         from slughifi import slughifi
130         from sortify import sortify
131         from librarian import html
132         from catalogue.models import Fragment, Tag
133
134         book = fieldfile.instance
135
136         meta_tags = list(book.tags.filter(
137             category__in=('author', 'epoch', 'genre', 'kind')))
138         book_tag = book.book_tag()
139
140         html_output = self.transform(
141                         book.wldocument(parse_dublincore=False),
142                         fieldfile)
143         if html_output:
144             fieldfile.save(None, ContentFile(html_output.get_string()),
145                     save=False)
146             type(book).objects.filter(pk=book.pk).update(**{
147                 fieldfile.field.attname: fieldfile
148             })
149
150             # get ancestor l-tags for adding to new fragments
151             ancestor_tags = []
152             p = book.parent
153             while p:
154                 ancestor_tags.append(p.book_tag())
155                 p = p.parent
156
157             # Delete old fragments and create them from scratch
158             book.fragments.all().delete()
159             # Extract fragments
160             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
161             for fragment in closed_fragments.values():
162                 try:
163                     theme_names = [s.strip() for s in fragment.themes.split(',')]
164                 except AttributeError:
165                     continue
166                 themes = []
167                 for theme_name in theme_names:
168                     if not theme_name:
169                         continue
170                     tag, created = Tag.objects.get_or_create(
171                                         slug=slughifi(theme_name),
172                                         category='theme')
173                     if created:
174                         tag.name = theme_name
175                         tag.sort_key = sortify(theme_name.lower())
176                         tag.save()
177                     themes.append(tag)
178                 if not themes:
179                     continue
180
181                 text = fragment.to_string()
182                 short_text = truncate_html_words(text, 15)
183                 if text == short_text:
184                     short_text = ''
185                 new_fragment = Fragment.objects.create(anchor=fragment.id, 
186                         book=book, text=text, short_text=short_text)
187
188                 new_fragment.save()
189                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
190             book.html_built.send(sender=book)
191             return True
192         return False
193
194
195 class OverwritingFieldFile(FieldFile):
196     """
197         Deletes the old file before saving the new one.
198     """
199
200     def save(self, name, content, *args, **kwargs):
201         leave = kwargs.pop('leave', None)
202         # delete if there's a file already and there's a new one coming
203         if not leave and self and (not hasattr(content, 'path') or
204                                    content.path != self.path):
205             self.delete(save=False)
206         return super(OverwritingFieldFile, self).save(
207                 name, content, *args, **kwargs)
208
209
210 class OverwritingFileField(models.FileField):
211     attr_class = OverwritingFieldFile
212
213
214 try:
215     # check for south
216     from south.modelsinspector import add_introspection_rules
217 except ImportError:
218     pass
219 else:
220     add_introspection_rules([
221         (
222             [EbookField],
223             [],
224             {'format_name': ('format_name', {})}
225         )
226     ], ["^catalogue\.fields\.EbookField"])
227     add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])