2c4e4d57b5db5837417acae8e27361b5659e0a5a
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.utils import remove_zip, truncate_html_words
11 from celery import Task
12 from celery.task import task
13 from waiter.utils import clear_cache
14
15
16 class EbookFieldFile(FieldFile):
17     """Represents contents of an ebook file field."""
18
19     def build(self):
20         """Build the ebook immediately."""
21         return self.field.builder(self)
22
23     def build_delay(self):
24         """Builds the ebook in a delayed task."""
25         return self.field.builder.delay(self)
26
27
28 class EbookField(models.FileField):
29     """Represents an ebook file field, attachable to a model."""
30     attr_class = EbookFieldFile
31
32     def __init__(self, format_name, *args, **kwargs):
33         super(EbookField, self).__init__(*args, **kwargs)
34         self.format_name = format_name
35
36     @property
37     def builder(self):
38         """Finds a celery task suitable for the format of the field."""
39         return BuildEbook.for_format(self.format_name)
40
41     def contribute_to_class(self, cls, name):
42         super(EbookField, self).contribute_to_class(cls, name)
43
44         def has(model_instance):
45             return bool(getattr(model_instance, self.attname, None))
46         has.__doc__ = None
47         has.__name__ = "has_%s" % self.attname
48         has.short_description = self.name
49         has.boolean = True
50         setattr(cls, 'has_%s' % self.attname, has)
51
52
53 class BuildEbook(Task):
54     formats = {}
55
56     @classmethod
57     def register(cls, format_name):
58         """A decorator for registering subclasses for particular formats."""
59         def wrapper(builder):
60             cls.formats[format_name] = builder
61             return builder
62         return wrapper
63
64     @classmethod
65     def for_format(cls, format_name):
66         """Returns a celery task suitable for specified format."""
67         return cls.formats.get(format_name, BuildEbookTask)
68
69     @staticmethod
70     def transform(wldoc, fieldfile):
71         """Transforms an librarian.WLDocument into an librarian.OutputFile.
72
73         By default, it just calls relevant wldoc.as_??? method.
74
75         """
76         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
77
78     def run(self, fieldfile):
79         book = fieldfile.instance
80         out = self.transform(book.wldocument(), fieldfile)
81         fieldfile.save(None, File(open(out.get_filename())), save=False)
82         if book.pk is not None:
83             type(book).objects.filter(pk=book.pk).update(**{
84                 fieldfile.field.attname: fieldfile
85             })
86         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
87             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
88 # Don't decorate BuildEbook, because we want to subclass it.
89 BuildEbookTask = task(BuildEbook, ignore_result=True)
90
91
92 @BuildEbook.register('txt')
93 @task(ignore_result=True)
94 class BuildTxt(BuildEbook):
95     @staticmethod
96     def transform(wldoc, fieldfile):
97         return wldoc.as_text()
98
99
100 @BuildEbook.register('pdf')
101 @task(ignore_result=True)
102 class BuildPdf(BuildEbook):
103     @staticmethod
104     def transform(wldoc, fieldfile):
105         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
106
107     def run(self, fieldfile):
108         BuildEbook.run(self, fieldfile)
109         clear_cache(fieldfile.instance.slug)
110
111
112 @BuildEbook.register('html')
113 @task(ignore_result=True)
114 class BuildHtml(BuildEbook):
115     def run(self, fieldfile):
116         from django.core.files.base import ContentFile
117         from slughifi import slughifi
118         from sortify import sortify
119         from librarian import html
120         from catalogue.models import Fragment, Tag
121
122         book = fieldfile.instance
123
124         meta_tags = list(book.tags.filter(
125             category__in=('author', 'epoch', 'genre', 'kind')))
126         book_tag = book.book_tag()
127
128         html_output = self.transform(
129                         book.wldocument(parse_dublincore=False),
130                         fieldfile)
131         if html_output:
132             fieldfile.save(None, ContentFile(html_output.get_string()),
133                     save=False)
134             type(book).objects.filter(pk=book.pk).update(**{
135                 fieldfile.field.attname: fieldfile
136             })
137
138             # get ancestor l-tags for adding to new fragments
139             ancestor_tags = []
140             p = book.parent
141             while p:
142                 ancestor_tags.append(p.book_tag())
143                 p = p.parent
144
145             # Delete old fragments and create them from scratch
146             book.fragments.all().delete()
147             # Extract fragments
148             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
149             for fragment in closed_fragments.values():
150                 try:
151                     theme_names = [s.strip() for s in fragment.themes.split(',')]
152                 except AttributeError:
153                     continue
154                 themes = []
155                 for theme_name in theme_names:
156                     if not theme_name:
157                         continue
158                     tag, created = Tag.objects.get_or_create(
159                                         slug=slughifi(theme_name),
160                                         category='theme')
161                     if created:
162                         tag.name = theme_name
163                         tag.sort_key = sortify(theme_name.lower())
164                         tag.save()
165                     themes.append(tag)
166                 if not themes:
167                     continue
168
169                 text = fragment.to_string()
170                 short_text = truncate_html_words(text, 15)
171                 if text == short_text:
172                     short_text = ''
173                 new_fragment = Fragment.objects.create(anchor=fragment.id, 
174                         book=book, text=text, short_text=short_text)
175
176                 new_fragment.save()
177                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
178             book.html_built.send(sender=book)
179             return True
180         return False
181
182
183 class OverwritingFieldFile(FieldFile):
184     """
185         Deletes the old file before saving the new one.
186     """
187
188     def save(self, name, content, *args, **kwargs):
189         leave = kwargs.pop('leave', None)
190         # delete if there's a file already and there's a new one coming
191         if not leave and self and (not hasattr(content, 'path') or
192                                    content.path != self.path):
193             self.delete(save=False)
194         return super(OverwritingFieldFile, self).save(
195                 name, content, *args, **kwargs)
196
197
198 class OverwritingFileField(models.FileField):
199     attr_class = OverwritingFieldFile
200
201
202 try:
203     # check for south
204     from south.modelsinspector import add_introspection_rules
205 except ImportError:
206     pass
207 else:
208     add_introspection_rules([
209         (
210             [EbookField],
211             [],
212             {'format_name': ('format_name', {})}
213         )
214     ], ["^catalogue\.fields\.EbookField"])
215     add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])