0e02ff90f641f03f589875d013fed1f45a8411ee
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.utils import remove_zip, truncate_html_words
11 from celery.task import Task, task
12 from waiter.utils import clear_cache
13
14
15 class EbookFieldFile(FieldFile):
16     """Represents contents of an ebook file field."""
17
18     def build(self):
19         """Build the ebook immediately."""
20         return self.field.builder.build(self)
21
22     def build_delay(self):
23         """Builds the ebook in a delayed task."""
24         return self.field.builder.delay(self.instance, self.field.attname)
25
26
27 class EbookField(models.FileField):
28     """Represents an ebook file field, attachable to a model."""
29     attr_class = EbookFieldFile
30
31     def __init__(self, format_name, *args, **kwargs):
32         super(EbookField, self).__init__(*args, **kwargs)
33         self.format_name = format_name
34
35     @property
36     def builder(self):
37         """Finds a celery task suitable for the format of the field."""
38         return BuildEbook.for_format(self.format_name)
39
40     def contribute_to_class(self, cls, name):
41         super(EbookField, self).contribute_to_class(cls, name)
42
43         def has(model_instance):
44             return bool(getattr(model_instance, self.attname, None))
45         has.__doc__ = None
46         has.__name__ = "has_%s" % self.attname
47         has.short_description = self.name
48         has.boolean = True
49         setattr(cls, 'has_%s' % self.attname, has)
50
51
52 class BuildEbook(Task):
53     formats = {}
54
55     @classmethod
56     def register(cls, format_name):
57         """A decorator for registering subclasses for particular formats."""
58         def wrapper(builder):
59             cls.formats[format_name] = builder
60             return builder
61         return wrapper
62
63     @classmethod
64     def for_format(cls, format_name):
65         """Returns a celery task suitable for specified format."""
66         return cls.formats.get(format_name, BuildEbookTask)
67
68     @staticmethod
69     def transform(wldoc, fieldfile):
70         """Transforms an librarian.WLDocument into an librarian.OutputFile.
71
72         By default, it just calls relevant wldoc.as_??? method.
73
74         """
75         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
76
77     def run(self, obj, field_name):
78         """Just run `build` on FieldFile, can't pass it directly to Celery."""
79         return self.build(getattr(obj, field_name))
80
81     def build(self, fieldfile):
82         book = fieldfile.instance
83         out = self.transform(book.wldocument(), fieldfile)
84         fieldfile.save(None, File(open(out.get_filename())), save=False)
85         if book.pk is not None:
86             type(book).objects.filter(pk=book.pk).update(**{
87                 fieldfile.field.attname: fieldfile
88             })
89         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
90             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
91 # Don't decorate BuildEbook, because we want to subclass it.
92 BuildEbookTask = task(BuildEbook, ignore_result=True)
93
94
95 @BuildEbook.register('txt')
96 @task(ignore_result=True)
97 class BuildTxt(BuildEbook):
98     @staticmethod
99     def transform(wldoc, fieldfile):
100         return wldoc.as_text()
101
102
103 @BuildEbook.register('pdf')
104 @task(ignore_result=True)
105 class BuildPdf(BuildEbook):
106     @staticmethod
107     def transform(wldoc, fieldfile):
108         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
109
110     def build(self, fieldfile):
111         BuildEbook.build(self, fieldfile)
112         clear_cache(fieldfile.instance.slug)
113
114
115 @BuildEbook.register('html')
116 @task(ignore_result=True)
117 class BuildHtml(BuildEbook):
118     def build(self, fieldfile):
119         from django.core.files.base import ContentFile
120         from slughifi import slughifi
121         from sortify import sortify
122         from librarian import html
123         from catalogue.models import Fragment, Tag
124
125         book = fieldfile.instance
126
127         meta_tags = list(book.tags.filter(
128             category__in=('author', 'epoch', 'genre', 'kind')))
129         book_tag = book.book_tag()
130
131         html_output = self.transform(
132                         book.wldocument(parse_dublincore=False),
133                         fieldfile)
134         if html_output:
135             fieldfile.save(None, ContentFile(html_output.get_string()),
136                     save=False)
137             type(book).objects.filter(pk=book.pk).update(**{
138                 fieldfile.field.attname: fieldfile
139             })
140
141             # get ancestor l-tags for adding to new fragments
142             ancestor_tags = []
143             p = book.parent
144             while p:
145                 ancestor_tags.append(p.book_tag())
146                 p = p.parent
147
148             # Delete old fragments and create them from scratch
149             book.fragments.all().delete()
150             # Extract fragments
151             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
152             for fragment in closed_fragments.values():
153                 try:
154                     theme_names = [s.strip() for s in fragment.themes.split(',')]
155                 except AttributeError:
156                     continue
157                 themes = []
158                 for theme_name in theme_names:
159                     if not theme_name:
160                         continue
161                     tag, created = Tag.objects.get_or_create(
162                                         slug=slughifi(theme_name),
163                                         category='theme')
164                     if created:
165                         tag.name = theme_name
166                         tag.sort_key = sortify(theme_name.lower())
167                         tag.save()
168                     themes.append(tag)
169                 if not themes:
170                     continue
171
172                 text = fragment.to_string()
173                 short_text = truncate_html_words(text, 15)
174                 if text == short_text:
175                     short_text = ''
176                 new_fragment = Fragment.objects.create(anchor=fragment.id, 
177                         book=book, text=text, short_text=short_text)
178
179                 new_fragment.save()
180                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
181             book.html_built.send(sender=book)
182             return True
183         return False
184
185
186 class OverwritingFieldFile(FieldFile):
187     """
188         Deletes the old file before saving the new one.
189     """
190
191     def save(self, name, content, *args, **kwargs):
192         leave = kwargs.pop('leave', None)
193         # delete if there's a file already and there's a new one coming
194         if not leave and self and (not hasattr(content, 'path') or
195                                    content.path != self.path):
196             self.delete(save=False)
197         return super(OverwritingFieldFile, self).save(
198                 name, content, *args, **kwargs)
199
200
201 class OverwritingFileField(models.FileField):
202     attr_class = OverwritingFieldFile
203
204
205 try:
206     # check for south
207     from south.modelsinspector import add_introspection_rules
208 except ImportError:
209     pass
210 else:
211     add_introspection_rules([
212         (
213             [EbookField],
214             [],
215             {'format_name': ('format_name', {})}
216         )
217     ], ["^catalogue\.fields\.EbookField"])
218     add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])