fad697f44aaf23aa91999189d21644ca9d54be25
[wolnelektury.git] / apps / catalogue / fields.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.core.files import File
7 from django.db import models
8 from django.db.models.fields.files import FieldFile
9 from catalogue import app_settings
10 from catalogue.utils import remove_zip, truncate_html_words
11 from celery.task import Task, task
12 from waiter.utils import clear_cache
13
14
15 class EbookFieldFile(FieldFile):
16     """Represents contents of an ebook file field."""
17
18     def build(self):
19         """Build the ebook immediately."""
20         return self.field.builder(self)
21
22     def build_delay(self):
23         """Builds the ebook in a delayed task."""
24         return self.field.builder.delay(self)
25
26
27 class EbookField(models.FileField):
28     """Represents an ebook file field, attachable to a model."""
29     attr_class = EbookFieldFile
30
31     def __init__(self, format_name, *args, **kwargs):
32         super(EbookField, self).__init__(*args, **kwargs)
33         self.format_name = format_name
34
35     @property
36     def builder(self):
37         """Finds a celery task suitable for the format of the field."""
38         return BuildEbook.for_format(self.format_name)
39
40     def contribute_to_class(self, cls, name):
41         super(EbookField, self).contribute_to_class(cls, name)
42
43         def has(model_instance):
44             return bool(getattr(model_instance, self.attname, None))
45         has.__doc__ = None
46         has.__name__ = "has_%s" % self.attname
47         has.short_description = self.name
48         has.boolean = True
49         setattr(cls, 'has_%s' % self.attname, has)
50
51
52 class BuildEbook(Task):
53     formats = {}
54
55     @classmethod
56     def register(cls, format_name):
57         """A decorator for registering subclasses for particular formats."""
58         def wrapper(builder):
59             cls.formats[format_name] = builder
60             return builder
61         return wrapper
62
63     @classmethod
64     def for_format(cls, format_name):
65         """Returns a celery task suitable for specified format."""
66         return cls.formats.get(format_name, BuildEbookTask)
67
68     @staticmethod
69     def transform(wldoc, fieldfile):
70         """Transforms an librarian.WLDocument into an librarian.OutputFile.
71
72         By default, it just calls relevant wldoc.as_??? method.
73
74         """
75         return getattr(wldoc, "as_%s" % fieldfile.field.format_name)()
76
77     def run(self, fieldfile):
78         book = fieldfile.instance
79         out = self.transform(book.wldocument(), fieldfile)
80         fieldfile.save(None, File(open(out.get_filename())), save=False)
81         if book.pk is not None:
82             type(book).objects.filter(pk=book.pk).update(**{
83                 fieldfile.field.attname: fieldfile
84             })
85         if fieldfile.field.format_name in app_settings.FORMAT_ZIPS:
86             remove_zip(app_settings.FORMAT_ZIPS[fieldfile.field.format_name])
87 # Don't decorate BuildEbook, because we want to subclass it.
88 BuildEbookTask = task(BuildEbook, ignore_result=True)
89
90
91 @BuildEbook.register('txt')
92 @task(ignore_result=True)
93 class BuildTxt(BuildEbook):
94     @staticmethod
95     def transform(wldoc, fieldfile):
96         return wldoc.as_text()
97
98
99 @BuildEbook.register('pdf')
100 @task(ignore_result=True)
101 class BuildPdf(BuildEbook):
102     @staticmethod
103     def transform(wldoc, fieldfile):
104         return wldoc.as_pdf(morefloats=settings.LIBRARIAN_PDF_MOREFLOATS)
105
106     def run(self, fieldfile):
107         BuildEbook.run(self, fieldfile)
108         clear_cache(fieldfile.instance.slug)
109
110
111 @BuildEbook.register('html')
112 @task(ignore_result=True)
113 class BuildHtml(BuildEbook):
114     def run(self, fieldfile):
115         from django.core.files.base import ContentFile
116         from slughifi import slughifi
117         from sortify import sortify
118         from librarian import html
119         from catalogue.models import Fragment, Tag
120
121         book = fieldfile.instance
122
123         meta_tags = list(book.tags.filter(
124             category__in=('author', 'epoch', 'genre', 'kind')))
125         book_tag = book.book_tag()
126
127         html_output = self.transform(
128                         book.wldocument(parse_dublincore=False),
129                         fieldfile)
130         if html_output:
131             fieldfile.save(None, ContentFile(html_output.get_string()),
132                     save=False)
133             type(book).objects.filter(pk=book.pk).update(**{
134                 fieldfile.field.attname: fieldfile
135             })
136
137             # get ancestor l-tags for adding to new fragments
138             ancestor_tags = []
139             p = book.parent
140             while p:
141                 ancestor_tags.append(p.book_tag())
142                 p = p.parent
143
144             # Delete old fragments and create them from scratch
145             book.fragments.all().delete()
146             # Extract fragments
147             closed_fragments, open_fragments = html.extract_fragments(fieldfile.path)
148             for fragment in closed_fragments.values():
149                 try:
150                     theme_names = [s.strip() for s in fragment.themes.split(',')]
151                 except AttributeError:
152                     continue
153                 themes = []
154                 for theme_name in theme_names:
155                     if not theme_name:
156                         continue
157                     tag, created = Tag.objects.get_or_create(
158                                         slug=slughifi(theme_name),
159                                         category='theme')
160                     if created:
161                         tag.name = theme_name
162                         tag.sort_key = sortify(theme_name.lower())
163                         tag.save()
164                     themes.append(tag)
165                 if not themes:
166                     continue
167
168                 text = fragment.to_string()
169                 short_text = truncate_html_words(text, 15)
170                 if text == short_text:
171                     short_text = ''
172                 new_fragment = Fragment.objects.create(anchor=fragment.id, 
173                         book=book, text=text, short_text=short_text)
174
175                 new_fragment.save()
176                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
177             book.html_built.send(sender=book)
178             return True
179         return False
180
181
182 class OverwritingFieldFile(FieldFile):
183     """
184         Deletes the old file before saving the new one.
185     """
186
187     def save(self, name, content, *args, **kwargs):
188         leave = kwargs.pop('leave', None)
189         # delete if there's a file already and there's a new one coming
190         if not leave and self and (not hasattr(content, 'path') or
191                                    content.path != self.path):
192             self.delete(save=False)
193         return super(OverwritingFieldFile, self).save(
194                 name, content, *args, **kwargs)
195
196
197 class OverwritingFileField(models.FileField):
198     attr_class = OverwritingFieldFile
199
200
201 try:
202     # check for south
203     from south.modelsinspector import add_introspection_rules
204 except ImportError:
205     pass
206 else:
207     add_introspection_rules([
208         (
209             [EbookField],
210             [],
211             {'format_name': ('format_name', {})}
212         )
213     ], ["^catalogue\.fields\.EbookField"])
214     add_introspection_rules([], ["^catalogue\.fields\.OverwritingFileField"])