publish PDF with images
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue import app_settings
26 from catalogue import tasks
27 from wolnelektury.utils import makedirs
28
29 bofh_storage = BofhFileSystemStorage()
30
31
32 def _make_upload_to(path):
33     def _upload_to(i, n):
34         return path % i.slug
35     return _upload_to
36
37
38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
40
41
42 def _ebook_upload_to(upload_path):
43     return _make_upload_to(upload_path)
44
45
46 class Book(models.Model):
47     """Represents a book imported from WL-XML."""
48     title = models.CharField(_('title'), max_length=32767)
49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
50     sort_key_author = models.CharField(
51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
55     description = models.TextField(_('description'), blank=True)
56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
58     parent_number = models.IntegerField(_('parent number'), default=0)
59     extra_info = jsonfield.JSONField(_('extra information'), default={})
60     gazeta_link = models.CharField(blank=True, max_length=240)
61     wiki_link = models.CharField(blank=True, max_length=240)
62
63     # files generated during publication
64     cover = EbookField(
65         'cover', _('cover'),
66         null=True, blank=True,
67         upload_to=_cover_upload_to,
68         storage=bofh_storage, max_length=255)
69     # Cleaner version of cover for thumbs
70     cover_thumb = EbookField(
71         'cover_thumb', _('cover thumbnail'),
72         null=True, blank=True,
73         upload_to=_cover_thumb_upload_to,
74         max_length=255)
75     ebook_formats = constants.EBOOK_FORMATS
76     formats = ebook_formats + ['html', 'xml']
77
78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
80
81     objects = models.Manager()
82     tagged = managers.ModelTaggedItemManager(Tag)
83     tags = managers.TagDescriptor(Tag)
84     tag_relations = GenericRelation(Tag.intermediary_table_model)
85
86     html_built = django.dispatch.Signal()
87     published = django.dispatch.Signal()
88
89     short_html_url_name = 'catalogue_book_short'
90
91     class AlreadyExists(Exception):
92         pass
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('book')
97         verbose_name_plural = _('books')
98         app_label = 'catalogue'
99
100     def __unicode__(self):
101         return self.title
102
103     def get_initial(self):
104         try:
105             return re.search(r'\w', self.title, re.U).group(0)
106         except AttributeError:
107             return ''
108
109     def author_str(self):
110         return ", ".join(str(t) for t in self.tags.filter(category='author'))
111
112     def save(self, force_insert=False, force_update=False, **kwargs):
113         from sortify import sortify
114
115         self.sort_key = sortify(self.title)[:120]
116         self.title = unicode(self.title)  # ???
117
118         try:
119             author = self.tags.filter(category='author')[0].sort_key
120         except IndexError:
121             author = u''
122         self.sort_key_author = author
123
124         ret = super(Book, self).save(force_insert, force_update, **kwargs)
125
126         return ret
127
128     @permalink
129     def get_absolute_url(self):
130         return 'catalogue.views.book_detail', [self.slug]
131
132     @staticmethod
133     @permalink
134     def create_url(slug):
135         return 'catalogue.views.book_detail', [slug]
136
137     def gallery_path(self):
138         return gallery_path(self.slug)
139
140     def gallery_url(self):
141         return gallery_url(self.slug)
142
143     @property
144     def name(self):
145         return self.title
146
147     def language_code(self):
148         return constants.LANGUAGES_3TO2.get(self.language, self.language)
149
150     def language_name(self):
151         return dict(settings.LANGUAGES).get(self.language_code(), "")
152
153     def has_media(self, type_):
154         if type_ in Book.formats:
155             return bool(getattr(self, "%s_file" % type_))
156         else:
157             return self.media.filter(type=type_).exists()
158
159     def get_media(self, type_):
160         if self.has_media(type_):
161             if type_ in Book.formats:
162                 return getattr(self, "%s_file" % type_)
163             else:
164                 return self.media.filter(type=type_)
165         else:
166             return None
167
168     def get_mp3(self):
169         return self.get_media("mp3")
170
171     def get_odt(self):
172         return self.get_media("odt")
173
174     def get_ogg(self):
175         return self.get_media("ogg")
176
177     def get_daisy(self):
178         return self.get_media("daisy")
179
180     def has_description(self):
181         return len(self.description) > 0
182     has_description.short_description = _('description')
183     has_description.boolean = True
184
185     # ugly ugly ugly
186     def has_mp3_file(self):
187         return bool(self.has_media("mp3"))
188     has_mp3_file.short_description = 'MP3'
189     has_mp3_file.boolean = True
190
191     def has_ogg_file(self):
192         return bool(self.has_media("ogg"))
193     has_ogg_file.short_description = 'OGG'
194     has_ogg_file.boolean = True
195
196     def has_daisy_file(self):
197         return bool(self.has_media("daisy"))
198     has_daisy_file.short_description = 'DAISY'
199     has_daisy_file.boolean = True
200
201     def wldocument(self, parse_dublincore=True, inherit=True):
202         from catalogue.import_utils import ORMDocProvider
203         from librarian.parser import WLDocument
204
205         if inherit and self.parent:
206             meta_fallbacks = self.parent.cover_info()
207         else:
208             meta_fallbacks = None
209
210         return WLDocument.from_file(
211             self.xml_file.path,
212             provider=ORMDocProvider(self),
213             parse_dublincore=parse_dublincore,
214             meta_fallbacks=meta_fallbacks)
215
216     @staticmethod
217     def zip_format(format_):
218         def pretty_file_name(book):
219             return "%s/%s.%s" % (
220                 book.extra_info['author'],
221                 book.slug,
222                 format_)
223
224         field_name = "%s_file" % format_
225         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
226         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
227         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
228
229     def zip_audiobooks(self, format_):
230         bm = BookMedia.objects.filter(book=self, type=format_)
231         paths = map(lambda bm: (None, bm.file.path), bm)
232         return create_zip(paths, "%s_%s" % (self.slug, format_))
233
234     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
235         if index is None:
236             from search.index import Index
237             index = Index()
238         try:
239             index.index_book(self, book_info)
240             if index_tags:
241                 index.index_tags()
242             if commit:
243                 index.index.commit()
244         except Exception, e:
245             index.index.rollback()
246             raise e
247
248     def download_pictures(self, remote_gallery_url):
249         gallery_path = self.gallery_path()
250         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
251         if ilustr_elements:
252             makedirs(gallery_path)
253             for ilustr in ilustr_elements:
254                 ilustr_src = ilustr.get('src')
255                 ilustr_path = os.path.join(gallery_path, ilustr_src)
256                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
257
258     @classmethod
259     def from_xml_file(cls, xml_file, **kwargs):
260         from django.core.files import File
261         from librarian import dcparser
262
263         # use librarian to parse meta-data
264         book_info = dcparser.parse(xml_file)
265
266         if not isinstance(xml_file, File):
267             xml_file = File(open(xml_file))
268
269         try:
270             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
271         finally:
272             xml_file.close()
273
274     @classmethod
275     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
276                            search_index_tags=True, remote_gallery_url=None):
277         if dont_build is None:
278             dont_build = set()
279         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
280
281         # check for parts before we do anything
282         children = []
283         if hasattr(book_info, 'parts'):
284             for part_url in book_info.parts:
285                 try:
286                     children.append(Book.objects.get(slug=part_url.slug))
287                 except Book.DoesNotExist:
288                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
289
290         # Read book metadata
291         book_slug = book_info.url.slug
292         if re.search(r'[^a-z0-9-]', book_slug):
293             raise ValueError('Invalid characters in slug')
294         book, created = Book.objects.get_or_create(slug=book_slug)
295
296         if created:
297             book_shelves = []
298             old_cover = None
299         else:
300             if not overwrite:
301                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
302             # Save shelves for this book
303             book_shelves = list(book.tags.filter(category='set'))
304             old_cover = book.cover_info()
305
306         # Save XML file
307         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
308
309         book.language = book_info.language
310         book.title = book_info.title
311         if book_info.variant_of:
312             book.common_slug = book_info.variant_of.slug
313         else:
314             book.common_slug = book.slug
315         book.extra_info = book_info.to_dict()
316         book.save()
317
318         meta_tags = Tag.tags_from_info(book_info)
319
320         book.tags = set(meta_tags + book_shelves)
321
322         cover_changed = old_cover != book.cover_info()
323         obsolete_children = set(b for b in book.children.all()
324                                 if b not in children)
325         notify_cover_changed = []
326         for n, child_book in enumerate(children):
327             new_child = child_book.parent != book
328             child_book.parent = book
329             child_book.parent_number = n
330             child_book.save()
331             if new_child or cover_changed:
332                 notify_cover_changed.append(child_book)
333         # Disown unfaithful children and let them cope on their own.
334         for child in obsolete_children:
335             child.parent = None
336             child.parent_number = 0
337             child.save()
338             if old_cover:
339                 notify_cover_changed.append(child)
340
341         cls.repopulate_ancestors()
342         tasks.update_counters.delay()
343
344         if remote_gallery_url:
345             book.download_pictures(remote_gallery_url)
346
347         # No saves beyond this point.
348
349         # Build cover.
350         if 'cover' not in dont_build:
351             book.cover.build_delay()
352             book.cover_thumb.build_delay()
353
354         # Build HTML and ebooks.
355         book.html_file.build_delay()
356         if not children:
357             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
358                 if format_ not in dont_build:
359                     getattr(book, '%s_file' % format_).build_delay()
360         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
361             if format_ not in dont_build:
362                 getattr(book, '%s_file' % format_).build_delay()
363
364         if not settings.NO_SEARCH_INDEX and search_index:
365             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
366
367         for child in notify_cover_changed:
368             child.parent_cover_changed()
369
370         cls.published.send(sender=cls, instance=book)
371         return book
372
373     @classmethod
374     @transaction.atomic
375     def repopulate_ancestors(cls):
376         """Fixes the ancestry cache."""
377         # TODO: table names
378         cursor = connection.cursor()
379         if connection.vendor == 'postgres':
380             cursor.execute("TRUNCATE catalogue_book_ancestor")
381             cursor.execute("""
382                 WITH RECURSIVE ancestry AS (
383                     SELECT book.id, book.parent_id
384                     FROM catalogue_book AS book
385                     WHERE book.parent_id IS NOT NULL
386                     UNION
387                     SELECT ancestor.id, book.parent_id
388                     FROM ancestry AS ancestor, catalogue_book AS book
389                     WHERE ancestor.parent_id = book.id
390                         AND book.parent_id IS NOT NULL
391                     )
392                 INSERT INTO catalogue_book_ancestor
393                     (from_book_id, to_book_id)
394                     SELECT id, parent_id
395                     FROM ancestry
396                     ORDER BY id;
397                 """)
398         else:
399             cursor.execute("DELETE FROM catalogue_book_ancestor")
400             for b in cls.objects.exclude(parent=None):
401                 parent = b.parent
402                 while parent is not None:
403                     b.ancestor.add(parent)
404                     parent = parent.parent
405
406     def flush_includes(self, languages=True):
407         if not languages:
408             return
409         if languages is True:
410             languages = [lc for (lc, _ln) in settings.LANGUAGES]
411         flush_ssi_includes([
412             template % (self.pk, lang)
413             for template in [
414                 '/katalog/b/%d/mini.%s.html',
415                 '/katalog/b/%d/mini_nolink.%s.html',
416                 '/katalog/b/%d/short.%s.html',
417                 '/katalog/b/%d/wide.%s.html',
418                 '/api/include/book/%d.%s.json',
419                 '/api/include/book/%d.%s.xml',
420                 ]
421             for lang in languages
422             ])
423
424     def cover_info(self, inherit=True):
425         """Returns a dictionary to serve as fallback for BookInfo.
426
427         For now, the only thing inherited is the cover image.
428         """
429         need = False
430         info = {}
431         for field in ('cover_url', 'cover_by', 'cover_source'):
432             val = self.extra_info.get(field)
433             if val:
434                 info[field] = val
435             else:
436                 need = True
437         if inherit and need and self.parent is not None:
438             parent_info = self.parent.cover_info()
439             parent_info.update(info)
440             info = parent_info
441         return info
442
443     def related_themes(self):
444         return Tag.objects.usage_for_queryset(
445             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
446             counts=True).filter(category='theme')
447
448     def parent_cover_changed(self):
449         """Called when parent book's cover image is changed."""
450         if not self.cover_info(inherit=False):
451             if 'cover' not in app_settings.DONT_BUILD:
452                 self.cover.build_delay()
453                 self.cover_thumb.build_delay()
454             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
455                 if format_ not in app_settings.DONT_BUILD:
456                     getattr(self, '%s_file' % format_).build_delay()
457             for child in self.children.all():
458                 child.parent_cover_changed()
459
460     def other_versions(self):
461         """Find other versions (i.e. in other languages) of the book."""
462         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
463
464     def parents(self):
465         books = []
466         parent = self.parent
467         while parent is not None:
468             books.insert(0, parent)
469             parent = parent.parent
470         return books
471
472     def pretty_title(self, html_links=False):
473         names = [(tag.name, tag.get_absolute_url()) for tag in self.tags.filter(category='author')]
474         books = self.parents() + [self]
475         names.extend([(b.title, b.get_absolute_url()) for b in books])
476
477         if html_links:
478             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
479         else:
480             names = [tag[0] for tag in names]
481         return ', '.join(names)
482
483     @classmethod
484     def tagged_top_level(cls, tags):
485         """ Returns top-level books tagged with `tags`.
486
487         It only returns those books which don't have ancestors which are
488         also tagged with those tags.
489
490         """
491         objects = cls.tagged.with_all(tags)
492         return objects.exclude(ancestor__in=objects)
493
494     @classmethod
495     def book_list(cls, book_filter=None):
496         """Generates a hierarchical listing of all books.
497
498         Books are optionally filtered with a test function.
499
500         """
501
502         books_by_parent = {}
503         books = cls.objects.all().order_by('parent_number', 'sort_key').only(
504                 'title', 'parent', 'slug')
505         if book_filter:
506             books = books.filter(book_filter).distinct()
507
508             book_ids = set(b['pk'] for b in books.values("pk").iterator())
509             for book in books.iterator():
510                 parent = book.parent_id
511                 if parent not in book_ids:
512                     parent = None
513                 books_by_parent.setdefault(parent, []).append(book)
514         else:
515             for book in books.iterator():
516                 books_by_parent.setdefault(book.parent_id, []).append(book)
517
518         orphans = []
519         books_by_author = OrderedDict()
520         for tag in Tag.objects.filter(category='author').iterator():
521             books_by_author[tag] = []
522
523         for book in books_by_parent.get(None, ()):
524             authors = list(book.tags.filter(category='author'))
525             if authors:
526                 for author in authors:
527                     books_by_author[author].append(book)
528             else:
529                 orphans.append(book)
530
531         return books_by_author, orphans, books_by_parent
532
533     _audiences_pl = {
534         "SP": (1, u"szkoła podstawowa"),
535         "SP1": (1, u"szkoła podstawowa"),
536         "SP2": (1, u"szkoła podstawowa"),
537         "P": (1, u"szkoła podstawowa"),
538         "G": (2, u"gimnazjum"),
539         "L": (3, u"liceum"),
540         "LP": (3, u"liceum"),
541     }
542
543     def audiences_pl(self):
544         audiences = self.extra_info.get('audiences', [])
545         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
546         return [a[1] for a in audiences]
547
548     def stage_note(self):
549         stage = self.extra_info.get('stage')
550         if stage and stage < '0.4':
551             return (_('This work needs modernisation'),
552                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
553         else:
554             return None, None
555
556     def choose_fragment(self):
557         fragments = self.fragments.order_by()
558         fragments_count = fragments.count()
559         if not fragments_count and self.children.exists():
560             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
561             fragments_count = fragments.count()
562         if fragments_count:
563             return fragments[randint(0, fragments_count - 1)]
564         elif self.parent:
565             return self.parent.choose_fragment()
566         else:
567             return None
568
569
570 def add_file_fields():
571     for format_ in Book.formats:
572         field_name = "%s_file" % format_
573         # This weird globals() assignment makes Django migrations comfortable.
574         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
575         _upload_to.__name__ = '_%s_upload_to' % format_
576         globals()[_upload_to.__name__] = _upload_to
577
578         EbookField(
579             format_, _("%s file" % format_.upper()),
580             upload_to=_upload_to,
581             storage=bofh_storage,
582             max_length=255,
583             blank=True,
584             default=''
585         ).contribute_to_class(Book, field_name)
586
587 add_file_fields()