minor fixes for images in epub/mobi
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue import app_settings
26 from catalogue import tasks
27 from wolnelektury.utils import makedirs
28
29 bofh_storage = BofhFileSystemStorage()
30
31
32 def _make_upload_to(path):
33     def _upload_to(i, n):
34         return path % i.slug
35     return _upload_to
36
37
38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
40
41
42 def _ebook_upload_to(upload_path):
43     return _make_upload_to(upload_path)
44
45
46 class Book(models.Model):
47     """Represents a book imported from WL-XML."""
48     title = models.CharField(_('title'), max_length=32767)
49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
50     sort_key_author = models.CharField(
51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
55     description = models.TextField(_('description'), blank=True)
56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
58     parent_number = models.IntegerField(_('parent number'), default=0)
59     extra_info = jsonfield.JSONField(_('extra information'), default={})
60     gazeta_link = models.CharField(blank=True, max_length=240)
61     wiki_link = models.CharField(blank=True, max_length=240)
62
63     # files generated during publication
64     cover = EbookField(
65         'cover', _('cover'),
66         null=True, blank=True,
67         upload_to=_cover_upload_to,
68         storage=bofh_storage, max_length=255)
69     # Cleaner version of cover for thumbs
70     cover_thumb = EbookField(
71         'cover_thumb', _('cover thumbnail'),
72         null=True, blank=True,
73         upload_to=_cover_thumb_upload_to,
74         max_length=255)
75     ebook_formats = constants.EBOOK_FORMATS
76     formats = ebook_formats + ['html', 'xml']
77
78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
80
81     objects = models.Manager()
82     tagged = managers.ModelTaggedItemManager(Tag)
83     tags = managers.TagDescriptor(Tag)
84     tag_relations = GenericRelation(Tag.intermediary_table_model)
85
86     html_built = django.dispatch.Signal()
87     published = django.dispatch.Signal()
88
89     short_html_url_name = 'catalogue_book_short'
90
91     class AlreadyExists(Exception):
92         pass
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('book')
97         verbose_name_plural = _('books')
98         app_label = 'catalogue'
99
100     def __unicode__(self):
101         return self.title
102
103     def get_initial(self):
104         try:
105             return re.search(r'\w', self.title, re.U).group(0)
106         except AttributeError:
107             return ''
108
109     def author_str(self):
110         return ", ".join(str(t) for t in self.tags.filter(category='author'))
111
112     def save(self, force_insert=False, force_update=False, **kwargs):
113         from sortify import sortify
114
115         self.sort_key = sortify(self.title)[:120]
116         self.title = unicode(self.title)  # ???
117
118         try:
119             author = self.tags.filter(category='author')[0].sort_key
120         except IndexError:
121             author = u''
122         self.sort_key_author = author
123
124         ret = super(Book, self).save(force_insert, force_update, **kwargs)
125
126         return ret
127
128     @permalink
129     def get_absolute_url(self):
130         return 'catalogue.views.book_detail', [self.slug]
131
132     @staticmethod
133     @permalink
134     def create_url(slug):
135         return 'catalogue.views.book_detail', [slug]
136
137     def gallery_path(self):
138         return gallery_path(self.slug)
139
140     def gallery_url(self):
141         return gallery_url(self.slug)
142
143     @property
144     def name(self):
145         return self.title
146
147     def language_code(self):
148         return constants.LANGUAGES_3TO2.get(self.language, self.language)
149
150     def language_name(self):
151         return dict(settings.LANGUAGES).get(self.language_code(), "")
152
153     def has_media(self, type_):
154         if type_ in Book.formats:
155             return bool(getattr(self, "%s_file" % type_))
156         else:
157             return self.media.filter(type=type_).exists()
158
159     def get_media(self, type_):
160         if self.has_media(type_):
161             if type_ in Book.formats:
162                 return getattr(self, "%s_file" % type_)
163             else:
164                 return self.media.filter(type=type_)
165         else:
166             return None
167
168     def get_mp3(self):
169         return self.get_media("mp3")
170
171     def get_odt(self):
172         return self.get_media("odt")
173
174     def get_ogg(self):
175         return self.get_media("ogg")
176
177     def get_daisy(self):
178         return self.get_media("daisy")
179
180     def has_description(self):
181         return len(self.description) > 0
182     has_description.short_description = _('description')
183     has_description.boolean = True
184
185     # ugly ugly ugly
186     def has_mp3_file(self):
187         return bool(self.has_media("mp3"))
188     has_mp3_file.short_description = 'MP3'
189     has_mp3_file.boolean = True
190
191     def has_ogg_file(self):
192         return bool(self.has_media("ogg"))
193     has_ogg_file.short_description = 'OGG'
194     has_ogg_file.boolean = True
195
196     def has_daisy_file(self):
197         return bool(self.has_media("daisy"))
198     has_daisy_file.short_description = 'DAISY'
199     has_daisy_file.boolean = True
200
201     def wldocument(self, parse_dublincore=True, inherit=True):
202         from catalogue.import_utils import ORMDocProvider
203         from librarian.parser import WLDocument
204
205         if inherit and self.parent:
206             meta_fallbacks = self.parent.cover_info()
207         else:
208             meta_fallbacks = None
209
210         return WLDocument.from_file(
211             self.xml_file.path,
212             provider=ORMDocProvider(self),
213             parse_dublincore=parse_dublincore,
214             meta_fallbacks=meta_fallbacks)
215
216     @staticmethod
217     def zip_format(format_):
218         def pretty_file_name(book):
219             return "%s/%s.%s" % (
220                 book.extra_info['author'],
221                 book.slug,
222                 format_)
223
224         field_name = "%s_file" % format_
225         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
226         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
227         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
228
229     def zip_audiobooks(self, format_):
230         bm = BookMedia.objects.filter(book=self, type=format_)
231         paths = map(lambda bm: (None, bm.file.path), bm)
232         return create_zip(paths, "%s_%s" % (self.slug, format_))
233
234     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
235         if index is None:
236             from search.index import Index
237             index = Index()
238         try:
239             index.index_book(self, book_info)
240             if index_tags:
241                 index.index_tags()
242             if commit:
243                 index.index.commit()
244         except Exception, e:
245             index.index.rollback()
246             raise e
247
248     def download_pictures(self, remote_gallery_url):
249         gallery_path = self.gallery_path()
250         # delete previous files, so we don't include old files in ebooks
251         for filename in os.listdir(gallery_path):
252             file_path = os.path.join(gallery_path, filename)
253             os.unlink(file_path)
254         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
255         if ilustr_elements:
256             makedirs(gallery_path)
257             for ilustr in ilustr_elements:
258                 ilustr_src = ilustr.get('src')
259                 ilustr_path = os.path.join(gallery_path, ilustr_src)
260                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
261
262     @classmethod
263     def from_xml_file(cls, xml_file, **kwargs):
264         from django.core.files import File
265         from librarian import dcparser
266
267         # use librarian to parse meta-data
268         book_info = dcparser.parse(xml_file)
269
270         if not isinstance(xml_file, File):
271             xml_file = File(open(xml_file))
272
273         try:
274             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
275         finally:
276             xml_file.close()
277
278     @classmethod
279     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
280                            search_index_tags=True, remote_gallery_url=None):
281         if dont_build is None:
282             dont_build = set()
283         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
284
285         # check for parts before we do anything
286         children = []
287         if hasattr(book_info, 'parts'):
288             for part_url in book_info.parts:
289                 try:
290                     children.append(Book.objects.get(slug=part_url.slug))
291                 except Book.DoesNotExist:
292                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
293
294         # Read book metadata
295         book_slug = book_info.url.slug
296         if re.search(r'[^a-z0-9-]', book_slug):
297             raise ValueError('Invalid characters in slug')
298         book, created = Book.objects.get_or_create(slug=book_slug)
299
300         if created:
301             book_shelves = []
302             old_cover = None
303         else:
304             if not overwrite:
305                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
306             # Save shelves for this book
307             book_shelves = list(book.tags.filter(category='set'))
308             old_cover = book.cover_info()
309
310         # Save XML file
311         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
312
313         book.language = book_info.language
314         book.title = book_info.title
315         if book_info.variant_of:
316             book.common_slug = book_info.variant_of.slug
317         else:
318             book.common_slug = book.slug
319         book.extra_info = book_info.to_dict()
320         book.save()
321
322         meta_tags = Tag.tags_from_info(book_info)
323
324         book.tags = set(meta_tags + book_shelves)
325
326         cover_changed = old_cover != book.cover_info()
327         obsolete_children = set(b for b in book.children.all()
328                                 if b not in children)
329         notify_cover_changed = []
330         for n, child_book in enumerate(children):
331             new_child = child_book.parent != book
332             child_book.parent = book
333             child_book.parent_number = n
334             child_book.save()
335             if new_child or cover_changed:
336                 notify_cover_changed.append(child_book)
337         # Disown unfaithful children and let them cope on their own.
338         for child in obsolete_children:
339             child.parent = None
340             child.parent_number = 0
341             child.save()
342             if old_cover:
343                 notify_cover_changed.append(child)
344
345         cls.repopulate_ancestors()
346         tasks.update_counters.delay()
347
348         if remote_gallery_url:
349             book.download_pictures(remote_gallery_url)
350
351         # No saves beyond this point.
352
353         # Build cover.
354         if 'cover' not in dont_build:
355             book.cover.build_delay()
356             book.cover_thumb.build_delay()
357
358         # Build HTML and ebooks.
359         book.html_file.build_delay()
360         if not children:
361             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
362                 if format_ not in dont_build:
363                     getattr(book, '%s_file' % format_).build_delay()
364         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
365             if format_ not in dont_build:
366                 getattr(book, '%s_file' % format_).build_delay()
367
368         if not settings.NO_SEARCH_INDEX and search_index:
369             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
370
371         for child in notify_cover_changed:
372             child.parent_cover_changed()
373
374         cls.published.send(sender=cls, instance=book)
375         return book
376
377     @classmethod
378     @transaction.atomic
379     def repopulate_ancestors(cls):
380         """Fixes the ancestry cache."""
381         # TODO: table names
382         cursor = connection.cursor()
383         if connection.vendor == 'postgres':
384             cursor.execute("TRUNCATE catalogue_book_ancestor")
385             cursor.execute("""
386                 WITH RECURSIVE ancestry AS (
387                     SELECT book.id, book.parent_id
388                     FROM catalogue_book AS book
389                     WHERE book.parent_id IS NOT NULL
390                     UNION
391                     SELECT ancestor.id, book.parent_id
392                     FROM ancestry AS ancestor, catalogue_book AS book
393                     WHERE ancestor.parent_id = book.id
394                         AND book.parent_id IS NOT NULL
395                     )
396                 INSERT INTO catalogue_book_ancestor
397                     (from_book_id, to_book_id)
398                     SELECT id, parent_id
399                     FROM ancestry
400                     ORDER BY id;
401                 """)
402         else:
403             cursor.execute("DELETE FROM catalogue_book_ancestor")
404             for b in cls.objects.exclude(parent=None):
405                 parent = b.parent
406                 while parent is not None:
407                     b.ancestor.add(parent)
408                     parent = parent.parent
409
410     def flush_includes(self, languages=True):
411         if not languages:
412             return
413         if languages is True:
414             languages = [lc for (lc, _ln) in settings.LANGUAGES]
415         flush_ssi_includes([
416             template % (self.pk, lang)
417             for template in [
418                 '/katalog/b/%d/mini.%s.html',
419                 '/katalog/b/%d/mini_nolink.%s.html',
420                 '/katalog/b/%d/short.%s.html',
421                 '/katalog/b/%d/wide.%s.html',
422                 '/api/include/book/%d.%s.json',
423                 '/api/include/book/%d.%s.xml',
424                 ]
425             for lang in languages
426             ])
427
428     def cover_info(self, inherit=True):
429         """Returns a dictionary to serve as fallback for BookInfo.
430
431         For now, the only thing inherited is the cover image.
432         """
433         need = False
434         info = {}
435         for field in ('cover_url', 'cover_by', 'cover_source'):
436             val = self.extra_info.get(field)
437             if val:
438                 info[field] = val
439             else:
440                 need = True
441         if inherit and need and self.parent is not None:
442             parent_info = self.parent.cover_info()
443             parent_info.update(info)
444             info = parent_info
445         return info
446
447     def related_themes(self):
448         return Tag.objects.usage_for_queryset(
449             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
450             counts=True).filter(category='theme')
451
452     def parent_cover_changed(self):
453         """Called when parent book's cover image is changed."""
454         if not self.cover_info(inherit=False):
455             if 'cover' not in app_settings.DONT_BUILD:
456                 self.cover.build_delay()
457                 self.cover_thumb.build_delay()
458             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
459                 if format_ not in app_settings.DONT_BUILD:
460                     getattr(self, '%s_file' % format_).build_delay()
461             for child in self.children.all():
462                 child.parent_cover_changed()
463
464     def other_versions(self):
465         """Find other versions (i.e. in other languages) of the book."""
466         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
467
468     def parents(self):
469         books = []
470         parent = self.parent
471         while parent is not None:
472             books.insert(0, parent)
473             parent = parent.parent
474         return books
475
476     def pretty_title(self, html_links=False):
477         names = [(tag.name, tag.get_absolute_url()) for tag in self.tags.filter(category='author')]
478         books = self.parents() + [self]
479         names.extend([(b.title, b.get_absolute_url()) for b in books])
480
481         if html_links:
482             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
483         else:
484             names = [tag[0] for tag in names]
485         return ', '.join(names)
486
487     @classmethod
488     def tagged_top_level(cls, tags):
489         """ Returns top-level books tagged with `tags`.
490
491         It only returns those books which don't have ancestors which are
492         also tagged with those tags.
493
494         """
495         objects = cls.tagged.with_all(tags)
496         return objects.exclude(ancestor__in=objects)
497
498     @classmethod
499     def book_list(cls, book_filter=None):
500         """Generates a hierarchical listing of all books.
501
502         Books are optionally filtered with a test function.
503
504         """
505
506         books_by_parent = {}
507         books = cls.objects.all().order_by('parent_number', 'sort_key').only(
508                 'title', 'parent', 'slug')
509         if book_filter:
510             books = books.filter(book_filter).distinct()
511
512             book_ids = set(b['pk'] for b in books.values("pk").iterator())
513             for book in books.iterator():
514                 parent = book.parent_id
515                 if parent not in book_ids:
516                     parent = None
517                 books_by_parent.setdefault(parent, []).append(book)
518         else:
519             for book in books.iterator():
520                 books_by_parent.setdefault(book.parent_id, []).append(book)
521
522         orphans = []
523         books_by_author = OrderedDict()
524         for tag in Tag.objects.filter(category='author').iterator():
525             books_by_author[tag] = []
526
527         for book in books_by_parent.get(None, ()):
528             authors = list(book.tags.filter(category='author'))
529             if authors:
530                 for author in authors:
531                     books_by_author[author].append(book)
532             else:
533                 orphans.append(book)
534
535         return books_by_author, orphans, books_by_parent
536
537     _audiences_pl = {
538         "SP": (1, u"szkoła podstawowa"),
539         "SP1": (1, u"szkoła podstawowa"),
540         "SP2": (1, u"szkoła podstawowa"),
541         "P": (1, u"szkoła podstawowa"),
542         "G": (2, u"gimnazjum"),
543         "L": (3, u"liceum"),
544         "LP": (3, u"liceum"),
545     }
546
547     def audiences_pl(self):
548         audiences = self.extra_info.get('audiences', [])
549         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
550         return [a[1] for a in audiences]
551
552     def stage_note(self):
553         stage = self.extra_info.get('stage')
554         if stage and stage < '0.4':
555             return (_('This work needs modernisation'),
556                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
557         else:
558             return None, None
559
560     def choose_fragment(self):
561         fragments = self.fragments.order_by()
562         fragments_count = fragments.count()
563         if not fragments_count and self.children.exists():
564             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
565             fragments_count = fragments.count()
566         if fragments_count:
567             return fragments[randint(0, fragments_count - 1)]
568         elif self.parent:
569             return self.parent.choose_fragment()
570         else:
571             return None
572
573
574 def add_file_fields():
575     for format_ in Book.formats:
576         field_name = "%s_file" % format_
577         # This weird globals() assignment makes Django migrations comfortable.
578         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
579         _upload_to.__name__ = '_%s_upload_to' % format_
580         globals()[_upload_to.__name__] = _upload_to
581
582         EbookField(
583             format_, _("%s file" % format_.upper()),
584             upload_to=_upload_to,
585             storage=bofh_storage,
586             max_length=255,
587             blank=True,
588             default=''
589         ).contribute_to_class(Book, field_name)
590
591 add_file_fields()