cover thumb with no box for api
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 import jsonfield
19 from fnpdjango.storage import BofhFileSystemStorage
20 from ssify import flush_ssi_includes
21 from newtagging import managers
22 from catalogue import constants
23 from catalogue.fields import EbookField
24 from catalogue.models import Tag, Fragment, BookMedia
25 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
26 from catalogue.models.tag import prefetched_relations
27 from catalogue import app_settings
28 from catalogue import tasks
29 from wolnelektury.utils import makedirs
30
31 bofh_storage = BofhFileSystemStorage()
32
33
34 @deconstructible
35 class UploadToPath(object):
36     def __init__(self, path):
37         self.path = path
38
39     def __call__(self, instance, filename):
40         return self.path % instance.slug
41
42
43 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
44 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
45 _cover_api_thumb_opload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
46
47
48 def _ebook_upload_to(upload_path):
49     return UploadToPath(upload_path)
50
51
52 class Book(models.Model):
53     """Represents a book imported from WL-XML."""
54     title = models.CharField(_('title'), max_length=32767)
55     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
56     sort_key_author = models.CharField(
57         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
58     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
59     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
60     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
61     description = models.TextField(_('description'), blank=True)
62     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
63     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
64     parent_number = models.IntegerField(_('parent number'), default=0)
65     extra_info = jsonfield.JSONField(_('extra information'), default={})
66     gazeta_link = models.CharField(blank=True, max_length=240)
67     wiki_link = models.CharField(blank=True, max_length=240)
68     print_on_demand = models.BooleanField(_('print on demand'), default=False)
69     recommended = models.BooleanField(_('recommended'), default=False)
70
71     # files generated during publication
72     cover = EbookField(
73         'cover', _('cover'),
74         null=True, blank=True,
75         upload_to=_cover_upload_to,
76         storage=bofh_storage, max_length=255)
77     # Cleaner version of cover for thumbs
78     cover_thumb = EbookField(
79         'cover_thumb', _('cover thumbnail'),
80         null=True, blank=True,
81         upload_to=_cover_thumb_upload_to,
82         max_length=255)
83     cover_api_thumb = EbookField(
84         'cover_api_thumb', _('cover thumbnail for API'),
85         null=True, blank=True,
86         upload_to=_cover_api_thumb_opload_to,
87         max_length=255)
88     ebook_formats = constants.EBOOK_FORMATS
89     formats = ebook_formats + ['html', 'xml']
90
91     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
92     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
93
94     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
95     has_audience = models.BooleanField(default=False)
96
97     objects = models.Manager()
98     tagged = managers.ModelTaggedItemManager(Tag)
99     tags = managers.TagDescriptor(Tag)
100     tag_relations = GenericRelation(Tag.intermediary_table_model)
101
102     html_built = django.dispatch.Signal()
103     published = django.dispatch.Signal()
104
105     short_html_url_name = 'catalogue_book_short'
106
107     class AlreadyExists(Exception):
108         pass
109
110     class Meta:
111         ordering = ('sort_key_author', 'sort_key')
112         verbose_name = _('book')
113         verbose_name_plural = _('books')
114         app_label = 'catalogue'
115
116     def __unicode__(self):
117         return self.title
118
119     def get_initial(self):
120         try:
121             return re.search(r'\w', self.title, re.U).group(0)
122         except AttributeError:
123             return ''
124
125     def authors(self):
126         return self.tags.filter(category='author')
127
128     def tag_unicode(self, category):
129         relations = prefetched_relations(self, category)
130         if relations:
131             return ', '.join(rel.tag.name for rel in relations)
132         else:
133             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
134
135     def tags_by_category(self):
136         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
137
138     def author_unicode(self):
139         return self.cached_author
140
141     def translator(self):
142         translators = self.extra_info.get('translators')
143         if not translators:
144             return None
145         if len(translators) > 3:
146             translators = translators[:2]
147             others = ' i inni'
148         else:
149             others = ''
150         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
151
152     def cover_source(self):
153         return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
154
155     def save(self, force_insert=False, force_update=False, **kwargs):
156         from sortify import sortify
157
158         self.sort_key = sortify(self.title)[:120]
159         self.title = unicode(self.title)  # ???
160
161         try:
162             author = self.authors().first().sort_key
163         except AttributeError:
164             author = u''
165         self.sort_key_author = author
166
167         self.cached_author = self.tag_unicode('author')
168         self.has_audience = 'audience' in self.extra_info
169
170         ret = super(Book, self).save(force_insert, force_update, **kwargs)
171
172         return ret
173
174     @permalink
175     def get_absolute_url(self):
176         return 'catalogue.views.book_detail', [self.slug]
177
178     @staticmethod
179     @permalink
180     def create_url(slug):
181         return 'catalogue.views.book_detail', [slug]
182
183     def gallery_path(self):
184         return gallery_path(self.slug)
185
186     def gallery_url(self):
187         return gallery_url(self.slug)
188
189     @property
190     def name(self):
191         return self.title
192
193     def language_code(self):
194         return constants.LANGUAGES_3TO2.get(self.language, self.language)
195
196     def language_name(self):
197         return dict(settings.LANGUAGES).get(self.language_code(), "")
198
199     def is_foreign(self):
200         return self.language_code() != settings.LANGUAGE_CODE
201
202     def has_media(self, type_):
203         if type_ in Book.formats:
204             return bool(getattr(self, "%s_file" % type_))
205         else:
206             return self.media.filter(type=type_).exists()
207
208     def has_audio(self):
209         return self.has_media('mp3')
210
211     def get_media(self, type_):
212         if self.has_media(type_):
213             if type_ in Book.formats:
214                 return getattr(self, "%s_file" % type_)
215             else:
216                 return self.media.filter(type=type_)
217         else:
218             return None
219
220     def get_mp3(self):
221         return self.get_media("mp3")
222
223     def get_odt(self):
224         return self.get_media("odt")
225
226     def get_ogg(self):
227         return self.get_media("ogg")
228
229     def get_daisy(self):
230         return self.get_media("daisy")
231
232     def has_description(self):
233         return len(self.description) > 0
234     has_description.short_description = _('description')
235     has_description.boolean = True
236
237     # ugly ugly ugly
238     def has_mp3_file(self):
239         return bool(self.has_media("mp3"))
240     has_mp3_file.short_description = 'MP3'
241     has_mp3_file.boolean = True
242
243     def has_ogg_file(self):
244         return bool(self.has_media("ogg"))
245     has_ogg_file.short_description = 'OGG'
246     has_ogg_file.boolean = True
247
248     def has_daisy_file(self):
249         return bool(self.has_media("daisy"))
250     has_daisy_file.short_description = 'DAISY'
251     has_daisy_file.boolean = True
252
253     def get_audiobooks(self):
254         ogg_files = {}
255         for m in self.media.filter(type='ogg').order_by().iterator():
256             ogg_files[m.name] = m
257
258         audiobooks = []
259         projects = set()
260         for mp3 in self.media.filter(type='mp3').iterator():
261             # ogg files are always from the same project
262             meta = mp3.extra_info
263             project = meta.get('project')
264             if not project:
265                 # temporary fallback
266                 project = u'CzytamySłuchając'
267
268             projects.add((project, meta.get('funded_by', '')))
269
270             media = {'mp3': mp3}
271
272             ogg = ogg_files.get(mp3.name)
273             if ogg:
274                 media['ogg'] = ogg
275             audiobooks.append(media)
276
277         projects = sorted(projects)
278         return audiobooks, projects
279
280     def wldocument(self, parse_dublincore=True, inherit=True):
281         from catalogue.import_utils import ORMDocProvider
282         from librarian.parser import WLDocument
283
284         if inherit and self.parent:
285             meta_fallbacks = self.parent.cover_info()
286         else:
287             meta_fallbacks = None
288
289         return WLDocument.from_file(
290             self.xml_file.path,
291             provider=ORMDocProvider(self),
292             parse_dublincore=parse_dublincore,
293             meta_fallbacks=meta_fallbacks)
294
295     @staticmethod
296     def zip_format(format_):
297         def pretty_file_name(book):
298             return "%s/%s.%s" % (
299                 book.extra_info['author'],
300                 book.slug,
301                 format_)
302
303         field_name = "%s_file" % format_
304         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
305         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
306         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
307
308     def zip_audiobooks(self, format_):
309         bm = BookMedia.objects.filter(book=self, type=format_)
310         paths = map(lambda bm: (None, bm.file.path), bm)
311         return create_zip(paths, "%s_%s" % (self.slug, format_))
312
313     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
314         if index is None:
315             from search.index import Index
316             index = Index()
317         try:
318             index.index_book(self, book_info)
319             if index_tags:
320                 index.index_tags()
321             if commit:
322                 index.index.commit()
323         except Exception, e:
324             index.index.rollback()
325             raise e
326
327     def download_pictures(self, remote_gallery_url):
328         gallery_path = self.gallery_path()
329         # delete previous files, so we don't include old files in ebooks
330         if os.path.isdir(gallery_path):
331             for filename in os.listdir(gallery_path):
332                 file_path = os.path.join(gallery_path, filename)
333                 os.unlink(file_path)
334         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
335         if ilustr_elements:
336             makedirs(gallery_path)
337             for ilustr in ilustr_elements:
338                 ilustr_src = ilustr.get('src')
339                 ilustr_path = os.path.join(gallery_path, ilustr_src)
340                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
341
342     @classmethod
343     def from_xml_file(cls, xml_file, **kwargs):
344         from django.core.files import File
345         from librarian import dcparser
346
347         # use librarian to parse meta-data
348         book_info = dcparser.parse(xml_file)
349
350         if not isinstance(xml_file, File):
351             xml_file = File(open(xml_file))
352
353         try:
354             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
355         finally:
356             xml_file.close()
357
358     @classmethod
359     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
360                            search_index_tags=True, remote_gallery_url=None):
361         if dont_build is None:
362             dont_build = set()
363         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
364
365         # check for parts before we do anything
366         children = []
367         if hasattr(book_info, 'parts'):
368             for part_url in book_info.parts:
369                 try:
370                     children.append(Book.objects.get(slug=part_url.slug))
371                 except Book.DoesNotExist:
372                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
373
374         # Read book metadata
375         book_slug = book_info.url.slug
376         if re.search(r'[^a-z0-9-]', book_slug):
377             raise ValueError('Invalid characters in slug')
378         book, created = Book.objects.get_or_create(slug=book_slug)
379
380         if created:
381             book_shelves = []
382             old_cover = None
383         else:
384             if not overwrite:
385                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
386             # Save shelves for this book
387             book_shelves = list(book.tags.filter(category='set'))
388             old_cover = book.cover_info()
389
390         # Save XML file
391         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
392
393         book.language = book_info.language
394         book.title = book_info.title
395         if book_info.variant_of:
396             book.common_slug = book_info.variant_of.slug
397         else:
398             book.common_slug = book.slug
399         book.extra_info = book_info.to_dict()
400         book.save()
401
402         meta_tags = Tag.tags_from_info(book_info)
403
404         for tag in meta_tags:
405             if not tag.for_books:
406                 tag.for_books = True
407                 tag.save()
408
409         book.tags = set(meta_tags + book_shelves)
410
411         cover_changed = old_cover != book.cover_info()
412         obsolete_children = set(b for b in book.children.all()
413                                 if b not in children)
414         notify_cover_changed = []
415         for n, child_book in enumerate(children):
416             new_child = child_book.parent != book
417             child_book.parent = book
418             child_book.parent_number = n
419             child_book.save()
420             if new_child or cover_changed:
421                 notify_cover_changed.append(child_book)
422         # Disown unfaithful children and let them cope on their own.
423         for child in obsolete_children:
424             child.parent = None
425             child.parent_number = 0
426             child.save()
427             if old_cover:
428                 notify_cover_changed.append(child)
429
430         cls.repopulate_ancestors()
431         tasks.update_counters.delay()
432
433         if remote_gallery_url:
434             book.download_pictures(remote_gallery_url)
435
436         # No saves beyond this point.
437
438         # Build cover.
439         if 'cover' not in dont_build:
440             book.cover.build_delay()
441             book.cover_thumb.build_delay()
442             book.cover_api_thumb.build_delay()
443
444         # Build HTML and ebooks.
445         book.html_file.build_delay()
446         if not children:
447             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
448                 if format_ not in dont_build:
449                     getattr(book, '%s_file' % format_).build_delay()
450         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
451             if format_ not in dont_build:
452                 getattr(book, '%s_file' % format_).build_delay()
453
454         if not settings.NO_SEARCH_INDEX and search_index:
455             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
456
457         for child in notify_cover_changed:
458             child.parent_cover_changed()
459
460         book.save()  # update sort_key_author
461         cls.published.send(sender=cls, instance=book)
462         return book
463
464     @classmethod
465     @transaction.atomic
466     def repopulate_ancestors(cls):
467         """Fixes the ancestry cache."""
468         # TODO: table names
469         cursor = connection.cursor()
470         if connection.vendor == 'postgres':
471             cursor.execute("TRUNCATE catalogue_book_ancestor")
472             cursor.execute("""
473                 WITH RECURSIVE ancestry AS (
474                     SELECT book.id, book.parent_id
475                     FROM catalogue_book AS book
476                     WHERE book.parent_id IS NOT NULL
477                     UNION
478                     SELECT ancestor.id, book.parent_id
479                     FROM ancestry AS ancestor, catalogue_book AS book
480                     WHERE ancestor.parent_id = book.id
481                         AND book.parent_id IS NOT NULL
482                     )
483                 INSERT INTO catalogue_book_ancestor
484                     (from_book_id, to_book_id)
485                     SELECT id, parent_id
486                     FROM ancestry
487                     ORDER BY id;
488                 """)
489         else:
490             cursor.execute("DELETE FROM catalogue_book_ancestor")
491             for b in cls.objects.exclude(parent=None):
492                 parent = b.parent
493                 while parent is not None:
494                     b.ancestor.add(parent)
495                     parent = parent.parent
496
497     def flush_includes(self, languages=True):
498         if not languages:
499             return
500         if languages is True:
501             languages = [lc for (lc, _ln) in settings.LANGUAGES]
502         flush_ssi_includes([
503             template % (self.pk, lang)
504             for template in [
505                 '/katalog/b/%d/mini.%s.html',
506                 '/katalog/b/%d/mini_nolink.%s.html',
507                 '/katalog/b/%d/short.%s.html',
508                 '/katalog/b/%d/wide.%s.html',
509                 '/api/include/book/%d.%s.json',
510                 '/api/include/book/%d.%s.xml',
511                 ]
512             for lang in languages
513             ])
514
515     def cover_info(self, inherit=True):
516         """Returns a dictionary to serve as fallback for BookInfo.
517
518         For now, the only thing inherited is the cover image.
519         """
520         need = False
521         info = {}
522         for field in ('cover_url', 'cover_by', 'cover_source'):
523             val = self.extra_info.get(field)
524             if val:
525                 info[field] = val
526             else:
527                 need = True
528         if inherit and need and self.parent is not None:
529             parent_info = self.parent.cover_info()
530             parent_info.update(info)
531             info = parent_info
532         return info
533
534     def related_themes(self):
535         return Tag.objects.usage_for_queryset(
536             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
537             counts=True).filter(category='theme')
538
539     def parent_cover_changed(self):
540         """Called when parent book's cover image is changed."""
541         if not self.cover_info(inherit=False):
542             if 'cover' not in app_settings.DONT_BUILD:
543                 self.cover.build_delay()
544                 self.cover_thumb.build_delay()
545                 self.cover_api_thumb.build_delay()
546             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
547                 if format_ not in app_settings.DONT_BUILD:
548                     getattr(self, '%s_file' % format_).build_delay()
549             for child in self.children.all():
550                 child.parent_cover_changed()
551
552     def other_versions(self):
553         """Find other versions (i.e. in other languages) of the book."""
554         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
555
556     def parents(self):
557         books = []
558         parent = self.parent
559         while parent is not None:
560             books.insert(0, parent)
561             parent = parent.parent
562         return books
563
564     def pretty_title(self, html_links=False):
565         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
566         books = self.parents() + [self]
567         names.extend([(b.title, b.get_absolute_url()) for b in books])
568
569         if html_links:
570             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
571         else:
572             names = [tag[0] for tag in names]
573         return ', '.join(names)
574
575     def publisher(self):
576         publisher = self.extra_info['publisher']
577         if isinstance(publisher, basestring):
578             return publisher
579         elif isinstance(publisher, list):
580             return ', '.join(publisher)
581
582     @classmethod
583     def tagged_top_level(cls, tags):
584         """ Returns top-level books tagged with `tags`.
585
586         It only returns those books which don't have ancestors which are
587         also tagged with those tags.
588
589         """
590         objects = cls.tagged.with_all(tags)
591         return objects.exclude(ancestor__in=objects)
592
593     @classmethod
594     def book_list(cls, book_filter=None):
595         """Generates a hierarchical listing of all books.
596
597         Books are optionally filtered with a test function.
598
599         """
600
601         books_by_parent = {}
602         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
603         if book_filter:
604             books = books.filter(book_filter).distinct()
605
606             book_ids = set(b['pk'] for b in books.values("pk").iterator())
607             for book in books.iterator():
608                 parent = book.parent_id
609                 if parent not in book_ids:
610                     parent = None
611                 books_by_parent.setdefault(parent, []).append(book)
612         else:
613             for book in books.iterator():
614                 books_by_parent.setdefault(book.parent_id, []).append(book)
615
616         orphans = []
617         books_by_author = OrderedDict()
618         for tag in Tag.objects.filter(category='author').iterator():
619             books_by_author[tag] = []
620
621         for book in books_by_parent.get(None, ()):
622             authors = list(book.authors().only('pk'))
623             if authors:
624                 for author in authors:
625                     books_by_author[author].append(book)
626             else:
627                 orphans.append(book)
628
629         return books_by_author, orphans, books_by_parent
630
631     _audiences_pl = {
632         "SP": (1, u"szkoła podstawowa"),
633         "SP1": (1, u"szkoła podstawowa"),
634         "SP2": (1, u"szkoła podstawowa"),
635         "SP3": (1, u"szkoła podstawowa"),
636         "P": (1, u"szkoła podstawowa"),
637         "G": (2, u"gimnazjum"),
638         "L": (3, u"liceum"),
639         "LP": (3, u"liceum"),
640     }
641
642     def audiences_pl(self):
643         audiences = self.extra_info.get('audiences', [])
644         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
645         return [a[1] for a in audiences]
646
647     def stage_note(self):
648         stage = self.extra_info.get('stage')
649         if stage and stage < '0.4':
650             return (_('This work needs modernisation'),
651                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
652         else:
653             return None, None
654
655     def choose_fragment(self):
656         fragments = self.fragments.order_by()
657         fragments_count = fragments.count()
658         if not fragments_count and self.children.exists():
659             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
660             fragments_count = fragments.count()
661         if fragments_count:
662             return fragments[randint(0, fragments_count - 1)]
663         elif self.parent:
664             return self.parent.choose_fragment()
665         else:
666             return None
667
668     def fragment_data(self):
669         fragment = self.choose_fragment()
670         if fragment:
671             return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
672         else:
673             return None
674
675     def update_popularity(self):
676         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
677         try:
678             pop = self.popularity
679             pop.count = count
680             pop.save()
681         except BookPopularity.DoesNotExist:
682             BookPopularity.objects.create(book=self, count=count)
683
684     def ridero_link(self):
685         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
686
687
688 def add_file_fields():
689     for format_ in Book.formats:
690         field_name = "%s_file" % format_
691         # This weird globals() assignment makes Django migrations comfortable.
692         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
693         _upload_to.__name__ = '_%s_upload_to' % format_
694         globals()[_upload_to.__name__] = _upload_to
695
696         EbookField(
697             format_, _("%s file" % format_.upper()),
698             upload_to=_upload_to,
699             storage=bofh_storage,
700             max_length=255,
701             blank=True,
702             default=''
703         ).contribute_to_class(Book, field_name)
704
705 add_file_fields()
706
707
708 class BookPopularity(models.Model):
709     book = models.OneToOneField(Book, related_name='popularity')
710     count = models.IntegerField(default=0)