Merge branch 'api'
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
85     has_audience = models.BooleanField(default=False)
86
87     objects = models.Manager()
88     tagged = managers.ModelTaggedItemManager(Tag)
89     tags = managers.TagDescriptor(Tag)
90     tag_relations = GenericRelation(Tag.intermediary_table_model)
91
92     html_built = django.dispatch.Signal()
93     published = django.dispatch.Signal()
94
95     short_html_url_name = 'catalogue_book_short'
96
97     class AlreadyExists(Exception):
98         pass
99
100     class Meta:
101         ordering = ('sort_key_author', 'sort_key')
102         verbose_name = _('book')
103         verbose_name_plural = _('books')
104         app_label = 'catalogue'
105
106     def __unicode__(self):
107         return self.title
108
109     def get_initial(self):
110         try:
111             return re.search(r'\w', self.title, re.U).group(0)
112         except AttributeError:
113             return ''
114
115     def authors(self):
116         return self.tags.filter(category='author')
117
118     def tag_unicode(self, category):
119         relations = prefetched_relations(self, category)
120         if relations:
121             return ', '.join(rel.tag.name for rel in relations)
122         else:
123             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
124
125     def tags_by_category(self):
126         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
127
128     def author_unicode(self):
129         return self.cached_author
130
131     def translator(self):
132         translators = self.extra_info.get('translators')
133         if not translators:
134             return None
135         if len(translators) > 3:
136             translators = translators[:2]
137             others = ' i inni'
138         else:
139             others = ''
140         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
141
142     def cover_source(self):
143         return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
144
145     def save(self, force_insert=False, force_update=False, **kwargs):
146         from sortify import sortify
147
148         self.sort_key = sortify(self.title)[:120]
149         self.title = unicode(self.title)  # ???
150
151         try:
152             author = self.authors().first().sort_key
153         except AttributeError:
154             author = u''
155         self.sort_key_author = author
156
157         self.cached_author = self.tag_unicode('author')
158         self.has_audience = 'audience' in self.extra_info
159
160         ret = super(Book, self).save(force_insert, force_update, **kwargs)
161
162         return ret
163
164     @permalink
165     def get_absolute_url(self):
166         return 'catalogue.views.book_detail', [self.slug]
167
168     @staticmethod
169     @permalink
170     def create_url(slug):
171         return 'catalogue.views.book_detail', [slug]
172
173     def gallery_path(self):
174         return gallery_path(self.slug)
175
176     def gallery_url(self):
177         return gallery_url(self.slug)
178
179     @property
180     def name(self):
181         return self.title
182
183     def language_code(self):
184         return constants.LANGUAGES_3TO2.get(self.language, self.language)
185
186     def language_name(self):
187         return dict(settings.LANGUAGES).get(self.language_code(), "")
188
189     def is_foreign(self):
190         return self.language_code() != settings.LANGUAGE_CODE
191
192     def has_media(self, type_):
193         if type_ in Book.formats:
194             return bool(getattr(self, "%s_file" % type_))
195         else:
196             return self.media.filter(type=type_).exists()
197
198     def get_media(self, type_):
199         if self.has_media(type_):
200             if type_ in Book.formats:
201                 return getattr(self, "%s_file" % type_)
202             else:
203                 return self.media.filter(type=type_)
204         else:
205             return None
206
207     def get_mp3(self):
208         return self.get_media("mp3")
209
210     def get_odt(self):
211         return self.get_media("odt")
212
213     def get_ogg(self):
214         return self.get_media("ogg")
215
216     def get_daisy(self):
217         return self.get_media("daisy")
218
219     def has_description(self):
220         return len(self.description) > 0
221     has_description.short_description = _('description')
222     has_description.boolean = True
223
224     # ugly ugly ugly
225     def has_mp3_file(self):
226         return bool(self.has_media("mp3"))
227     has_mp3_file.short_description = 'MP3'
228     has_mp3_file.boolean = True
229
230     def has_ogg_file(self):
231         return bool(self.has_media("ogg"))
232     has_ogg_file.short_description = 'OGG'
233     has_ogg_file.boolean = True
234
235     def has_daisy_file(self):
236         return bool(self.has_media("daisy"))
237     has_daisy_file.short_description = 'DAISY'
238     has_daisy_file.boolean = True
239
240     def get_audiobooks(self):
241         ogg_files = {}
242         for m in self.media.filter(type='ogg').order_by().iterator():
243             ogg_files[m.name] = m
244
245         audiobooks = []
246         projects = set()
247         for mp3 in self.media.filter(type='mp3').iterator():
248             # ogg files are always from the same project
249             meta = mp3.extra_info
250             project = meta.get('project')
251             if not project:
252                 # temporary fallback
253                 project = u'CzytamySłuchając'
254
255             projects.add((project, meta.get('funded_by', '')))
256
257             media = {'mp3': mp3}
258
259             ogg = ogg_files.get(mp3.name)
260             if ogg:
261                 media['ogg'] = ogg
262             audiobooks.append(media)
263
264         projects = sorted(projects)
265         return audiobooks, projects
266
267     def wldocument(self, parse_dublincore=True, inherit=True):
268         from catalogue.import_utils import ORMDocProvider
269         from librarian.parser import WLDocument
270
271         if inherit and self.parent:
272             meta_fallbacks = self.parent.cover_info()
273         else:
274             meta_fallbacks = None
275
276         return WLDocument.from_file(
277             self.xml_file.path,
278             provider=ORMDocProvider(self),
279             parse_dublincore=parse_dublincore,
280             meta_fallbacks=meta_fallbacks)
281
282     @staticmethod
283     def zip_format(format_):
284         def pretty_file_name(book):
285             return "%s/%s.%s" % (
286                 book.extra_info['author'],
287                 book.slug,
288                 format_)
289
290         field_name = "%s_file" % format_
291         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
292         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
293         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
294
295     def zip_audiobooks(self, format_):
296         bm = BookMedia.objects.filter(book=self, type=format_)
297         paths = map(lambda bm: (None, bm.file.path), bm)
298         return create_zip(paths, "%s_%s" % (self.slug, format_))
299
300     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
301         if index is None:
302             from search.index import Index
303             index = Index()
304         try:
305             index.index_book(self, book_info)
306             if index_tags:
307                 index.index_tags()
308             if commit:
309                 index.index.commit()
310         except Exception, e:
311             index.index.rollback()
312             raise e
313
314     def download_pictures(self, remote_gallery_url):
315         gallery_path = self.gallery_path()
316         # delete previous files, so we don't include old files in ebooks
317         if os.path.isdir(gallery_path):
318             for filename in os.listdir(gallery_path):
319                 file_path = os.path.join(gallery_path, filename)
320                 os.unlink(file_path)
321         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
322         if ilustr_elements:
323             makedirs(gallery_path)
324             for ilustr in ilustr_elements:
325                 ilustr_src = ilustr.get('src')
326                 ilustr_path = os.path.join(gallery_path, ilustr_src)
327                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
328
329     @classmethod
330     def from_xml_file(cls, xml_file, **kwargs):
331         from django.core.files import File
332         from librarian import dcparser
333
334         # use librarian to parse meta-data
335         book_info = dcparser.parse(xml_file)
336
337         if not isinstance(xml_file, File):
338             xml_file = File(open(xml_file))
339
340         try:
341             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
342         finally:
343             xml_file.close()
344
345     @classmethod
346     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
347                            search_index_tags=True, remote_gallery_url=None):
348         if dont_build is None:
349             dont_build = set()
350         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
351
352         # check for parts before we do anything
353         children = []
354         if hasattr(book_info, 'parts'):
355             for part_url in book_info.parts:
356                 try:
357                     children.append(Book.objects.get(slug=part_url.slug))
358                 except Book.DoesNotExist:
359                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
360
361         # Read book metadata
362         book_slug = book_info.url.slug
363         if re.search(r'[^a-z0-9-]', book_slug):
364             raise ValueError('Invalid characters in slug')
365         book, created = Book.objects.get_or_create(slug=book_slug)
366
367         if created:
368             book_shelves = []
369             old_cover = None
370         else:
371             if not overwrite:
372                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
373             # Save shelves for this book
374             book_shelves = list(book.tags.filter(category='set'))
375             old_cover = book.cover_info()
376
377         # Save XML file
378         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
379
380         book.language = book_info.language
381         book.title = book_info.title
382         if book_info.variant_of:
383             book.common_slug = book_info.variant_of.slug
384         else:
385             book.common_slug = book.slug
386         book.extra_info = book_info.to_dict()
387         book.save()
388
389         meta_tags = Tag.tags_from_info(book_info)
390
391         book.tags = set(meta_tags + book_shelves)
392
393         cover_changed = old_cover != book.cover_info()
394         obsolete_children = set(b for b in book.children.all()
395                                 if b not in children)
396         notify_cover_changed = []
397         for n, child_book in enumerate(children):
398             new_child = child_book.parent != book
399             child_book.parent = book
400             child_book.parent_number = n
401             child_book.save()
402             if new_child or cover_changed:
403                 notify_cover_changed.append(child_book)
404         # Disown unfaithful children and let them cope on their own.
405         for child in obsolete_children:
406             child.parent = None
407             child.parent_number = 0
408             child.save()
409             if old_cover:
410                 notify_cover_changed.append(child)
411
412         cls.repopulate_ancestors()
413         tasks.update_counters.delay()
414
415         if remote_gallery_url:
416             book.download_pictures(remote_gallery_url)
417
418         # No saves beyond this point.
419
420         # Build cover.
421         if 'cover' not in dont_build:
422             book.cover.build_delay()
423             book.cover_thumb.build_delay()
424
425         # Build HTML and ebooks.
426         book.html_file.build_delay()
427         if not children:
428             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
429                 if format_ not in dont_build:
430                     getattr(book, '%s_file' % format_).build_delay()
431         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
432             if format_ not in dont_build:
433                 getattr(book, '%s_file' % format_).build_delay()
434
435         if not settings.NO_SEARCH_INDEX and search_index:
436             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
437
438         for child in notify_cover_changed:
439             child.parent_cover_changed()
440
441         book.save()  # update sort_key_author
442         cls.published.send(sender=cls, instance=book)
443         return book
444
445     @classmethod
446     @transaction.atomic
447     def repopulate_ancestors(cls):
448         """Fixes the ancestry cache."""
449         # TODO: table names
450         cursor = connection.cursor()
451         if connection.vendor == 'postgres':
452             cursor.execute("TRUNCATE catalogue_book_ancestor")
453             cursor.execute("""
454                 WITH RECURSIVE ancestry AS (
455                     SELECT book.id, book.parent_id
456                     FROM catalogue_book AS book
457                     WHERE book.parent_id IS NOT NULL
458                     UNION
459                     SELECT ancestor.id, book.parent_id
460                     FROM ancestry AS ancestor, catalogue_book AS book
461                     WHERE ancestor.parent_id = book.id
462                         AND book.parent_id IS NOT NULL
463                     )
464                 INSERT INTO catalogue_book_ancestor
465                     (from_book_id, to_book_id)
466                     SELECT id, parent_id
467                     FROM ancestry
468                     ORDER BY id;
469                 """)
470         else:
471             cursor.execute("DELETE FROM catalogue_book_ancestor")
472             for b in cls.objects.exclude(parent=None):
473                 parent = b.parent
474                 while parent is not None:
475                     b.ancestor.add(parent)
476                     parent = parent.parent
477
478     def flush_includes(self, languages=True):
479         if not languages:
480             return
481         if languages is True:
482             languages = [lc for (lc, _ln) in settings.LANGUAGES]
483         flush_ssi_includes([
484             template % (self.pk, lang)
485             for template in [
486                 '/katalog/b/%d/mini.%s.html',
487                 '/katalog/b/%d/mini_nolink.%s.html',
488                 '/katalog/b/%d/short.%s.html',
489                 '/katalog/b/%d/wide.%s.html',
490                 '/api/include/book/%d.%s.json',
491                 '/api/include/book/%d.%s.xml',
492                 ]
493             for lang in languages
494             ])
495
496     def cover_info(self, inherit=True):
497         """Returns a dictionary to serve as fallback for BookInfo.
498
499         For now, the only thing inherited is the cover image.
500         """
501         need = False
502         info = {}
503         for field in ('cover_url', 'cover_by', 'cover_source'):
504             val = self.extra_info.get(field)
505             if val:
506                 info[field] = val
507             else:
508                 need = True
509         if inherit and need and self.parent is not None:
510             parent_info = self.parent.cover_info()
511             parent_info.update(info)
512             info = parent_info
513         return info
514
515     def related_themes(self):
516         return Tag.objects.usage_for_queryset(
517             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
518             counts=True).filter(category='theme')
519
520     def parent_cover_changed(self):
521         """Called when parent book's cover image is changed."""
522         if not self.cover_info(inherit=False):
523             if 'cover' not in app_settings.DONT_BUILD:
524                 self.cover.build_delay()
525                 self.cover_thumb.build_delay()
526             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
527                 if format_ not in app_settings.DONT_BUILD:
528                     getattr(self, '%s_file' % format_).build_delay()
529             for child in self.children.all():
530                 child.parent_cover_changed()
531
532     def other_versions(self):
533         """Find other versions (i.e. in other languages) of the book."""
534         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
535
536     def parents(self):
537         books = []
538         parent = self.parent
539         while parent is not None:
540             books.insert(0, parent)
541             parent = parent.parent
542         return books
543
544     def pretty_title(self, html_links=False):
545         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
546         books = self.parents() + [self]
547         names.extend([(b.title, b.get_absolute_url()) for b in books])
548
549         if html_links:
550             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
551         else:
552             names = [tag[0] for tag in names]
553         return ', '.join(names)
554
555     def publisher(self):
556         publisher = self.extra_info['publisher']
557         if isinstance(publisher, basestring):
558             return publisher
559         elif isinstance(publisher, list):
560             return ', '.join(publisher)
561
562     @classmethod
563     def tagged_top_level(cls, tags):
564         """ Returns top-level books tagged with `tags`.
565
566         It only returns those books which don't have ancestors which are
567         also tagged with those tags.
568
569         """
570         objects = cls.tagged.with_all(tags)
571         return objects.exclude(ancestor__in=objects)
572
573     @classmethod
574     def book_list(cls, book_filter=None):
575         """Generates a hierarchical listing of all books.
576
577         Books are optionally filtered with a test function.
578
579         """
580
581         books_by_parent = {}
582         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
583         if book_filter:
584             books = books.filter(book_filter).distinct()
585
586             book_ids = set(b['pk'] for b in books.values("pk").iterator())
587             for book in books.iterator():
588                 parent = book.parent_id
589                 if parent not in book_ids:
590                     parent = None
591                 books_by_parent.setdefault(parent, []).append(book)
592         else:
593             for book in books.iterator():
594                 books_by_parent.setdefault(book.parent_id, []).append(book)
595
596         orphans = []
597         books_by_author = OrderedDict()
598         for tag in Tag.objects.filter(category='author').iterator():
599             books_by_author[tag] = []
600
601         for book in books_by_parent.get(None, ()):
602             authors = list(book.authors().only('pk'))
603             if authors:
604                 for author in authors:
605                     books_by_author[author].append(book)
606             else:
607                 orphans.append(book)
608
609         return books_by_author, orphans, books_by_parent
610
611     _audiences_pl = {
612         "SP": (1, u"szkoła podstawowa"),
613         "SP1": (1, u"szkoła podstawowa"),
614         "SP2": (1, u"szkoła podstawowa"),
615         "SP3": (1, u"szkoła podstawowa"),
616         "P": (1, u"szkoła podstawowa"),
617         "G": (2, u"gimnazjum"),
618         "L": (3, u"liceum"),
619         "LP": (3, u"liceum"),
620     }
621
622     def audiences_pl(self):
623         audiences = self.extra_info.get('audiences', [])
624         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
625         return [a[1] for a in audiences]
626
627     def stage_note(self):
628         stage = self.extra_info.get('stage')
629         if stage and stage < '0.4':
630             return (_('This work needs modernisation'),
631                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
632         else:
633             return None, None
634
635     def choose_fragment(self):
636         fragments = self.fragments.order_by()
637         fragments_count = fragments.count()
638         if not fragments_count and self.children.exists():
639             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
640             fragments_count = fragments.count()
641         if fragments_count:
642             return fragments[randint(0, fragments_count - 1)]
643         elif self.parent:
644             return self.parent.choose_fragment()
645         else:
646             return None
647
648     def fragment_data(self):
649         fragment = self.choose_fragment()
650         if fragment:
651             return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
652         else:
653             return None
654
655     def update_popularity(self):
656         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
657         try:
658             pop = self.popularity
659             pop.count = count
660             pop.save()
661         except BookPopularity.DoesNotExist:
662             BookPopularity.objects.create(book=self, count=count)
663
664     def ridero_link(self):
665         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
666
667
668 def add_file_fields():
669     for format_ in Book.formats:
670         field_name = "%s_file" % format_
671         # This weird globals() assignment makes Django migrations comfortable.
672         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
673         _upload_to.__name__ = '_%s_upload_to' % format_
674         globals()[_upload_to.__name__] = _upload_to
675
676         EbookField(
677             format_, _("%s file" % format_.upper()),
678             upload_to=_upload_to,
679             storage=bofh_storage,
680             max_length=255,
681             blank=True,
682             default=''
683         ).contribute_to_class(Book, field_name)
684
685 add_file_fields()
686
687
688 class BookPopularity(models.Model):
689     book = models.OneToOneField(Book, related_name='popularity')
690     count = models.IntegerField(default=0)