author/title match ordering in api filter-books + fix lektura/audiobook filters
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
85     has_audience = models.BooleanField(default=False)
86
87     objects = models.Manager()
88     tagged = managers.ModelTaggedItemManager(Tag)
89     tags = managers.TagDescriptor(Tag)
90     tag_relations = GenericRelation(Tag.intermediary_table_model)
91
92     html_built = django.dispatch.Signal()
93     published = django.dispatch.Signal()
94
95     short_html_url_name = 'catalogue_book_short'
96
97     class AlreadyExists(Exception):
98         pass
99
100     class Meta:
101         ordering = ('sort_key_author', 'sort_key')
102         verbose_name = _('book')
103         verbose_name_plural = _('books')
104         app_label = 'catalogue'
105
106     def __unicode__(self):
107         return self.title
108
109     def get_initial(self):
110         try:
111             return re.search(r'\w', self.title, re.U).group(0)
112         except AttributeError:
113             return ''
114
115     def authors(self):
116         return self.tags.filter(category='author')
117
118     def tag_unicode(self, category):
119         relations = prefetched_relations(self, category)
120         if relations:
121             return ', '.join(rel.tag.name for rel in relations)
122         else:
123             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
124
125     def author_unicode(self):
126         return self.cached_author
127
128     def translator(self):
129         translators = self.extra_info.get('translators')
130         if not translators:
131             return None
132         if len(translators) > 3:
133             translators = translators[:2]
134             others = ' i inni'
135         else:
136             others = ''
137         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
138
139     def cover_source(self):
140         return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
141
142     def save(self, force_insert=False, force_update=False, **kwargs):
143         from sortify import sortify
144
145         self.sort_key = sortify(self.title)[:120]
146         self.title = unicode(self.title)  # ???
147
148         try:
149             author = self.authors().first().sort_key
150         except AttributeError:
151             author = u''
152         self.sort_key_author = author
153
154         self.cached_author = self.tag_unicode('author')
155         self.has_audience = 'audience' in self.extra_info
156
157         ret = super(Book, self).save(force_insert, force_update, **kwargs)
158
159         return ret
160
161     @permalink
162     def get_absolute_url(self):
163         return 'catalogue.views.book_detail', [self.slug]
164
165     @staticmethod
166     @permalink
167     def create_url(slug):
168         return 'catalogue.views.book_detail', [slug]
169
170     def gallery_path(self):
171         return gallery_path(self.slug)
172
173     def gallery_url(self):
174         return gallery_url(self.slug)
175
176     @property
177     def name(self):
178         return self.title
179
180     def language_code(self):
181         return constants.LANGUAGES_3TO2.get(self.language, self.language)
182
183     def language_name(self):
184         return dict(settings.LANGUAGES).get(self.language_code(), "")
185
186     def is_foreign(self):
187         return self.language_code() != settings.LANGUAGE_CODE
188
189     def has_media(self, type_):
190         if type_ in Book.formats:
191             return bool(getattr(self, "%s_file" % type_))
192         else:
193             return self.media.filter(type=type_).exists()
194
195     def get_media(self, type_):
196         if self.has_media(type_):
197             if type_ in Book.formats:
198                 return getattr(self, "%s_file" % type_)
199             else:
200                 return self.media.filter(type=type_)
201         else:
202             return None
203
204     def get_mp3(self):
205         return self.get_media("mp3")
206
207     def get_odt(self):
208         return self.get_media("odt")
209
210     def get_ogg(self):
211         return self.get_media("ogg")
212
213     def get_daisy(self):
214         return self.get_media("daisy")
215
216     def has_description(self):
217         return len(self.description) > 0
218     has_description.short_description = _('description')
219     has_description.boolean = True
220
221     # ugly ugly ugly
222     def has_mp3_file(self):
223         return bool(self.has_media("mp3"))
224     has_mp3_file.short_description = 'MP3'
225     has_mp3_file.boolean = True
226
227     def has_ogg_file(self):
228         return bool(self.has_media("ogg"))
229     has_ogg_file.short_description = 'OGG'
230     has_ogg_file.boolean = True
231
232     def has_daisy_file(self):
233         return bool(self.has_media("daisy"))
234     has_daisy_file.short_description = 'DAISY'
235     has_daisy_file.boolean = True
236
237     def wldocument(self, parse_dublincore=True, inherit=True):
238         from catalogue.import_utils import ORMDocProvider
239         from librarian.parser import WLDocument
240
241         if inherit and self.parent:
242             meta_fallbacks = self.parent.cover_info()
243         else:
244             meta_fallbacks = None
245
246         return WLDocument.from_file(
247             self.xml_file.path,
248             provider=ORMDocProvider(self),
249             parse_dublincore=parse_dublincore,
250             meta_fallbacks=meta_fallbacks)
251
252     @staticmethod
253     def zip_format(format_):
254         def pretty_file_name(book):
255             return "%s/%s.%s" % (
256                 book.extra_info['author'],
257                 book.slug,
258                 format_)
259
260         field_name = "%s_file" % format_
261         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
262         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
263         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
264
265     def zip_audiobooks(self, format_):
266         bm = BookMedia.objects.filter(book=self, type=format_)
267         paths = map(lambda bm: (None, bm.file.path), bm)
268         return create_zip(paths, "%s_%s" % (self.slug, format_))
269
270     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
271         if index is None:
272             from search.index import Index
273             index = Index()
274         try:
275             index.index_book(self, book_info)
276             if index_tags:
277                 index.index_tags()
278             if commit:
279                 index.index.commit()
280         except Exception, e:
281             index.index.rollback()
282             raise e
283
284     def download_pictures(self, remote_gallery_url):
285         gallery_path = self.gallery_path()
286         # delete previous files, so we don't include old files in ebooks
287         if os.path.isdir(gallery_path):
288             for filename in os.listdir(gallery_path):
289                 file_path = os.path.join(gallery_path, filename)
290                 os.unlink(file_path)
291         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
292         if ilustr_elements:
293             makedirs(gallery_path)
294             for ilustr in ilustr_elements:
295                 ilustr_src = ilustr.get('src')
296                 ilustr_path = os.path.join(gallery_path, ilustr_src)
297                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
298
299     @classmethod
300     def from_xml_file(cls, xml_file, **kwargs):
301         from django.core.files import File
302         from librarian import dcparser
303
304         # use librarian to parse meta-data
305         book_info = dcparser.parse(xml_file)
306
307         if not isinstance(xml_file, File):
308             xml_file = File(open(xml_file))
309
310         try:
311             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
312         finally:
313             xml_file.close()
314
315     @classmethod
316     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
317                            search_index_tags=True, remote_gallery_url=None):
318         if dont_build is None:
319             dont_build = set()
320         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
321
322         # check for parts before we do anything
323         children = []
324         if hasattr(book_info, 'parts'):
325             for part_url in book_info.parts:
326                 try:
327                     children.append(Book.objects.get(slug=part_url.slug))
328                 except Book.DoesNotExist:
329                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
330
331         # Read book metadata
332         book_slug = book_info.url.slug
333         if re.search(r'[^a-z0-9-]', book_slug):
334             raise ValueError('Invalid characters in slug')
335         book, created = Book.objects.get_or_create(slug=book_slug)
336
337         if created:
338             book_shelves = []
339             old_cover = None
340         else:
341             if not overwrite:
342                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
343             # Save shelves for this book
344             book_shelves = list(book.tags.filter(category='set'))
345             old_cover = book.cover_info()
346
347         # Save XML file
348         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
349
350         book.language = book_info.language
351         book.title = book_info.title
352         if book_info.variant_of:
353             book.common_slug = book_info.variant_of.slug
354         else:
355             book.common_slug = book.slug
356         book.extra_info = book_info.to_dict()
357         book.save()
358
359         meta_tags = Tag.tags_from_info(book_info)
360
361         book.tags = set(meta_tags + book_shelves)
362
363         cover_changed = old_cover != book.cover_info()
364         obsolete_children = set(b for b in book.children.all()
365                                 if b not in children)
366         notify_cover_changed = []
367         for n, child_book in enumerate(children):
368             new_child = child_book.parent != book
369             child_book.parent = book
370             child_book.parent_number = n
371             child_book.save()
372             if new_child or cover_changed:
373                 notify_cover_changed.append(child_book)
374         # Disown unfaithful children and let them cope on their own.
375         for child in obsolete_children:
376             child.parent = None
377             child.parent_number = 0
378             child.save()
379             if old_cover:
380                 notify_cover_changed.append(child)
381
382         cls.repopulate_ancestors()
383         tasks.update_counters.delay()
384
385         if remote_gallery_url:
386             book.download_pictures(remote_gallery_url)
387
388         # No saves beyond this point.
389
390         # Build cover.
391         if 'cover' not in dont_build:
392             book.cover.build_delay()
393             book.cover_thumb.build_delay()
394
395         # Build HTML and ebooks.
396         book.html_file.build_delay()
397         if not children:
398             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
399                 if format_ not in dont_build:
400                     getattr(book, '%s_file' % format_).build_delay()
401         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
402             if format_ not in dont_build:
403                 getattr(book, '%s_file' % format_).build_delay()
404
405         if not settings.NO_SEARCH_INDEX and search_index:
406             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
407
408         for child in notify_cover_changed:
409             child.parent_cover_changed()
410
411         book.save()  # update sort_key_author
412         cls.published.send(sender=cls, instance=book)
413         return book
414
415     @classmethod
416     @transaction.atomic
417     def repopulate_ancestors(cls):
418         """Fixes the ancestry cache."""
419         # TODO: table names
420         cursor = connection.cursor()
421         if connection.vendor == 'postgres':
422             cursor.execute("TRUNCATE catalogue_book_ancestor")
423             cursor.execute("""
424                 WITH RECURSIVE ancestry AS (
425                     SELECT book.id, book.parent_id
426                     FROM catalogue_book AS book
427                     WHERE book.parent_id IS NOT NULL
428                     UNION
429                     SELECT ancestor.id, book.parent_id
430                     FROM ancestry AS ancestor, catalogue_book AS book
431                     WHERE ancestor.parent_id = book.id
432                         AND book.parent_id IS NOT NULL
433                     )
434                 INSERT INTO catalogue_book_ancestor
435                     (from_book_id, to_book_id)
436                     SELECT id, parent_id
437                     FROM ancestry
438                     ORDER BY id;
439                 """)
440         else:
441             cursor.execute("DELETE FROM catalogue_book_ancestor")
442             for b in cls.objects.exclude(parent=None):
443                 parent = b.parent
444                 while parent is not None:
445                     b.ancestor.add(parent)
446                     parent = parent.parent
447
448     def flush_includes(self, languages=True):
449         if not languages:
450             return
451         if languages is True:
452             languages = [lc for (lc, _ln) in settings.LANGUAGES]
453         flush_ssi_includes([
454             template % (self.pk, lang)
455             for template in [
456                 '/katalog/b/%d/mini.%s.html',
457                 '/katalog/b/%d/mini_nolink.%s.html',
458                 '/katalog/b/%d/short.%s.html',
459                 '/katalog/b/%d/wide.%s.html',
460                 '/api/include/book/%d.%s.json',
461                 '/api/include/book/%d.%s.xml',
462                 ]
463             for lang in languages
464             ])
465
466     def cover_info(self, inherit=True):
467         """Returns a dictionary to serve as fallback for BookInfo.
468
469         For now, the only thing inherited is the cover image.
470         """
471         need = False
472         info = {}
473         for field in ('cover_url', 'cover_by', 'cover_source'):
474             val = self.extra_info.get(field)
475             if val:
476                 info[field] = val
477             else:
478                 need = True
479         if inherit and need and self.parent is not None:
480             parent_info = self.parent.cover_info()
481             parent_info.update(info)
482             info = parent_info
483         return info
484
485     def related_themes(self):
486         return Tag.objects.usage_for_queryset(
487             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
488             counts=True).filter(category='theme')
489
490     def parent_cover_changed(self):
491         """Called when parent book's cover image is changed."""
492         if not self.cover_info(inherit=False):
493             if 'cover' not in app_settings.DONT_BUILD:
494                 self.cover.build_delay()
495                 self.cover_thumb.build_delay()
496             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
497                 if format_ not in app_settings.DONT_BUILD:
498                     getattr(self, '%s_file' % format_).build_delay()
499             for child in self.children.all():
500                 child.parent_cover_changed()
501
502     def other_versions(self):
503         """Find other versions (i.e. in other languages) of the book."""
504         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
505
506     def parents(self):
507         books = []
508         parent = self.parent
509         while parent is not None:
510             books.insert(0, parent)
511             parent = parent.parent
512         return books
513
514     def pretty_title(self, html_links=False):
515         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
516         books = self.parents() + [self]
517         names.extend([(b.title, b.get_absolute_url()) for b in books])
518
519         if html_links:
520             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
521         else:
522             names = [tag[0] for tag in names]
523         return ', '.join(names)
524
525     def publisher(self):
526         publisher = self.extra_info['publisher']
527         if isinstance(publisher, basestring):
528             return publisher
529         elif isinstance(publisher, list):
530             return ', '.join(publisher)
531
532     @classmethod
533     def tagged_top_level(cls, tags):
534         """ Returns top-level books tagged with `tags`.
535
536         It only returns those books which don't have ancestors which are
537         also tagged with those tags.
538
539         """
540         objects = cls.tagged.with_all(tags)
541         return objects.exclude(ancestor__in=objects)
542
543     @classmethod
544     def book_list(cls, book_filter=None):
545         """Generates a hierarchical listing of all books.
546
547         Books are optionally filtered with a test function.
548
549         """
550
551         books_by_parent = {}
552         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
553         if book_filter:
554             books = books.filter(book_filter).distinct()
555
556             book_ids = set(b['pk'] for b in books.values("pk").iterator())
557             for book in books.iterator():
558                 parent = book.parent_id
559                 if parent not in book_ids:
560                     parent = None
561                 books_by_parent.setdefault(parent, []).append(book)
562         else:
563             for book in books.iterator():
564                 books_by_parent.setdefault(book.parent_id, []).append(book)
565
566         orphans = []
567         books_by_author = OrderedDict()
568         for tag in Tag.objects.filter(category='author').iterator():
569             books_by_author[tag] = []
570
571         for book in books_by_parent.get(None, ()):
572             authors = list(book.authors().only('pk'))
573             if authors:
574                 for author in authors:
575                     books_by_author[author].append(book)
576             else:
577                 orphans.append(book)
578
579         return books_by_author, orphans, books_by_parent
580
581     _audiences_pl = {
582         "SP": (1, u"szkoła podstawowa"),
583         "SP1": (1, u"szkoła podstawowa"),
584         "SP2": (1, u"szkoła podstawowa"),
585         "SP3": (1, u"szkoła podstawowa"),
586         "P": (1, u"szkoła podstawowa"),
587         "G": (2, u"gimnazjum"),
588         "L": (3, u"liceum"),
589         "LP": (3, u"liceum"),
590     }
591
592     def audiences_pl(self):
593         audiences = self.extra_info.get('audiences', [])
594         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
595         return [a[1] for a in audiences]
596
597     def stage_note(self):
598         stage = self.extra_info.get('stage')
599         if stage and stage < '0.4':
600             return (_('This work needs modernisation'),
601                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
602         else:
603             return None, None
604
605     def choose_fragment(self):
606         fragments = self.fragments.order_by()
607         fragments_count = fragments.count()
608         if not fragments_count and self.children.exists():
609             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
610             fragments_count = fragments.count()
611         if fragments_count:
612             return fragments[randint(0, fragments_count - 1)]
613         elif self.parent:
614             return self.parent.choose_fragment()
615         else:
616             return None
617
618     def fragment_data(self):
619         fragment = self.choose_fragment()
620         if fragment:
621             return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
622         else:
623             return None
624
625     def update_popularity(self):
626         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
627         try:
628             pop = self.popularity
629             pop.count = count
630             pop.save()
631         except BookPopularity.DoesNotExist:
632             BookPopularity.objects.create(book=self, count=count)
633
634     def ridero_link(self):
635         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
636
637
638 def add_file_fields():
639     for format_ in Book.formats:
640         field_name = "%s_file" % format_
641         # This weird globals() assignment makes Django migrations comfortable.
642         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
643         _upload_to.__name__ = '_%s_upload_to' % format_
644         globals()[_upload_to.__name__] = _upload_to
645
646         EbookField(
647             format_, _("%s file" % format_.upper()),
648             upload_to=_upload_to,
649             storage=bofh_storage,
650             max_length=255,
651             blank=True,
652             default=''
653         ).contribute_to_class(Book, field_name)
654
655 add_file_fields()
656
657
658 class BookPopularity(models.Model):
659     book = models.OneToOneField(Book, related_name='popularity')
660     count = models.IntegerField(default=0)