add to api: keyset pagination, recommended, newest
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     objects = models.Manager()
85     tagged = managers.ModelTaggedItemManager(Tag)
86     tags = managers.TagDescriptor(Tag)
87     tag_relations = GenericRelation(Tag.intermediary_table_model)
88
89     html_built = django.dispatch.Signal()
90     published = django.dispatch.Signal()
91
92     short_html_url_name = 'catalogue_book_short'
93
94     class AlreadyExists(Exception):
95         pass
96
97     class Meta:
98         ordering = ('sort_key_author', 'sort_key')
99         verbose_name = _('book')
100         verbose_name_plural = _('books')
101         app_label = 'catalogue'
102
103     def __unicode__(self):
104         return self.title
105
106     def get_initial(self):
107         try:
108             return re.search(r'\w', self.title, re.U).group(0)
109         except AttributeError:
110             return ''
111
112     def authors(self):
113         return self.tags.filter(category='author')
114
115     def tag_unicode(self, category):
116         relations = prefetched_relations(self, category)
117         if relations:
118             return ', '.join(rel.tag.name for rel in relations)
119         else:
120             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
121
122     def author_unicode(self):
123         return self.tag_unicode('author')
124
125     def translator(self):
126         translators = self.extra_info.get('translators')
127         if not translators:
128             return None
129         if len(translators) > 3:
130             translators = translators[:2]
131             others = ' i inni'
132         else:
133             others = ''
134         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
135
136     def save(self, force_insert=False, force_update=False, **kwargs):
137         from sortify import sortify
138
139         self.sort_key = sortify(self.title)[:120]
140         self.title = unicode(self.title)  # ???
141
142         try:
143             author = self.authors().first().sort_key
144         except AttributeError:
145             author = u''
146         self.sort_key_author = author
147
148         ret = super(Book, self).save(force_insert, force_update, **kwargs)
149
150         return ret
151
152     @permalink
153     def get_absolute_url(self):
154         return 'catalogue.views.book_detail', [self.slug]
155
156     @staticmethod
157     @permalink
158     def create_url(slug):
159         return 'catalogue.views.book_detail', [slug]
160
161     def gallery_path(self):
162         return gallery_path(self.slug)
163
164     def gallery_url(self):
165         return gallery_url(self.slug)
166
167     @property
168     def name(self):
169         return self.title
170
171     def language_code(self):
172         return constants.LANGUAGES_3TO2.get(self.language, self.language)
173
174     def language_name(self):
175         return dict(settings.LANGUAGES).get(self.language_code(), "")
176
177     def is_foreign(self):
178         return self.language_code() != settings.LANGUAGE_CODE
179
180     def has_media(self, type_):
181         if type_ in Book.formats:
182             return bool(getattr(self, "%s_file" % type_))
183         else:
184             return self.media.filter(type=type_).exists()
185
186     def get_media(self, type_):
187         if self.has_media(type_):
188             if type_ in Book.formats:
189                 return getattr(self, "%s_file" % type_)
190             else:
191                 return self.media.filter(type=type_)
192         else:
193             return None
194
195     def get_mp3(self):
196         return self.get_media("mp3")
197
198     def get_odt(self):
199         return self.get_media("odt")
200
201     def get_ogg(self):
202         return self.get_media("ogg")
203
204     def get_daisy(self):
205         return self.get_media("daisy")
206
207     def has_description(self):
208         return len(self.description) > 0
209     has_description.short_description = _('description')
210     has_description.boolean = True
211
212     # ugly ugly ugly
213     def has_mp3_file(self):
214         return bool(self.has_media("mp3"))
215     has_mp3_file.short_description = 'MP3'
216     has_mp3_file.boolean = True
217
218     def has_ogg_file(self):
219         return bool(self.has_media("ogg"))
220     has_ogg_file.short_description = 'OGG'
221     has_ogg_file.boolean = True
222
223     def has_daisy_file(self):
224         return bool(self.has_media("daisy"))
225     has_daisy_file.short_description = 'DAISY'
226     has_daisy_file.boolean = True
227
228     def wldocument(self, parse_dublincore=True, inherit=True):
229         from catalogue.import_utils import ORMDocProvider
230         from librarian.parser import WLDocument
231
232         if inherit and self.parent:
233             meta_fallbacks = self.parent.cover_info()
234         else:
235             meta_fallbacks = None
236
237         return WLDocument.from_file(
238             self.xml_file.path,
239             provider=ORMDocProvider(self),
240             parse_dublincore=parse_dublincore,
241             meta_fallbacks=meta_fallbacks)
242
243     @staticmethod
244     def zip_format(format_):
245         def pretty_file_name(book):
246             return "%s/%s.%s" % (
247                 book.extra_info['author'],
248                 book.slug,
249                 format_)
250
251         field_name = "%s_file" % format_
252         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
253         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
254         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
255
256     def zip_audiobooks(self, format_):
257         bm = BookMedia.objects.filter(book=self, type=format_)
258         paths = map(lambda bm: (None, bm.file.path), bm)
259         return create_zip(paths, "%s_%s" % (self.slug, format_))
260
261     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
262         if index is None:
263             from search.index import Index
264             index = Index()
265         try:
266             index.index_book(self, book_info)
267             if index_tags:
268                 index.index_tags()
269             if commit:
270                 index.index.commit()
271         except Exception, e:
272             index.index.rollback()
273             raise e
274
275     def download_pictures(self, remote_gallery_url):
276         gallery_path = self.gallery_path()
277         # delete previous files, so we don't include old files in ebooks
278         if os.path.isdir(gallery_path):
279             for filename in os.listdir(gallery_path):
280                 file_path = os.path.join(gallery_path, filename)
281                 os.unlink(file_path)
282         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
283         if ilustr_elements:
284             makedirs(gallery_path)
285             for ilustr in ilustr_elements:
286                 ilustr_src = ilustr.get('src')
287                 ilustr_path = os.path.join(gallery_path, ilustr_src)
288                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
289
290     @classmethod
291     def from_xml_file(cls, xml_file, **kwargs):
292         from django.core.files import File
293         from librarian import dcparser
294
295         # use librarian to parse meta-data
296         book_info = dcparser.parse(xml_file)
297
298         if not isinstance(xml_file, File):
299             xml_file = File(open(xml_file))
300
301         try:
302             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
303         finally:
304             xml_file.close()
305
306     @classmethod
307     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
308                            search_index_tags=True, remote_gallery_url=None):
309         if dont_build is None:
310             dont_build = set()
311         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
312
313         # check for parts before we do anything
314         children = []
315         if hasattr(book_info, 'parts'):
316             for part_url in book_info.parts:
317                 try:
318                     children.append(Book.objects.get(slug=part_url.slug))
319                 except Book.DoesNotExist:
320                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
321
322         # Read book metadata
323         book_slug = book_info.url.slug
324         if re.search(r'[^a-z0-9-]', book_slug):
325             raise ValueError('Invalid characters in slug')
326         book, created = Book.objects.get_or_create(slug=book_slug)
327
328         if created:
329             book_shelves = []
330             old_cover = None
331         else:
332             if not overwrite:
333                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
334             # Save shelves for this book
335             book_shelves = list(book.tags.filter(category='set'))
336             old_cover = book.cover_info()
337
338         # Save XML file
339         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
340
341         book.language = book_info.language
342         book.title = book_info.title
343         if book_info.variant_of:
344             book.common_slug = book_info.variant_of.slug
345         else:
346             book.common_slug = book.slug
347         book.extra_info = book_info.to_dict()
348         book.save()
349
350         meta_tags = Tag.tags_from_info(book_info)
351
352         book.tags = set(meta_tags + book_shelves)
353
354         cover_changed = old_cover != book.cover_info()
355         obsolete_children = set(b for b in book.children.all()
356                                 if b not in children)
357         notify_cover_changed = []
358         for n, child_book in enumerate(children):
359             new_child = child_book.parent != book
360             child_book.parent = book
361             child_book.parent_number = n
362             child_book.save()
363             if new_child or cover_changed:
364                 notify_cover_changed.append(child_book)
365         # Disown unfaithful children and let them cope on their own.
366         for child in obsolete_children:
367             child.parent = None
368             child.parent_number = 0
369             child.save()
370             if old_cover:
371                 notify_cover_changed.append(child)
372
373         cls.repopulate_ancestors()
374         tasks.update_counters.delay()
375
376         if remote_gallery_url:
377             book.download_pictures(remote_gallery_url)
378
379         # No saves beyond this point.
380
381         # Build cover.
382         if 'cover' not in dont_build:
383             book.cover.build_delay()
384             book.cover_thumb.build_delay()
385
386         # Build HTML and ebooks.
387         book.html_file.build_delay()
388         if not children:
389             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
390                 if format_ not in dont_build:
391                     getattr(book, '%s_file' % format_).build_delay()
392         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
393             if format_ not in dont_build:
394                 getattr(book, '%s_file' % format_).build_delay()
395
396         if not settings.NO_SEARCH_INDEX and search_index:
397             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
398
399         for child in notify_cover_changed:
400             child.parent_cover_changed()
401
402         book.save()  # update sort_key_author
403         cls.published.send(sender=cls, instance=book)
404         return book
405
406     @classmethod
407     @transaction.atomic
408     def repopulate_ancestors(cls):
409         """Fixes the ancestry cache."""
410         # TODO: table names
411         cursor = connection.cursor()
412         if connection.vendor == 'postgres':
413             cursor.execute("TRUNCATE catalogue_book_ancestor")
414             cursor.execute("""
415                 WITH RECURSIVE ancestry AS (
416                     SELECT book.id, book.parent_id
417                     FROM catalogue_book AS book
418                     WHERE book.parent_id IS NOT NULL
419                     UNION
420                     SELECT ancestor.id, book.parent_id
421                     FROM ancestry AS ancestor, catalogue_book AS book
422                     WHERE ancestor.parent_id = book.id
423                         AND book.parent_id IS NOT NULL
424                     )
425                 INSERT INTO catalogue_book_ancestor
426                     (from_book_id, to_book_id)
427                     SELECT id, parent_id
428                     FROM ancestry
429                     ORDER BY id;
430                 """)
431         else:
432             cursor.execute("DELETE FROM catalogue_book_ancestor")
433             for b in cls.objects.exclude(parent=None):
434                 parent = b.parent
435                 while parent is not None:
436                     b.ancestor.add(parent)
437                     parent = parent.parent
438
439     def flush_includes(self, languages=True):
440         if not languages:
441             return
442         if languages is True:
443             languages = [lc for (lc, _ln) in settings.LANGUAGES]
444         flush_ssi_includes([
445             template % (self.pk, lang)
446             for template in [
447                 '/katalog/b/%d/mini.%s.html',
448                 '/katalog/b/%d/mini_nolink.%s.html',
449                 '/katalog/b/%d/short.%s.html',
450                 '/katalog/b/%d/wide.%s.html',
451                 '/api/include/book/%d.%s.json',
452                 '/api/include/book/%d.%s.xml',
453                 ]
454             for lang in languages
455             ])
456
457     def cover_info(self, inherit=True):
458         """Returns a dictionary to serve as fallback for BookInfo.
459
460         For now, the only thing inherited is the cover image.
461         """
462         need = False
463         info = {}
464         for field in ('cover_url', 'cover_by', 'cover_source'):
465             val = self.extra_info.get(field)
466             if val:
467                 info[field] = val
468             else:
469                 need = True
470         if inherit and need and self.parent is not None:
471             parent_info = self.parent.cover_info()
472             parent_info.update(info)
473             info = parent_info
474         return info
475
476     def related_themes(self):
477         return Tag.objects.usage_for_queryset(
478             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
479             counts=True).filter(category='theme')
480
481     def parent_cover_changed(self):
482         """Called when parent book's cover image is changed."""
483         if not self.cover_info(inherit=False):
484             if 'cover' not in app_settings.DONT_BUILD:
485                 self.cover.build_delay()
486                 self.cover_thumb.build_delay()
487             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
488                 if format_ not in app_settings.DONT_BUILD:
489                     getattr(self, '%s_file' % format_).build_delay()
490             for child in self.children.all():
491                 child.parent_cover_changed()
492
493     def other_versions(self):
494         """Find other versions (i.e. in other languages) of the book."""
495         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
496
497     def parents(self):
498         books = []
499         parent = self.parent
500         while parent is not None:
501             books.insert(0, parent)
502             parent = parent.parent
503         return books
504
505     def pretty_title(self, html_links=False):
506         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
507         books = self.parents() + [self]
508         names.extend([(b.title, b.get_absolute_url()) for b in books])
509
510         if html_links:
511             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
512         else:
513             names = [tag[0] for tag in names]
514         return ', '.join(names)
515
516     def publisher(self):
517         publisher = self.extra_info['publisher']
518         if isinstance(publisher, basestring):
519             return publisher
520         elif isinstance(publisher, list):
521             return ', '.join(publisher)
522
523     @classmethod
524     def tagged_top_level(cls, tags):
525         """ Returns top-level books tagged with `tags`.
526
527         It only returns those books which don't have ancestors which are
528         also tagged with those tags.
529
530         """
531         objects = cls.tagged.with_all(tags)
532         return objects.exclude(ancestor__in=objects)
533
534     @classmethod
535     def book_list(cls, book_filter=None):
536         """Generates a hierarchical listing of all books.
537
538         Books are optionally filtered with a test function.
539
540         """
541
542         books_by_parent = {}
543         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
544         if book_filter:
545             books = books.filter(book_filter).distinct()
546
547             book_ids = set(b['pk'] for b in books.values("pk").iterator())
548             for book in books.iterator():
549                 parent = book.parent_id
550                 if parent not in book_ids:
551                     parent = None
552                 books_by_parent.setdefault(parent, []).append(book)
553         else:
554             for book in books.iterator():
555                 books_by_parent.setdefault(book.parent_id, []).append(book)
556
557         orphans = []
558         books_by_author = OrderedDict()
559         for tag in Tag.objects.filter(category='author').iterator():
560             books_by_author[tag] = []
561
562         for book in books_by_parent.get(None, ()):
563             authors = list(book.authors().only('pk'))
564             if authors:
565                 for author in authors:
566                     books_by_author[author].append(book)
567             else:
568                 orphans.append(book)
569
570         return books_by_author, orphans, books_by_parent
571
572     _audiences_pl = {
573         "SP": (1, u"szkoła podstawowa"),
574         "SP1": (1, u"szkoła podstawowa"),
575         "SP2": (1, u"szkoła podstawowa"),
576         "SP3": (1, u"szkoła podstawowa"),
577         "P": (1, u"szkoła podstawowa"),
578         "G": (2, u"gimnazjum"),
579         "L": (3, u"liceum"),
580         "LP": (3, u"liceum"),
581     }
582
583     def audiences_pl(self):
584         audiences = self.extra_info.get('audiences', [])
585         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
586         return [a[1] for a in audiences]
587
588     def stage_note(self):
589         stage = self.extra_info.get('stage')
590         if stage and stage < '0.4':
591             return (_('This work needs modernisation'),
592                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
593         else:
594             return None, None
595
596     def choose_fragment(self):
597         fragments = self.fragments.order_by()
598         fragments_count = fragments.count()
599         if not fragments_count and self.children.exists():
600             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
601             fragments_count = fragments.count()
602         if fragments_count:
603             return fragments[randint(0, fragments_count - 1)]
604         elif self.parent:
605             return self.parent.choose_fragment()
606         else:
607             return None
608
609     def update_popularity(self):
610         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
611         try:
612             pop = self.popularity
613             pop.count = count
614             pop.save()
615         except BookPopularity.DoesNotExist:
616             BookPopularity.objects.create(book=self, count=count)
617
618     def ridero_link(self):
619         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
620
621
622 def add_file_fields():
623     for format_ in Book.formats:
624         field_name = "%s_file" % format_
625         # This weird globals() assignment makes Django migrations comfortable.
626         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
627         _upload_to.__name__ = '_%s_upload_to' % format_
628         globals()[_upload_to.__name__] = _upload_to
629
630         EbookField(
631             format_, _("%s file" % format_.upper()),
632             upload_to=_upload_to,
633             storage=bofh_storage,
634             max_length=255,
635             blank=True,
636             default=''
637         ).contribute_to_class(Book, field_name)
638
639 add_file_fields()
640
641
642 class BookPopularity(models.Model):
643     book = models.OneToOneField(Book, related_name='popularity')
644     count = models.IntegerField(default=0)