44ee02ecc10083e500003e2b172dd7b8b0fece83
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
85     has_audience = models.BooleanField(default=False)
86
87     objects = models.Manager()
88     tagged = managers.ModelTaggedItemManager(Tag)
89     tags = managers.TagDescriptor(Tag)
90     tag_relations = GenericRelation(Tag.intermediary_table_model)
91
92     html_built = django.dispatch.Signal()
93     published = django.dispatch.Signal()
94
95     short_html_url_name = 'catalogue_book_short'
96
97     class AlreadyExists(Exception):
98         pass
99
100     class Meta:
101         ordering = ('sort_key_author', 'sort_key')
102         verbose_name = _('book')
103         verbose_name_plural = _('books')
104         app_label = 'catalogue'
105
106     def __unicode__(self):
107         return self.title
108
109     def get_initial(self):
110         try:
111             return re.search(r'\w', self.title, re.U).group(0)
112         except AttributeError:
113             return ''
114
115     def authors(self):
116         return self.tags.filter(category='author')
117
118     def tag_unicode(self, category):
119         relations = prefetched_relations(self, category)
120         if relations:
121             return ', '.join(rel.tag.name for rel in relations)
122         else:
123             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
124
125     def tags_by_category(self):
126         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
127
128     def author_unicode(self):
129         return self.cached_author
130
131     def translator(self):
132         translators = self.extra_info.get('translators')
133         if not translators:
134             return None
135         if len(translators) > 3:
136             translators = translators[:2]
137             others = ' i inni'
138         else:
139             others = ''
140         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
141
142     def cover_source(self):
143         return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
144
145     def save(self, force_insert=False, force_update=False, **kwargs):
146         from sortify import sortify
147
148         self.sort_key = sortify(self.title)[:120]
149         self.title = unicode(self.title)  # ???
150
151         try:
152             author = self.authors().first().sort_key
153         except AttributeError:
154             author = u''
155         self.sort_key_author = author
156
157         self.cached_author = self.tag_unicode('author')
158         self.has_audience = 'audience' in self.extra_info
159
160         ret = super(Book, self).save(force_insert, force_update, **kwargs)
161
162         return ret
163
164     @permalink
165     def get_absolute_url(self):
166         return 'catalogue.views.book_detail', [self.slug]
167
168     @staticmethod
169     @permalink
170     def create_url(slug):
171         return 'catalogue.views.book_detail', [slug]
172
173     def gallery_path(self):
174         return gallery_path(self.slug)
175
176     def gallery_url(self):
177         return gallery_url(self.slug)
178
179     @property
180     def name(self):
181         return self.title
182
183     def language_code(self):
184         return constants.LANGUAGES_3TO2.get(self.language, self.language)
185
186     def language_name(self):
187         return dict(settings.LANGUAGES).get(self.language_code(), "")
188
189     def is_foreign(self):
190         return self.language_code() != settings.LANGUAGE_CODE
191
192     def has_media(self, type_):
193         if type_ in Book.formats:
194             return bool(getattr(self, "%s_file" % type_))
195         else:
196             return self.media.filter(type=type_).exists()
197
198     def has_audio(self):
199         return self.has_media('mp3')
200
201     def get_media(self, type_):
202         if self.has_media(type_):
203             if type_ in Book.formats:
204                 return getattr(self, "%s_file" % type_)
205             else:
206                 return self.media.filter(type=type_)
207         else:
208             return None
209
210     def get_mp3(self):
211         return self.get_media("mp3")
212
213     def get_odt(self):
214         return self.get_media("odt")
215
216     def get_ogg(self):
217         return self.get_media("ogg")
218
219     def get_daisy(self):
220         return self.get_media("daisy")
221
222     def has_description(self):
223         return len(self.description) > 0
224     has_description.short_description = _('description')
225     has_description.boolean = True
226
227     # ugly ugly ugly
228     def has_mp3_file(self):
229         return bool(self.has_media("mp3"))
230     has_mp3_file.short_description = 'MP3'
231     has_mp3_file.boolean = True
232
233     def has_ogg_file(self):
234         return bool(self.has_media("ogg"))
235     has_ogg_file.short_description = 'OGG'
236     has_ogg_file.boolean = True
237
238     def has_daisy_file(self):
239         return bool(self.has_media("daisy"))
240     has_daisy_file.short_description = 'DAISY'
241     has_daisy_file.boolean = True
242
243     def get_audiobooks(self):
244         ogg_files = {}
245         for m in self.media.filter(type='ogg').order_by().iterator():
246             ogg_files[m.name] = m
247
248         audiobooks = []
249         projects = set()
250         for mp3 in self.media.filter(type='mp3').iterator():
251             # ogg files are always from the same project
252             meta = mp3.extra_info
253             project = meta.get('project')
254             if not project:
255                 # temporary fallback
256                 project = u'CzytamySłuchając'
257
258             projects.add((project, meta.get('funded_by', '')))
259
260             media = {'mp3': mp3}
261
262             ogg = ogg_files.get(mp3.name)
263             if ogg:
264                 media['ogg'] = ogg
265             audiobooks.append(media)
266
267         projects = sorted(projects)
268         return audiobooks, projects
269
270     def wldocument(self, parse_dublincore=True, inherit=True):
271         from catalogue.import_utils import ORMDocProvider
272         from librarian.parser import WLDocument
273
274         if inherit and self.parent:
275             meta_fallbacks = self.parent.cover_info()
276         else:
277             meta_fallbacks = None
278
279         return WLDocument.from_file(
280             self.xml_file.path,
281             provider=ORMDocProvider(self),
282             parse_dublincore=parse_dublincore,
283             meta_fallbacks=meta_fallbacks)
284
285     @staticmethod
286     def zip_format(format_):
287         def pretty_file_name(book):
288             return "%s/%s.%s" % (
289                 book.extra_info['author'],
290                 book.slug,
291                 format_)
292
293         field_name = "%s_file" % format_
294         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
295         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
296         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
297
298     def zip_audiobooks(self, format_):
299         bm = BookMedia.objects.filter(book=self, type=format_)
300         paths = map(lambda bm: (None, bm.file.path), bm)
301         return create_zip(paths, "%s_%s" % (self.slug, format_))
302
303     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
304         if index is None:
305             from search.index import Index
306             index = Index()
307         try:
308             index.index_book(self, book_info)
309             if index_tags:
310                 index.index_tags()
311             if commit:
312                 index.index.commit()
313         except Exception, e:
314             index.index.rollback()
315             raise e
316
317     def download_pictures(self, remote_gallery_url):
318         gallery_path = self.gallery_path()
319         # delete previous files, so we don't include old files in ebooks
320         if os.path.isdir(gallery_path):
321             for filename in os.listdir(gallery_path):
322                 file_path = os.path.join(gallery_path, filename)
323                 os.unlink(file_path)
324         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
325         if ilustr_elements:
326             makedirs(gallery_path)
327             for ilustr in ilustr_elements:
328                 ilustr_src = ilustr.get('src')
329                 ilustr_path = os.path.join(gallery_path, ilustr_src)
330                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
331
332     @classmethod
333     def from_xml_file(cls, xml_file, **kwargs):
334         from django.core.files import File
335         from librarian import dcparser
336
337         # use librarian to parse meta-data
338         book_info = dcparser.parse(xml_file)
339
340         if not isinstance(xml_file, File):
341             xml_file = File(open(xml_file))
342
343         try:
344             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
345         finally:
346             xml_file.close()
347
348     @classmethod
349     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
350                            search_index_tags=True, remote_gallery_url=None):
351         if dont_build is None:
352             dont_build = set()
353         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
354
355         # check for parts before we do anything
356         children = []
357         if hasattr(book_info, 'parts'):
358             for part_url in book_info.parts:
359                 try:
360                     children.append(Book.objects.get(slug=part_url.slug))
361                 except Book.DoesNotExist:
362                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
363
364         # Read book metadata
365         book_slug = book_info.url.slug
366         if re.search(r'[^a-z0-9-]', book_slug):
367             raise ValueError('Invalid characters in slug')
368         book, created = Book.objects.get_or_create(slug=book_slug)
369
370         if created:
371             book_shelves = []
372             old_cover = None
373         else:
374             if not overwrite:
375                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
376             # Save shelves for this book
377             book_shelves = list(book.tags.filter(category='set'))
378             old_cover = book.cover_info()
379
380         # Save XML file
381         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
382
383         book.language = book_info.language
384         book.title = book_info.title
385         if book_info.variant_of:
386             book.common_slug = book_info.variant_of.slug
387         else:
388             book.common_slug = book.slug
389         book.extra_info = book_info.to_dict()
390         book.save()
391
392         meta_tags = Tag.tags_from_info(book_info)
393
394         for tag in meta_tags:
395             if not tag.for_books:
396                 tag.for_books = True
397                 tag.save()
398
399         book.tags = set(meta_tags + book_shelves)
400
401         cover_changed = old_cover != book.cover_info()
402         obsolete_children = set(b for b in book.children.all()
403                                 if b not in children)
404         notify_cover_changed = []
405         for n, child_book in enumerate(children):
406             new_child = child_book.parent != book
407             child_book.parent = book
408             child_book.parent_number = n
409             child_book.save()
410             if new_child or cover_changed:
411                 notify_cover_changed.append(child_book)
412         # Disown unfaithful children and let them cope on their own.
413         for child in obsolete_children:
414             child.parent = None
415             child.parent_number = 0
416             child.save()
417             if old_cover:
418                 notify_cover_changed.append(child)
419
420         cls.repopulate_ancestors()
421         tasks.update_counters.delay()
422
423         if remote_gallery_url:
424             book.download_pictures(remote_gallery_url)
425
426         # No saves beyond this point.
427
428         # Build cover.
429         if 'cover' not in dont_build:
430             book.cover.build_delay()
431             book.cover_thumb.build_delay()
432
433         # Build HTML and ebooks.
434         book.html_file.build_delay()
435         if not children:
436             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
437                 if format_ not in dont_build:
438                     getattr(book, '%s_file' % format_).build_delay()
439         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
440             if format_ not in dont_build:
441                 getattr(book, '%s_file' % format_).build_delay()
442
443         if not settings.NO_SEARCH_INDEX and search_index:
444             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
445
446         for child in notify_cover_changed:
447             child.parent_cover_changed()
448
449         book.save()  # update sort_key_author
450         cls.published.send(sender=cls, instance=book)
451         return book
452
453     @classmethod
454     @transaction.atomic
455     def repopulate_ancestors(cls):
456         """Fixes the ancestry cache."""
457         # TODO: table names
458         cursor = connection.cursor()
459         if connection.vendor == 'postgres':
460             cursor.execute("TRUNCATE catalogue_book_ancestor")
461             cursor.execute("""
462                 WITH RECURSIVE ancestry AS (
463                     SELECT book.id, book.parent_id
464                     FROM catalogue_book AS book
465                     WHERE book.parent_id IS NOT NULL
466                     UNION
467                     SELECT ancestor.id, book.parent_id
468                     FROM ancestry AS ancestor, catalogue_book AS book
469                     WHERE ancestor.parent_id = book.id
470                         AND book.parent_id IS NOT NULL
471                     )
472                 INSERT INTO catalogue_book_ancestor
473                     (from_book_id, to_book_id)
474                     SELECT id, parent_id
475                     FROM ancestry
476                     ORDER BY id;
477                 """)
478         else:
479             cursor.execute("DELETE FROM catalogue_book_ancestor")
480             for b in cls.objects.exclude(parent=None):
481                 parent = b.parent
482                 while parent is not None:
483                     b.ancestor.add(parent)
484                     parent = parent.parent
485
486     def flush_includes(self, languages=True):
487         if not languages:
488             return
489         if languages is True:
490             languages = [lc for (lc, _ln) in settings.LANGUAGES]
491         flush_ssi_includes([
492             template % (self.pk, lang)
493             for template in [
494                 '/katalog/b/%d/mini.%s.html',
495                 '/katalog/b/%d/mini_nolink.%s.html',
496                 '/katalog/b/%d/short.%s.html',
497                 '/katalog/b/%d/wide.%s.html',
498                 '/api/include/book/%d.%s.json',
499                 '/api/include/book/%d.%s.xml',
500                 ]
501             for lang in languages
502             ])
503
504     def cover_info(self, inherit=True):
505         """Returns a dictionary to serve as fallback for BookInfo.
506
507         For now, the only thing inherited is the cover image.
508         """
509         need = False
510         info = {}
511         for field in ('cover_url', 'cover_by', 'cover_source'):
512             val = self.extra_info.get(field)
513             if val:
514                 info[field] = val
515             else:
516                 need = True
517         if inherit and need and self.parent is not None:
518             parent_info = self.parent.cover_info()
519             parent_info.update(info)
520             info = parent_info
521         return info
522
523     def related_themes(self):
524         return Tag.objects.usage_for_queryset(
525             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
526             counts=True).filter(category='theme')
527
528     def parent_cover_changed(self):
529         """Called when parent book's cover image is changed."""
530         if not self.cover_info(inherit=False):
531             if 'cover' not in app_settings.DONT_BUILD:
532                 self.cover.build_delay()
533                 self.cover_thumb.build_delay()
534             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
535                 if format_ not in app_settings.DONT_BUILD:
536                     getattr(self, '%s_file' % format_).build_delay()
537             for child in self.children.all():
538                 child.parent_cover_changed()
539
540     def other_versions(self):
541         """Find other versions (i.e. in other languages) of the book."""
542         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
543
544     def parents(self):
545         books = []
546         parent = self.parent
547         while parent is not None:
548             books.insert(0, parent)
549             parent = parent.parent
550         return books
551
552     def pretty_title(self, html_links=False):
553         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
554         books = self.parents() + [self]
555         names.extend([(b.title, b.get_absolute_url()) for b in books])
556
557         if html_links:
558             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
559         else:
560             names = [tag[0] for tag in names]
561         return ', '.join(names)
562
563     def publisher(self):
564         publisher = self.extra_info['publisher']
565         if isinstance(publisher, basestring):
566             return publisher
567         elif isinstance(publisher, list):
568             return ', '.join(publisher)
569
570     @classmethod
571     def tagged_top_level(cls, tags):
572         """ Returns top-level books tagged with `tags`.
573
574         It only returns those books which don't have ancestors which are
575         also tagged with those tags.
576
577         """
578         objects = cls.tagged.with_all(tags)
579         return objects.exclude(ancestor__in=objects)
580
581     @classmethod
582     def book_list(cls, book_filter=None):
583         """Generates a hierarchical listing of all books.
584
585         Books are optionally filtered with a test function.
586
587         """
588
589         books_by_parent = {}
590         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
591         if book_filter:
592             books = books.filter(book_filter).distinct()
593
594             book_ids = set(b['pk'] for b in books.values("pk").iterator())
595             for book in books.iterator():
596                 parent = book.parent_id
597                 if parent not in book_ids:
598                     parent = None
599                 books_by_parent.setdefault(parent, []).append(book)
600         else:
601             for book in books.iterator():
602                 books_by_parent.setdefault(book.parent_id, []).append(book)
603
604         orphans = []
605         books_by_author = OrderedDict()
606         for tag in Tag.objects.filter(category='author').iterator():
607             books_by_author[tag] = []
608
609         for book in books_by_parent.get(None, ()):
610             authors = list(book.authors().only('pk'))
611             if authors:
612                 for author in authors:
613                     books_by_author[author].append(book)
614             else:
615                 orphans.append(book)
616
617         return books_by_author, orphans, books_by_parent
618
619     _audiences_pl = {
620         "SP": (1, u"szkoła podstawowa"),
621         "SP1": (1, u"szkoła podstawowa"),
622         "SP2": (1, u"szkoła podstawowa"),
623         "SP3": (1, u"szkoła podstawowa"),
624         "P": (1, u"szkoła podstawowa"),
625         "G": (2, u"gimnazjum"),
626         "L": (3, u"liceum"),
627         "LP": (3, u"liceum"),
628     }
629
630     def audiences_pl(self):
631         audiences = self.extra_info.get('audiences', [])
632         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
633         return [a[1] for a in audiences]
634
635     def stage_note(self):
636         stage = self.extra_info.get('stage')
637         if stage and stage < '0.4':
638             return (_('This work needs modernisation'),
639                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
640         else:
641             return None, None
642
643     def choose_fragment(self):
644         fragments = self.fragments.order_by()
645         fragments_count = fragments.count()
646         if not fragments_count and self.children.exists():
647             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
648             fragments_count = fragments.count()
649         if fragments_count:
650             return fragments[randint(0, fragments_count - 1)]
651         elif self.parent:
652             return self.parent.choose_fragment()
653         else:
654             return None
655
656     def fragment_data(self):
657         fragment = self.choose_fragment()
658         if fragment:
659             return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
660         else:
661             return None
662
663     def update_popularity(self):
664         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
665         try:
666             pop = self.popularity
667             pop.count = count
668             pop.save()
669         except BookPopularity.DoesNotExist:
670             BookPopularity.objects.create(book=self, count=count)
671
672     def ridero_link(self):
673         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
674
675
676 def add_file_fields():
677     for format_ in Book.formats:
678         field_name = "%s_file" % format_
679         # This weird globals() assignment makes Django migrations comfortable.
680         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
681         _upload_to.__name__ = '_%s_upload_to' % format_
682         globals()[_upload_to.__name__] = _upload_to
683
684         EbookField(
685             format_, _("%s file" % format_.upper()),
686             upload_to=_upload_to,
687             storage=bofh_storage,
688             max_length=255,
689             blank=True,
690             default=''
691         ).contribute_to_class(Book, field_name)
692
693 add_file_fields()
694
695
696 class BookPopularity(models.Model):
697     book = models.OneToOneField(Book, related_name='popularity')
698     count = models.IntegerField(default=0)