db50f3e3c3dcac732bb2664b7625d54fca71ea02
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64
65     # files generated during publication
66     cover = EbookField(
67         'cover', _('cover'),
68         null=True, blank=True,
69         upload_to=_cover_upload_to,
70         storage=bofh_storage, max_length=255)
71     # Cleaner version of cover for thumbs
72     cover_thumb = EbookField(
73         'cover_thumb', _('cover thumbnail'),
74         null=True, blank=True,
75         upload_to=_cover_thumb_upload_to,
76         max_length=255)
77     ebook_formats = constants.EBOOK_FORMATS
78     formats = ebook_formats + ['html', 'xml']
79
80     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
81     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
82
83     objects = models.Manager()
84     tagged = managers.ModelTaggedItemManager(Tag)
85     tags = managers.TagDescriptor(Tag)
86     tag_relations = GenericRelation(Tag.intermediary_table_model)
87
88     html_built = django.dispatch.Signal()
89     published = django.dispatch.Signal()
90
91     short_html_url_name = 'catalogue_book_short'
92
93     class AlreadyExists(Exception):
94         pass
95
96     class Meta:
97         ordering = ('sort_key_author', 'sort_key')
98         verbose_name = _('book')
99         verbose_name_plural = _('books')
100         app_label = 'catalogue'
101
102     def __unicode__(self):
103         return self.title
104
105     def get_initial(self):
106         try:
107             return re.search(r'\w', self.title, re.U).group(0)
108         except AttributeError:
109             return ''
110
111     def authors(self):
112         return self.tags.filter(category='author')
113
114     def tag_unicode(self, category):
115         relations = prefetched_relations(self, category)
116         if relations:
117             return ', '.join(rel.tag.name for rel in relations)
118         else:
119             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
120
121     def author_unicode(self):
122         return self.tag_unicode('author')
123
124     def save(self, force_insert=False, force_update=False, **kwargs):
125         from sortify import sortify
126
127         self.sort_key = sortify(self.title)[:120]
128         self.title = unicode(self.title)  # ???
129
130         try:
131             author = self.authors().first().sort_key
132         except AttributeError:
133             author = u''
134         self.sort_key_author = author
135
136         ret = super(Book, self).save(force_insert, force_update, **kwargs)
137
138         return ret
139
140     @permalink
141     def get_absolute_url(self):
142         return 'catalogue.views.book_detail', [self.slug]
143
144     @staticmethod
145     @permalink
146     def create_url(slug):
147         return 'catalogue.views.book_detail', [slug]
148
149     def gallery_path(self):
150         return gallery_path(self.slug)
151
152     def gallery_url(self):
153         return gallery_url(self.slug)
154
155     @property
156     def name(self):
157         return self.title
158
159     def language_code(self):
160         return constants.LANGUAGES_3TO2.get(self.language, self.language)
161
162     def language_name(self):
163         return dict(settings.LANGUAGES).get(self.language_code(), "")
164
165     def is_foreign(self):
166         return self.language_code() != settings.LANGUAGE_CODE
167
168     def has_media(self, type_):
169         if type_ in Book.formats:
170             return bool(getattr(self, "%s_file" % type_))
171         else:
172             return self.media.filter(type=type_).exists()
173
174     def get_media(self, type_):
175         if self.has_media(type_):
176             if type_ in Book.formats:
177                 return getattr(self, "%s_file" % type_)
178             else:
179                 return self.media.filter(type=type_)
180         else:
181             return None
182
183     def get_mp3(self):
184         return self.get_media("mp3")
185
186     def get_odt(self):
187         return self.get_media("odt")
188
189     def get_ogg(self):
190         return self.get_media("ogg")
191
192     def get_daisy(self):
193         return self.get_media("daisy")
194
195     def has_description(self):
196         return len(self.description) > 0
197     has_description.short_description = _('description')
198     has_description.boolean = True
199
200     # ugly ugly ugly
201     def has_mp3_file(self):
202         return bool(self.has_media("mp3"))
203     has_mp3_file.short_description = 'MP3'
204     has_mp3_file.boolean = True
205
206     def has_ogg_file(self):
207         return bool(self.has_media("ogg"))
208     has_ogg_file.short_description = 'OGG'
209     has_ogg_file.boolean = True
210
211     def has_daisy_file(self):
212         return bool(self.has_media("daisy"))
213     has_daisy_file.short_description = 'DAISY'
214     has_daisy_file.boolean = True
215
216     def wldocument(self, parse_dublincore=True, inherit=True):
217         from catalogue.import_utils import ORMDocProvider
218         from librarian.parser import WLDocument
219
220         if inherit and self.parent:
221             meta_fallbacks = self.parent.cover_info()
222         else:
223             meta_fallbacks = None
224
225         return WLDocument.from_file(
226             self.xml_file.path,
227             provider=ORMDocProvider(self),
228             parse_dublincore=parse_dublincore,
229             meta_fallbacks=meta_fallbacks)
230
231     @staticmethod
232     def zip_format(format_):
233         def pretty_file_name(book):
234             return "%s/%s.%s" % (
235                 book.extra_info['author'],
236                 book.slug,
237                 format_)
238
239         field_name = "%s_file" % format_
240         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
241         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
242         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
243
244     def zip_audiobooks(self, format_):
245         bm = BookMedia.objects.filter(book=self, type=format_)
246         paths = map(lambda bm: (None, bm.file.path), bm)
247         return create_zip(paths, "%s_%s" % (self.slug, format_))
248
249     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
250         if index is None:
251             from search.index import Index
252             index = Index()
253         try:
254             index.index_book(self, book_info)
255             if index_tags:
256                 index.index_tags()
257             if commit:
258                 index.index.commit()
259         except Exception, e:
260             index.index.rollback()
261             raise e
262
263     def download_pictures(self, remote_gallery_url):
264         gallery_path = self.gallery_path()
265         # delete previous files, so we don't include old files in ebooks
266         if os.path.isdir(gallery_path):
267             for filename in os.listdir(gallery_path):
268                 file_path = os.path.join(gallery_path, filename)
269                 os.unlink(file_path)
270         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
271         if ilustr_elements:
272             makedirs(gallery_path)
273             for ilustr in ilustr_elements:
274                 ilustr_src = ilustr.get('src')
275                 ilustr_path = os.path.join(gallery_path, ilustr_src)
276                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
277
278     @classmethod
279     def from_xml_file(cls, xml_file, **kwargs):
280         from django.core.files import File
281         from librarian import dcparser
282
283         # use librarian to parse meta-data
284         book_info = dcparser.parse(xml_file)
285
286         if not isinstance(xml_file, File):
287             xml_file = File(open(xml_file))
288
289         try:
290             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
291         finally:
292             xml_file.close()
293
294     @classmethod
295     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
296                            search_index_tags=True, remote_gallery_url=None):
297         if dont_build is None:
298             dont_build = set()
299         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
300
301         # check for parts before we do anything
302         children = []
303         if hasattr(book_info, 'parts'):
304             for part_url in book_info.parts:
305                 try:
306                     children.append(Book.objects.get(slug=part_url.slug))
307                 except Book.DoesNotExist:
308                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
309
310         # Read book metadata
311         book_slug = book_info.url.slug
312         if re.search(r'[^a-z0-9-]', book_slug):
313             raise ValueError('Invalid characters in slug')
314         book, created = Book.objects.get_or_create(slug=book_slug)
315
316         if created:
317             book_shelves = []
318             old_cover = None
319         else:
320             if not overwrite:
321                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
322             # Save shelves for this book
323             book_shelves = list(book.tags.filter(category='set'))
324             old_cover = book.cover_info()
325
326         # Save XML file
327         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
328
329         book.language = book_info.language
330         book.title = book_info.title
331         if book_info.variant_of:
332             book.common_slug = book_info.variant_of.slug
333         else:
334             book.common_slug = book.slug
335         book.extra_info = book_info.to_dict()
336         book.save()
337
338         meta_tags = Tag.tags_from_info(book_info)
339
340         book.tags = set(meta_tags + book_shelves)
341
342         cover_changed = old_cover != book.cover_info()
343         obsolete_children = set(b for b in book.children.all()
344                                 if b not in children)
345         notify_cover_changed = []
346         for n, child_book in enumerate(children):
347             new_child = child_book.parent != book
348             child_book.parent = book
349             child_book.parent_number = n
350             child_book.save()
351             if new_child or cover_changed:
352                 notify_cover_changed.append(child_book)
353         # Disown unfaithful children and let them cope on their own.
354         for child in obsolete_children:
355             child.parent = None
356             child.parent_number = 0
357             child.save()
358             if old_cover:
359                 notify_cover_changed.append(child)
360
361         cls.repopulate_ancestors()
362         tasks.update_counters.delay()
363
364         if remote_gallery_url:
365             book.download_pictures(remote_gallery_url)
366
367         # No saves beyond this point.
368
369         # Build cover.
370         if 'cover' not in dont_build:
371             book.cover.build_delay()
372             book.cover_thumb.build_delay()
373
374         # Build HTML and ebooks.
375         book.html_file.build_delay()
376         if not children:
377             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
378                 if format_ not in dont_build:
379                     getattr(book, '%s_file' % format_).build_delay()
380         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
381             if format_ not in dont_build:
382                 getattr(book, '%s_file' % format_).build_delay()
383
384         if not settings.NO_SEARCH_INDEX and search_index:
385             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
386
387         for child in notify_cover_changed:
388             child.parent_cover_changed()
389
390         book.save()  # update sort_key_author
391         cls.published.send(sender=cls, instance=book)
392         return book
393
394     @classmethod
395     @transaction.atomic
396     def repopulate_ancestors(cls):
397         """Fixes the ancestry cache."""
398         # TODO: table names
399         cursor = connection.cursor()
400         if connection.vendor == 'postgres':
401             cursor.execute("TRUNCATE catalogue_book_ancestor")
402             cursor.execute("""
403                 WITH RECURSIVE ancestry AS (
404                     SELECT book.id, book.parent_id
405                     FROM catalogue_book AS book
406                     WHERE book.parent_id IS NOT NULL
407                     UNION
408                     SELECT ancestor.id, book.parent_id
409                     FROM ancestry AS ancestor, catalogue_book AS book
410                     WHERE ancestor.parent_id = book.id
411                         AND book.parent_id IS NOT NULL
412                     )
413                 INSERT INTO catalogue_book_ancestor
414                     (from_book_id, to_book_id)
415                     SELECT id, parent_id
416                     FROM ancestry
417                     ORDER BY id;
418                 """)
419         else:
420             cursor.execute("DELETE FROM catalogue_book_ancestor")
421             for b in cls.objects.exclude(parent=None):
422                 parent = b.parent
423                 while parent is not None:
424                     b.ancestor.add(parent)
425                     parent = parent.parent
426
427     def flush_includes(self, languages=True):
428         if not languages:
429             return
430         if languages is True:
431             languages = [lc for (lc, _ln) in settings.LANGUAGES]
432         flush_ssi_includes([
433             template % (self.pk, lang)
434             for template in [
435                 '/katalog/b/%d/mini.%s.html',
436                 '/katalog/b/%d/mini_nolink.%s.html',
437                 '/katalog/b/%d/short.%s.html',
438                 '/katalog/b/%d/wide.%s.html',
439                 '/api/include/book/%d.%s.json',
440                 '/api/include/book/%d.%s.xml',
441                 ]
442             for lang in languages
443             ])
444
445     def cover_info(self, inherit=True):
446         """Returns a dictionary to serve as fallback for BookInfo.
447
448         For now, the only thing inherited is the cover image.
449         """
450         need = False
451         info = {}
452         for field in ('cover_url', 'cover_by', 'cover_source'):
453             val = self.extra_info.get(field)
454             if val:
455                 info[field] = val
456             else:
457                 need = True
458         if inherit and need and self.parent is not None:
459             parent_info = self.parent.cover_info()
460             parent_info.update(info)
461             info = parent_info
462         return info
463
464     def related_themes(self):
465         return Tag.objects.usage_for_queryset(
466             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
467             counts=True).filter(category='theme')
468
469     def parent_cover_changed(self):
470         """Called when parent book's cover image is changed."""
471         if not self.cover_info(inherit=False):
472             if 'cover' not in app_settings.DONT_BUILD:
473                 self.cover.build_delay()
474                 self.cover_thumb.build_delay()
475             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
476                 if format_ not in app_settings.DONT_BUILD:
477                     getattr(self, '%s_file' % format_).build_delay()
478             for child in self.children.all():
479                 child.parent_cover_changed()
480
481     def other_versions(self):
482         """Find other versions (i.e. in other languages) of the book."""
483         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
484
485     def parents(self):
486         books = []
487         parent = self.parent
488         while parent is not None:
489             books.insert(0, parent)
490             parent = parent.parent
491         return books
492
493     def pretty_title(self, html_links=False):
494         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
495         books = self.parents() + [self]
496         names.extend([(b.title, b.get_absolute_url()) for b in books])
497
498         if html_links:
499             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
500         else:
501             names = [tag[0] for tag in names]
502         return ', '.join(names)
503
504     def publisher(self):
505         publisher = self.extra_info['publisher']
506         if isinstance(publisher, basestring):
507             return publisher
508         elif isinstance(publisher, list):
509             return ', '.join(publisher)
510
511     @classmethod
512     def tagged_top_level(cls, tags):
513         """ Returns top-level books tagged with `tags`.
514
515         It only returns those books which don't have ancestors which are
516         also tagged with those tags.
517
518         """
519         objects = cls.tagged.with_all(tags)
520         return objects.exclude(ancestor__in=objects)
521
522     @classmethod
523     def book_list(cls, book_filter=None):
524         """Generates a hierarchical listing of all books.
525
526         Books are optionally filtered with a test function.
527
528         """
529
530         books_by_parent = {}
531         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
532         if book_filter:
533             books = books.filter(book_filter).distinct()
534
535             book_ids = set(b['pk'] for b in books.values("pk").iterator())
536             for book in books.iterator():
537                 parent = book.parent_id
538                 if parent not in book_ids:
539                     parent = None
540                 books_by_parent.setdefault(parent, []).append(book)
541         else:
542             for book in books.iterator():
543                 books_by_parent.setdefault(book.parent_id, []).append(book)
544
545         orphans = []
546         books_by_author = OrderedDict()
547         for tag in Tag.objects.filter(category='author').iterator():
548             books_by_author[tag] = []
549
550         for book in books_by_parent.get(None, ()):
551             authors = list(book.authors().only('pk'))
552             if authors:
553                 for author in authors:
554                     books_by_author[author].append(book)
555             else:
556                 orphans.append(book)
557
558         return books_by_author, orphans, books_by_parent
559
560     _audiences_pl = {
561         "SP": (1, u"szkoła podstawowa"),
562         "SP1": (1, u"szkoła podstawowa"),
563         "SP2": (1, u"szkoła podstawowa"),
564         "P": (1, u"szkoła podstawowa"),
565         "G": (2, u"gimnazjum"),
566         "L": (3, u"liceum"),
567         "LP": (3, u"liceum"),
568     }
569
570     def audiences_pl(self):
571         audiences = self.extra_info.get('audiences', [])
572         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
573         return [a[1] for a in audiences]
574
575     def stage_note(self):
576         stage = self.extra_info.get('stage')
577         if stage and stage < '0.4':
578             return (_('This work needs modernisation'),
579                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
580         else:
581             return None, None
582
583     def choose_fragment(self):
584         fragments = self.fragments.order_by()
585         fragments_count = fragments.count()
586         if not fragments_count and self.children.exists():
587             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
588             fragments_count = fragments.count()
589         if fragments_count:
590             return fragments[randint(0, fragments_count - 1)]
591         elif self.parent:
592             return self.parent.choose_fragment()
593         else:
594             return None
595
596     def update_popularity(self):
597         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
598         try:
599             pop = self.popularity
600             pop.count = count
601             pop.save()
602         except BookPopularity.DoesNotExist:
603             BookPopularity.objects.create(book=self, count=count)
604
605     def ridero_link(self):
606         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
607
608
609 def add_file_fields():
610     for format_ in Book.formats:
611         field_name = "%s_file" % format_
612         # This weird globals() assignment makes Django migrations comfortable.
613         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
614         _upload_to.__name__ = '_%s_upload_to' % format_
615         globals()[_upload_to.__name__] = _upload_to
616
617         EbookField(
618             format_, _("%s file" % format_.upper()),
619             upload_to=_upload_to,
620             storage=bofh_storage,
621             max_length=255,
622             blank=True,
623             default=''
624         ).contribute_to_class(Book, field_name)
625
626 add_file_fields()
627
628
629 class BookPopularity(models.Model):
630     book = models.OneToOneField(Book, related_name='popularity')
631     count = models.IntegerField(default=0)