69d71aef264497b33a6628fe7f7ac877275f2414
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
85     has_audience = models.BooleanField(default=False)
86
87     objects = models.Manager()
88     tagged = managers.ModelTaggedItemManager(Tag)
89     tags = managers.TagDescriptor(Tag)
90     tag_relations = GenericRelation(Tag.intermediary_table_model)
91
92     html_built = django.dispatch.Signal()
93     published = django.dispatch.Signal()
94
95     short_html_url_name = 'catalogue_book_short'
96
97     class AlreadyExists(Exception):
98         pass
99
100     class Meta:
101         ordering = ('sort_key_author', 'sort_key')
102         verbose_name = _('book')
103         verbose_name_plural = _('books')
104         app_label = 'catalogue'
105
106     def __unicode__(self):
107         return self.title
108
109     def get_initial(self):
110         try:
111             return re.search(r'\w', self.title, re.U).group(0)
112         except AttributeError:
113             return ''
114
115     def authors(self):
116         return self.tags.filter(category='author')
117
118     def tag_unicode(self, category):
119         relations = prefetched_relations(self, category)
120         if relations:
121             return ', '.join(rel.tag.name for rel in relations)
122         else:
123             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
124
125     def author_unicode(self):
126         return self.cached_author
127
128     def translator(self):
129         translators = self.extra_info.get('translators')
130         if not translators:
131             return None
132         if len(translators) > 3:
133             translators = translators[:2]
134             others = ' i inni'
135         else:
136             others = ''
137         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
138
139     def save(self, force_insert=False, force_update=False, **kwargs):
140         from sortify import sortify
141
142         self.sort_key = sortify(self.title)[:120]
143         self.title = unicode(self.title)  # ???
144
145         try:
146             author = self.authors().first().sort_key
147         except AttributeError:
148             author = u''
149         self.sort_key_author = author
150
151         self.cached_author = self.tag_unicode('author')
152         self.has_audience = 'audience' in self.extra_info
153
154         ret = super(Book, self).save(force_insert, force_update, **kwargs)
155
156         return ret
157
158     @permalink
159     def get_absolute_url(self):
160         return 'catalogue.views.book_detail', [self.slug]
161
162     @staticmethod
163     @permalink
164     def create_url(slug):
165         return 'catalogue.views.book_detail', [slug]
166
167     def gallery_path(self):
168         return gallery_path(self.slug)
169
170     def gallery_url(self):
171         return gallery_url(self.slug)
172
173     @property
174     def name(self):
175         return self.title
176
177     def language_code(self):
178         return constants.LANGUAGES_3TO2.get(self.language, self.language)
179
180     def language_name(self):
181         return dict(settings.LANGUAGES).get(self.language_code(), "")
182
183     def is_foreign(self):
184         return self.language_code() != settings.LANGUAGE_CODE
185
186     def has_media(self, type_):
187         if type_ in Book.formats:
188             return bool(getattr(self, "%s_file" % type_))
189         else:
190             return self.media.filter(type=type_).exists()
191
192     def get_media(self, type_):
193         if self.has_media(type_):
194             if type_ in Book.formats:
195                 return getattr(self, "%s_file" % type_)
196             else:
197                 return self.media.filter(type=type_)
198         else:
199             return None
200
201     def get_mp3(self):
202         return self.get_media("mp3")
203
204     def get_odt(self):
205         return self.get_media("odt")
206
207     def get_ogg(self):
208         return self.get_media("ogg")
209
210     def get_daisy(self):
211         return self.get_media("daisy")
212
213     def has_description(self):
214         return len(self.description) > 0
215     has_description.short_description = _('description')
216     has_description.boolean = True
217
218     # ugly ugly ugly
219     def has_mp3_file(self):
220         return bool(self.has_media("mp3"))
221     has_mp3_file.short_description = 'MP3'
222     has_mp3_file.boolean = True
223
224     def has_ogg_file(self):
225         return bool(self.has_media("ogg"))
226     has_ogg_file.short_description = 'OGG'
227     has_ogg_file.boolean = True
228
229     def has_daisy_file(self):
230         return bool(self.has_media("daisy"))
231     has_daisy_file.short_description = 'DAISY'
232     has_daisy_file.boolean = True
233
234     def wldocument(self, parse_dublincore=True, inherit=True):
235         from catalogue.import_utils import ORMDocProvider
236         from librarian.parser import WLDocument
237
238         if inherit and self.parent:
239             meta_fallbacks = self.parent.cover_info()
240         else:
241             meta_fallbacks = None
242
243         return WLDocument.from_file(
244             self.xml_file.path,
245             provider=ORMDocProvider(self),
246             parse_dublincore=parse_dublincore,
247             meta_fallbacks=meta_fallbacks)
248
249     @staticmethod
250     def zip_format(format_):
251         def pretty_file_name(book):
252             return "%s/%s.%s" % (
253                 book.extra_info['author'],
254                 book.slug,
255                 format_)
256
257         field_name = "%s_file" % format_
258         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
259         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
260         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
261
262     def zip_audiobooks(self, format_):
263         bm = BookMedia.objects.filter(book=self, type=format_)
264         paths = map(lambda bm: (None, bm.file.path), bm)
265         return create_zip(paths, "%s_%s" % (self.slug, format_))
266
267     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
268         if index is None:
269             from search.index import Index
270             index = Index()
271         try:
272             index.index_book(self, book_info)
273             if index_tags:
274                 index.index_tags()
275             if commit:
276                 index.index.commit()
277         except Exception, e:
278             index.index.rollback()
279             raise e
280
281     def download_pictures(self, remote_gallery_url):
282         gallery_path = self.gallery_path()
283         # delete previous files, so we don't include old files in ebooks
284         if os.path.isdir(gallery_path):
285             for filename in os.listdir(gallery_path):
286                 file_path = os.path.join(gallery_path, filename)
287                 os.unlink(file_path)
288         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
289         if ilustr_elements:
290             makedirs(gallery_path)
291             for ilustr in ilustr_elements:
292                 ilustr_src = ilustr.get('src')
293                 ilustr_path = os.path.join(gallery_path, ilustr_src)
294                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
295
296     @classmethod
297     def from_xml_file(cls, xml_file, **kwargs):
298         from django.core.files import File
299         from librarian import dcparser
300
301         # use librarian to parse meta-data
302         book_info = dcparser.parse(xml_file)
303
304         if not isinstance(xml_file, File):
305             xml_file = File(open(xml_file))
306
307         try:
308             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
309         finally:
310             xml_file.close()
311
312     @classmethod
313     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
314                            search_index_tags=True, remote_gallery_url=None):
315         if dont_build is None:
316             dont_build = set()
317         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
318
319         # check for parts before we do anything
320         children = []
321         if hasattr(book_info, 'parts'):
322             for part_url in book_info.parts:
323                 try:
324                     children.append(Book.objects.get(slug=part_url.slug))
325                 except Book.DoesNotExist:
326                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
327
328         # Read book metadata
329         book_slug = book_info.url.slug
330         if re.search(r'[^a-z0-9-]', book_slug):
331             raise ValueError('Invalid characters in slug')
332         book, created = Book.objects.get_or_create(slug=book_slug)
333
334         if created:
335             book_shelves = []
336             old_cover = None
337         else:
338             if not overwrite:
339                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
340             # Save shelves for this book
341             book_shelves = list(book.tags.filter(category='set'))
342             old_cover = book.cover_info()
343
344         # Save XML file
345         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
346
347         book.language = book_info.language
348         book.title = book_info.title
349         if book_info.variant_of:
350             book.common_slug = book_info.variant_of.slug
351         else:
352             book.common_slug = book.slug
353         book.extra_info = book_info.to_dict()
354         book.save()
355
356         meta_tags = Tag.tags_from_info(book_info)
357
358         book.tags = set(meta_tags + book_shelves)
359
360         cover_changed = old_cover != book.cover_info()
361         obsolete_children = set(b for b in book.children.all()
362                                 if b not in children)
363         notify_cover_changed = []
364         for n, child_book in enumerate(children):
365             new_child = child_book.parent != book
366             child_book.parent = book
367             child_book.parent_number = n
368             child_book.save()
369             if new_child or cover_changed:
370                 notify_cover_changed.append(child_book)
371         # Disown unfaithful children and let them cope on their own.
372         for child in obsolete_children:
373             child.parent = None
374             child.parent_number = 0
375             child.save()
376             if old_cover:
377                 notify_cover_changed.append(child)
378
379         cls.repopulate_ancestors()
380         tasks.update_counters.delay()
381
382         if remote_gallery_url:
383             book.download_pictures(remote_gallery_url)
384
385         # No saves beyond this point.
386
387         # Build cover.
388         if 'cover' not in dont_build:
389             book.cover.build_delay()
390             book.cover_thumb.build_delay()
391
392         # Build HTML and ebooks.
393         book.html_file.build_delay()
394         if not children:
395             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
396                 if format_ not in dont_build:
397                     getattr(book, '%s_file' % format_).build_delay()
398         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
399             if format_ not in dont_build:
400                 getattr(book, '%s_file' % format_).build_delay()
401
402         if not settings.NO_SEARCH_INDEX and search_index:
403             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
404
405         for child in notify_cover_changed:
406             child.parent_cover_changed()
407
408         book.save()  # update sort_key_author
409         cls.published.send(sender=cls, instance=book)
410         return book
411
412     @classmethod
413     @transaction.atomic
414     def repopulate_ancestors(cls):
415         """Fixes the ancestry cache."""
416         # TODO: table names
417         cursor = connection.cursor()
418         if connection.vendor == 'postgres':
419             cursor.execute("TRUNCATE catalogue_book_ancestor")
420             cursor.execute("""
421                 WITH RECURSIVE ancestry AS (
422                     SELECT book.id, book.parent_id
423                     FROM catalogue_book AS book
424                     WHERE book.parent_id IS NOT NULL
425                     UNION
426                     SELECT ancestor.id, book.parent_id
427                     FROM ancestry AS ancestor, catalogue_book AS book
428                     WHERE ancestor.parent_id = book.id
429                         AND book.parent_id IS NOT NULL
430                     )
431                 INSERT INTO catalogue_book_ancestor
432                     (from_book_id, to_book_id)
433                     SELECT id, parent_id
434                     FROM ancestry
435                     ORDER BY id;
436                 """)
437         else:
438             cursor.execute("DELETE FROM catalogue_book_ancestor")
439             for b in cls.objects.exclude(parent=None):
440                 parent = b.parent
441                 while parent is not None:
442                     b.ancestor.add(parent)
443                     parent = parent.parent
444
445     def flush_includes(self, languages=True):
446         if not languages:
447             return
448         if languages is True:
449             languages = [lc for (lc, _ln) in settings.LANGUAGES]
450         flush_ssi_includes([
451             template % (self.pk, lang)
452             for template in [
453                 '/katalog/b/%d/mini.%s.html',
454                 '/katalog/b/%d/mini_nolink.%s.html',
455                 '/katalog/b/%d/short.%s.html',
456                 '/katalog/b/%d/wide.%s.html',
457                 '/api/include/book/%d.%s.json',
458                 '/api/include/book/%d.%s.xml',
459                 ]
460             for lang in languages
461             ])
462
463     def cover_info(self, inherit=True):
464         """Returns a dictionary to serve as fallback for BookInfo.
465
466         For now, the only thing inherited is the cover image.
467         """
468         need = False
469         info = {}
470         for field in ('cover_url', 'cover_by', 'cover_source'):
471             val = self.extra_info.get(field)
472             if val:
473                 info[field] = val
474             else:
475                 need = True
476         if inherit and need and self.parent is not None:
477             parent_info = self.parent.cover_info()
478             parent_info.update(info)
479             info = parent_info
480         return info
481
482     def related_themes(self):
483         return Tag.objects.usage_for_queryset(
484             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
485             counts=True).filter(category='theme')
486
487     def parent_cover_changed(self):
488         """Called when parent book's cover image is changed."""
489         if not self.cover_info(inherit=False):
490             if 'cover' not in app_settings.DONT_BUILD:
491                 self.cover.build_delay()
492                 self.cover_thumb.build_delay()
493             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
494                 if format_ not in app_settings.DONT_BUILD:
495                     getattr(self, '%s_file' % format_).build_delay()
496             for child in self.children.all():
497                 child.parent_cover_changed()
498
499     def other_versions(self):
500         """Find other versions (i.e. in other languages) of the book."""
501         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
502
503     def parents(self):
504         books = []
505         parent = self.parent
506         while parent is not None:
507             books.insert(0, parent)
508             parent = parent.parent
509         return books
510
511     def pretty_title(self, html_links=False):
512         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
513         books = self.parents() + [self]
514         names.extend([(b.title, b.get_absolute_url()) for b in books])
515
516         if html_links:
517             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
518         else:
519             names = [tag[0] for tag in names]
520         return ', '.join(names)
521
522     def publisher(self):
523         publisher = self.extra_info['publisher']
524         if isinstance(publisher, basestring):
525             return publisher
526         elif isinstance(publisher, list):
527             return ', '.join(publisher)
528
529     @classmethod
530     def tagged_top_level(cls, tags):
531         """ Returns top-level books tagged with `tags`.
532
533         It only returns those books which don't have ancestors which are
534         also tagged with those tags.
535
536         """
537         objects = cls.tagged.with_all(tags)
538         return objects.exclude(ancestor__in=objects)
539
540     @classmethod
541     def book_list(cls, book_filter=None):
542         """Generates a hierarchical listing of all books.
543
544         Books are optionally filtered with a test function.
545
546         """
547
548         books_by_parent = {}
549         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
550         if book_filter:
551             books = books.filter(book_filter).distinct()
552
553             book_ids = set(b['pk'] for b in books.values("pk").iterator())
554             for book in books.iterator():
555                 parent = book.parent_id
556                 if parent not in book_ids:
557                     parent = None
558                 books_by_parent.setdefault(parent, []).append(book)
559         else:
560             for book in books.iterator():
561                 books_by_parent.setdefault(book.parent_id, []).append(book)
562
563         orphans = []
564         books_by_author = OrderedDict()
565         for tag in Tag.objects.filter(category='author').iterator():
566             books_by_author[tag] = []
567
568         for book in books_by_parent.get(None, ()):
569             authors = list(book.authors().only('pk'))
570             if authors:
571                 for author in authors:
572                     books_by_author[author].append(book)
573             else:
574                 orphans.append(book)
575
576         return books_by_author, orphans, books_by_parent
577
578     _audiences_pl = {
579         "SP": (1, u"szkoła podstawowa"),
580         "SP1": (1, u"szkoła podstawowa"),
581         "SP2": (1, u"szkoła podstawowa"),
582         "SP3": (1, u"szkoła podstawowa"),
583         "P": (1, u"szkoła podstawowa"),
584         "G": (2, u"gimnazjum"),
585         "L": (3, u"liceum"),
586         "LP": (3, u"liceum"),
587     }
588
589     def audiences_pl(self):
590         audiences = self.extra_info.get('audiences', [])
591         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
592         return [a[1] for a in audiences]
593
594     def stage_note(self):
595         stage = self.extra_info.get('stage')
596         if stage and stage < '0.4':
597             return (_('This work needs modernisation'),
598                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
599         else:
600             return None, None
601
602     def choose_fragment(self):
603         fragments = self.fragments.order_by()
604         fragments_count = fragments.count()
605         if not fragments_count and self.children.exists():
606             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
607             fragments_count = fragments.count()
608         if fragments_count:
609             return fragments[randint(0, fragments_count - 1)]
610         elif self.parent:
611             return self.parent.choose_fragment()
612         else:
613             return None
614
615     def update_popularity(self):
616         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
617         try:
618             pop = self.popularity
619             pop.count = count
620             pop.save()
621         except BookPopularity.DoesNotExist:
622             BookPopularity.objects.create(book=self, count=count)
623
624     def ridero_link(self):
625         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
626
627
628 def add_file_fields():
629     for format_ in Book.formats:
630         field_name = "%s_file" % format_
631         # This weird globals() assignment makes Django migrations comfortable.
632         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
633         _upload_to.__name__ = '_%s_upload_to' % format_
634         globals()[_upload_to.__name__] = _upload_to
635
636         EbookField(
637             format_, _("%s file" % format_.upper()),
638             upload_to=_upload_to,
639             storage=bofh_storage,
640             max_length=255,
641             blank=True,
642             default=''
643         ).contribute_to_class(Book, field_name)
644
645 add_file_fields()
646
647
648 class BookPopularity(models.Model):
649     book = models.OneToOneField(Book, related_name='popularity')
650     count = models.IntegerField(default=0)