Merge branch 'dev'
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue import app_settings
26 from catalogue import tasks
27 from wolnelektury.utils import makedirs
28
29 bofh_storage = BofhFileSystemStorage()
30
31
32 def _make_upload_to(path):
33     def _upload_to(i, n):
34         return path % i.slug
35     return _upload_to
36
37
38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
40
41
42 def _ebook_upload_to(upload_path):
43     return _make_upload_to(upload_path)
44
45
46 class Book(models.Model):
47     """Represents a book imported from WL-XML."""
48     title = models.CharField(_('title'), max_length=32767)
49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
50     sort_key_author = models.CharField(
51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
55     description = models.TextField(_('description'), blank=True)
56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
58     parent_number = models.IntegerField(_('parent number'), default=0)
59     extra_info = jsonfield.JSONField(_('extra information'), default={})
60     gazeta_link = models.CharField(blank=True, max_length=240)
61     wiki_link = models.CharField(blank=True, max_length=240)
62
63     # files generated during publication
64     cover = EbookField(
65         'cover', _('cover'),
66         null=True, blank=True,
67         upload_to=_cover_upload_to,
68         storage=bofh_storage, max_length=255)
69     # Cleaner version of cover for thumbs
70     cover_thumb = EbookField(
71         'cover_thumb', _('cover thumbnail'),
72         null=True, blank=True,
73         upload_to=_cover_thumb_upload_to,
74         max_length=255)
75     ebook_formats = constants.EBOOK_FORMATS
76     formats = ebook_formats + ['html', 'xml']
77
78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
80
81     objects = models.Manager()
82     tagged = managers.ModelTaggedItemManager(Tag)
83     tags = managers.TagDescriptor(Tag)
84     tag_relations = GenericRelation(Tag.intermediary_table_model)
85
86     html_built = django.dispatch.Signal()
87     published = django.dispatch.Signal()
88
89     short_html_url_name = 'catalogue_book_short'
90
91     class AlreadyExists(Exception):
92         pass
93
94     class Meta:
95         ordering = ('sort_key_author', 'sort_key')
96         verbose_name = _('book')
97         verbose_name_plural = _('books')
98         app_label = 'catalogue'
99
100     def __unicode__(self):
101         return self.title
102
103     def get_initial(self):
104         try:
105             return re.search(r'\w', self.title, re.U).group(0)
106         except AttributeError:
107             return ''
108
109     def authors(self):
110         return self.tags.filter(category='author')
111
112     def author_unicode(self):
113         return ", ".join(self.authors().values_list('name', flat=True))
114
115     def save(self, force_insert=False, force_update=False, **kwargs):
116         from sortify import sortify
117
118         self.sort_key = sortify(self.title)[:120]
119         self.title = unicode(self.title)  # ???
120
121         try:
122             author = self.authors().first().sort_key
123         except AttributeError:
124             author = u''
125         self.sort_key_author = author
126
127         ret = super(Book, self).save(force_insert, force_update, **kwargs)
128
129         return ret
130
131     @permalink
132     def get_absolute_url(self):
133         return 'catalogue.views.book_detail', [self.slug]
134
135     @staticmethod
136     @permalink
137     def create_url(slug):
138         return 'catalogue.views.book_detail', [slug]
139
140     def gallery_path(self):
141         return gallery_path(self.slug)
142
143     def gallery_url(self):
144         return gallery_url(self.slug)
145
146     @property
147     def name(self):
148         return self.title
149
150     def language_code(self):
151         return constants.LANGUAGES_3TO2.get(self.language, self.language)
152
153     def language_name(self):
154         return dict(settings.LANGUAGES).get(self.language_code(), "")
155
156     def is_foreign(self):
157         return self.language_code() != settings.LANGUAGE_CODE
158
159     def has_media(self, type_):
160         if type_ in Book.formats:
161             return bool(getattr(self, "%s_file" % type_))
162         else:
163             return self.media.filter(type=type_).exists()
164
165     def get_media(self, type_):
166         if self.has_media(type_):
167             if type_ in Book.formats:
168                 return getattr(self, "%s_file" % type_)
169             else:
170                 return self.media.filter(type=type_)
171         else:
172             return None
173
174     def get_mp3(self):
175         return self.get_media("mp3")
176
177     def get_odt(self):
178         return self.get_media("odt")
179
180     def get_ogg(self):
181         return self.get_media("ogg")
182
183     def get_daisy(self):
184         return self.get_media("daisy")
185
186     def has_description(self):
187         return len(self.description) > 0
188     has_description.short_description = _('description')
189     has_description.boolean = True
190
191     # ugly ugly ugly
192     def has_mp3_file(self):
193         return bool(self.has_media("mp3"))
194     has_mp3_file.short_description = 'MP3'
195     has_mp3_file.boolean = True
196
197     def has_ogg_file(self):
198         return bool(self.has_media("ogg"))
199     has_ogg_file.short_description = 'OGG'
200     has_ogg_file.boolean = True
201
202     def has_daisy_file(self):
203         return bool(self.has_media("daisy"))
204     has_daisy_file.short_description = 'DAISY'
205     has_daisy_file.boolean = True
206
207     def wldocument(self, parse_dublincore=True, inherit=True):
208         from catalogue.import_utils import ORMDocProvider
209         from librarian.parser import WLDocument
210
211         if inherit and self.parent:
212             meta_fallbacks = self.parent.cover_info()
213         else:
214             meta_fallbacks = None
215
216         return WLDocument.from_file(
217             self.xml_file.path,
218             provider=ORMDocProvider(self),
219             parse_dublincore=parse_dublincore,
220             meta_fallbacks=meta_fallbacks)
221
222     @staticmethod
223     def zip_format(format_):
224         def pretty_file_name(book):
225             return "%s/%s.%s" % (
226                 book.extra_info['author'],
227                 book.slug,
228                 format_)
229
230         field_name = "%s_file" % format_
231         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
232         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
233         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
234
235     def zip_audiobooks(self, format_):
236         bm = BookMedia.objects.filter(book=self, type=format_)
237         paths = map(lambda bm: (None, bm.file.path), bm)
238         return create_zip(paths, "%s_%s" % (self.slug, format_))
239
240     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
241         if index is None:
242             from search.index import Index
243             index = Index()
244         try:
245             index.index_book(self, book_info)
246             if index_tags:
247                 index.index_tags()
248             if commit:
249                 index.index.commit()
250         except Exception, e:
251             index.index.rollback()
252             raise e
253
254     def download_pictures(self, remote_gallery_url):
255         gallery_path = self.gallery_path()
256         # delete previous files, so we don't include old files in ebooks
257         if os.path.isdir(gallery_path):
258             for filename in os.listdir(gallery_path):
259                 file_path = os.path.join(gallery_path, filename)
260                 os.unlink(file_path)
261         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
262         if ilustr_elements:
263             makedirs(gallery_path)
264             for ilustr in ilustr_elements:
265                 ilustr_src = ilustr.get('src')
266                 ilustr_path = os.path.join(gallery_path, ilustr_src)
267                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
268
269     @classmethod
270     def from_xml_file(cls, xml_file, **kwargs):
271         from django.core.files import File
272         from librarian import dcparser
273
274         # use librarian to parse meta-data
275         book_info = dcparser.parse(xml_file)
276
277         if not isinstance(xml_file, File):
278             xml_file = File(open(xml_file))
279
280         try:
281             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
282         finally:
283             xml_file.close()
284
285     @classmethod
286     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
287                            search_index_tags=True, remote_gallery_url=None):
288         if dont_build is None:
289             dont_build = set()
290         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
291
292         # check for parts before we do anything
293         children = []
294         if hasattr(book_info, 'parts'):
295             for part_url in book_info.parts:
296                 try:
297                     children.append(Book.objects.get(slug=part_url.slug))
298                 except Book.DoesNotExist:
299                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
300
301         # Read book metadata
302         book_slug = book_info.url.slug
303         if re.search(r'[^a-z0-9-]', book_slug):
304             raise ValueError('Invalid characters in slug')
305         book, created = Book.objects.get_or_create(slug=book_slug)
306
307         if created:
308             book_shelves = []
309             old_cover = None
310         else:
311             if not overwrite:
312                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
313             # Save shelves for this book
314             book_shelves = list(book.tags.filter(category='set'))
315             old_cover = book.cover_info()
316
317         # Save XML file
318         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
319
320         book.language = book_info.language
321         book.title = book_info.title
322         if book_info.variant_of:
323             book.common_slug = book_info.variant_of.slug
324         else:
325             book.common_slug = book.slug
326         book.extra_info = book_info.to_dict()
327         book.save()
328
329         meta_tags = Tag.tags_from_info(book_info)
330
331         book.tags = set(meta_tags + book_shelves)
332
333         cover_changed = old_cover != book.cover_info()
334         obsolete_children = set(b for b in book.children.all()
335                                 if b not in children)
336         notify_cover_changed = []
337         for n, child_book in enumerate(children):
338             new_child = child_book.parent != book
339             child_book.parent = book
340             child_book.parent_number = n
341             child_book.save()
342             if new_child or cover_changed:
343                 notify_cover_changed.append(child_book)
344         # Disown unfaithful children and let them cope on their own.
345         for child in obsolete_children:
346             child.parent = None
347             child.parent_number = 0
348             child.save()
349             if old_cover:
350                 notify_cover_changed.append(child)
351
352         cls.repopulate_ancestors()
353         tasks.update_counters.delay()
354
355         if remote_gallery_url:
356             book.download_pictures(remote_gallery_url)
357
358         # No saves beyond this point.
359
360         # Build cover.
361         if 'cover' not in dont_build:
362             book.cover.build_delay()
363             book.cover_thumb.build_delay()
364
365         # Build HTML and ebooks.
366         book.html_file.build_delay()
367         if not children:
368             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
369                 if format_ not in dont_build:
370                     getattr(book, '%s_file' % format_).build_delay()
371         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
372             if format_ not in dont_build:
373                 getattr(book, '%s_file' % format_).build_delay()
374
375         if not settings.NO_SEARCH_INDEX and search_index:
376             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
377
378         for child in notify_cover_changed:
379             child.parent_cover_changed()
380
381         cls.published.send(sender=cls, instance=book)
382         return book
383
384     @classmethod
385     @transaction.atomic
386     def repopulate_ancestors(cls):
387         """Fixes the ancestry cache."""
388         # TODO: table names
389         cursor = connection.cursor()
390         if connection.vendor == 'postgres':
391             cursor.execute("TRUNCATE catalogue_book_ancestor")
392             cursor.execute("""
393                 WITH RECURSIVE ancestry AS (
394                     SELECT book.id, book.parent_id
395                     FROM catalogue_book AS book
396                     WHERE book.parent_id IS NOT NULL
397                     UNION
398                     SELECT ancestor.id, book.parent_id
399                     FROM ancestry AS ancestor, catalogue_book AS book
400                     WHERE ancestor.parent_id = book.id
401                         AND book.parent_id IS NOT NULL
402                     )
403                 INSERT INTO catalogue_book_ancestor
404                     (from_book_id, to_book_id)
405                     SELECT id, parent_id
406                     FROM ancestry
407                     ORDER BY id;
408                 """)
409         else:
410             cursor.execute("DELETE FROM catalogue_book_ancestor")
411             for b in cls.objects.exclude(parent=None):
412                 parent = b.parent
413                 while parent is not None:
414                     b.ancestor.add(parent)
415                     parent = parent.parent
416
417     def flush_includes(self, languages=True):
418         if not languages:
419             return
420         if languages is True:
421             languages = [lc for (lc, _ln) in settings.LANGUAGES]
422         flush_ssi_includes([
423             template % (self.pk, lang)
424             for template in [
425                 '/katalog/b/%d/mini.%s.html',
426                 '/katalog/b/%d/mini_nolink.%s.html',
427                 '/katalog/b/%d/short.%s.html',
428                 '/katalog/b/%d/wide.%s.html',
429                 '/api/include/book/%d.%s.json',
430                 '/api/include/book/%d.%s.xml',
431                 ]
432             for lang in languages
433             ])
434
435     def cover_info(self, inherit=True):
436         """Returns a dictionary to serve as fallback for BookInfo.
437
438         For now, the only thing inherited is the cover image.
439         """
440         need = False
441         info = {}
442         for field in ('cover_url', 'cover_by', 'cover_source'):
443             val = self.extra_info.get(field)
444             if val:
445                 info[field] = val
446             else:
447                 need = True
448         if inherit and need and self.parent is not None:
449             parent_info = self.parent.cover_info()
450             parent_info.update(info)
451             info = parent_info
452         return info
453
454     def related_themes(self):
455         return Tag.objects.usage_for_queryset(
456             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
457             counts=True).filter(category='theme')
458
459     def parent_cover_changed(self):
460         """Called when parent book's cover image is changed."""
461         if not self.cover_info(inherit=False):
462             if 'cover' not in app_settings.DONT_BUILD:
463                 self.cover.build_delay()
464                 self.cover_thumb.build_delay()
465             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
466                 if format_ not in app_settings.DONT_BUILD:
467                     getattr(self, '%s_file' % format_).build_delay()
468             for child in self.children.all():
469                 child.parent_cover_changed()
470
471     def other_versions(self):
472         """Find other versions (i.e. in other languages) of the book."""
473         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
474
475     def parents(self):
476         books = []
477         parent = self.parent
478         while parent is not None:
479             books.insert(0, parent)
480             parent = parent.parent
481         return books
482
483     def pretty_title(self, html_links=False):
484         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
485         books = self.parents() + [self]
486         names.extend([(b.title, b.get_absolute_url()) for b in books])
487
488         if html_links:
489             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
490         else:
491             names = [tag[0] for tag in names]
492         return ', '.join(names)
493
494     @classmethod
495     def tagged_top_level(cls, tags):
496         """ Returns top-level books tagged with `tags`.
497
498         It only returns those books which don't have ancestors which are
499         also tagged with those tags.
500
501         """
502         objects = cls.tagged.with_all(tags)
503         return objects.exclude(ancestor__in=objects)
504
505     @classmethod
506     def book_list(cls, book_filter=None):
507         """Generates a hierarchical listing of all books.
508
509         Books are optionally filtered with a test function.
510
511         """
512
513         books_by_parent = {}
514         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
515         if book_filter:
516             books = books.filter(book_filter).distinct()
517
518             book_ids = set(b['pk'] for b in books.values("pk").iterator())
519             for book in books.iterator():
520                 parent = book.parent_id
521                 if parent not in book_ids:
522                     parent = None
523                 books_by_parent.setdefault(parent, []).append(book)
524         else:
525             for book in books.iterator():
526                 books_by_parent.setdefault(book.parent_id, []).append(book)
527
528         orphans = []
529         books_by_author = OrderedDict()
530         for tag in Tag.objects.filter(category='author').iterator():
531             books_by_author[tag] = []
532
533         for book in books_by_parent.get(None, ()):
534             authors = list(book.authors().only('pk'))
535             if authors:
536                 for author in authors:
537                     books_by_author[author].append(book)
538             else:
539                 orphans.append(book)
540
541         return books_by_author, orphans, books_by_parent
542
543     _audiences_pl = {
544         "SP": (1, u"szkoła podstawowa"),
545         "SP1": (1, u"szkoła podstawowa"),
546         "SP2": (1, u"szkoła podstawowa"),
547         "P": (1, u"szkoła podstawowa"),
548         "G": (2, u"gimnazjum"),
549         "L": (3, u"liceum"),
550         "LP": (3, u"liceum"),
551     }
552
553     def audiences_pl(self):
554         audiences = self.extra_info.get('audiences', [])
555         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
556         return [a[1] for a in audiences]
557
558     def stage_note(self):
559         stage = self.extra_info.get('stage')
560         if stage and stage < '0.4':
561             return (_('This work needs modernisation'),
562                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
563         else:
564             return None, None
565
566     def choose_fragment(self):
567         fragments = self.fragments.order_by()
568         fragments_count = fragments.count()
569         if not fragments_count and self.children.exists():
570             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
571             fragments_count = fragments.count()
572         if fragments_count:
573             return fragments[randint(0, fragments_count - 1)]
574         elif self.parent:
575             return self.parent.choose_fragment()
576         else:
577             return None
578
579     def update_popularity(self):
580         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
581         try:
582             pop = self.popularity
583             pop.count = count
584             pop.save()
585         except BookPopularity.DoesNotExist:
586             BookPopularity.objects.create(book=self, count=count)
587
588
589 def add_file_fields():
590     for format_ in Book.formats:
591         field_name = "%s_file" % format_
592         # This weird globals() assignment makes Django migrations comfortable.
593         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
594         _upload_to.__name__ = '_%s_upload_to' % format_
595         globals()[_upload_to.__name__] = _upload_to
596
597         EbookField(
598             format_, _("%s file" % format_.upper()),
599             upload_to=_upload_to,
600             storage=bofh_storage,
601             max_length=255,
602             blank=True,
603             default=''
604         ).contribute_to_class(Book, field_name)
605
606 add_file_fields()
607
608
609 class BookPopularity(models.Model):
610     book = models.OneToOneField(Book, related_name='popularity')
611     count = models.IntegerField(default=0)