db optimizations
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue import app_settings
26 from catalogue import tasks
27 from wolnelektury.utils import makedirs
28
29 bofh_storage = BofhFileSystemStorage()
30
31
32 def _make_upload_to(path):
33     def _upload_to(i, n):
34         return path % i.slug
35     return _upload_to
36
37
38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
40
41
42 def _ebook_upload_to(upload_path):
43     return _make_upload_to(upload_path)
44
45
46 class Book(models.Model):
47     """Represents a book imported from WL-XML."""
48     title = models.CharField(_('title'), max_length=32767)
49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
50     sort_key_author = models.CharField(
51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
55     description = models.TextField(_('description'), blank=True)
56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
58     parent_number = models.IntegerField(_('parent number'), default=0)
59     extra_info = jsonfield.JSONField(_('extra information'), default={})
60     gazeta_link = models.CharField(blank=True, max_length=240)
61     wiki_link = models.CharField(blank=True, max_length=240)
62
63     # files generated during publication
64     cover = EbookField(
65         'cover', _('cover'),
66         null=True, blank=True,
67         upload_to=_cover_upload_to,
68         storage=bofh_storage, max_length=255)
69     # Cleaner version of cover for thumbs
70     cover_thumb = EbookField(
71         'cover_thumb', _('cover thumbnail'),
72         null=True, blank=True,
73         upload_to=_cover_thumb_upload_to,
74         max_length=255)
75     ebook_formats = constants.EBOOK_FORMATS
76     formats = ebook_formats + ['html', 'xml']
77
78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
80
81     objects = models.Manager()
82     tagged = managers.ModelTaggedItemManager(Tag)
83     tags = managers.TagDescriptor(Tag)
84     tag_relations = GenericRelation(Tag.intermediary_table_model)
85
86     html_built = django.dispatch.Signal()
87     published = django.dispatch.Signal()
88
89     short_html_url_name = 'catalogue_book_short'
90
91     class AlreadyExists(Exception):
92         pass
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('book')
97         verbose_name_plural = _('books')
98         app_label = 'catalogue'
99
100     def __unicode__(self):
101         return self.title
102
103     def get_initial(self):
104         try:
105             return re.search(r'\w', self.title, re.U).group(0)
106         except AttributeError:
107             return ''
108
109     def authors(self):
110         return self.tags.filter(category='author')
111
112     def author_unicode(self):
113         return ", ".join(self.authors().values_list('name', flat=True))
114
115     def save(self, force_insert=False, force_update=False, **kwargs):
116         from sortify import sortify
117
118         self.sort_key = sortify(self.title)[:120]
119         self.title = unicode(self.title)  # ???
120
121         try:
122             author = self.authors().first().sort_key
123         except AttributeError:
124             author = u''
125         self.sort_key_author = author
126
127         ret = super(Book, self).save(force_insert, force_update, **kwargs)
128
129         return ret
130
131     @permalink
132     def get_absolute_url(self):
133         return 'catalogue.views.book_detail', [self.slug]
134
135     @staticmethod
136     @permalink
137     def create_url(slug):
138         return 'catalogue.views.book_detail', [slug]
139
140     def gallery_path(self):
141         return gallery_path(self.slug)
142
143     def gallery_url(self):
144         return gallery_url(self.slug)
145
146     @property
147     def name(self):
148         return self.title
149
150     def language_code(self):
151         return constants.LANGUAGES_3TO2.get(self.language, self.language)
152
153     def language_name(self):
154         return dict(settings.LANGUAGES).get(self.language_code(), "")
155
156     def has_media(self, type_):
157         if type_ in Book.formats:
158             return bool(getattr(self, "%s_file" % type_))
159         else:
160             return self.media.filter(type=type_).exists()
161
162     def get_media(self, type_):
163         if self.has_media(type_):
164             if type_ in Book.formats:
165                 return getattr(self, "%s_file" % type_)
166             else:
167                 return self.media.filter(type=type_)
168         else:
169             return None
170
171     def get_mp3(self):
172         return self.get_media("mp3")
173
174     def get_odt(self):
175         return self.get_media("odt")
176
177     def get_ogg(self):
178         return self.get_media("ogg")
179
180     def get_daisy(self):
181         return self.get_media("daisy")
182
183     def has_description(self):
184         return len(self.description) > 0
185     has_description.short_description = _('description')
186     has_description.boolean = True
187
188     # ugly ugly ugly
189     def has_mp3_file(self):
190         return bool(self.has_media("mp3"))
191     has_mp3_file.short_description = 'MP3'
192     has_mp3_file.boolean = True
193
194     def has_ogg_file(self):
195         return bool(self.has_media("ogg"))
196     has_ogg_file.short_description = 'OGG'
197     has_ogg_file.boolean = True
198
199     def has_daisy_file(self):
200         return bool(self.has_media("daisy"))
201     has_daisy_file.short_description = 'DAISY'
202     has_daisy_file.boolean = True
203
204     def wldocument(self, parse_dublincore=True, inherit=True):
205         from catalogue.import_utils import ORMDocProvider
206         from librarian.parser import WLDocument
207
208         if inherit and self.parent:
209             meta_fallbacks = self.parent.cover_info()
210         else:
211             meta_fallbacks = None
212
213         return WLDocument.from_file(
214             self.xml_file.path,
215             provider=ORMDocProvider(self),
216             parse_dublincore=parse_dublincore,
217             meta_fallbacks=meta_fallbacks)
218
219     @staticmethod
220     def zip_format(format_):
221         def pretty_file_name(book):
222             return "%s/%s.%s" % (
223                 book.extra_info['author'],
224                 book.slug,
225                 format_)
226
227         field_name = "%s_file" % format_
228         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
229         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
230         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
231
232     def zip_audiobooks(self, format_):
233         bm = BookMedia.objects.filter(book=self, type=format_)
234         paths = map(lambda bm: (None, bm.file.path), bm)
235         return create_zip(paths, "%s_%s" % (self.slug, format_))
236
237     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
238         if index is None:
239             from search.index import Index
240             index = Index()
241         try:
242             index.index_book(self, book_info)
243             if index_tags:
244                 index.index_tags()
245             if commit:
246                 index.index.commit()
247         except Exception, e:
248             index.index.rollback()
249             raise e
250
251     def download_pictures(self, remote_gallery_url):
252         gallery_path = self.gallery_path()
253         # delete previous files, so we don't include old files in ebooks
254         for filename in os.listdir(gallery_path):
255             file_path = os.path.join(gallery_path, filename)
256             os.unlink(file_path)
257         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
258         if ilustr_elements:
259             makedirs(gallery_path)
260             for ilustr in ilustr_elements:
261                 ilustr_src = ilustr.get('src')
262                 ilustr_path = os.path.join(gallery_path, ilustr_src)
263                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
264
265     @classmethod
266     def from_xml_file(cls, xml_file, **kwargs):
267         from django.core.files import File
268         from librarian import dcparser
269
270         # use librarian to parse meta-data
271         book_info = dcparser.parse(xml_file)
272
273         if not isinstance(xml_file, File):
274             xml_file = File(open(xml_file))
275
276         try:
277             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
278         finally:
279             xml_file.close()
280
281     @classmethod
282     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
283                            search_index_tags=True, remote_gallery_url=None):
284         if dont_build is None:
285             dont_build = set()
286         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
287
288         # check for parts before we do anything
289         children = []
290         if hasattr(book_info, 'parts'):
291             for part_url in book_info.parts:
292                 try:
293                     children.append(Book.objects.get(slug=part_url.slug))
294                 except Book.DoesNotExist:
295                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
296
297         # Read book metadata
298         book_slug = book_info.url.slug
299         if re.search(r'[^a-z0-9-]', book_slug):
300             raise ValueError('Invalid characters in slug')
301         book, created = Book.objects.get_or_create(slug=book_slug)
302
303         if created:
304             book_shelves = []
305             old_cover = None
306         else:
307             if not overwrite:
308                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
309             # Save shelves for this book
310             book_shelves = list(book.tags.filter(category='set'))
311             old_cover = book.cover_info()
312
313         # Save XML file
314         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
315
316         book.language = book_info.language
317         book.title = book_info.title
318         if book_info.variant_of:
319             book.common_slug = book_info.variant_of.slug
320         else:
321             book.common_slug = book.slug
322         book.extra_info = book_info.to_dict()
323         book.save()
324
325         meta_tags = Tag.tags_from_info(book_info)
326
327         book.tags = set(meta_tags + book_shelves)
328
329         cover_changed = old_cover != book.cover_info()
330         obsolete_children = set(b for b in book.children.all()
331                                 if b not in children)
332         notify_cover_changed = []
333         for n, child_book in enumerate(children):
334             new_child = child_book.parent != book
335             child_book.parent = book
336             child_book.parent_number = n
337             child_book.save()
338             if new_child or cover_changed:
339                 notify_cover_changed.append(child_book)
340         # Disown unfaithful children and let them cope on their own.
341         for child in obsolete_children:
342             child.parent = None
343             child.parent_number = 0
344             child.save()
345             if old_cover:
346                 notify_cover_changed.append(child)
347
348         cls.repopulate_ancestors()
349         tasks.update_counters.delay()
350
351         if remote_gallery_url:
352             book.download_pictures(remote_gallery_url)
353
354         # No saves beyond this point.
355
356         # Build cover.
357         if 'cover' not in dont_build:
358             book.cover.build_delay()
359             book.cover_thumb.build_delay()
360
361         # Build HTML and ebooks.
362         book.html_file.build_delay()
363         if not children:
364             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
365                 if format_ not in dont_build:
366                     getattr(book, '%s_file' % format_).build_delay()
367         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
368             if format_ not in dont_build:
369                 getattr(book, '%s_file' % format_).build_delay()
370
371         if not settings.NO_SEARCH_INDEX and search_index:
372             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
373
374         for child in notify_cover_changed:
375             child.parent_cover_changed()
376
377         cls.published.send(sender=cls, instance=book)
378         return book
379
380     @classmethod
381     @transaction.atomic
382     def repopulate_ancestors(cls):
383         """Fixes the ancestry cache."""
384         # TODO: table names
385         cursor = connection.cursor()
386         if connection.vendor == 'postgres':
387             cursor.execute("TRUNCATE catalogue_book_ancestor")
388             cursor.execute("""
389                 WITH RECURSIVE ancestry AS (
390                     SELECT book.id, book.parent_id
391                     FROM catalogue_book AS book
392                     WHERE book.parent_id IS NOT NULL
393                     UNION
394                     SELECT ancestor.id, book.parent_id
395                     FROM ancestry AS ancestor, catalogue_book AS book
396                     WHERE ancestor.parent_id = book.id
397                         AND book.parent_id IS NOT NULL
398                     )
399                 INSERT INTO catalogue_book_ancestor
400                     (from_book_id, to_book_id)
401                     SELECT id, parent_id
402                     FROM ancestry
403                     ORDER BY id;
404                 """)
405         else:
406             cursor.execute("DELETE FROM catalogue_book_ancestor")
407             for b in cls.objects.exclude(parent=None):
408                 parent = b.parent
409                 while parent is not None:
410                     b.ancestor.add(parent)
411                     parent = parent.parent
412
413     def flush_includes(self, languages=True):
414         if not languages:
415             return
416         if languages is True:
417             languages = [lc for (lc, _ln) in settings.LANGUAGES]
418         flush_ssi_includes([
419             template % (self.pk, lang)
420             for template in [
421                 '/katalog/b/%d/mini.%s.html',
422                 '/katalog/b/%d/mini_nolink.%s.html',
423                 '/katalog/b/%d/short.%s.html',
424                 '/katalog/b/%d/wide.%s.html',
425                 '/api/include/book/%d.%s.json',
426                 '/api/include/book/%d.%s.xml',
427                 ]
428             for lang in languages
429             ])
430
431     def cover_info(self, inherit=True):
432         """Returns a dictionary to serve as fallback for BookInfo.
433
434         For now, the only thing inherited is the cover image.
435         """
436         need = False
437         info = {}
438         for field in ('cover_url', 'cover_by', 'cover_source'):
439             val = self.extra_info.get(field)
440             if val:
441                 info[field] = val
442             else:
443                 need = True
444         if inherit and need and self.parent is not None:
445             parent_info = self.parent.cover_info()
446             parent_info.update(info)
447             info = parent_info
448         return info
449
450     def related_themes(self):
451         return Tag.objects.usage_for_queryset(
452             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
453             counts=True).filter(category='theme')
454
455     def parent_cover_changed(self):
456         """Called when parent book's cover image is changed."""
457         if not self.cover_info(inherit=False):
458             if 'cover' not in app_settings.DONT_BUILD:
459                 self.cover.build_delay()
460                 self.cover_thumb.build_delay()
461             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
462                 if format_ not in app_settings.DONT_BUILD:
463                     getattr(self, '%s_file' % format_).build_delay()
464             for child in self.children.all():
465                 child.parent_cover_changed()
466
467     def other_versions(self):
468         """Find other versions (i.e. in other languages) of the book."""
469         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
470
471     def parents(self):
472         books = []
473         parent = self.parent
474         while parent is not None:
475             books.insert(0, parent)
476             parent = parent.parent
477         return books
478
479     def pretty_title(self, html_links=False):
480         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
481         books = self.parents() + [self]
482         names.extend([(b.title, b.get_absolute_url()) for b in books])
483
484         if html_links:
485             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
486         else:
487             names = [tag[0] for tag in names]
488         return ', '.join(names)
489
490     @classmethod
491     def tagged_top_level(cls, tags):
492         """ Returns top-level books tagged with `tags`.
493
494         It only returns those books which don't have ancestors which are
495         also tagged with those tags.
496
497         """
498         objects = cls.tagged.with_all(tags)
499         return objects.exclude(ancestor__in=objects)
500
501     @classmethod
502     def book_list(cls, book_filter=None):
503         """Generates a hierarchical listing of all books.
504
505         Books are optionally filtered with a test function.
506
507         """
508
509         books_by_parent = {}
510         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
511         if book_filter:
512             books = books.filter(book_filter).distinct()
513
514             book_ids = set(b['pk'] for b in books.values("pk").iterator())
515             for book in books.iterator():
516                 parent = book.parent_id
517                 if parent not in book_ids:
518                     parent = None
519                 books_by_parent.setdefault(parent, []).append(book)
520         else:
521             for book in books.iterator():
522                 books_by_parent.setdefault(book.parent_id, []).append(book)
523
524         orphans = []
525         books_by_author = OrderedDict()
526         for tag in Tag.objects.filter(category='author').iterator():
527             books_by_author[tag] = []
528
529         for book in books_by_parent.get(None, ()):
530             authors = list(book.authors().only('pk'))
531             if authors:
532                 for author in authors:
533                     books_by_author[author].append(book)
534             else:
535                 orphans.append(book)
536
537         return books_by_author, orphans, books_by_parent
538
539     _audiences_pl = {
540         "SP": (1, u"szkoła podstawowa"),
541         "SP1": (1, u"szkoła podstawowa"),
542         "SP2": (1, u"szkoła podstawowa"),
543         "P": (1, u"szkoła podstawowa"),
544         "G": (2, u"gimnazjum"),
545         "L": (3, u"liceum"),
546         "LP": (3, u"liceum"),
547     }
548
549     def audiences_pl(self):
550         audiences = self.extra_info.get('audiences', [])
551         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
552         return [a[1] for a in audiences]
553
554     def stage_note(self):
555         stage = self.extra_info.get('stage')
556         if stage and stage < '0.4':
557             return (_('This work needs modernisation'),
558                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
559         else:
560             return None, None
561
562     def choose_fragment(self):
563         fragments = self.fragments.order_by()
564         fragments_count = fragments.count()
565         if not fragments_count and self.children.exists():
566             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
567             fragments_count = fragments.count()
568         if fragments_count:
569             return fragments[randint(0, fragments_count - 1)]
570         elif self.parent:
571             return self.parent.choose_fragment()
572         else:
573             return None
574
575
576 def add_file_fields():
577     for format_ in Book.formats:
578         field_name = "%s_file" % format_
579         # This weird globals() assignment makes Django migrations comfortable.
580         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
581         _upload_to.__name__ = '_%s_upload_to' % format_
582         globals()[_upload_to.__name__] = _upload_to
583
584         EbookField(
585             format_, _("%s file" % format_.upper()),
586             upload_to=_upload_to,
587             storage=bofh_storage,
588             max_length=255,
589             blank=True,
590             default=''
591         ).contribute_to_class(Book, field_name)
592
593 add_file_fields()