optimize db usage in tagged object list
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63
64     # files generated during publication
65     cover = EbookField(
66         'cover', _('cover'),
67         null=True, blank=True,
68         upload_to=_cover_upload_to,
69         storage=bofh_storage, max_length=255)
70     # Cleaner version of cover for thumbs
71     cover_thumb = EbookField(
72         'cover_thumb', _('cover thumbnail'),
73         null=True, blank=True,
74         upload_to=_cover_thumb_upload_to,
75         max_length=255)
76     ebook_formats = constants.EBOOK_FORMATS
77     formats = ebook_formats + ['html', 'xml']
78
79     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
80     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
81
82     objects = models.Manager()
83     tagged = managers.ModelTaggedItemManager(Tag)
84     tags = managers.TagDescriptor(Tag)
85     tag_relations = GenericRelation(Tag.intermediary_table_model)
86
87     html_built = django.dispatch.Signal()
88     published = django.dispatch.Signal()
89
90     short_html_url_name = 'catalogue_book_short'
91
92     class AlreadyExists(Exception):
93         pass
94
95     class Meta:
96         ordering = ('sort_key_author', 'sort_key')
97         verbose_name = _('book')
98         verbose_name_plural = _('books')
99         app_label = 'catalogue'
100
101     def __unicode__(self):
102         return self.title
103
104     def get_initial(self):
105         try:
106             return re.search(r'\w', self.title, re.U).group(0)
107         except AttributeError:
108             return ''
109
110     def authors(self):
111         return self.tags.filter(category='author')
112
113     def tag_unicode(self, category):
114         relations = prefetched_relations(self, category)
115         if relations:
116             return ', '.join(rel.tag.name for rel in relations)
117         else:
118             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
119
120     def author_unicode(self):
121         return self.tag_unicode('author')
122
123     def save(self, force_insert=False, force_update=False, **kwargs):
124         from sortify import sortify
125
126         self.sort_key = sortify(self.title)[:120]
127         self.title = unicode(self.title)  # ???
128
129         try:
130             author = self.authors().first().sort_key
131         except AttributeError:
132             author = u''
133         self.sort_key_author = author
134
135         ret = super(Book, self).save(force_insert, force_update, **kwargs)
136
137         return ret
138
139     @permalink
140     def get_absolute_url(self):
141         return 'catalogue.views.book_detail', [self.slug]
142
143     @staticmethod
144     @permalink
145     def create_url(slug):
146         return 'catalogue.views.book_detail', [slug]
147
148     def gallery_path(self):
149         return gallery_path(self.slug)
150
151     def gallery_url(self):
152         return gallery_url(self.slug)
153
154     @property
155     def name(self):
156         return self.title
157
158     def language_code(self):
159         return constants.LANGUAGES_3TO2.get(self.language, self.language)
160
161     def language_name(self):
162         return dict(settings.LANGUAGES).get(self.language_code(), "")
163
164     def is_foreign(self):
165         return self.language_code() != settings.LANGUAGE_CODE
166
167     def has_media(self, type_):
168         if type_ in Book.formats:
169             return bool(getattr(self, "%s_file" % type_))
170         else:
171             return self.media.filter(type=type_).exists()
172
173     def get_media(self, type_):
174         if self.has_media(type_):
175             if type_ in Book.formats:
176                 return getattr(self, "%s_file" % type_)
177             else:
178                 return self.media.filter(type=type_)
179         else:
180             return None
181
182     def get_mp3(self):
183         return self.get_media("mp3")
184
185     def get_odt(self):
186         return self.get_media("odt")
187
188     def get_ogg(self):
189         return self.get_media("ogg")
190
191     def get_daisy(self):
192         return self.get_media("daisy")
193
194     def has_description(self):
195         return len(self.description) > 0
196     has_description.short_description = _('description')
197     has_description.boolean = True
198
199     # ugly ugly ugly
200     def has_mp3_file(self):
201         return bool(self.has_media("mp3"))
202     has_mp3_file.short_description = 'MP3'
203     has_mp3_file.boolean = True
204
205     def has_ogg_file(self):
206         return bool(self.has_media("ogg"))
207     has_ogg_file.short_description = 'OGG'
208     has_ogg_file.boolean = True
209
210     def has_daisy_file(self):
211         return bool(self.has_media("daisy"))
212     has_daisy_file.short_description = 'DAISY'
213     has_daisy_file.boolean = True
214
215     def wldocument(self, parse_dublincore=True, inherit=True):
216         from catalogue.import_utils import ORMDocProvider
217         from librarian.parser import WLDocument
218
219         if inherit and self.parent:
220             meta_fallbacks = self.parent.cover_info()
221         else:
222             meta_fallbacks = None
223
224         return WLDocument.from_file(
225             self.xml_file.path,
226             provider=ORMDocProvider(self),
227             parse_dublincore=parse_dublincore,
228             meta_fallbacks=meta_fallbacks)
229
230     @staticmethod
231     def zip_format(format_):
232         def pretty_file_name(book):
233             return "%s/%s.%s" % (
234                 book.extra_info['author'],
235                 book.slug,
236                 format_)
237
238         field_name = "%s_file" % format_
239         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
240         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
241         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
242
243     def zip_audiobooks(self, format_):
244         bm = BookMedia.objects.filter(book=self, type=format_)
245         paths = map(lambda bm: (None, bm.file.path), bm)
246         return create_zip(paths, "%s_%s" % (self.slug, format_))
247
248     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
249         if index is None:
250             from search.index import Index
251             index = Index()
252         try:
253             index.index_book(self, book_info)
254             if index_tags:
255                 index.index_tags()
256             if commit:
257                 index.index.commit()
258         except Exception, e:
259             index.index.rollback()
260             raise e
261
262     def download_pictures(self, remote_gallery_url):
263         gallery_path = self.gallery_path()
264         # delete previous files, so we don't include old files in ebooks
265         if os.path.isdir(gallery_path):
266             for filename in os.listdir(gallery_path):
267                 file_path = os.path.join(gallery_path, filename)
268                 os.unlink(file_path)
269         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
270         if ilustr_elements:
271             makedirs(gallery_path)
272             for ilustr in ilustr_elements:
273                 ilustr_src = ilustr.get('src')
274                 ilustr_path = os.path.join(gallery_path, ilustr_src)
275                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
276
277     @classmethod
278     def from_xml_file(cls, xml_file, **kwargs):
279         from django.core.files import File
280         from librarian import dcparser
281
282         # use librarian to parse meta-data
283         book_info = dcparser.parse(xml_file)
284
285         if not isinstance(xml_file, File):
286             xml_file = File(open(xml_file))
287
288         try:
289             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
290         finally:
291             xml_file.close()
292
293     @classmethod
294     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
295                            search_index_tags=True, remote_gallery_url=None):
296         if dont_build is None:
297             dont_build = set()
298         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
299
300         # check for parts before we do anything
301         children = []
302         if hasattr(book_info, 'parts'):
303             for part_url in book_info.parts:
304                 try:
305                     children.append(Book.objects.get(slug=part_url.slug))
306                 except Book.DoesNotExist:
307                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
308
309         # Read book metadata
310         book_slug = book_info.url.slug
311         if re.search(r'[^a-z0-9-]', book_slug):
312             raise ValueError('Invalid characters in slug')
313         book, created = Book.objects.get_or_create(slug=book_slug)
314
315         if created:
316             book_shelves = []
317             old_cover = None
318         else:
319             if not overwrite:
320                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
321             # Save shelves for this book
322             book_shelves = list(book.tags.filter(category='set'))
323             old_cover = book.cover_info()
324
325         # Save XML file
326         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
327
328         book.language = book_info.language
329         book.title = book_info.title
330         if book_info.variant_of:
331             book.common_slug = book_info.variant_of.slug
332         else:
333             book.common_slug = book.slug
334         book.extra_info = book_info.to_dict()
335         book.save()
336
337         meta_tags = Tag.tags_from_info(book_info)
338
339         book.tags = set(meta_tags + book_shelves)
340
341         cover_changed = old_cover != book.cover_info()
342         obsolete_children = set(b for b in book.children.all()
343                                 if b not in children)
344         notify_cover_changed = []
345         for n, child_book in enumerate(children):
346             new_child = child_book.parent != book
347             child_book.parent = book
348             child_book.parent_number = n
349             child_book.save()
350             if new_child or cover_changed:
351                 notify_cover_changed.append(child_book)
352         # Disown unfaithful children and let them cope on their own.
353         for child in obsolete_children:
354             child.parent = None
355             child.parent_number = 0
356             child.save()
357             if old_cover:
358                 notify_cover_changed.append(child)
359
360         cls.repopulate_ancestors()
361         tasks.update_counters.delay()
362
363         if remote_gallery_url:
364             book.download_pictures(remote_gallery_url)
365
366         # No saves beyond this point.
367
368         # Build cover.
369         if 'cover' not in dont_build:
370             book.cover.build_delay()
371             book.cover_thumb.build_delay()
372
373         # Build HTML and ebooks.
374         book.html_file.build_delay()
375         if not children:
376             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
377                 if format_ not in dont_build:
378                     getattr(book, '%s_file' % format_).build_delay()
379         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
380             if format_ not in dont_build:
381                 getattr(book, '%s_file' % format_).build_delay()
382
383         if not settings.NO_SEARCH_INDEX and search_index:
384             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
385
386         for child in notify_cover_changed:
387             child.parent_cover_changed()
388
389         cls.published.send(sender=cls, instance=book)
390         return book
391
392     @classmethod
393     @transaction.atomic
394     def repopulate_ancestors(cls):
395         """Fixes the ancestry cache."""
396         # TODO: table names
397         cursor = connection.cursor()
398         if connection.vendor == 'postgres':
399             cursor.execute("TRUNCATE catalogue_book_ancestor")
400             cursor.execute("""
401                 WITH RECURSIVE ancestry AS (
402                     SELECT book.id, book.parent_id
403                     FROM catalogue_book AS book
404                     WHERE book.parent_id IS NOT NULL
405                     UNION
406                     SELECT ancestor.id, book.parent_id
407                     FROM ancestry AS ancestor, catalogue_book AS book
408                     WHERE ancestor.parent_id = book.id
409                         AND book.parent_id IS NOT NULL
410                     )
411                 INSERT INTO catalogue_book_ancestor
412                     (from_book_id, to_book_id)
413                     SELECT id, parent_id
414                     FROM ancestry
415                     ORDER BY id;
416                 """)
417         else:
418             cursor.execute("DELETE FROM catalogue_book_ancestor")
419             for b in cls.objects.exclude(parent=None):
420                 parent = b.parent
421                 while parent is not None:
422                     b.ancestor.add(parent)
423                     parent = parent.parent
424
425     def flush_includes(self, languages=True):
426         if not languages:
427             return
428         if languages is True:
429             languages = [lc for (lc, _ln) in settings.LANGUAGES]
430         flush_ssi_includes([
431             template % (self.pk, lang)
432             for template in [
433                 '/katalog/b/%d/mini.%s.html',
434                 '/katalog/b/%d/mini_nolink.%s.html',
435                 '/katalog/b/%d/short.%s.html',
436                 '/katalog/b/%d/wide.%s.html',
437                 '/api/include/book/%d.%s.json',
438                 '/api/include/book/%d.%s.xml',
439                 ]
440             for lang in languages
441             ])
442
443     def cover_info(self, inherit=True):
444         """Returns a dictionary to serve as fallback for BookInfo.
445
446         For now, the only thing inherited is the cover image.
447         """
448         need = False
449         info = {}
450         for field in ('cover_url', 'cover_by', 'cover_source'):
451             val = self.extra_info.get(field)
452             if val:
453                 info[field] = val
454             else:
455                 need = True
456         if inherit and need and self.parent is not None:
457             parent_info = self.parent.cover_info()
458             parent_info.update(info)
459             info = parent_info
460         return info
461
462     def related_themes(self):
463         return Tag.objects.usage_for_queryset(
464             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
465             counts=True).filter(category='theme')
466
467     def parent_cover_changed(self):
468         """Called when parent book's cover image is changed."""
469         if not self.cover_info(inherit=False):
470             if 'cover' not in app_settings.DONT_BUILD:
471                 self.cover.build_delay()
472                 self.cover_thumb.build_delay()
473             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
474                 if format_ not in app_settings.DONT_BUILD:
475                     getattr(self, '%s_file' % format_).build_delay()
476             for child in self.children.all():
477                 child.parent_cover_changed()
478
479     def other_versions(self):
480         """Find other versions (i.e. in other languages) of the book."""
481         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
482
483     def parents(self):
484         books = []
485         parent = self.parent
486         while parent is not None:
487             books.insert(0, parent)
488             parent = parent.parent
489         return books
490
491     def pretty_title(self, html_links=False):
492         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
493         books = self.parents() + [self]
494         names.extend([(b.title, b.get_absolute_url()) for b in books])
495
496         if html_links:
497             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
498         else:
499             names = [tag[0] for tag in names]
500         return ', '.join(names)
501
502     @classmethod
503     def tagged_top_level(cls, tags):
504         """ Returns top-level books tagged with `tags`.
505
506         It only returns those books which don't have ancestors which are
507         also tagged with those tags.
508
509         """
510         objects = cls.tagged.with_all(tags)
511         return objects.exclude(ancestor__in=objects)
512
513     @classmethod
514     def book_list(cls, book_filter=None):
515         """Generates a hierarchical listing of all books.
516
517         Books are optionally filtered with a test function.
518
519         """
520
521         books_by_parent = {}
522         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
523         if book_filter:
524             books = books.filter(book_filter).distinct()
525
526             book_ids = set(b['pk'] for b in books.values("pk").iterator())
527             for book in books.iterator():
528                 parent = book.parent_id
529                 if parent not in book_ids:
530                     parent = None
531                 books_by_parent.setdefault(parent, []).append(book)
532         else:
533             for book in books.iterator():
534                 books_by_parent.setdefault(book.parent_id, []).append(book)
535
536         orphans = []
537         books_by_author = OrderedDict()
538         for tag in Tag.objects.filter(category='author').iterator():
539             books_by_author[tag] = []
540
541         for book in books_by_parent.get(None, ()):
542             authors = list(book.authors().only('pk'))
543             if authors:
544                 for author in authors:
545                     books_by_author[author].append(book)
546             else:
547                 orphans.append(book)
548
549         return books_by_author, orphans, books_by_parent
550
551     _audiences_pl = {
552         "SP": (1, u"szkoła podstawowa"),
553         "SP1": (1, u"szkoła podstawowa"),
554         "SP2": (1, u"szkoła podstawowa"),
555         "P": (1, u"szkoła podstawowa"),
556         "G": (2, u"gimnazjum"),
557         "L": (3, u"liceum"),
558         "LP": (3, u"liceum"),
559     }
560
561     def audiences_pl(self):
562         audiences = self.extra_info.get('audiences', [])
563         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
564         return [a[1] for a in audiences]
565
566     def stage_note(self):
567         stage = self.extra_info.get('stage')
568         if stage and stage < '0.4':
569             return (_('This work needs modernisation'),
570                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
571         else:
572             return None, None
573
574     def choose_fragment(self):
575         fragments = self.fragments.order_by()
576         fragments_count = fragments.count()
577         if not fragments_count and self.children.exists():
578             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
579             fragments_count = fragments.count()
580         if fragments_count:
581             return fragments[randint(0, fragments_count - 1)]
582         elif self.parent:
583             return self.parent.choose_fragment()
584         else:
585             return None
586
587     def update_popularity(self):
588         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
589         try:
590             pop = self.popularity
591             pop.count = count
592             pop.save()
593         except BookPopularity.DoesNotExist:
594             BookPopularity.objects.create(book=self, count=count)
595
596
597 def add_file_fields():
598     for format_ in Book.formats:
599         field_name = "%s_file" % format_
600         # This weird globals() assignment makes Django migrations comfortable.
601         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
602         _upload_to.__name__ = '_%s_upload_to' % format_
603         globals()[_upload_to.__name__] = _upload_to
604
605         EbookField(
606             format_, _("%s file" % format_.upper()),
607             upload_to=_upload_to,
608             storage=bofh_storage,
609             max_length=255,
610             blank=True,
611             default=''
612         ).contribute_to_class(Book, field_name)
613
614 add_file_fields()
615
616
617 class BookPopularity(models.Model):
618     book = models.OneToOneField(Book, related_name='popularity')
619     count = models.IntegerField(default=0)