option to turn off ssify just for api + some optimizations
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue import app_settings
26 from catalogue import tasks
27 from wolnelektury.utils import makedirs
28
29 bofh_storage = BofhFileSystemStorage()
30
31
32 def _make_upload_to(path):
33     def _upload_to(i, n):
34         return path % i.slug
35     return _upload_to
36
37
38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
40
41
42 def _ebook_upload_to(upload_path):
43     return _make_upload_to(upload_path)
44
45
46 class Book(models.Model):
47     """Represents a book imported from WL-XML."""
48     title = models.CharField(_('title'), max_length=32767)
49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
50     sort_key_author = models.CharField(
51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
55     description = models.TextField(_('description'), blank=True)
56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
58     parent_number = models.IntegerField(_('parent number'), default=0)
59     extra_info = jsonfield.JSONField(_('extra information'), default={})
60     gazeta_link = models.CharField(blank=True, max_length=240)
61     wiki_link = models.CharField(blank=True, max_length=240)
62
63     # files generated during publication
64     cover = EbookField(
65         'cover', _('cover'),
66         null=True, blank=True,
67         upload_to=_cover_upload_to,
68         storage=bofh_storage, max_length=255)
69     # Cleaner version of cover for thumbs
70     cover_thumb = EbookField(
71         'cover_thumb', _('cover thumbnail'),
72         null=True, blank=True,
73         upload_to=_cover_thumb_upload_to,
74         max_length=255)
75     ebook_formats = constants.EBOOK_FORMATS
76     formats = ebook_formats + ['html', 'xml']
77
78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
80
81     objects = models.Manager()
82     tagged = managers.ModelTaggedItemManager(Tag)
83     tags = managers.TagDescriptor(Tag)
84     tag_relations = GenericRelation(Tag.intermediary_table_model)
85
86     html_built = django.dispatch.Signal()
87     published = django.dispatch.Signal()
88
89     short_html_url_name = 'catalogue_book_short'
90
91     class AlreadyExists(Exception):
92         pass
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('book')
97         verbose_name_plural = _('books')
98         app_label = 'catalogue'
99
100     def __unicode__(self):
101         return self.title
102
103     def get_initial(self):
104         try:
105             return re.search(r'\w', self.title, re.U).group(0)
106         except AttributeError:
107             return ''
108
109     def authors(self):
110         return self.tags.filter(category='author')
111
112     def author_unicode(self):
113         return ", ".join(self.authors().values_list('name', flat=True))
114
115     def save(self, force_insert=False, force_update=False, **kwargs):
116         from sortify import sortify
117
118         self.sort_key = sortify(self.title)[:120]
119         self.title = unicode(self.title)  # ???
120
121         try:
122             author = self.authors().first().sort_key
123         except AttributeError:
124             author = u''
125         self.sort_key_author = author
126
127         ret = super(Book, self).save(force_insert, force_update, **kwargs)
128
129         return ret
130
131     @permalink
132     def get_absolute_url(self):
133         return 'catalogue.views.book_detail', [self.slug]
134
135     @staticmethod
136     @permalink
137     def create_url(slug):
138         return 'catalogue.views.book_detail', [slug]
139
140     def gallery_path(self):
141         return gallery_path(self.slug)
142
143     def gallery_url(self):
144         return gallery_url(self.slug)
145
146     @property
147     def name(self):
148         return self.title
149
150     def language_code(self):
151         return constants.LANGUAGES_3TO2.get(self.language, self.language)
152
153     def language_name(self):
154         return dict(settings.LANGUAGES).get(self.language_code(), "")
155
156     def has_media(self, type_):
157         if type_ in Book.formats:
158             return bool(getattr(self, "%s_file" % type_))
159         else:
160             return self.media.filter(type=type_).exists()
161
162     def get_media(self, type_):
163         if self.has_media(type_):
164             if type_ in Book.formats:
165                 return getattr(self, "%s_file" % type_)
166             else:
167                 return self.media.filter(type=type_)
168         else:
169             return None
170
171     def get_mp3(self):
172         return self.get_media("mp3")
173
174     def get_odt(self):
175         return self.get_media("odt")
176
177     def get_ogg(self):
178         return self.get_media("ogg")
179
180     def get_daisy(self):
181         return self.get_media("daisy")
182
183     def has_description(self):
184         return len(self.description) > 0
185     has_description.short_description = _('description')
186     has_description.boolean = True
187
188     # ugly ugly ugly
189     def has_mp3_file(self):
190         return bool(self.has_media("mp3"))
191     has_mp3_file.short_description = 'MP3'
192     has_mp3_file.boolean = True
193
194     def has_ogg_file(self):
195         return bool(self.has_media("ogg"))
196     has_ogg_file.short_description = 'OGG'
197     has_ogg_file.boolean = True
198
199     def has_daisy_file(self):
200         return bool(self.has_media("daisy"))
201     has_daisy_file.short_description = 'DAISY'
202     has_daisy_file.boolean = True
203
204     def wldocument(self, parse_dublincore=True, inherit=True):
205         from catalogue.import_utils import ORMDocProvider
206         from librarian.parser import WLDocument
207
208         if inherit and self.parent:
209             meta_fallbacks = self.parent.cover_info()
210         else:
211             meta_fallbacks = None
212
213         return WLDocument.from_file(
214             self.xml_file.path,
215             provider=ORMDocProvider(self),
216             parse_dublincore=parse_dublincore,
217             meta_fallbacks=meta_fallbacks)
218
219     @staticmethod
220     def zip_format(format_):
221         def pretty_file_name(book):
222             return "%s/%s.%s" % (
223                 book.extra_info['author'],
224                 book.slug,
225                 format_)
226
227         field_name = "%s_file" % format_
228         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
229         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
230         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
231
232     def zip_audiobooks(self, format_):
233         bm = BookMedia.objects.filter(book=self, type=format_)
234         paths = map(lambda bm: (None, bm.file.path), bm)
235         return create_zip(paths, "%s_%s" % (self.slug, format_))
236
237     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
238         if index is None:
239             from search.index import Index
240             index = Index()
241         try:
242             index.index_book(self, book_info)
243             if index_tags:
244                 index.index_tags()
245             if commit:
246                 index.index.commit()
247         except Exception, e:
248             index.index.rollback()
249             raise e
250
251     def download_pictures(self, remote_gallery_url):
252         gallery_path = self.gallery_path()
253         # delete previous files, so we don't include old files in ebooks
254         if os.path.isdir(gallery_path):
255             for filename in os.listdir(gallery_path):
256                 file_path = os.path.join(gallery_path, filename)
257                 os.unlink(file_path)
258         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
259         if ilustr_elements:
260             makedirs(gallery_path)
261             for ilustr in ilustr_elements:
262                 ilustr_src = ilustr.get('src')
263                 ilustr_path = os.path.join(gallery_path, ilustr_src)
264                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
265
266     @classmethod
267     def from_xml_file(cls, xml_file, **kwargs):
268         from django.core.files import File
269         from librarian import dcparser
270
271         # use librarian to parse meta-data
272         book_info = dcparser.parse(xml_file)
273
274         if not isinstance(xml_file, File):
275             xml_file = File(open(xml_file))
276
277         try:
278             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
279         finally:
280             xml_file.close()
281
282     @classmethod
283     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
284                            search_index_tags=True, remote_gallery_url=None):
285         if dont_build is None:
286             dont_build = set()
287         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
288
289         # check for parts before we do anything
290         children = []
291         if hasattr(book_info, 'parts'):
292             for part_url in book_info.parts:
293                 try:
294                     children.append(Book.objects.get(slug=part_url.slug))
295                 except Book.DoesNotExist:
296                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
297
298         # Read book metadata
299         book_slug = book_info.url.slug
300         if re.search(r'[^a-z0-9-]', book_slug):
301             raise ValueError('Invalid characters in slug')
302         book, created = Book.objects.get_or_create(slug=book_slug)
303
304         if created:
305             book_shelves = []
306             old_cover = None
307         else:
308             if not overwrite:
309                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
310             # Save shelves for this book
311             book_shelves = list(book.tags.filter(category='set'))
312             old_cover = book.cover_info()
313
314         # Save XML file
315         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
316
317         book.language = book_info.language
318         book.title = book_info.title
319         if book_info.variant_of:
320             book.common_slug = book_info.variant_of.slug
321         else:
322             book.common_slug = book.slug
323         book.extra_info = book_info.to_dict()
324         book.save()
325
326         meta_tags = Tag.tags_from_info(book_info)
327
328         book.tags = set(meta_tags + book_shelves)
329
330         cover_changed = old_cover != book.cover_info()
331         obsolete_children = set(b for b in book.children.all()
332                                 if b not in children)
333         notify_cover_changed = []
334         for n, child_book in enumerate(children):
335             new_child = child_book.parent != book
336             child_book.parent = book
337             child_book.parent_number = n
338             child_book.save()
339             if new_child or cover_changed:
340                 notify_cover_changed.append(child_book)
341         # Disown unfaithful children and let them cope on their own.
342         for child in obsolete_children:
343             child.parent = None
344             child.parent_number = 0
345             child.save()
346             if old_cover:
347                 notify_cover_changed.append(child)
348
349         cls.repopulate_ancestors()
350         tasks.update_counters.delay()
351
352         if remote_gallery_url:
353             book.download_pictures(remote_gallery_url)
354
355         # No saves beyond this point.
356
357         # Build cover.
358         if 'cover' not in dont_build:
359             book.cover.build_delay()
360             book.cover_thumb.build_delay()
361
362         # Build HTML and ebooks.
363         book.html_file.build_delay()
364         if not children:
365             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
366                 if format_ not in dont_build:
367                     getattr(book, '%s_file' % format_).build_delay()
368         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
369             if format_ not in dont_build:
370                 getattr(book, '%s_file' % format_).build_delay()
371
372         if not settings.NO_SEARCH_INDEX and search_index:
373             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
374
375         for child in notify_cover_changed:
376             child.parent_cover_changed()
377
378         cls.published.send(sender=cls, instance=book)
379         return book
380
381     @classmethod
382     @transaction.atomic
383     def repopulate_ancestors(cls):
384         """Fixes the ancestry cache."""
385         # TODO: table names
386         cursor = connection.cursor()
387         if connection.vendor == 'postgres':
388             cursor.execute("TRUNCATE catalogue_book_ancestor")
389             cursor.execute("""
390                 WITH RECURSIVE ancestry AS (
391                     SELECT book.id, book.parent_id
392                     FROM catalogue_book AS book
393                     WHERE book.parent_id IS NOT NULL
394                     UNION
395                     SELECT ancestor.id, book.parent_id
396                     FROM ancestry AS ancestor, catalogue_book AS book
397                     WHERE ancestor.parent_id = book.id
398                         AND book.parent_id IS NOT NULL
399                     )
400                 INSERT INTO catalogue_book_ancestor
401                     (from_book_id, to_book_id)
402                     SELECT id, parent_id
403                     FROM ancestry
404                     ORDER BY id;
405                 """)
406         else:
407             cursor.execute("DELETE FROM catalogue_book_ancestor")
408             for b in cls.objects.exclude(parent=None):
409                 parent = b.parent
410                 while parent is not None:
411                     b.ancestor.add(parent)
412                     parent = parent.parent
413
414     def flush_includes(self, languages=True):
415         if not languages:
416             return
417         if languages is True:
418             languages = [lc for (lc, _ln) in settings.LANGUAGES]
419         flush_ssi_includes([
420             template % (self.pk, lang)
421             for template in [
422                 '/katalog/b/%d/mini.%s.html',
423                 '/katalog/b/%d/mini_nolink.%s.html',
424                 '/katalog/b/%d/short.%s.html',
425                 '/katalog/b/%d/wide.%s.html',
426                 '/api/include/book/%d.%s.json',
427                 '/api/include/book/%d.%s.xml',
428                 ]
429             for lang in languages
430             ])
431
432     def cover_info(self, inherit=True):
433         """Returns a dictionary to serve as fallback for BookInfo.
434
435         For now, the only thing inherited is the cover image.
436         """
437         need = False
438         info = {}
439         for field in ('cover_url', 'cover_by', 'cover_source'):
440             val = self.extra_info.get(field)
441             if val:
442                 info[field] = val
443             else:
444                 need = True
445         if inherit and need and self.parent is not None:
446             parent_info = self.parent.cover_info()
447             parent_info.update(info)
448             info = parent_info
449         return info
450
451     def related_themes(self):
452         return Tag.objects.usage_for_queryset(
453             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
454             counts=True).filter(category='theme')
455
456     def parent_cover_changed(self):
457         """Called when parent book's cover image is changed."""
458         if not self.cover_info(inherit=False):
459             if 'cover' not in app_settings.DONT_BUILD:
460                 self.cover.build_delay()
461                 self.cover_thumb.build_delay()
462             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
463                 if format_ not in app_settings.DONT_BUILD:
464                     getattr(self, '%s_file' % format_).build_delay()
465             for child in self.children.all():
466                 child.parent_cover_changed()
467
468     def other_versions(self):
469         """Find other versions (i.e. in other languages) of the book."""
470         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
471
472     def parents(self):
473         books = []
474         parent = self.parent
475         while parent is not None:
476             books.insert(0, parent)
477             parent = parent.parent
478         return books
479
480     def pretty_title(self, html_links=False):
481         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
482         books = self.parents() + [self]
483         names.extend([(b.title, b.get_absolute_url()) for b in books])
484
485         if html_links:
486             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
487         else:
488             names = [tag[0] for tag in names]
489         return ', '.join(names)
490
491     @classmethod
492     def tagged_top_level(cls, tags):
493         """ Returns top-level books tagged with `tags`.
494
495         It only returns those books which don't have ancestors which are
496         also tagged with those tags.
497
498         """
499         objects = cls.tagged.with_all(tags)
500         return objects.exclude(ancestor__in=objects)
501
502     @classmethod
503     def book_list(cls, book_filter=None):
504         """Generates a hierarchical listing of all books.
505
506         Books are optionally filtered with a test function.
507
508         """
509
510         books_by_parent = {}
511         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
512         if book_filter:
513             books = books.filter(book_filter).distinct()
514
515             book_ids = set(b['pk'] for b in books.values("pk").iterator())
516             for book in books.iterator():
517                 parent = book.parent_id
518                 if parent not in book_ids:
519                     parent = None
520                 books_by_parent.setdefault(parent, []).append(book)
521         else:
522             for book in books.iterator():
523                 books_by_parent.setdefault(book.parent_id, []).append(book)
524
525         orphans = []
526         books_by_author = OrderedDict()
527         for tag in Tag.objects.filter(category='author').iterator():
528             books_by_author[tag] = []
529
530         for book in books_by_parent.get(None, ()):
531             authors = list(book.authors().only('pk'))
532             if authors:
533                 for author in authors:
534                     books_by_author[author].append(book)
535             else:
536                 orphans.append(book)
537
538         return books_by_author, orphans, books_by_parent
539
540     _audiences_pl = {
541         "SP": (1, u"szkoła podstawowa"),
542         "SP1": (1, u"szkoła podstawowa"),
543         "SP2": (1, u"szkoła podstawowa"),
544         "P": (1, u"szkoła podstawowa"),
545         "G": (2, u"gimnazjum"),
546         "L": (3, u"liceum"),
547         "LP": (3, u"liceum"),
548     }
549
550     def audiences_pl(self):
551         audiences = self.extra_info.get('audiences', [])
552         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
553         return [a[1] for a in audiences]
554
555     def stage_note(self):
556         stage = self.extra_info.get('stage')
557         if stage and stage < '0.4':
558             return (_('This work needs modernisation'),
559                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
560         else:
561             return None, None
562
563     def choose_fragment(self):
564         fragments = self.fragments.order_by()
565         fragments_count = fragments.count()
566         if not fragments_count and self.children.exists():
567             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
568             fragments_count = fragments.count()
569         if fragments_count:
570             return fragments[randint(0, fragments_count - 1)]
571         elif self.parent:
572             return self.parent.choose_fragment()
573         else:
574             return None
575
576
577 def add_file_fields():
578     for format_ in Book.formats:
579         field_name = "%s_file" % format_
580         # This weird globals() assignment makes Django migrations comfortable.
581         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
582         _upload_to.__name__ = '_%s_upload_to' % format_
583         globals()[_upload_to.__name__] = _upload_to
584
585         EbookField(
586             format_, _("%s file" % format_.upper()),
587             upload_to=_upload_to,
588             storage=bofh_storage,
589             max_length=255,
590             blank=True,
591             default=''
592         ).contribute_to_class(Book, field_name)
593
594 add_file_fields()