37d9b71b2a98a7bd9cdcc39eb29603d0f137f5d0
[wolnelektury.git] / apps / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import re
8 from django.conf import settings
9 from django.core.cache import caches
10 from django.db import connection, models, transaction
11 from django.db.models import permalink
12 import django.dispatch
13 from django.contrib.contenttypes.fields import GenericRelation
14 from django.core.urlresolvers import reverse
15 from django.utils.translation import ugettext_lazy as _
16 import jsonfield
17 from fnpdjango.storage import BofhFileSystemStorage
18 from catalogue import constants
19 from catalogue.fields import EbookField
20 from catalogue.models import Tag, Fragment, BookMedia
21 from catalogue.utils import create_zip, split_tags
22 from catalogue import app_settings
23 from catalogue import tasks
24 from newtagging import managers
25
26 bofh_storage = BofhFileSystemStorage()
27
28 permanent_cache = caches['permanent']
29
30
31 def _cover_upload_to(i, n):
32     return 'book/cover/%s.jpg' % i.slug
33
34 def _cover_thumb_upload_to(i, n):
35     return 'book/cover_thumb/%s.jpg' % i.slug,
36
37 def _ebook_upload_to(upload_path):
38     def _upload_to(i, n):
39         return upload_path % i.slug
40     return _upload_to
41
42
43 class Book(models.Model):
44     """Represents a book imported from WL-XML."""
45     title         = models.CharField(_('title'), max_length=120)
46     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
47     sort_key_author = models.CharField(_('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
48     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
49             unique=True)
50     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
51     language = models.CharField(_('language code'), max_length=3, db_index=True,
52                     default=app_settings.DEFAULT_LANGUAGE)
53     description   = models.TextField(_('description'), blank=True)
54     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
55     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
56     parent_number = models.IntegerField(_('parent number'), default=0)
57     extra_info    = jsonfield.JSONField(_('extra information'), default={})
58     gazeta_link   = models.CharField(blank=True, max_length=240)
59     wiki_link     = models.CharField(blank=True, max_length=240)
60     # files generated during publication
61
62     cover = EbookField('cover', _('cover'),
63             null=True, blank=True,
64             upload_to=_cover_upload_to,
65             storage=bofh_storage, max_length=255)
66     # Cleaner version of cover for thumbs
67     cover_thumb = EbookField('cover_thumb', _('cover thumbnail'), 
68             null=True, blank=True,
69             upload_to=_cover_thumb_upload_to,
70             max_length=255)
71     ebook_formats = constants.EBOOK_FORMATS
72     formats = ebook_formats + ['html', 'xml']
73
74     parent = models.ForeignKey('self', blank=True, null=True,
75         related_name='children')
76     ancestor = models.ManyToManyField('self', blank=True, null=True,
77         editable=False, related_name='descendant', symmetrical=False)
78
79     objects  = models.Manager()
80     tagged   = managers.ModelTaggedItemManager(Tag)
81     tags     = managers.TagDescriptor(Tag)
82     tag_relations = GenericRelation(Tag.intermediary_table_model)
83
84     html_built = django.dispatch.Signal()
85     published = django.dispatch.Signal()
86
87     class AlreadyExists(Exception):
88         pass
89
90     class Meta:
91         ordering = ('sort_key',)
92         verbose_name = _('book')
93         verbose_name_plural = _('books')
94         app_label = 'catalogue'
95
96     def __unicode__(self):
97         return self.title
98
99     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
100         from sortify import sortify
101
102         self.sort_key = sortify(self.title)
103         self.title = unicode(self.title) # ???
104
105         ret = super(Book, self).save(force_insert, force_update, **kwargs)
106
107         if reset_short_html:
108             self.reset_short_html()
109
110         return ret
111
112     @permalink
113     def get_absolute_url(self):
114         return ('catalogue.views.book_detail', [self.slug])
115
116     @staticmethod
117     @permalink
118     def create_url(slug):
119         return ('catalogue.views.book_detail', [slug])
120
121     @property
122     def name(self):
123         return self.title
124
125     def language_code(self):
126         return constants.LANGUAGES_3TO2.get(self.language, self.language)
127
128     def language_name(self):
129         return dict(settings.LANGUAGES).get(self.language_code(), "")
130
131     def has_media(self, type_):
132         if type_ in Book.formats:
133             return bool(getattr(self, "%s_file" % type_))
134         else:
135             return self.media.filter(type=type_).exists()
136
137     def get_media(self, type_):
138         if self.has_media(type_):
139             if type_ in Book.formats:
140                 return getattr(self, "%s_file" % type_)
141             else:
142                 return self.media.filter(type=type_)
143         else:
144             return None
145
146     def get_mp3(self):
147         return self.get_media("mp3")
148     def get_odt(self):
149         return self.get_media("odt")
150     def get_ogg(self):
151         return self.get_media("ogg")
152     def get_daisy(self):
153         return self.get_media("daisy")
154
155     def reset_short_html(self):
156         if self.id is None:
157             return
158
159         # Fragment.short_html relies on book's tags, so reset it here too
160         for fragm in self.fragments.all().iterator():
161             fragm.reset_short_html()
162
163         try:
164             author = self.tags.filter(category='author')[0].sort_key
165         except IndexError:
166             author = u''
167         type(self).objects.filter(pk=self.pk).update(sort_key_author=author)
168
169
170
171     def has_description(self):
172         return len(self.description) > 0
173     has_description.short_description = _('description')
174     has_description.boolean = True
175
176     # ugly ugly ugly
177     def has_mp3_file(self):
178         return bool(self.has_media("mp3"))
179     has_mp3_file.short_description = 'MP3'
180     has_mp3_file.boolean = True
181
182     def has_ogg_file(self):
183         return bool(self.has_media("ogg"))
184     has_ogg_file.short_description = 'OGG'
185     has_ogg_file.boolean = True
186
187     def has_daisy_file(self):
188         return bool(self.has_media("daisy"))
189     has_daisy_file.short_description = 'DAISY'
190     has_daisy_file.boolean = True
191
192     def wldocument(self, parse_dublincore=True, inherit=True):
193         from catalogue.import_utils import ORMDocProvider
194         from librarian.parser import WLDocument
195
196         if inherit and self.parent:
197             meta_fallbacks = self.parent.cover_info()
198         else:
199             meta_fallbacks = None
200
201         return WLDocument.from_file(self.xml_file.path,
202                 provider=ORMDocProvider(self),
203                 parse_dublincore=parse_dublincore,
204                 meta_fallbacks=meta_fallbacks)
205
206     @staticmethod
207     def zip_format(format_):
208         def pretty_file_name(book):
209             return "%s/%s.%s" % (
210                 book.extra_info['author'],
211                 book.slug,
212                 format_)
213
214         field_name = "%s_file" % format_
215         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
216         paths = [(pretty_file_name(b), getattr(b, field_name).path)
217                     for b in books.iterator()]
218         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
219
220     def zip_audiobooks(self, format_):
221         bm = BookMedia.objects.filter(book=self, type=format_)
222         paths = map(lambda bm: (None, bm.file.path), bm)
223         return create_zip(paths, "%s_%s" % (self.slug, format_))
224
225     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
226         if index is None:
227             from search.index import Index
228             index = Index()
229         try:
230             index.index_book(self, book_info)
231             if index_tags:
232                 index.index_tags()
233             if commit:
234                 index.index.commit()
235         except Exception, e:
236             index.index.rollback()
237             raise e
238
239
240     @classmethod
241     def from_xml_file(cls, xml_file, **kwargs):
242         from django.core.files import File
243         from librarian import dcparser
244
245         # use librarian to parse meta-data
246         book_info = dcparser.parse(xml_file)
247
248         if not isinstance(xml_file, File):
249             xml_file = File(open(xml_file))
250
251         try:
252             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
253         finally:
254             xml_file.close()
255
256     @classmethod
257     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
258             dont_build=None, search_index=True,
259             search_index_tags=True):
260         if dont_build is None:
261             dont_build = set()
262         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
263
264         # check for parts before we do anything
265         children = []
266         if hasattr(book_info, 'parts'):
267             for part_url in book_info.parts:
268                 try:
269                     children.append(Book.objects.get(slug=part_url.slug))
270                 except Book.DoesNotExist:
271                     raise Book.DoesNotExist(_('Book "%s" does not exist.') %
272                             part_url.slug)
273
274         # Read book metadata
275         book_slug = book_info.url.slug
276         if re.search(r'[^a-z0-9-]', book_slug):
277             raise ValueError('Invalid characters in slug')
278         book, created = Book.objects.get_or_create(slug=book_slug)
279
280         if created:
281             book_shelves = []
282             old_cover = None
283         else:
284             if not overwrite:
285                 raise Book.AlreadyExists(_('Book %s already exists') % (
286                         book_slug))
287             # Save shelves for this book
288             book_shelves = list(book.tags.filter(category='set'))
289             old_cover = book.cover_info()
290
291         # Save XML file
292         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
293
294         book.language = book_info.language
295         book.title = book_info.title
296         if book_info.variant_of:
297             book.common_slug = book_info.variant_of.slug
298         else:
299             book.common_slug = book.slug
300         book.extra_info = book_info.to_dict()
301         book.save()
302
303         meta_tags = Tag.tags_from_info(book_info)
304
305         book.tags = set(meta_tags + book_shelves)
306
307         cover_changed = old_cover != book.cover_info()
308         obsolete_children = set(b for b in book.children.all()
309                                 if b not in children)
310         notify_cover_changed = []
311         for n, child_book in enumerate(children):
312             new_child = child_book.parent != book
313             child_book.parent = book
314             child_book.parent_number = n
315             child_book.save()
316             if new_child or cover_changed:
317                 notify_cover_changed.append(child_book)
318         # Disown unfaithful children and let them cope on their own.
319         for child in obsolete_children:
320             child.parent = None
321             child.parent_number = 0
322             child.save()
323             tasks.fix_tree_tags.delay(child)
324             if old_cover:
325                 notify_cover_changed.append(child)
326
327         cls.fix_tree_tags()
328
329         # No saves beyond this point.
330
331         # Build cover.
332         if 'cover' not in dont_build:
333             book.cover.build_delay()
334             book.cover_thumb.build_delay()
335
336         # Build HTML and ebooks.
337         book.html_file.build_delay()
338         if not children:
339             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
340                 if format_ not in dont_build:
341                     getattr(book, '%s_file' % format_).build_delay()
342         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
343             if format_ not in dont_build:
344                 getattr(book, '%s_file' % format_).build_delay()
345
346         if not settings.NO_SEARCH_INDEX and search_index:
347             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
348
349         for child in notify_cover_changed:
350             child.parent_cover_changed()
351
352         cls.published.send(sender=book)
353         return book
354
355     @classmethod
356     def fix_tree_tags(cls):
357         """Fixes the ancestry cache."""
358         # TODO: table names
359         with transaction.atomic():
360             cursor = connection.cursor()
361             if connection.vendor == 'postgres':
362                 cursor.execute("TRUNCATE catalogue_book_ancestor")
363                 cursor.execute("""
364                     WITH RECURSIVE ancestry AS (
365                         SELECT book.id, book.parent_id
366                         FROM catalogue_book AS book
367                         WHERE book.parent_id IS NOT NULL
368                         UNION
369                         SELECT ancestor.id, book.parent_id
370                         FROM ancestry AS ancestor, catalogue_book AS book
371                         WHERE ancestor.parent_id = book.id
372                             AND book.parent_id IS NOT NULL
373                         )
374                     INSERT INTO catalogue_book_ancestor
375                         (from_book_id, to_book_id)
376                         SELECT id, parent_id
377                         FROM ancestry
378                         ORDER BY id;
379                     """)
380             else:
381                 cursor.execute("DELETE FROM catalogue_book_ancestor")
382                 for b in cls.objects.exclude(parent=None):
383                     parent = b.parent
384                     while parent is not None:
385                         b.ancestor.add(parent)
386                         parent = parent.parent
387
388     def cover_info(self, inherit=True):
389         """Returns a dictionary to serve as fallback for BookInfo.
390
391         For now, the only thing inherited is the cover image.
392         """
393         need = False
394         info = {}
395         for field in ('cover_url', 'cover_by', 'cover_source'):
396             val = self.extra_info.get(field)
397             if val:
398                 info[field] = val
399             else:
400                 need = True
401         if inherit and need and self.parent is not None:
402             parent_info = self.parent.cover_info()
403             parent_info.update(info)
404             info = parent_info
405         return info
406
407     def related_themes(self):
408         return Tag.objects.usage_for_queryset(
409             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
410             counts=True).filter(category='theme')
411
412     def parent_cover_changed(self):
413         """Called when parent book's cover image is changed."""
414         if not self.cover_info(inherit=False):
415             if 'cover' not in app_settings.DONT_BUILD:
416                 self.cover.build_delay()
417                 self.cover_thumb.build_delay()
418             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
419                 if format_ not in app_settings.DONT_BUILD:
420                     getattr(self, '%s_file' % format_).build_delay()
421             for child in self.children.all():
422                 child.parent_cover_changed()
423
424     def other_versions(self):
425         """Find other versions (i.e. in other languages) of the book."""
426         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
427
428     def parents(self):
429         books = []
430         parent = self.parent
431         while parent is not None:
432             books.insert(0, parent)
433             parent = parent.parent
434         return books
435
436     def pretty_title(self, html_links=False):
437         names = [(tag.name, tag.get_absolute_url())
438             for tag in self.tags.filter(category='author')]
439         books = self.parents() + [self]
440         names.extend([(b.title, b.get_absolute_url()) for b in books])
441
442         if html_links:
443             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
444         else:
445             names = [tag[0] for tag in names]
446         return ', '.join(names)
447
448     @classmethod
449     def tagged_top_level(cls, tags):
450         """ Returns top-level books tagged with `tags`.
451
452         It only returns those books which don't have ancestors which are
453         also tagged with those tags.
454
455         """
456         objects = cls.tagged.with_all(tags)
457         return objects.exclude(ancestor__in=objects)
458
459     @classmethod
460     def book_list(cls, filter=None):
461         """Generates a hierarchical listing of all books.
462
463         Books are optionally filtered with a test function.
464
465         """
466
467         books_by_parent = {}
468         books = cls.objects.all().order_by('parent_number', 'sort_key').only(
469                 'title', 'parent', 'slug')
470         if filter:
471             books = books.filter(filter).distinct()
472
473             book_ids = set(b['pk'] for b in books.values("pk").iterator())
474             for book in books.iterator():
475                 parent = book.parent_id
476                 if parent not in book_ids:
477                     parent = None
478                 books_by_parent.setdefault(parent, []).append(book)
479         else:
480             for book in books.iterator():
481                 books_by_parent.setdefault(book.parent_id, []).append(book)
482
483         orphans = []
484         books_by_author = OrderedDict()
485         for tag in Tag.objects.filter(category='author').iterator():
486             books_by_author[tag] = []
487
488         for book in books_by_parent.get(None, ()):
489             authors = list(book.tags.filter(category='author'))
490             if authors:
491                 for author in authors:
492                     books_by_author[author].append(book)
493             else:
494                 orphans.append(book)
495
496         return books_by_author, orphans, books_by_parent
497
498     _audiences_pl = {
499         "SP": (1, u"szkoła podstawowa"),
500         "SP1": (1, u"szkoła podstawowa"),
501         "SP2": (1, u"szkoła podstawowa"),
502         "P": (1, u"szkoła podstawowa"),
503         "G": (2, u"gimnazjum"),
504         "L": (3, u"liceum"),
505         "LP": (3, u"liceum"),
506     }
507     def audiences_pl(self):
508         audiences = self.extra_info.get('audiences', [])
509         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
510         return [a[1] for a in audiences]
511
512     def stage_note(self):
513         stage = self.extra_info.get('stage')
514         if stage and stage < '0.4':
515             return (_('This work needs modernisation'),
516                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
517         else:
518             return None, None
519
520     def choose_fragment(self):
521         fragments = self.fragments.order_by()
522         fragments_count = fragments.count()
523         if not fragments_count and self.children.exists():
524             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
525             fragments_count = fragments.count()
526         if fragments_count:
527             return fragments[randint(0, fragments_count - 1)]
528         elif self.parent:
529             return self.parent.choose_fragment()
530         else:
531             return None
532
533
534 # add the file fields
535 for format_ in Book.formats:
536     field_name = "%s_file" % format_
537     # This weird globals() assignment makes Django migrations comfortable.
538     _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
539     _upload_to.__name__ = '_%s_upload_to' % format_
540     globals()[_upload_to.__name__] = _upload_to
541
542     EbookField(format_, _("%s file" % format_.upper()),
543         upload_to=_upload_to,
544         storage=bofh_storage,
545         max_length=255,
546         blank=True,
547         default=''
548     ).contribute_to_class(Book, field_name)