Some cleaning.
[wolnelektury.git] / apps / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import re
8 from django.conf import settings
9 from django.core.cache import caches
10 from django.db import connection, models, transaction
11 from django.db.models import permalink
12 import django.dispatch
13 from django.contrib.contenttypes.fields import GenericRelation
14 from django.core.urlresolvers import reverse
15 from django.utils.translation import ugettext_lazy as _
16 import jsonfield
17 from fnpdjango.storage import BofhFileSystemStorage
18 from catalogue import constants
19 from catalogue.fields import EbookField
20 from catalogue.models import Tag, Fragment, BookMedia
21 from catalogue.utils import create_zip
22 from catalogue import app_settings
23 from catalogue import tasks
24 from newtagging import managers
25
26 bofh_storage = BofhFileSystemStorage()
27
28 permanent_cache = caches['permanent']
29
30
31 def _cover_upload_to(i, n):
32     return 'book/cover/%s.jpg' % i.slug
33
34 def _cover_thumb_upload_to(i, n):
35     return 'book/cover_thumb/%s.jpg' % i.slug,
36
37 def _ebook_upload_to(upload_path):
38     def _upload_to(i, n):
39         return upload_path % i.slug
40     return _upload_to
41
42
43 class Book(models.Model):
44     """Represents a book imported from WL-XML."""
45     title         = models.CharField(_('title'), max_length=120)
46     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
47     sort_key_author = models.CharField(_('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
48     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
49             unique=True)
50     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
51     language = models.CharField(_('language code'), max_length=3, db_index=True,
52                     default=app_settings.DEFAULT_LANGUAGE)
53     description   = models.TextField(_('description'), blank=True)
54     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
55     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
56     parent_number = models.IntegerField(_('parent number'), default=0)
57     extra_info    = jsonfield.JSONField(_('extra information'), default={})
58     gazeta_link   = models.CharField(blank=True, max_length=240)
59     wiki_link     = models.CharField(blank=True, max_length=240)
60     # files generated during publication
61
62     cover = EbookField('cover', _('cover'),
63             null=True, blank=True,
64             upload_to=_cover_upload_to,
65             storage=bofh_storage, max_length=255)
66     # Cleaner version of cover for thumbs
67     cover_thumb = EbookField('cover_thumb', _('cover thumbnail'),
68             null=True, blank=True,
69             upload_to=_cover_thumb_upload_to,
70             max_length=255)
71     ebook_formats = constants.EBOOK_FORMATS
72     formats = ebook_formats + ['html', 'xml']
73
74     parent = models.ForeignKey('self', blank=True, null=True,
75         related_name='children')
76     ancestor = models.ManyToManyField('self', blank=True, null=True,
77         editable=False, related_name='descendant', symmetrical=False)
78
79     objects  = models.Manager()
80     tagged   = managers.ModelTaggedItemManager(Tag)
81     tags     = managers.TagDescriptor(Tag)
82     tag_relations = GenericRelation(Tag.intermediary_table_model)
83
84     html_built = django.dispatch.Signal()
85     published = django.dispatch.Signal()
86
87     class AlreadyExists(Exception):
88         pass
89
90     class Meta:
91         ordering = ('sort_key',)
92         verbose_name = _('book')
93         verbose_name_plural = _('books')
94         app_label = 'catalogue'
95
96     def __unicode__(self):
97         return self.title
98
99     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
100         from sortify import sortify
101
102         self.sort_key = sortify(self.title)
103         self.title = unicode(self.title) # ???
104
105         ret = super(Book, self).save(force_insert, force_update, **kwargs)
106
107         if reset_short_html:
108             self.reset_short_html()
109
110         return ret
111
112     @permalink
113     def get_absolute_url(self):
114         return ('catalogue.views.book_detail', [self.slug])
115
116     @staticmethod
117     @permalink
118     def create_url(slug):
119         return ('catalogue.views.book_detail', [slug])
120
121     @property
122     def name(self):
123         return self.title
124
125     def language_code(self):
126         return constants.LANGUAGES_3TO2.get(self.language, self.language)
127
128     def language_name(self):
129         return dict(settings.LANGUAGES).get(self.language_code(), "")
130
131     def has_media(self, type_):
132         if type_ in Book.formats:
133             return bool(getattr(self, "%s_file" % type_))
134         else:
135             return self.media.filter(type=type_).exists()
136
137     def get_media(self, type_):
138         if self.has_media(type_):
139             if type_ in Book.formats:
140                 return getattr(self, "%s_file" % type_)
141             else:
142                 return self.media.filter(type=type_)
143         else:
144             return None
145
146     def get_mp3(self):
147         return self.get_media("mp3")
148     def get_odt(self):
149         return self.get_media("odt")
150     def get_ogg(self):
151         return self.get_media("ogg")
152     def get_daisy(self):
153         return self.get_media("daisy")
154
155     def reset_short_html(self):
156         if self.id is None:
157             return
158
159         # Fragment.short_html relies on book's tags, so reset it here too
160         for fragm in self.fragments.all().iterator():
161             fragm.reset_short_html()
162
163         try:
164             author = self.tags.filter(category='author')[0].sort_key
165         except IndexError:
166             author = u''
167         type(self).objects.filter(pk=self.pk).update(sort_key_author=author)
168
169     def has_description(self):
170         return len(self.description) > 0
171     has_description.short_description = _('description')
172     has_description.boolean = True
173
174     # ugly ugly ugly
175     def has_mp3_file(self):
176         return bool(self.has_media("mp3"))
177     has_mp3_file.short_description = 'MP3'
178     has_mp3_file.boolean = True
179
180     def has_ogg_file(self):
181         return bool(self.has_media("ogg"))
182     has_ogg_file.short_description = 'OGG'
183     has_ogg_file.boolean = True
184
185     def has_daisy_file(self):
186         return bool(self.has_media("daisy"))
187     has_daisy_file.short_description = 'DAISY'
188     has_daisy_file.boolean = True
189
190     def wldocument(self, parse_dublincore=True, inherit=True):
191         from catalogue.import_utils import ORMDocProvider
192         from librarian.parser import WLDocument
193
194         if inherit and self.parent:
195             meta_fallbacks = self.parent.cover_info()
196         else:
197             meta_fallbacks = None
198
199         return WLDocument.from_file(self.xml_file.path,
200                 provider=ORMDocProvider(self),
201                 parse_dublincore=parse_dublincore,
202                 meta_fallbacks=meta_fallbacks)
203
204     @staticmethod
205     def zip_format(format_):
206         def pretty_file_name(book):
207             return "%s/%s.%s" % (
208                 book.extra_info['author'],
209                 book.slug,
210                 format_)
211
212         field_name = "%s_file" % format_
213         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
214         paths = [(pretty_file_name(b), getattr(b, field_name).path)
215                     for b in books.iterator()]
216         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
217
218     def zip_audiobooks(self, format_):
219         bm = BookMedia.objects.filter(book=self, type=format_)
220         paths = map(lambda bm: (None, bm.file.path), bm)
221         return create_zip(paths, "%s_%s" % (self.slug, format_))
222
223     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
224         if index is None:
225             from search.index import Index
226             index = Index()
227         try:
228             index.index_book(self, book_info)
229             if index_tags:
230                 index.index_tags()
231             if commit:
232                 index.index.commit()
233         except Exception, e:
234             index.index.rollback()
235             raise e
236
237
238     @classmethod
239     def from_xml_file(cls, xml_file, **kwargs):
240         from django.core.files import File
241         from librarian import dcparser
242
243         # use librarian to parse meta-data
244         book_info = dcparser.parse(xml_file)
245
246         if not isinstance(xml_file, File):
247             xml_file = File(open(xml_file))
248
249         try:
250             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
251         finally:
252             xml_file.close()
253
254     @classmethod
255     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
256             dont_build=None, search_index=True,
257             search_index_tags=True):
258         if dont_build is None:
259             dont_build = set()
260         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
261
262         # check for parts before we do anything
263         children = []
264         if hasattr(book_info, 'parts'):
265             for part_url in book_info.parts:
266                 try:
267                     children.append(Book.objects.get(slug=part_url.slug))
268                 except Book.DoesNotExist:
269                     raise Book.DoesNotExist(_('Book "%s" does not exist.') %
270                             part_url.slug)
271
272         # Read book metadata
273         book_slug = book_info.url.slug
274         if re.search(r'[^a-z0-9-]', book_slug):
275             raise ValueError('Invalid characters in slug')
276         book, created = Book.objects.get_or_create(slug=book_slug)
277
278         if created:
279             book_shelves = []
280             old_cover = None
281         else:
282             if not overwrite:
283                 raise Book.AlreadyExists(_('Book %s already exists') % (
284                         book_slug))
285             # Save shelves for this book
286             book_shelves = list(book.tags.filter(category='set'))
287             old_cover = book.cover_info()
288
289         # Save XML file
290         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
291
292         book.language = book_info.language
293         book.title = book_info.title
294         if book_info.variant_of:
295             book.common_slug = book_info.variant_of.slug
296         else:
297             book.common_slug = book.slug
298         book.extra_info = book_info.to_dict()
299         book.save()
300
301         meta_tags = Tag.tags_from_info(book_info)
302
303         book.tags = set(meta_tags + book_shelves)
304
305         cover_changed = old_cover != book.cover_info()
306         obsolete_children = set(b for b in book.children.all()
307                                 if b not in children)
308         notify_cover_changed = []
309         for n, child_book in enumerate(children):
310             new_child = child_book.parent != book
311             child_book.parent = book
312             child_book.parent_number = n
313             child_book.save()
314             if new_child or cover_changed:
315                 notify_cover_changed.append(child_book)
316         # Disown unfaithful children and let them cope on their own.
317         for child in obsolete_children:
318             child.parent = None
319             child.parent_number = 0
320             child.save()
321             tasks.fix_tree_tags.delay(child)
322             if old_cover:
323                 notify_cover_changed.append(child)
324
325         cls.fix_tree_tags()
326
327         # No saves beyond this point.
328
329         # Build cover.
330         if 'cover' not in dont_build:
331             book.cover.build_delay()
332             book.cover_thumb.build_delay()
333
334         # Build HTML and ebooks.
335         book.html_file.build_delay()
336         if not children:
337             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
338                 if format_ not in dont_build:
339                     getattr(book, '%s_file' % format_).build_delay()
340         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
341             if format_ not in dont_build:
342                 getattr(book, '%s_file' % format_).build_delay()
343
344         if not settings.NO_SEARCH_INDEX and search_index:
345             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
346
347         for child in notify_cover_changed:
348             child.parent_cover_changed()
349
350         cls.published.send(sender=book)
351         return book
352
353     @classmethod
354     def fix_tree_tags(cls):
355         """Fixes the ancestry cache."""
356         # TODO: table names
357         with transaction.atomic():
358             cursor = connection.cursor()
359             if connection.vendor == 'postgres':
360                 cursor.execute("TRUNCATE catalogue_book_ancestor")
361                 cursor.execute("""
362                     WITH RECURSIVE ancestry AS (
363                         SELECT book.id, book.parent_id
364                         FROM catalogue_book AS book
365                         WHERE book.parent_id IS NOT NULL
366                         UNION
367                         SELECT ancestor.id, book.parent_id
368                         FROM ancestry AS ancestor, catalogue_book AS book
369                         WHERE ancestor.parent_id = book.id
370                             AND book.parent_id IS NOT NULL
371                         )
372                     INSERT INTO catalogue_book_ancestor
373                         (from_book_id, to_book_id)
374                         SELECT id, parent_id
375                         FROM ancestry
376                         ORDER BY id;
377                     """)
378             else:
379                 cursor.execute("DELETE FROM catalogue_book_ancestor")
380                 for b in cls.objects.exclude(parent=None):
381                     parent = b.parent
382                     while parent is not None:
383                         b.ancestor.add(parent)
384                         parent = parent.parent
385
386     def cover_info(self, inherit=True):
387         """Returns a dictionary to serve as fallback for BookInfo.
388
389         For now, the only thing inherited is the cover image.
390         """
391         need = False
392         info = {}
393         for field in ('cover_url', 'cover_by', 'cover_source'):
394             val = self.extra_info.get(field)
395             if val:
396                 info[field] = val
397             else:
398                 need = True
399         if inherit and need and self.parent is not None:
400             parent_info = self.parent.cover_info()
401             parent_info.update(info)
402             info = parent_info
403         return info
404
405     def related_themes(self):
406         return Tag.objects.usage_for_queryset(
407             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
408             counts=True).filter(category='theme')
409
410     def parent_cover_changed(self):
411         """Called when parent book's cover image is changed."""
412         if not self.cover_info(inherit=False):
413             if 'cover' not in app_settings.DONT_BUILD:
414                 self.cover.build_delay()
415                 self.cover_thumb.build_delay()
416             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
417                 if format_ not in app_settings.DONT_BUILD:
418                     getattr(self, '%s_file' % format_).build_delay()
419             for child in self.children.all():
420                 child.parent_cover_changed()
421
422     def other_versions(self):
423         """Find other versions (i.e. in other languages) of the book."""
424         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
425
426     def parents(self):
427         books = []
428         parent = self.parent
429         while parent is not None:
430             books.insert(0, parent)
431             parent = parent.parent
432         return books
433
434     def pretty_title(self, html_links=False):
435         names = [(tag.name, tag.get_absolute_url())
436             for tag in self.tags.filter(category='author')]
437         books = self.parents() + [self]
438         names.extend([(b.title, b.get_absolute_url()) for b in books])
439
440         if html_links:
441             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
442         else:
443             names = [tag[0] for tag in names]
444         return ', '.join(names)
445
446     @classmethod
447     def tagged_top_level(cls, tags):
448         """ Returns top-level books tagged with `tags`.
449
450         It only returns those books which don't have ancestors which are
451         also tagged with those tags.
452
453         """
454         objects = cls.tagged.with_all(tags)
455         return objects.exclude(ancestor__in=objects)
456
457     @classmethod
458     def book_list(cls, filter=None):
459         """Generates a hierarchical listing of all books.
460
461         Books are optionally filtered with a test function.
462
463         """
464
465         books_by_parent = {}
466         books = cls.objects.all().order_by('parent_number', 'sort_key').only(
467                 'title', 'parent', 'slug')
468         if filter:
469             books = books.filter(filter).distinct()
470
471             book_ids = set(b['pk'] for b in books.values("pk").iterator())
472             for book in books.iterator():
473                 parent = book.parent_id
474                 if parent not in book_ids:
475                     parent = None
476                 books_by_parent.setdefault(parent, []).append(book)
477         else:
478             for book in books.iterator():
479                 books_by_parent.setdefault(book.parent_id, []).append(book)
480
481         orphans = []
482         books_by_author = OrderedDict()
483         for tag in Tag.objects.filter(category='author').iterator():
484             books_by_author[tag] = []
485
486         for book in books_by_parent.get(None, ()):
487             authors = list(book.tags.filter(category='author'))
488             if authors:
489                 for author in authors:
490                     books_by_author[author].append(book)
491             else:
492                 orphans.append(book)
493
494         return books_by_author, orphans, books_by_parent
495
496     _audiences_pl = {
497         "SP": (1, u"szkoła podstawowa"),
498         "SP1": (1, u"szkoła podstawowa"),
499         "SP2": (1, u"szkoła podstawowa"),
500         "P": (1, u"szkoła podstawowa"),
501         "G": (2, u"gimnazjum"),
502         "L": (3, u"liceum"),
503         "LP": (3, u"liceum"),
504     }
505     def audiences_pl(self):
506         audiences = self.extra_info.get('audiences', [])
507         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
508         return [a[1] for a in audiences]
509
510     def stage_note(self):
511         stage = self.extra_info.get('stage')
512         if stage and stage < '0.4':
513             return (_('This work needs modernisation'),
514                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
515         else:
516             return None, None
517
518     def choose_fragment(self):
519         fragments = self.fragments.order_by()
520         fragments_count = fragments.count()
521         if not fragments_count and self.children.exists():
522             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
523             fragments_count = fragments.count()
524         if fragments_count:
525             return fragments[randint(0, fragments_count - 1)]
526         elif self.parent:
527             return self.parent.choose_fragment()
528         else:
529             return None
530
531
532 # add the file fields
533 for format_ in Book.formats:
534     field_name = "%s_file" % format_
535     # This weird globals() assignment makes Django migrations comfortable.
536     _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
537     _upload_to.__name__ = '_%s_upload_to' % format_
538     globals()[_upload_to.__name__] = _upload_to
539
540     EbookField(format_, _("%s file" % format_.upper()),
541         upload_to=_upload_to,
542         storage=bofh_storage,
543         max_length=255,
544         blank=True,
545         default=''
546     ).contribute_to_class(Book, field_name)