volume and imprint for isbn/csv
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63
64     # files generated during publication
65     cover = EbookField(
66         'cover', _('cover'),
67         null=True, blank=True,
68         upload_to=_cover_upload_to,
69         storage=bofh_storage, max_length=255)
70     # Cleaner version of cover for thumbs
71     cover_thumb = EbookField(
72         'cover_thumb', _('cover thumbnail'),
73         null=True, blank=True,
74         upload_to=_cover_thumb_upload_to,
75         max_length=255)
76     ebook_formats = constants.EBOOK_FORMATS
77     formats = ebook_formats + ['html', 'xml']
78
79     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
80     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
81
82     objects = models.Manager()
83     tagged = managers.ModelTaggedItemManager(Tag)
84     tags = managers.TagDescriptor(Tag)
85     tag_relations = GenericRelation(Tag.intermediary_table_model)
86
87     html_built = django.dispatch.Signal()
88     published = django.dispatch.Signal()
89
90     short_html_url_name = 'catalogue_book_short'
91
92     class AlreadyExists(Exception):
93         pass
94
95     class Meta:
96         ordering = ('sort_key_author', 'sort_key')
97         verbose_name = _('book')
98         verbose_name_plural = _('books')
99         app_label = 'catalogue'
100
101     def __unicode__(self):
102         return self.title
103
104     def get_initial(self):
105         try:
106             return re.search(r'\w', self.title, re.U).group(0)
107         except AttributeError:
108             return ''
109
110     def authors(self):
111         return self.tags.filter(category='author')
112
113     def tag_unicode(self, category):
114         relations = prefetched_relations(self, category)
115         if relations:
116             return ', '.join(rel.tag.name for rel in relations)
117         else:
118             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
119
120     def author_unicode(self):
121         return self.tag_unicode('author')
122
123     def save(self, force_insert=False, force_update=False, **kwargs):
124         from sortify import sortify
125
126         self.sort_key = sortify(self.title)[:120]
127         self.title = unicode(self.title)  # ???
128
129         try:
130             author = self.authors().first().sort_key
131         except AttributeError:
132             author = u''
133         self.sort_key_author = author
134
135         ret = super(Book, self).save(force_insert, force_update, **kwargs)
136
137         return ret
138
139     @permalink
140     def get_absolute_url(self):
141         return 'catalogue.views.book_detail', [self.slug]
142
143     @staticmethod
144     @permalink
145     def create_url(slug):
146         return 'catalogue.views.book_detail', [slug]
147
148     def gallery_path(self):
149         return gallery_path(self.slug)
150
151     def gallery_url(self):
152         return gallery_url(self.slug)
153
154     @property
155     def name(self):
156         return self.title
157
158     def language_code(self):
159         return constants.LANGUAGES_3TO2.get(self.language, self.language)
160
161     def language_name(self):
162         return dict(settings.LANGUAGES).get(self.language_code(), "")
163
164     def is_foreign(self):
165         return self.language_code() != settings.LANGUAGE_CODE
166
167     def has_media(self, type_):
168         if type_ in Book.formats:
169             return bool(getattr(self, "%s_file" % type_))
170         else:
171             return self.media.filter(type=type_).exists()
172
173     def get_media(self, type_):
174         if self.has_media(type_):
175             if type_ in Book.formats:
176                 return getattr(self, "%s_file" % type_)
177             else:
178                 return self.media.filter(type=type_)
179         else:
180             return None
181
182     def get_mp3(self):
183         return self.get_media("mp3")
184
185     def get_odt(self):
186         return self.get_media("odt")
187
188     def get_ogg(self):
189         return self.get_media("ogg")
190
191     def get_daisy(self):
192         return self.get_media("daisy")
193
194     def has_description(self):
195         return len(self.description) > 0
196     has_description.short_description = _('description')
197     has_description.boolean = True
198
199     # ugly ugly ugly
200     def has_mp3_file(self):
201         return bool(self.has_media("mp3"))
202     has_mp3_file.short_description = 'MP3'
203     has_mp3_file.boolean = True
204
205     def has_ogg_file(self):
206         return bool(self.has_media("ogg"))
207     has_ogg_file.short_description = 'OGG'
208     has_ogg_file.boolean = True
209
210     def has_daisy_file(self):
211         return bool(self.has_media("daisy"))
212     has_daisy_file.short_description = 'DAISY'
213     has_daisy_file.boolean = True
214
215     def wldocument(self, parse_dublincore=True, inherit=True):
216         from catalogue.import_utils import ORMDocProvider
217         from librarian.parser import WLDocument
218
219         if inherit and self.parent:
220             meta_fallbacks = self.parent.cover_info()
221         else:
222             meta_fallbacks = None
223
224         return WLDocument.from_file(
225             self.xml_file.path,
226             provider=ORMDocProvider(self),
227             parse_dublincore=parse_dublincore,
228             meta_fallbacks=meta_fallbacks)
229
230     @staticmethod
231     def zip_format(format_):
232         def pretty_file_name(book):
233             return "%s/%s.%s" % (
234                 book.extra_info['author'],
235                 book.slug,
236                 format_)
237
238         field_name = "%s_file" % format_
239         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
240         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
241         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
242
243     def zip_audiobooks(self, format_):
244         bm = BookMedia.objects.filter(book=self, type=format_)
245         paths = map(lambda bm: (None, bm.file.path), bm)
246         return create_zip(paths, "%s_%s" % (self.slug, format_))
247
248     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
249         if index is None:
250             from search.index import Index
251             index = Index()
252         try:
253             index.index_book(self, book_info)
254             if index_tags:
255                 index.index_tags()
256             if commit:
257                 index.index.commit()
258         except Exception, e:
259             index.index.rollback()
260             raise e
261
262     def download_pictures(self, remote_gallery_url):
263         gallery_path = self.gallery_path()
264         # delete previous files, so we don't include old files in ebooks
265         if os.path.isdir(gallery_path):
266             for filename in os.listdir(gallery_path):
267                 file_path = os.path.join(gallery_path, filename)
268                 os.unlink(file_path)
269         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
270         if ilustr_elements:
271             makedirs(gallery_path)
272             for ilustr in ilustr_elements:
273                 ilustr_src = ilustr.get('src')
274                 ilustr_path = os.path.join(gallery_path, ilustr_src)
275                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
276
277     @classmethod
278     def from_xml_file(cls, xml_file, **kwargs):
279         from django.core.files import File
280         from librarian import dcparser
281
282         # use librarian to parse meta-data
283         book_info = dcparser.parse(xml_file)
284
285         if not isinstance(xml_file, File):
286             xml_file = File(open(xml_file))
287
288         try:
289             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
290         finally:
291             xml_file.close()
292
293     @classmethod
294     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
295                            search_index_tags=True, remote_gallery_url=None):
296         if dont_build is None:
297             dont_build = set()
298         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
299
300         # check for parts before we do anything
301         children = []
302         if hasattr(book_info, 'parts'):
303             for part_url in book_info.parts:
304                 try:
305                     children.append(Book.objects.get(slug=part_url.slug))
306                 except Book.DoesNotExist:
307                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
308
309         # Read book metadata
310         book_slug = book_info.url.slug
311         if re.search(r'[^a-z0-9-]', book_slug):
312             raise ValueError('Invalid characters in slug')
313         book, created = Book.objects.get_or_create(slug=book_slug)
314
315         if created:
316             book_shelves = []
317             old_cover = None
318         else:
319             if not overwrite:
320                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
321             # Save shelves for this book
322             book_shelves = list(book.tags.filter(category='set'))
323             old_cover = book.cover_info()
324
325         # Save XML file
326         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
327
328         book.language = book_info.language
329         book.title = book_info.title
330         if book_info.variant_of:
331             book.common_slug = book_info.variant_of.slug
332         else:
333             book.common_slug = book.slug
334         book.extra_info = book_info.to_dict()
335         book.save()
336
337         meta_tags = Tag.tags_from_info(book_info)
338
339         book.tags = set(meta_tags + book_shelves)
340
341         cover_changed = old_cover != book.cover_info()
342         obsolete_children = set(b for b in book.children.all()
343                                 if b not in children)
344         notify_cover_changed = []
345         for n, child_book in enumerate(children):
346             new_child = child_book.parent != book
347             child_book.parent = book
348             child_book.parent_number = n
349             child_book.save()
350             if new_child or cover_changed:
351                 notify_cover_changed.append(child_book)
352         # Disown unfaithful children and let them cope on their own.
353         for child in obsolete_children:
354             child.parent = None
355             child.parent_number = 0
356             child.save()
357             if old_cover:
358                 notify_cover_changed.append(child)
359
360         cls.repopulate_ancestors()
361         tasks.update_counters.delay()
362
363         if remote_gallery_url:
364             book.download_pictures(remote_gallery_url)
365
366         # No saves beyond this point.
367
368         # Build cover.
369         if 'cover' not in dont_build:
370             book.cover.build_delay()
371             book.cover_thumb.build_delay()
372
373         # Build HTML and ebooks.
374         book.html_file.build_delay()
375         if not children:
376             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
377                 if format_ not in dont_build:
378                     getattr(book, '%s_file' % format_).build_delay()
379         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
380             if format_ not in dont_build:
381                 getattr(book, '%s_file' % format_).build_delay()
382
383         if not settings.NO_SEARCH_INDEX and search_index:
384             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
385
386         for child in notify_cover_changed:
387             child.parent_cover_changed()
388
389         book.save()  # update sort_key_author
390         cls.published.send(sender=cls, instance=book)
391         return book
392
393     @classmethod
394     @transaction.atomic
395     def repopulate_ancestors(cls):
396         """Fixes the ancestry cache."""
397         # TODO: table names
398         cursor = connection.cursor()
399         if connection.vendor == 'postgres':
400             cursor.execute("TRUNCATE catalogue_book_ancestor")
401             cursor.execute("""
402                 WITH RECURSIVE ancestry AS (
403                     SELECT book.id, book.parent_id
404                     FROM catalogue_book AS book
405                     WHERE book.parent_id IS NOT NULL
406                     UNION
407                     SELECT ancestor.id, book.parent_id
408                     FROM ancestry AS ancestor, catalogue_book AS book
409                     WHERE ancestor.parent_id = book.id
410                         AND book.parent_id IS NOT NULL
411                     )
412                 INSERT INTO catalogue_book_ancestor
413                     (from_book_id, to_book_id)
414                     SELECT id, parent_id
415                     FROM ancestry
416                     ORDER BY id;
417                 """)
418         else:
419             cursor.execute("DELETE FROM catalogue_book_ancestor")
420             for b in cls.objects.exclude(parent=None):
421                 parent = b.parent
422                 while parent is not None:
423                     b.ancestor.add(parent)
424                     parent = parent.parent
425
426     def flush_includes(self, languages=True):
427         if not languages:
428             return
429         if languages is True:
430             languages = [lc for (lc, _ln) in settings.LANGUAGES]
431         flush_ssi_includes([
432             template % (self.pk, lang)
433             for template in [
434                 '/katalog/b/%d/mini.%s.html',
435                 '/katalog/b/%d/mini_nolink.%s.html',
436                 '/katalog/b/%d/short.%s.html',
437                 '/katalog/b/%d/wide.%s.html',
438                 '/api/include/book/%d.%s.json',
439                 '/api/include/book/%d.%s.xml',
440                 ]
441             for lang in languages
442             ])
443
444     def cover_info(self, inherit=True):
445         """Returns a dictionary to serve as fallback for BookInfo.
446
447         For now, the only thing inherited is the cover image.
448         """
449         need = False
450         info = {}
451         for field in ('cover_url', 'cover_by', 'cover_source'):
452             val = self.extra_info.get(field)
453             if val:
454                 info[field] = val
455             else:
456                 need = True
457         if inherit and need and self.parent is not None:
458             parent_info = self.parent.cover_info()
459             parent_info.update(info)
460             info = parent_info
461         return info
462
463     def related_themes(self):
464         return Tag.objects.usage_for_queryset(
465             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
466             counts=True).filter(category='theme')
467
468     def parent_cover_changed(self):
469         """Called when parent book's cover image is changed."""
470         if not self.cover_info(inherit=False):
471             if 'cover' not in app_settings.DONT_BUILD:
472                 self.cover.build_delay()
473                 self.cover_thumb.build_delay()
474             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
475                 if format_ not in app_settings.DONT_BUILD:
476                     getattr(self, '%s_file' % format_).build_delay()
477             for child in self.children.all():
478                 child.parent_cover_changed()
479
480     def other_versions(self):
481         """Find other versions (i.e. in other languages) of the book."""
482         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
483
484     def parents(self):
485         books = []
486         parent = self.parent
487         while parent is not None:
488             books.insert(0, parent)
489             parent = parent.parent
490         return books
491
492     def pretty_title(self, html_links=False):
493         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
494         books = self.parents() + [self]
495         names.extend([(b.title, b.get_absolute_url()) for b in books])
496
497         if html_links:
498             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
499         else:
500             names = [tag[0] for tag in names]
501         return ', '.join(names)
502
503     def publisher(self):
504         publisher = self.extra_info['publisher']
505         if isinstance(publisher, basestring):
506             return publisher
507         elif isinstance(publisher, list):
508             return ', '.join(publisher)
509
510     @classmethod
511     def tagged_top_level(cls, tags):
512         """ Returns top-level books tagged with `tags`.
513
514         It only returns those books which don't have ancestors which are
515         also tagged with those tags.
516
517         """
518         objects = cls.tagged.with_all(tags)
519         return objects.exclude(ancestor__in=objects)
520
521     @classmethod
522     def book_list(cls, book_filter=None):
523         """Generates a hierarchical listing of all books.
524
525         Books are optionally filtered with a test function.
526
527         """
528
529         books_by_parent = {}
530         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
531         if book_filter:
532             books = books.filter(book_filter).distinct()
533
534             book_ids = set(b['pk'] for b in books.values("pk").iterator())
535             for book in books.iterator():
536                 parent = book.parent_id
537                 if parent not in book_ids:
538                     parent = None
539                 books_by_parent.setdefault(parent, []).append(book)
540         else:
541             for book in books.iterator():
542                 books_by_parent.setdefault(book.parent_id, []).append(book)
543
544         orphans = []
545         books_by_author = OrderedDict()
546         for tag in Tag.objects.filter(category='author').iterator():
547             books_by_author[tag] = []
548
549         for book in books_by_parent.get(None, ()):
550             authors = list(book.authors().only('pk'))
551             if authors:
552                 for author in authors:
553                     books_by_author[author].append(book)
554             else:
555                 orphans.append(book)
556
557         return books_by_author, orphans, books_by_parent
558
559     _audiences_pl = {
560         "SP": (1, u"szkoła podstawowa"),
561         "SP1": (1, u"szkoła podstawowa"),
562         "SP2": (1, u"szkoła podstawowa"),
563         "P": (1, u"szkoła podstawowa"),
564         "G": (2, u"gimnazjum"),
565         "L": (3, u"liceum"),
566         "LP": (3, u"liceum"),
567     }
568
569     def audiences_pl(self):
570         audiences = self.extra_info.get('audiences', [])
571         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
572         return [a[1] for a in audiences]
573
574     def stage_note(self):
575         stage = self.extra_info.get('stage')
576         if stage and stage < '0.4':
577             return (_('This work needs modernisation'),
578                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
579         else:
580             return None, None
581
582     def choose_fragment(self):
583         fragments = self.fragments.order_by()
584         fragments_count = fragments.count()
585         if not fragments_count and self.children.exists():
586             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
587             fragments_count = fragments.count()
588         if fragments_count:
589             return fragments[randint(0, fragments_count - 1)]
590         elif self.parent:
591             return self.parent.choose_fragment()
592         else:
593             return None
594
595     def update_popularity(self):
596         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
597         try:
598             pop = self.popularity
599             pop.count = count
600             pop.save()
601         except BookPopularity.DoesNotExist:
602             BookPopularity.objects.create(book=self, count=count)
603
604
605 def add_file_fields():
606     for format_ in Book.formats:
607         field_name = "%s_file" % format_
608         # This weird globals() assignment makes Django migrations comfortable.
609         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
610         _upload_to.__name__ = '_%s_upload_to' % format_
611         globals()[_upload_to.__name__] = _upload_to
612
613         EbookField(
614             format_, _("%s file" % format_.upper()),
615             upload_to=_upload_to,
616             storage=bofh_storage,
617             max_length=255,
618             blank=True,
619             default=''
620         ).contribute_to_class(Book, field_name)
621
622 add_file_fields()
623
624
625 class BookPopularity(models.Model):
626     book = models.OneToOneField(Book, related_name='popularity')
627     count = models.IntegerField(default=0)