src/catalogue/models/book.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from collections import OrderedDict
   6 from random import randint
   7 import os.path
   8 import re
   9 import urllib
  10 from django.conf import settings
  11 from django.db import connection, models, transaction
  12 from django.db.models import permalink
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.core.urlresolvers import reverse
  16 from django.utils.translation import ugettext_lazy as _
  17 import jsonfield
  18 from fnpdjango.storage import BofhFileSystemStorage
  19 from ssify import flush_ssi_includes
  20 from newtagging import managers
  21 from catalogue import constants
  22 from catalogue.fields import EbookField
  23 from catalogue.models import Tag, Fragment, BookMedia
  24 from catalogue.utils import create_zip, gallery_url, gallery_path
  25 from catalogue import app_settings
  26 from catalogue import tasks
  27 from wolnelektury.utils import makedirs
  28
  29 bofh_storage = BofhFileSystemStorage()
  30
  31
  32 def _make_upload_to(path):
  33     def _upload_to(i, n):
  34         return path % i.slug
  35     return _upload_to
  36
  37
  38 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
  39 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
  40
  41
  42 def _ebook_upload_to(upload_path):
  43     return _make_upload_to(upload_path)
  44
  45
  46 class Book(models.Model):
  47     """Represents a book imported from WL-XML."""
  48     title = models.CharField(_('title'), max_length=32767)
  49     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  50     sort_key_author = models.CharField(
  51         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
  52     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  53     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  54     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  55     description = models.TextField(_('description'), blank=True)
  56     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  57     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
  58     parent_number = models.IntegerField(_('parent number'), default=0)
  59     extra_info = jsonfield.JSONField(_('extra information'), default={})
  60     gazeta_link = models.CharField(blank=True, max_length=240)
  61     wiki_link = models.CharField(blank=True, max_length=240)
  62
  63     # files generated during publication
  64     cover = EbookField(
  65         'cover', _('cover'),
  66         null=True, blank=True,
  67         upload_to=_cover_upload_to,
  68         storage=bofh_storage, max_length=255)
  69     # Cleaner version of cover for thumbs
  70     cover_thumb = EbookField(
  71         'cover_thumb', _('cover thumbnail'),
  72         null=True, blank=True,
  73         upload_to=_cover_thumb_upload_to,
  74         max_length=255)
  75     ebook_formats = constants.EBOOK_FORMATS
  76     formats = ebook_formats + ['html', 'xml']
  77
  78     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
  79     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  80
  81     objects = models.Manager()
  82     tagged = managers.ModelTaggedItemManager(Tag)
  83     tags = managers.TagDescriptor(Tag)
  84     tag_relations = GenericRelation(Tag.intermediary_table_model)
  85
  86     html_built = django.dispatch.Signal()
  87     published = django.dispatch.Signal()
  88
  89     short_html_url_name = 'catalogue_book_short'
  90
  91     class AlreadyExists(Exception):
  92         pass
  93
  94     class Meta:
  95         ordering = ('sort_key',)
  96         verbose_name = _('book')
  97         verbose_name_plural = _('books')
  98         app_label = 'catalogue'
  99
 100     def __unicode__(self):
 101         return self.title
 102
 103     def get_initial(self):
 104         try:
 105             return re.search(r'\w', self.title, re.U).group(0)
 106         except AttributeError:
 107             return ''
 108
 109     def authors(self):
 110         return self.tags.filter(category='author')
 111
 112     def author_unicode(self):
 113         return ", ".join(self.authors().values_list('name', flat=True))
 114
 115     def save(self, force_insert=False, force_update=False, **kwargs):
 116         from sortify import sortify
 117
 118         self.sort_key = sortify(self.title)[:120]
 119         self.title = unicode(self.title)  # ???
 120
 121         try:
 122             author = self.authors().first().sort_key
 123         except AttributeError:
 124             author = u''
 125         self.sort_key_author = author
 126
 127         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 128
 129         return ret
 130
 131     @permalink
 132     def get_absolute_url(self):
 133         return 'catalogue.views.book_detail', [self.slug]
 134
 135     @staticmethod
 136     @permalink
 137     def create_url(slug):
 138         return 'catalogue.views.book_detail', [slug]
 139
 140     def gallery_path(self):
 141         return gallery_path(self.slug)
 142
 143     def gallery_url(self):
 144         return gallery_url(self.slug)
 145
 146     @property
 147     def name(self):
 148         return self.title
 149
 150     def language_code(self):
 151         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 152
 153     def language_name(self):
 154         return dict(settings.LANGUAGES).get(self.language_code(), "")
 155
 156     def is_foreign(self):
 157         return self.language_code() != settings.LANGUAGE_CODE
 158
 159     def has_media(self, type_):
 160         if type_ in Book.formats:
 161             return bool(getattr(self, "%s_file" % type_))
 162         else:
 163             return self.media.filter(type=type_).exists()
 164
 165     def get_media(self, type_):
 166         if self.has_media(type_):
 167             if type_ in Book.formats:
 168                 return getattr(self, "%s_file" % type_)
 169             else:
 170                 return self.media.filter(type=type_)
 171         else:
 172             return None
 173
 174     def get_mp3(self):
 175         return self.get_media("mp3")
 176
 177     def get_odt(self):
 178         return self.get_media("odt")
 179
 180     def get_ogg(self):
 181         return self.get_media("ogg")
 182
 183     def get_daisy(self):
 184         return self.get_media("daisy")
 185
 186     def has_description(self):
 187         return len(self.description) > 0
 188     has_description.short_description = _('description')
 189     has_description.boolean = True
 190
 191     # ugly ugly ugly
 192     def has_mp3_file(self):
 193         return bool(self.has_media("mp3"))
 194     has_mp3_file.short_description = 'MP3'
 195     has_mp3_file.boolean = True
 196
 197     def has_ogg_file(self):
 198         return bool(self.has_media("ogg"))
 199     has_ogg_file.short_description = 'OGG'
 200     has_ogg_file.boolean = True
 201
 202     def has_daisy_file(self):
 203         return bool(self.has_media("daisy"))
 204     has_daisy_file.short_description = 'DAISY'
 205     has_daisy_file.boolean = True
 206
 207     def wldocument(self, parse_dublincore=True, inherit=True):
 208         from catalogue.import_utils import ORMDocProvider
 209         from librarian.parser import WLDocument
 210
 211         if inherit and self.parent:
 212             meta_fallbacks = self.parent.cover_info()
 213         else:
 214             meta_fallbacks = None
 215
 216         return WLDocument.from_file(
 217             self.xml_file.path,
 218             provider=ORMDocProvider(self),
 219             parse_dublincore=parse_dublincore,
 220             meta_fallbacks=meta_fallbacks)
 221
 222     @staticmethod
 223     def zip_format(format_):
 224         def pretty_file_name(book):
 225             return "%s/%s.%s" % (
 226                 book.extra_info['author'],
 227                 book.slug,
 228                 format_)
 229
 230         field_name = "%s_file" % format_
 231         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
 232         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 233         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 234
 235     def zip_audiobooks(self, format_):
 236         bm = BookMedia.objects.filter(book=self, type=format_)
 237         paths = map(lambda bm: (None, bm.file.path), bm)
 238         return create_zip(paths, "%s_%s" % (self.slug, format_))
 239
 240     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 241         if index is None:
 242             from search.index import Index
 243             index = Index()
 244         try:
 245             index.index_book(self, book_info)
 246             if index_tags:
 247                 index.index_tags()
 248             if commit:
 249                 index.index.commit()
 250         except Exception, e:
 251             index.index.rollback()
 252             raise e
 253
 254     def download_pictures(self, remote_gallery_url):
 255         gallery_path = self.gallery_path()
 256         # delete previous files, so we don't include old files in ebooks
 257         if os.path.isdir(gallery_path):
 258             for filename in os.listdir(gallery_path):
 259                 file_path = os.path.join(gallery_path, filename)
 260                 os.unlink(file_path)
 261         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 262         if ilustr_elements:
 263             makedirs(gallery_path)
 264             for ilustr in ilustr_elements:
 265                 ilustr_src = ilustr.get('src')
 266                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 267                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 268
 269     @classmethod
 270     def from_xml_file(cls, xml_file, **kwargs):
 271         from django.core.files import File
 272         from librarian import dcparser
 273
 274         # use librarian to parse meta-data
 275         book_info = dcparser.parse(xml_file)
 276
 277         if not isinstance(xml_file, File):
 278             xml_file = File(open(xml_file))
 279
 280         try:
 281             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 282         finally:
 283             xml_file.close()
 284
 285     @classmethod
 286     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 287                            search_index_tags=True, remote_gallery_url=None):
 288         if dont_build is None:
 289             dont_build = set()
 290         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 291
 292         # check for parts before we do anything
 293         children = []
 294         if hasattr(book_info, 'parts'):
 295             for part_url in book_info.parts:
 296                 try:
 297                     children.append(Book.objects.get(slug=part_url.slug))
 298                 except Book.DoesNotExist:
 299                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 300
 301         # Read book metadata
 302         book_slug = book_info.url.slug
 303         if re.search(r'[^a-z0-9-]', book_slug):
 304             raise ValueError('Invalid characters in slug')
 305         book, created = Book.objects.get_or_create(slug=book_slug)
 306
 307         if created:
 308             book_shelves = []
 309             old_cover = None
 310         else:
 311             if not overwrite:
 312                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 313             # Save shelves for this book
 314             book_shelves = list(book.tags.filter(category='set'))
 315             old_cover = book.cover_info()
 316
 317         # Save XML file
 318         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 319
 320         book.language = book_info.language
 321         book.title = book_info.title
 322         if book_info.variant_of:
 323             book.common_slug = book_info.variant_of.slug
 324         else:
 325             book.common_slug = book.slug
 326         book.extra_info = book_info.to_dict()
 327         book.save()
 328
 329         meta_tags = Tag.tags_from_info(book_info)
 330
 331         book.tags = set(meta_tags + book_shelves)
 332
 333         cover_changed = old_cover != book.cover_info()
 334         obsolete_children = set(b for b in book.children.all()
 335                                 if b not in children)
 336         notify_cover_changed = []
 337         for n, child_book in enumerate(children):
 338             new_child = child_book.parent != book
 339             child_book.parent = book
 340             child_book.parent_number = n
 341             child_book.save()
 342             if new_child or cover_changed:
 343                 notify_cover_changed.append(child_book)
 344         # Disown unfaithful children and let them cope on their own.
 345         for child in obsolete_children:
 346             child.parent = None
 347             child.parent_number = 0
 348             child.save()
 349             if old_cover:
 350                 notify_cover_changed.append(child)
 351
 352         cls.repopulate_ancestors()
 353         tasks.update_counters.delay()
 354
 355         if remote_gallery_url:
 356             book.download_pictures(remote_gallery_url)
 357
 358         # No saves beyond this point.
 359
 360         # Build cover.
 361         if 'cover' not in dont_build:
 362             book.cover.build_delay()
 363             book.cover_thumb.build_delay()
 364
 365         # Build HTML and ebooks.
 366         book.html_file.build_delay()
 367         if not children:
 368             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 369                 if format_ not in dont_build:
 370                     getattr(book, '%s_file' % format_).build_delay()
 371         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 372             if format_ not in dont_build:
 373                 getattr(book, '%s_file' % format_).build_delay()
 374
 375         if not settings.NO_SEARCH_INDEX and search_index:
 376             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 377
 378         for child in notify_cover_changed:
 379             child.parent_cover_changed()
 380
 381         cls.published.send(sender=cls, instance=book)
 382         return book
 383
 384     @classmethod
 385     @transaction.atomic
 386     def repopulate_ancestors(cls):
 387         """Fixes the ancestry cache."""
 388         # TODO: table names
 389         cursor = connection.cursor()
 390         if connection.vendor == 'postgres':
 391             cursor.execute("TRUNCATE catalogue_book_ancestor")
 392             cursor.execute("""
 393                 WITH RECURSIVE ancestry AS (
 394                     SELECT book.id, book.parent_id
 395                     FROM catalogue_book AS book
 396                     WHERE book.parent_id IS NOT NULL
 397                     UNION
 398                     SELECT ancestor.id, book.parent_id
 399                     FROM ancestry AS ancestor, catalogue_book AS book
 400                     WHERE ancestor.parent_id = book.id
 401                         AND book.parent_id IS NOT NULL
 402                     )
 403                 INSERT INTO catalogue_book_ancestor
 404                     (from_book_id, to_book_id)
 405                     SELECT id, parent_id
 406                     FROM ancestry
 407                     ORDER BY id;
 408                 """)
 409         else:
 410             cursor.execute("DELETE FROM catalogue_book_ancestor")
 411             for b in cls.objects.exclude(parent=None):
 412                 parent = b.parent
 413                 while parent is not None:
 414                     b.ancestor.add(parent)
 415                     parent = parent.parent
 416
 417     def flush_includes(self, languages=True):
 418         if not languages:
 419             return
 420         if languages is True:
 421             languages = [lc for (lc, _ln) in settings.LANGUAGES]
 422         flush_ssi_includes([
 423             template % (self.pk, lang)
 424             for template in [
 425                 '/katalog/b/%d/mini.%s.html',
 426                 '/katalog/b/%d/mini_nolink.%s.html',
 427                 '/katalog/b/%d/short.%s.html',
 428                 '/katalog/b/%d/wide.%s.html',
 429                 '/api/include/book/%d.%s.json',
 430                 '/api/include/book/%d.%s.xml',
 431                 ]
 432             for lang in languages
 433             ])
 434
 435     def cover_info(self, inherit=True):
 436         """Returns a dictionary to serve as fallback for BookInfo.
 437
 438         For now, the only thing inherited is the cover image.
 439         """
 440         need = False
 441         info = {}
 442         for field in ('cover_url', 'cover_by', 'cover_source'):
 443             val = self.extra_info.get(field)
 444             if val:
 445                 info[field] = val
 446             else:
 447                 need = True
 448         if inherit and need and self.parent is not None:
 449             parent_info = self.parent.cover_info()
 450             parent_info.update(info)
 451             info = parent_info
 452         return info
 453
 454     def related_themes(self):
 455         return Tag.objects.usage_for_queryset(
 456             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 457             counts=True).filter(category='theme')
 458
 459     def parent_cover_changed(self):
 460         """Called when parent book's cover image is changed."""
 461         if not self.cover_info(inherit=False):
 462             if 'cover' not in app_settings.DONT_BUILD:
 463                 self.cover.build_delay()
 464                 self.cover_thumb.build_delay()
 465             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 466                 if format_ not in app_settings.DONT_BUILD:
 467                     getattr(self, '%s_file' % format_).build_delay()
 468             for child in self.children.all():
 469                 child.parent_cover_changed()
 470
 471     def other_versions(self):
 472         """Find other versions (i.e. in other languages) of the book."""
 473         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
 474
 475     def parents(self):
 476         books = []
 477         parent = self.parent
 478         while parent is not None:
 479             books.insert(0, parent)
 480             parent = parent.parent
 481         return books
 482
 483     def pretty_title(self, html_links=False):
 484         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 485         books = self.parents() + [self]
 486         names.extend([(b.title, b.get_absolute_url()) for b in books])
 487
 488         if html_links:
 489             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 490         else:
 491             names = [tag[0] for tag in names]
 492         return ', '.join(names)
 493
 494     @classmethod
 495     def tagged_top_level(cls, tags):
 496         """ Returns top-level books tagged with `tags`.
 497
 498         It only returns those books which don't have ancestors which are
 499         also tagged with those tags.
 500
 501         """
 502         objects = cls.tagged.with_all(tags)
 503         return objects.exclude(ancestor__in=objects)
 504
 505     @classmethod
 506     def book_list(cls, book_filter=None):
 507         """Generates a hierarchical listing of all books.
 508
 509         Books are optionally filtered with a test function.
 510
 511         """
 512
 513         books_by_parent = {}
 514         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
 515         if book_filter:
 516             books = books.filter(book_filter).distinct()
 517
 518             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 519             for book in books.iterator():
 520                 parent = book.parent_id
 521                 if parent not in book_ids:
 522                     parent = None
 523                 books_by_parent.setdefault(parent, []).append(book)
 524         else:
 525             for book in books.iterator():
 526                 books_by_parent.setdefault(book.parent_id, []).append(book)
 527
 528         orphans = []
 529         books_by_author = OrderedDict()
 530         for tag in Tag.objects.filter(category='author').iterator():
 531             books_by_author[tag] = []
 532
 533         for book in books_by_parent.get(None, ()):
 534             authors = list(book.authors().only('pk'))
 535             if authors:
 536                 for author in authors:
 537                     books_by_author[author].append(book)
 538             else:
 539                 orphans.append(book)
 540
 541         return books_by_author, orphans, books_by_parent
 542
 543     _audiences_pl = {
 544         "SP": (1, u"szkoła podstawowa"),
 545         "SP1": (1, u"szkoła podstawowa"),
 546         "SP2": (1, u"szkoła podstawowa"),
 547         "P": (1, u"szkoła podstawowa"),
 548         "G": (2, u"gimnazjum"),
 549         "L": (3, u"liceum"),
 550         "LP": (3, u"liceum"),
 551     }
 552
 553     def audiences_pl(self):
 554         audiences = self.extra_info.get('audiences', [])
 555         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 556         return [a[1] for a in audiences]
 557
 558     def stage_note(self):
 559         stage = self.extra_info.get('stage')
 560         if stage and stage < '0.4':
 561             return (_('This work needs modernisation'),
 562                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 563         else:
 564             return None, None
 565
 566     def choose_fragment(self):
 567         fragments = self.fragments.order_by()
 568         fragments_count = fragments.count()
 569         if not fragments_count and self.children.exists():
 570             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 571             fragments_count = fragments.count()
 572         if fragments_count:
 573             return fragments[randint(0, fragments_count - 1)]
 574         elif self.parent:
 575             return self.parent.choose_fragment()
 576         else:
 577             return None
 578
 579
 580 def add_file_fields():
 581     for format_ in Book.formats:
 582         field_name = "%s_file" % format_
 583         # This weird globals() assignment makes Django migrations comfortable.
 584         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 585         _upload_to.__name__ = '_%s_upload_to' % format_
 586         globals()[_upload_to.__name__] = _upload_to
 587
 588         EbookField(
 589             format_, _("%s file" % format_.upper()),
 590             upload_to=_upload_to,
 591             storage=bofh_storage,
 592             max_length=255,
 593             blank=True,
 594             default=''
 595         ).contribute_to_class(Book, field_name)
 596
 597 add_file_fields()