src/catalogue/models/book.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from collections import OrderedDict
   6 from random import randint
   7 import os.path
   8 import re
   9 import urllib
  10 from django.conf import settings
  11 from django.db import connection, models, transaction
  12 from django.db.models import permalink
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.core.urlresolvers import reverse
  16 from django.utils.translation import ugettext_lazy as _, get_language
  17 import jsonfield
  18 from fnpdjango.storage import BofhFileSystemStorage
  19 from ssify import flush_ssi_includes
  20 from newtagging import managers
  21 from catalogue import constants
  22 from catalogue.fields import EbookField
  23 from catalogue.models import Tag, Fragment, BookMedia
  24 from catalogue.utils import create_zip, gallery_url, gallery_path
  25 from catalogue.models.tag import prefetched_relations
  26 from catalogue import app_settings
  27 from catalogue import tasks
  28 from wolnelektury.utils import makedirs
  29
  30 bofh_storage = BofhFileSystemStorage()
  31
  32
  33 def _make_upload_to(path):
  34     def _upload_to(i, n):
  35         return path % i.slug
  36     return _upload_to
  37
  38
  39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
  40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
  41
  42
  43 def _ebook_upload_to(upload_path):
  44     return _make_upload_to(upload_path)
  45
  46
  47 class Book(models.Model):
  48     """Represents a book imported from WL-XML."""
  49     title = models.CharField(_('title'), max_length=32767)
  50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  51     sort_key_author = models.CharField(
  52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
  53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  56     description = models.TextField(_('description'), blank=True)
  57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  58     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
  59     parent_number = models.IntegerField(_('parent number'), default=0)
  60     extra_info = jsonfield.JSONField(_('extra information'), default={})
  61     gazeta_link = models.CharField(blank=True, max_length=240)
  62     wiki_link = models.CharField(blank=True, max_length=240)
  63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  64     recommended = models.BooleanField(_('recommended'), default=False)
  65
  66     # files generated during publication
  67     cover = EbookField(
  68         'cover', _('cover'),
  69         null=True, blank=True,
  70         upload_to=_cover_upload_to,
  71         storage=bofh_storage, max_length=255)
  72     # Cleaner version of cover for thumbs
  73     cover_thumb = EbookField(
  74         'cover_thumb', _('cover thumbnail'),
  75         null=True, blank=True,
  76         upload_to=_cover_thumb_upload_to,
  77         max_length=255)
  78     ebook_formats = constants.EBOOK_FORMATS
  79     formats = ebook_formats + ['html', 'xml']
  80
  81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
  82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  83
  84     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  85     has_audience = models.BooleanField(default=False)
  86
  87     objects = models.Manager()
  88     tagged = managers.ModelTaggedItemManager(Tag)
  89     tags = managers.TagDescriptor(Tag)
  90     tag_relations = GenericRelation(Tag.intermediary_table_model)
  91
  92     html_built = django.dispatch.Signal()
  93     published = django.dispatch.Signal()
  94
  95     short_html_url_name = 'catalogue_book_short'
  96
  97     class AlreadyExists(Exception):
  98         pass
  99
 100     class Meta:
 101         ordering = ('sort_key_author', 'sort_key')
 102         verbose_name = _('book')
 103         verbose_name_plural = _('books')
 104         app_label = 'catalogue'
 105
 106     def __unicode__(self):
 107         return self.title
 108
 109     def get_initial(self):
 110         try:
 111             return re.search(r'\w', self.title, re.U).group(0)
 112         except AttributeError:
 113             return ''
 114
 115     def authors(self):
 116         return self.tags.filter(category='author')
 117
 118     def tag_unicode(self, category):
 119         relations = prefetched_relations(self, category)
 120         if relations:
 121             return ', '.join(rel.tag.name for rel in relations)
 122         else:
 123             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 124
 125     def author_unicode(self):
 126         return self.cached_author
 127
 128     def translator(self):
 129         translators = self.extra_info.get('translators')
 130         if not translators:
 131             return None
 132         if len(translators) > 3:
 133             translators = translators[:2]
 134             others = ' i inni'
 135         else:
 136             others = ''
 137         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 138
 139     def save(self, force_insert=False, force_update=False, **kwargs):
 140         from sortify import sortify
 141
 142         self.sort_key = sortify(self.title)[:120]
 143         self.title = unicode(self.title)  # ???
 144
 145         try:
 146             author = self.authors().first().sort_key
 147         except AttributeError:
 148             author = u''
 149         self.sort_key_author = author
 150
 151         self.cached_author = self.tag_unicode('author')
 152         self.has_audience = 'audience' in self.extra_info
 153
 154         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 155
 156         return ret
 157
 158     @permalink
 159     def get_absolute_url(self):
 160         return 'catalogue.views.book_detail', [self.slug]
 161
 162     @staticmethod
 163     @permalink
 164     def create_url(slug):
 165         return 'catalogue.views.book_detail', [slug]
 166
 167     def gallery_path(self):
 168         return gallery_path(self.slug)
 169
 170     def gallery_url(self):
 171         return gallery_url(self.slug)
 172
 173     @property
 174     def name(self):
 175         return self.title
 176
 177     def language_code(self):
 178         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 179
 180     def language_name(self):
 181         return dict(settings.LANGUAGES).get(self.language_code(), "")
 182
 183     def is_foreign(self):
 184         return self.language_code() != settings.LANGUAGE_CODE
 185
 186     def has_media(self, type_):
 187         if type_ in Book.formats:
 188             return bool(getattr(self, "%s_file" % type_))
 189         else:
 190             return self.media.filter(type=type_).exists()
 191
 192     def get_media(self, type_):
 193         if self.has_media(type_):
 194             if type_ in Book.formats:
 195                 return getattr(self, "%s_file" % type_)
 196             else:
 197                 return self.media.filter(type=type_)
 198         else:
 199             return None
 200
 201     def get_mp3(self):
 202         return self.get_media("mp3")
 203
 204     def get_odt(self):
 205         return self.get_media("odt")
 206
 207     def get_ogg(self):
 208         return self.get_media("ogg")
 209
 210     def get_daisy(self):
 211         return self.get_media("daisy")
 212
 213     def has_description(self):
 214         return len(self.description) > 0
 215     has_description.short_description = _('description')
 216     has_description.boolean = True
 217
 218     # ugly ugly ugly
 219     def has_mp3_file(self):
 220         return bool(self.has_media("mp3"))
 221     has_mp3_file.short_description = 'MP3'
 222     has_mp3_file.boolean = True
 223
 224     def has_ogg_file(self):
 225         return bool(self.has_media("ogg"))
 226     has_ogg_file.short_description = 'OGG'
 227     has_ogg_file.boolean = True
 228
 229     def has_daisy_file(self):
 230         return bool(self.has_media("daisy"))
 231     has_daisy_file.short_description = 'DAISY'
 232     has_daisy_file.boolean = True
 233
 234     def wldocument(self, parse_dublincore=True, inherit=True):
 235         from catalogue.import_utils import ORMDocProvider
 236         from librarian.parser import WLDocument
 237
 238         if inherit and self.parent:
 239             meta_fallbacks = self.parent.cover_info()
 240         else:
 241             meta_fallbacks = None
 242
 243         return WLDocument.from_file(
 244             self.xml_file.path,
 245             provider=ORMDocProvider(self),
 246             parse_dublincore=parse_dublincore,
 247             meta_fallbacks=meta_fallbacks)
 248
 249     @staticmethod
 250     def zip_format(format_):
 251         def pretty_file_name(book):
 252             return "%s/%s.%s" % (
 253                 book.extra_info['author'],
 254                 book.slug,
 255                 format_)
 256
 257         field_name = "%s_file" % format_
 258         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
 259         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 260         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 261
 262     def zip_audiobooks(self, format_):
 263         bm = BookMedia.objects.filter(book=self, type=format_)
 264         paths = map(lambda bm: (None, bm.file.path), bm)
 265         return create_zip(paths, "%s_%s" % (self.slug, format_))
 266
 267     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 268         if index is None:
 269             from search.index import Index
 270             index = Index()
 271         try:
 272             index.index_book(self, book_info)
 273             if index_tags:
 274                 index.index_tags()
 275             if commit:
 276                 index.index.commit()
 277         except Exception, e:
 278             index.index.rollback()
 279             raise e
 280
 281     def download_pictures(self, remote_gallery_url):
 282         gallery_path = self.gallery_path()
 283         # delete previous files, so we don't include old files in ebooks
 284         if os.path.isdir(gallery_path):
 285             for filename in os.listdir(gallery_path):
 286                 file_path = os.path.join(gallery_path, filename)
 287                 os.unlink(file_path)
 288         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 289         if ilustr_elements:
 290             makedirs(gallery_path)
 291             for ilustr in ilustr_elements:
 292                 ilustr_src = ilustr.get('src')
 293                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 294                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 295
 296     @classmethod
 297     def from_xml_file(cls, xml_file, **kwargs):
 298         from django.core.files import File
 299         from librarian import dcparser
 300
 301         # use librarian to parse meta-data
 302         book_info = dcparser.parse(xml_file)
 303
 304         if not isinstance(xml_file, File):
 305             xml_file = File(open(xml_file))
 306
 307         try:
 308             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 309         finally:
 310             xml_file.close()
 311
 312     @classmethod
 313     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 314                            search_index_tags=True, remote_gallery_url=None):
 315         if dont_build is None:
 316             dont_build = set()
 317         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 318
 319         # check for parts before we do anything
 320         children = []
 321         if hasattr(book_info, 'parts'):
 322             for part_url in book_info.parts:
 323                 try:
 324                     children.append(Book.objects.get(slug=part_url.slug))
 325                 except Book.DoesNotExist:
 326                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 327
 328         # Read book metadata
 329         book_slug = book_info.url.slug
 330         if re.search(r'[^a-z0-9-]', book_slug):
 331             raise ValueError('Invalid characters in slug')
 332         book, created = Book.objects.get_or_create(slug=book_slug)
 333
 334         if created:
 335             book_shelves = []
 336             old_cover = None
 337         else:
 338             if not overwrite:
 339                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 340             # Save shelves for this book
 341             book_shelves = list(book.tags.filter(category='set'))
 342             old_cover = book.cover_info()
 343
 344         # Save XML file
 345         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 346
 347         book.language = book_info.language
 348         book.title = book_info.title
 349         if book_info.variant_of:
 350             book.common_slug = book_info.variant_of.slug
 351         else:
 352             book.common_slug = book.slug
 353         book.extra_info = book_info.to_dict()
 354         book.save()
 355
 356         meta_tags = Tag.tags_from_info(book_info)
 357
 358         book.tags = set(meta_tags + book_shelves)
 359
 360         cover_changed = old_cover != book.cover_info()
 361         obsolete_children = set(b for b in book.children.all()
 362                                 if b not in children)
 363         notify_cover_changed = []
 364         for n, child_book in enumerate(children):
 365             new_child = child_book.parent != book
 366             child_book.parent = book
 367             child_book.parent_number = n
 368             child_book.save()
 369             if new_child or cover_changed:
 370                 notify_cover_changed.append(child_book)
 371         # Disown unfaithful children and let them cope on their own.
 372         for child in obsolete_children:
 373             child.parent = None
 374             child.parent_number = 0
 375             child.save()
 376             if old_cover:
 377                 notify_cover_changed.append(child)
 378
 379         cls.repopulate_ancestors()
 380         tasks.update_counters.delay()
 381
 382         if remote_gallery_url:
 383             book.download_pictures(remote_gallery_url)
 384
 385         # No saves beyond this point.
 386
 387         # Build cover.
 388         if 'cover' not in dont_build:
 389             book.cover.build_delay()
 390             book.cover_thumb.build_delay()
 391
 392         # Build HTML and ebooks.
 393         book.html_file.build_delay()
 394         if not children:
 395             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 396                 if format_ not in dont_build:
 397                     getattr(book, '%s_file' % format_).build_delay()
 398         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 399             if format_ not in dont_build:
 400                 getattr(book, '%s_file' % format_).build_delay()
 401
 402         if not settings.NO_SEARCH_INDEX and search_index:
 403             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 404
 405         for child in notify_cover_changed:
 406             child.parent_cover_changed()
 407
 408         book.save()  # update sort_key_author
 409         cls.published.send(sender=cls, instance=book)
 410         return book
 411
 412     @classmethod
 413     @transaction.atomic
 414     def repopulate_ancestors(cls):
 415         """Fixes the ancestry cache."""
 416         # TODO: table names
 417         cursor = connection.cursor()
 418         if connection.vendor == 'postgres':
 419             cursor.execute("TRUNCATE catalogue_book_ancestor")
 420             cursor.execute("""
 421                 WITH RECURSIVE ancestry AS (
 422                     SELECT book.id, book.parent_id
 423                     FROM catalogue_book AS book
 424                     WHERE book.parent_id IS NOT NULL
 425                     UNION
 426                     SELECT ancestor.id, book.parent_id
 427                     FROM ancestry AS ancestor, catalogue_book AS book
 428                     WHERE ancestor.parent_id = book.id
 429                         AND book.parent_id IS NOT NULL
 430                     )
 431                 INSERT INTO catalogue_book_ancestor
 432                     (from_book_id, to_book_id)
 433                     SELECT id, parent_id
 434                     FROM ancestry
 435                     ORDER BY id;
 436                 """)
 437         else:
 438             cursor.execute("DELETE FROM catalogue_book_ancestor")
 439             for b in cls.objects.exclude(parent=None):
 440                 parent = b.parent
 441                 while parent is not None:
 442                     b.ancestor.add(parent)
 443                     parent = parent.parent
 444
 445     def flush_includes(self, languages=True):
 446         if not languages:
 447             return
 448         if languages is True:
 449             languages = [lc for (lc, _ln) in settings.LANGUAGES]
 450         flush_ssi_includes([
 451             template % (self.pk, lang)
 452             for template in [
 453                 '/katalog/b/%d/mini.%s.html',
 454                 '/katalog/b/%d/mini_nolink.%s.html',
 455                 '/katalog/b/%d/short.%s.html',
 456                 '/katalog/b/%d/wide.%s.html',
 457                 '/api/include/book/%d.%s.json',
 458                 '/api/include/book/%d.%s.xml',
 459                 ]
 460             for lang in languages
 461             ])
 462
 463     def cover_info(self, inherit=True):
 464         """Returns a dictionary to serve as fallback for BookInfo.
 465
 466         For now, the only thing inherited is the cover image.
 467         """
 468         need = False
 469         info = {}
 470         for field in ('cover_url', 'cover_by', 'cover_source'):
 471             val = self.extra_info.get(field)
 472             if val:
 473                 info[field] = val
 474             else:
 475                 need = True
 476         if inherit and need and self.parent is not None:
 477             parent_info = self.parent.cover_info()
 478             parent_info.update(info)
 479             info = parent_info
 480         return info
 481
 482     def related_themes(self):
 483         return Tag.objects.usage_for_queryset(
 484             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 485             counts=True).filter(category='theme')
 486
 487     def parent_cover_changed(self):
 488         """Called when parent book's cover image is changed."""
 489         if not self.cover_info(inherit=False):
 490             if 'cover' not in app_settings.DONT_BUILD:
 491                 self.cover.build_delay()
 492                 self.cover_thumb.build_delay()
 493             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 494                 if format_ not in app_settings.DONT_BUILD:
 495                     getattr(self, '%s_file' % format_).build_delay()
 496             for child in self.children.all():
 497                 child.parent_cover_changed()
 498
 499     def other_versions(self):
 500         """Find other versions (i.e. in other languages) of the book."""
 501         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
 502
 503     def parents(self):
 504         books = []
 505         parent = self.parent
 506         while parent is not None:
 507             books.insert(0, parent)
 508             parent = parent.parent
 509         return books
 510
 511     def pretty_title(self, html_links=False):
 512         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 513         books = self.parents() + [self]
 514         names.extend([(b.title, b.get_absolute_url()) for b in books])
 515
 516         if html_links:
 517             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 518         else:
 519             names = [tag[0] for tag in names]
 520         return ', '.join(names)
 521
 522     def publisher(self):
 523         publisher = self.extra_info['publisher']
 524         if isinstance(publisher, basestring):
 525             return publisher
 526         elif isinstance(publisher, list):
 527             return ', '.join(publisher)
 528
 529     @classmethod
 530     def tagged_top_level(cls, tags):
 531         """ Returns top-level books tagged with `tags`.
 532
 533         It only returns those books which don't have ancestors which are
 534         also tagged with those tags.
 535
 536         """
 537         objects = cls.tagged.with_all(tags)
 538         return objects.exclude(ancestor__in=objects)
 539
 540     @classmethod
 541     def book_list(cls, book_filter=None):
 542         """Generates a hierarchical listing of all books.
 543
 544         Books are optionally filtered with a test function.
 545
 546         """
 547
 548         books_by_parent = {}
 549         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
 550         if book_filter:
 551             books = books.filter(book_filter).distinct()
 552
 553             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 554             for book in books.iterator():
 555                 parent = book.parent_id
 556                 if parent not in book_ids:
 557                     parent = None
 558                 books_by_parent.setdefault(parent, []).append(book)
 559         else:
 560             for book in books.iterator():
 561                 books_by_parent.setdefault(book.parent_id, []).append(book)
 562
 563         orphans = []
 564         books_by_author = OrderedDict()
 565         for tag in Tag.objects.filter(category='author').iterator():
 566             books_by_author[tag] = []
 567
 568         for book in books_by_parent.get(None, ()):
 569             authors = list(book.authors().only('pk'))
 570             if authors:
 571                 for author in authors:
 572                     books_by_author[author].append(book)
 573             else:
 574                 orphans.append(book)
 575
 576         return books_by_author, orphans, books_by_parent
 577
 578     _audiences_pl = {
 579         "SP": (1, u"szkoła podstawowa"),
 580         "SP1": (1, u"szkoła podstawowa"),
 581         "SP2": (1, u"szkoła podstawowa"),
 582         "SP3": (1, u"szkoła podstawowa"),
 583         "P": (1, u"szkoła podstawowa"),
 584         "G": (2, u"gimnazjum"),
 585         "L": (3, u"liceum"),
 586         "LP": (3, u"liceum"),
 587     }
 588
 589     def audiences_pl(self):
 590         audiences = self.extra_info.get('audiences', [])
 591         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 592         return [a[1] for a in audiences]
 593
 594     def stage_note(self):
 595         stage = self.extra_info.get('stage')
 596         if stage and stage < '0.4':
 597             return (_('This work needs modernisation'),
 598                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 599         else:
 600             return None, None
 601
 602     def choose_fragment(self):
 603         fragments = self.fragments.order_by()
 604         fragments_count = fragments.count()
 605         if not fragments_count and self.children.exists():
 606             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 607             fragments_count = fragments.count()
 608         if fragments_count:
 609             return fragments[randint(0, fragments_count - 1)]
 610         elif self.parent:
 611             return self.parent.choose_fragment()
 612         else:
 613             return None
 614
 615     def update_popularity(self):
 616         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 617         try:
 618             pop = self.popularity
 619             pop.count = count
 620             pop.save()
 621         except BookPopularity.DoesNotExist:
 622             BookPopularity.objects.create(book=self, count=count)
 623
 624     def ridero_link(self):
 625         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 626
 627
 628 def add_file_fields():
 629     for format_ in Book.formats:
 630         field_name = "%s_file" % format_
 631         # This weird globals() assignment makes Django migrations comfortable.
 632         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 633         _upload_to.__name__ = '_%s_upload_to' % format_
 634         globals()[_upload_to.__name__] = _upload_to
 635
 636         EbookField(
 637             format_, _("%s file" % format_.upper()),
 638             upload_to=_upload_to,
 639             storage=bofh_storage,
 640             max_length=255,
 641             blank=True,
 642             default=''
 643         ).contribute_to_class(Book, field_name)
 644
 645 add_file_fields()
 646
 647
 648 class BookPopularity(models.Model):
 649     book = models.OneToOneField(Book, related_name='popularity')
 650     count = models.IntegerField(default=0)