src/catalogue/models/book.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from collections import OrderedDict
   6 from random import randint
   7 import os.path
   8 import re
   9 import urllib
  10 from django.conf import settings
  11 from django.db import connection, models, transaction
  12 from django.db.models import permalink
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.core.urlresolvers import reverse
  16 from django.utils.translation import ugettext_lazy as _, get_language
  17 import jsonfield
  18 from fnpdjango.storage import BofhFileSystemStorage
  19 from ssify import flush_ssi_includes
  20 from newtagging import managers
  21 from catalogue import constants
  22 from catalogue.fields import EbookField
  23 from catalogue.models import Tag, Fragment, BookMedia
  24 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
  25 from catalogue.models.tag import prefetched_relations
  26 from catalogue import app_settings
  27 from catalogue import tasks
  28 from wolnelektury.utils import makedirs
  29
  30 bofh_storage = BofhFileSystemStorage()
  31
  32
  33 def _make_upload_to(path):
  34     def _upload_to(i, n):
  35         return path % i.slug
  36     return _upload_to
  37
  38
  39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
  40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
  41
  42
  43 def _ebook_upload_to(upload_path):
  44     return _make_upload_to(upload_path)
  45
  46
  47 class Book(models.Model):
  48     """Represents a book imported from WL-XML."""
  49     title = models.CharField(_('title'), max_length=32767)
  50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  51     sort_key_author = models.CharField(
  52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
  53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  56     description = models.TextField(_('description'), blank=True)
  57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  58     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  59     parent_number = models.IntegerField(_('parent number'), default=0)
  60     extra_info = jsonfield.JSONField(_('extra information'), default={})
  61     gazeta_link = models.CharField(blank=True, max_length=240)
  62     wiki_link = models.CharField(blank=True, max_length=240)
  63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  64     recommended = models.BooleanField(_('recommended'), default=False)
  65
  66     # files generated during publication
  67     cover = EbookField(
  68         'cover', _('cover'),
  69         null=True, blank=True,
  70         upload_to=_cover_upload_to,
  71         storage=bofh_storage, max_length=255)
  72     # Cleaner version of cover for thumbs
  73     cover_thumb = EbookField(
  74         'cover_thumb', _('cover thumbnail'),
  75         null=True, blank=True,
  76         upload_to=_cover_thumb_upload_to,
  77         max_length=255)
  78     ebook_formats = constants.EBOOK_FORMATS
  79     formats = ebook_formats + ['html', 'xml']
  80
  81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
  82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  83
  84     objects = models.Manager()
  85     tagged = managers.ModelTaggedItemManager(Tag)
  86     tags = managers.TagDescriptor(Tag)
  87     tag_relations = GenericRelation(Tag.intermediary_table_model)
  88
  89     html_built = django.dispatch.Signal()
  90     published = django.dispatch.Signal()
  91
  92     short_html_url_name = 'catalogue_book_short'
  93
  94     class AlreadyExists(Exception):
  95         pass
  96
  97     class Meta:
  98         ordering = ('sort_key_author', 'sort_key')
  99         verbose_name = _('book')
 100         verbose_name_plural = _('books')
 101         app_label = 'catalogue'
 102
 103     def __unicode__(self):
 104         return self.title
 105
 106     def get_initial(self):
 107         try:
 108             return re.search(r'\w', self.title, re.U).group(0)
 109         except AttributeError:
 110             return ''
 111
 112     def authors(self):
 113         return self.tags.filter(category='author')
 114
 115     def tag_unicode(self, category):
 116         relations = prefetched_relations(self, category)
 117         if relations:
 118             return ', '.join(rel.tag.name for rel in relations)
 119         else:
 120             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 121
 122     def tags_by_category(self):
 123         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 124
 125     def author_unicode(self):
 126         return self.tag_unicode('author')
 127
 128     def translator(self):
 129         translators = self.extra_info.get('translators')
 130         if not translators:
 131             return None
 132         if len(translators) > 3:
 133             translators = translators[:2]
 134             others = ' i inni'
 135         else:
 136             others = ''
 137         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 138
 139     def save(self, force_insert=False, force_update=False, **kwargs):
 140         from sortify import sortify
 141
 142         self.sort_key = sortify(self.title)[:120]
 143         self.title = unicode(self.title)  # ???
 144
 145         try:
 146             author = self.authors().first().sort_key
 147         except AttributeError:
 148             author = u''
 149         self.sort_key_author = author
 150
 151         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 152
 153         return ret
 154
 155     @permalink
 156     def get_absolute_url(self):
 157         return 'catalogue.views.book_detail', [self.slug]
 158
 159     @staticmethod
 160     @permalink
 161     def create_url(slug):
 162         return 'catalogue.views.book_detail', [slug]
 163
 164     def gallery_path(self):
 165         return gallery_path(self.slug)
 166
 167     def gallery_url(self):
 168         return gallery_url(self.slug)
 169
 170     @property
 171     def name(self):
 172         return self.title
 173
 174     def language_code(self):
 175         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 176
 177     def language_name(self):
 178         return dict(settings.LANGUAGES).get(self.language_code(), "")
 179
 180     def is_foreign(self):
 181         return self.language_code() != settings.LANGUAGE_CODE
 182
 183     def has_media(self, type_):
 184         if type_ in Book.formats:
 185             return bool(getattr(self, "%s_file" % type_))
 186         else:
 187             return self.media.filter(type=type_).exists()
 188
 189     def get_media(self, type_):
 190         if self.has_media(type_):
 191             if type_ in Book.formats:
 192                 return getattr(self, "%s_file" % type_)
 193             else:
 194                 return self.media.filter(type=type_)
 195         else:
 196             return None
 197
 198     def get_mp3(self):
 199         return self.get_media("mp3")
 200
 201     def get_odt(self):
 202         return self.get_media("odt")
 203
 204     def get_ogg(self):
 205         return self.get_media("ogg")
 206
 207     def get_daisy(self):
 208         return self.get_media("daisy")
 209
 210     def has_description(self):
 211         return len(self.description) > 0
 212     has_description.short_description = _('description')
 213     has_description.boolean = True
 214
 215     # ugly ugly ugly
 216     def has_mp3_file(self):
 217         return bool(self.has_media("mp3"))
 218     has_mp3_file.short_description = 'MP3'
 219     has_mp3_file.boolean = True
 220
 221     def has_ogg_file(self):
 222         return bool(self.has_media("ogg"))
 223     has_ogg_file.short_description = 'OGG'
 224     has_ogg_file.boolean = True
 225
 226     def has_daisy_file(self):
 227         return bool(self.has_media("daisy"))
 228     has_daisy_file.short_description = 'DAISY'
 229     has_daisy_file.boolean = True
 230
 231     def get_audiobooks(self):
 232         ogg_files = {}
 233         for m in self.media.filter(type='ogg').order_by().iterator():
 234             ogg_files[m.name] = m
 235
 236         audiobooks = []
 237         projects = set()
 238         for mp3 in self.media.filter(type='mp3').iterator():
 239             # ogg files are always from the same project
 240             meta = mp3.extra_info
 241             project = meta.get('project')
 242             if not project:
 243                 # temporary fallback
 244                 project = u'CzytamySłuchając'
 245
 246             projects.add((project, meta.get('funded_by', '')))
 247
 248             media = {'mp3': mp3}
 249
 250             ogg = ogg_files.get(mp3.name)
 251             if ogg:
 252                 media['ogg'] = ogg
 253             audiobooks.append(media)
 254
 255         projects = sorted(projects)
 256         return audiobooks, projects
 257
 258     def wldocument(self, parse_dublincore=True, inherit=True):
 259         from catalogue.import_utils import ORMDocProvider
 260         from librarian.parser import WLDocument
 261
 262         if inherit and self.parent:
 263             meta_fallbacks = self.parent.cover_info()
 264         else:
 265             meta_fallbacks = None
 266
 267         return WLDocument.from_file(
 268             self.xml_file.path,
 269             provider=ORMDocProvider(self),
 270             parse_dublincore=parse_dublincore,
 271             meta_fallbacks=meta_fallbacks)
 272
 273     @staticmethod
 274     def zip_format(format_):
 275         def pretty_file_name(book):
 276             return "%s/%s.%s" % (
 277                 book.extra_info['author'],
 278                 book.slug,
 279                 format_)
 280
 281         field_name = "%s_file" % format_
 282         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
 283         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 284         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 285
 286     def zip_audiobooks(self, format_):
 287         bm = BookMedia.objects.filter(book=self, type=format_)
 288         paths = map(lambda bm: (None, bm.file.path), bm)
 289         return create_zip(paths, "%s_%s" % (self.slug, format_))
 290
 291     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 292         if index is None:
 293             from search.index import Index
 294             index = Index()
 295         try:
 296             index.index_book(self, book_info)
 297             if index_tags:
 298                 index.index_tags()
 299             if commit:
 300                 index.index.commit()
 301         except Exception, e:
 302             index.index.rollback()
 303             raise e
 304
 305     def download_pictures(self, remote_gallery_url):
 306         gallery_path = self.gallery_path()
 307         # delete previous files, so we don't include old files in ebooks
 308         if os.path.isdir(gallery_path):
 309             for filename in os.listdir(gallery_path):
 310                 file_path = os.path.join(gallery_path, filename)
 311                 os.unlink(file_path)
 312         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 313         if ilustr_elements:
 314             makedirs(gallery_path)
 315             for ilustr in ilustr_elements:
 316                 ilustr_src = ilustr.get('src')
 317                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 318                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 319
 320     @classmethod
 321     def from_xml_file(cls, xml_file, **kwargs):
 322         from django.core.files import File
 323         from librarian import dcparser
 324
 325         # use librarian to parse meta-data
 326         book_info = dcparser.parse(xml_file)
 327
 328         if not isinstance(xml_file, File):
 329             xml_file = File(open(xml_file))
 330
 331         try:
 332             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 333         finally:
 334             xml_file.close()
 335
 336     @classmethod
 337     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 338                            search_index_tags=True, remote_gallery_url=None):
 339         if dont_build is None:
 340             dont_build = set()
 341         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 342
 343         # check for parts before we do anything
 344         children = []
 345         if hasattr(book_info, 'parts'):
 346             for part_url in book_info.parts:
 347                 try:
 348                     children.append(Book.objects.get(slug=part_url.slug))
 349                 except Book.DoesNotExist:
 350                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 351
 352         # Read book metadata
 353         book_slug = book_info.url.slug
 354         if re.search(r'[^a-z0-9-]', book_slug):
 355             raise ValueError('Invalid characters in slug')
 356         book, created = Book.objects.get_or_create(slug=book_slug)
 357
 358         if created:
 359             book_shelves = []
 360             old_cover = None
 361         else:
 362             if not overwrite:
 363                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 364             # Save shelves for this book
 365             book_shelves = list(book.tags.filter(category='set'))
 366             old_cover = book.cover_info()
 367
 368         # Save XML file
 369         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 370
 371         book.language = book_info.language
 372         book.title = book_info.title
 373         if book_info.variant_of:
 374             book.common_slug = book_info.variant_of.slug
 375         else:
 376             book.common_slug = book.slug
 377         book.extra_info = book_info.to_dict()
 378         book.save()
 379
 380         meta_tags = Tag.tags_from_info(book_info)
 381
 382         book.tags = set(meta_tags + book_shelves)
 383
 384         cover_changed = old_cover != book.cover_info()
 385         obsolete_children = set(b for b in book.children.all()
 386                                 if b not in children)
 387         notify_cover_changed = []
 388         for n, child_book in enumerate(children):
 389             new_child = child_book.parent != book
 390             child_book.parent = book
 391             child_book.parent_number = n
 392             child_book.save()
 393             if new_child or cover_changed:
 394                 notify_cover_changed.append(child_book)
 395         # Disown unfaithful children and let them cope on their own.
 396         for child in obsolete_children:
 397             child.parent = None
 398             child.parent_number = 0
 399             child.save()
 400             if old_cover:
 401                 notify_cover_changed.append(child)
 402
 403         cls.repopulate_ancestors()
 404         tasks.update_counters.delay()
 405
 406         if remote_gallery_url:
 407             book.download_pictures(remote_gallery_url)
 408
 409         # No saves beyond this point.
 410
 411         # Build cover.
 412         if 'cover' not in dont_build:
 413             book.cover.build_delay()
 414             book.cover_thumb.build_delay()
 415
 416         # Build HTML and ebooks.
 417         book.html_file.build_delay()
 418         if not children:
 419             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 420                 if format_ not in dont_build:
 421                     getattr(book, '%s_file' % format_).build_delay()
 422         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 423             if format_ not in dont_build:
 424                 getattr(book, '%s_file' % format_).build_delay()
 425
 426         if not settings.NO_SEARCH_INDEX and search_index:
 427             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 428
 429         for child in notify_cover_changed:
 430             child.parent_cover_changed()
 431
 432         book.save()  # update sort_key_author
 433         cls.published.send(sender=cls, instance=book)
 434         return book
 435
 436     @classmethod
 437     @transaction.atomic
 438     def repopulate_ancestors(cls):
 439         """Fixes the ancestry cache."""
 440         # TODO: table names
 441         cursor = connection.cursor()
 442         if connection.vendor == 'postgres':
 443             cursor.execute("TRUNCATE catalogue_book_ancestor")
 444             cursor.execute("""
 445                 WITH RECURSIVE ancestry AS (
 446                     SELECT book.id, book.parent_id
 447                     FROM catalogue_book AS book
 448                     WHERE book.parent_id IS NOT NULL
 449                     UNION
 450                     SELECT ancestor.id, book.parent_id
 451                     FROM ancestry AS ancestor, catalogue_book AS book
 452                     WHERE ancestor.parent_id = book.id
 453                         AND book.parent_id IS NOT NULL
 454                     )
 455                 INSERT INTO catalogue_book_ancestor
 456                     (from_book_id, to_book_id)
 457                     SELECT id, parent_id
 458                     FROM ancestry
 459                     ORDER BY id;
 460                 """)
 461         else:
 462             cursor.execute("DELETE FROM catalogue_book_ancestor")
 463             for b in cls.objects.exclude(parent=None):
 464                 parent = b.parent
 465                 while parent is not None:
 466                     b.ancestor.add(parent)
 467                     parent = parent.parent
 468
 469     def flush_includes(self, languages=True):
 470         if not languages:
 471             return
 472         if languages is True:
 473             languages = [lc for (lc, _ln) in settings.LANGUAGES]
 474         flush_ssi_includes([
 475             template % (self.pk, lang)
 476             for template in [
 477                 '/katalog/b/%d/mini.%s.html',
 478                 '/katalog/b/%d/mini_nolink.%s.html',
 479                 '/katalog/b/%d/short.%s.html',
 480                 '/katalog/b/%d/wide.%s.html',
 481                 '/api/include/book/%d.%s.json',
 482                 '/api/include/book/%d.%s.xml',
 483                 ]
 484             for lang in languages
 485             ])
 486
 487     def cover_info(self, inherit=True):
 488         """Returns a dictionary to serve as fallback for BookInfo.
 489
 490         For now, the only thing inherited is the cover image.
 491         """
 492         need = False
 493         info = {}
 494         for field in ('cover_url', 'cover_by', 'cover_source'):
 495             val = self.extra_info.get(field)
 496             if val:
 497                 info[field] = val
 498             else:
 499                 need = True
 500         if inherit and need and self.parent is not None:
 501             parent_info = self.parent.cover_info()
 502             parent_info.update(info)
 503             info = parent_info
 504         return info
 505
 506     def related_themes(self):
 507         return Tag.objects.usage_for_queryset(
 508             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 509             counts=True).filter(category='theme')
 510
 511     def parent_cover_changed(self):
 512         """Called when parent book's cover image is changed."""
 513         if not self.cover_info(inherit=False):
 514             if 'cover' not in app_settings.DONT_BUILD:
 515                 self.cover.build_delay()
 516                 self.cover_thumb.build_delay()
 517             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 518                 if format_ not in app_settings.DONT_BUILD:
 519                     getattr(self, '%s_file' % format_).build_delay()
 520             for child in self.children.all():
 521                 child.parent_cover_changed()
 522
 523     def other_versions(self):
 524         """Find other versions (i.e. in other languages) of the book."""
 525         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
 526
 527     def parents(self):
 528         books = []
 529         parent = self.parent
 530         while parent is not None:
 531             books.insert(0, parent)
 532             parent = parent.parent
 533         return books
 534
 535     def pretty_title(self, html_links=False):
 536         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 537         books = self.parents() + [self]
 538         names.extend([(b.title, b.get_absolute_url()) for b in books])
 539
 540         if html_links:
 541             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 542         else:
 543             names = [tag[0] for tag in names]
 544         return ', '.join(names)
 545
 546     def publisher(self):
 547         publisher = self.extra_info['publisher']
 548         if isinstance(publisher, basestring):
 549             return publisher
 550         elif isinstance(publisher, list):
 551             return ', '.join(publisher)
 552
 553     @classmethod
 554     def tagged_top_level(cls, tags):
 555         """ Returns top-level books tagged with `tags`.
 556
 557         It only returns those books which don't have ancestors which are
 558         also tagged with those tags.
 559
 560         """
 561         objects = cls.tagged.with_all(tags)
 562         return objects.exclude(ancestor__in=objects)
 563
 564     @classmethod
 565     def book_list(cls, book_filter=None):
 566         """Generates a hierarchical listing of all books.
 567
 568         Books are optionally filtered with a test function.
 569
 570         """
 571
 572         books_by_parent = {}
 573         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
 574         if book_filter:
 575             books = books.filter(book_filter).distinct()
 576
 577             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 578             for book in books.iterator():
 579                 parent = book.parent_id
 580                 if parent not in book_ids:
 581                     parent = None
 582                 books_by_parent.setdefault(parent, []).append(book)
 583         else:
 584             for book in books.iterator():
 585                 books_by_parent.setdefault(book.parent_id, []).append(book)
 586
 587         orphans = []
 588         books_by_author = OrderedDict()
 589         for tag in Tag.objects.filter(category='author').iterator():
 590             books_by_author[tag] = []
 591
 592         for book in books_by_parent.get(None, ()):
 593             authors = list(book.authors().only('pk'))
 594             if authors:
 595                 for author in authors:
 596                     books_by_author[author].append(book)
 597             else:
 598                 orphans.append(book)
 599
 600         return books_by_author, orphans, books_by_parent
 601
 602     _audiences_pl = {
 603         "SP": (1, u"szkoła podstawowa"),
 604         "SP1": (1, u"szkoła podstawowa"),
 605         "SP2": (1, u"szkoła podstawowa"),
 606         "SP3": (1, u"szkoła podstawowa"),
 607         "P": (1, u"szkoła podstawowa"),
 608         "G": (2, u"gimnazjum"),
 609         "L": (3, u"liceum"),
 610         "LP": (3, u"liceum"),
 611     }
 612
 613     def audiences_pl(self):
 614         audiences = self.extra_info.get('audiences', [])
 615         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 616         return [a[1] for a in audiences]
 617
 618     def stage_note(self):
 619         stage = self.extra_info.get('stage')
 620         if stage and stage < '0.4':
 621             return (_('This work needs modernisation'),
 622                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 623         else:
 624             return None, None
 625
 626     def choose_fragment(self):
 627         fragments = self.fragments.order_by()
 628         fragments_count = fragments.count()
 629         if not fragments_count and self.children.exists():
 630             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 631             fragments_count = fragments.count()
 632         if fragments_count:
 633             return fragments[randint(0, fragments_count - 1)]
 634         elif self.parent:
 635             return self.parent.choose_fragment()
 636         else:
 637             return None
 638
 639     def update_popularity(self):
 640         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 641         try:
 642             pop = self.popularity
 643             pop.count = count
 644             pop.save()
 645         except BookPopularity.DoesNotExist:
 646             BookPopularity.objects.create(book=self, count=count)
 647
 648     def ridero_link(self):
 649         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 650
 651
 652 def add_file_fields():
 653     for format_ in Book.formats:
 654         field_name = "%s_file" % format_
 655         # This weird globals() assignment makes Django migrations comfortable.
 656         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 657         _upload_to.__name__ = '_%s_upload_to' % format_
 658         globals()[_upload_to.__name__] = _upload_to
 659
 660         EbookField(
 661             format_, _("%s file" % format_.upper()),
 662             upload_to=_upload_to,
 663             storage=bofh_storage,
 664             max_length=255,
 665             blank=True,
 666             default=''
 667         ).contribute_to_class(Book, field_name)
 668
 669 add_file_fields()
 670
 671
 672 class BookPopularity(models.Model):
 673     book = models.OneToOneField(Book, related_name='popularity')
 674     count = models.IntegerField(default=0)