src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     class AlreadyExists(Exception):
 143         pass
 144
 145     class Meta:
 146         ordering = ('sort_key_author', 'sort_key')
 147         verbose_name = _('book')
 148         verbose_name_plural = _('books')
 149         app_label = 'catalogue'
 150
 151     def __str__(self):
 152         return self.title
 153
 154     def get_extra_info_json(self):
 155         return json.loads(self.extra_info or '{}')
 156
 157     def get_initial(self):
 158         try:
 159             return re.search(r'\w', self.title, re.U).group(0)
 160         except AttributeError:
 161             return ''
 162
 163     def authors(self):
 164         return self.tags.filter(category='author')
 165
 166     def epochs(self):
 167         return self.tags.filter(category='epoch')
 168
 169     def genres(self):
 170         return self.tags.filter(category='genre')
 171
 172     def kinds(self):
 173         return self.tags.filter(category='kind')
 174
 175     def tag_unicode(self, category):
 176         relations = prefetched_relations(self, category)
 177         if relations:
 178             return ', '.join(rel.tag.name for rel in relations)
 179         else:
 180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 181
 182     def tags_by_category(self):
 183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 184
 185     def author_unicode(self):
 186         return self.cached_author
 187
 188     def kind_unicode(self):
 189         return self.tag_unicode('kind')
 190
 191     def epoch_unicode(self):
 192         return self.tag_unicode('epoch')
 193
 194     def genre_unicode(self):
 195         return self.tag_unicode('genre')
 196
 197     def translators(self):
 198         translators = self.get_extra_info_json().get('translators') or []
 199         return [
 200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 201         ]
 202
 203     def translator(self):
 204         translators = self.get_extra_info_json().get('translators')
 205         if not translators:
 206             return None
 207         if len(translators) > 3:
 208             translators = translators[:2]
 209             others = ' i inni'
 210         else:
 211             others = ''
 212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 213
 214     def cover_source(self):
 215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 216
 217     @property
 218     def isbn_pdf(self):
 219         return self.get_extra_info_json().get('isbn_pdf')
 220
 221     @property
 222     def isbn_epub(self):
 223         return self.get_extra_info_json().get('isbn_epub')
 224
 225     @property
 226     def isbn_mobi(self):
 227         return self.get_extra_info_json().get('isbn_mobi')
 228
 229
 230     def save(self, force_insert=False, force_update=False, **kwargs):
 231         from sortify import sortify
 232
 233         self.sort_key = sortify(self.title)[:120]
 234         self.title = str(self.title)  # ???
 235
 236         try:
 237             author = self.authors().first().sort_key
 238         except AttributeError:
 239             author = ''
 240         self.sort_key_author = author
 241
 242         self.cached_author = self.tag_unicode('author')
 243         self.has_audience = 'audience' in self.get_extra_info_json()
 244
 245         if self.preview and not self.preview_key:
 246             self.preview_key = get_random_hash(self.slug)[:32]
 247
 248         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 249
 250         return ret
 251
 252     def get_absolute_url(self):
 253         return reverse('book_detail', args=[self.slug])
 254
 255     def gallery_path(self):
 256         return gallery_path(self.slug)
 257
 258     def gallery_url(self):
 259         return gallery_url(self.slug)
 260
 261     def get_first_text(self):
 262         if self.html_file:
 263             return self
 264         child = self.children.all().order_by('parent_number').first()
 265         if child is not None:
 266             return child.get_first_text()
 267
 268     def get_last_text(self):
 269         if self.html_file:
 270             return self
 271         child = self.children.all().order_by('parent_number').last()
 272         if child is not None:
 273             return child.get_last_text()
 274
 275     def get_prev_text(self):
 276         if not self.parent:
 277             return None
 278         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 279         if sibling is not None:
 280             return sibling.get_last_text()
 281
 282         if self.parent.html_file:
 283             return self.parent
 284
 285         return self.parent.get_prev_text()
 286
 287     def get_next_text(self):
 288         child = self.children.order_by('parent_number').first()
 289         if child is not None:
 290             return child.get_first_text()
 291
 292         if not self.parent:
 293             return None
 294         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 295         if sibling is not None:
 296             return sibling.get_first_text()
 297         return self.parent.get_next_text()
 298
 299     def get_child_audiobook(self):
 300         BookMedia = apps.get_model('catalogue', 'BookMedia')
 301         if not BookMedia.objects.filter(book__ancestor=self).exists():
 302             return None
 303         for child in self.children.all():
 304             if child.has_mp3_file():
 305                 return child
 306             child_sub = child.get_child_audiobook()
 307             if child_sub is not None:
 308                 return child_sub
 309
 310     def get_siblings(self):
 311         if not self.parent:
 312             return []
 313         return self.parent.children.all().order_by('parent_number')
 314
 315     def get_children(self):
 316         return self.children.all().order_by('parent_number')
 317
 318     @property
 319     def name(self):
 320         return self.title
 321
 322     def language_code(self):
 323         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 324
 325     def language_name(self):
 326         return dict(settings.LANGUAGES).get(self.language_code(), "")
 327
 328     def is_foreign(self):
 329         return self.language_code() != settings.LANGUAGE_CODE
 330
 331     def set_audio_length(self):
 332         length = self.get_audio_length()
 333         if length > 0:
 334             self.audio_length = self.format_audio_length(length)
 335             self.save()
 336
 337     @staticmethod
 338     def format_audio_length(seconds):
 339         """
 340         >>> Book.format_audio_length(1)
 341         '0:01'
 342         >>> Book.format_audio_length(3661)
 343         '1:01:01'
 344         """
 345         if seconds < 60*60:
 346             minutes = seconds // 60
 347             seconds = seconds % 60
 348             return '%d:%02d' % (minutes, seconds)
 349         else:
 350             hours = seconds // 3600
 351             minutes = seconds % 3600 // 60
 352             seconds = seconds % 60
 353             return '%d:%02d:%02d' % (hours, minutes, seconds)
 354
 355     def get_audio_length(self):
 356         total = 0
 357         for media in self.get_mp3() or ():
 358             total += app_settings.GET_MP3_LENGTH(media.file.path)
 359         return int(total)
 360
 361     def has_media(self, type_):
 362         if type_ in Book.formats:
 363             return bool(getattr(self, "%s_file" % type_))
 364         else:
 365             return self.media.filter(type=type_).exists()
 366
 367     def has_audio(self):
 368         return self.has_media('mp3')
 369
 370     def get_media(self, type_):
 371         if self.has_media(type_):
 372             if type_ in Book.formats:
 373                 return getattr(self, "%s_file" % type_)
 374             else:
 375                 return self.media.filter(type=type_)
 376         else:
 377             return None
 378
 379     def get_mp3(self):
 380         return self.get_media("mp3")
 381
 382     def get_odt(self):
 383         return self.get_media("odt")
 384
 385     def get_ogg(self):
 386         return self.get_media("ogg")
 387
 388     def get_daisy(self):
 389         return self.get_media("daisy")
 390
 391     def media_url(self, format_):
 392         media = self.get_media(format_)
 393         if media:
 394             if self.preview:
 395                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 396             else:
 397                 return media.url
 398         else:
 399             return None
 400
 401     def html_url(self):
 402         return self.media_url('html')
 403
 404     def pdf_url(self):
 405         return self.media_url('pdf')
 406
 407     def epub_url(self):
 408         return self.media_url('epub')
 409
 410     def mobi_url(self):
 411         return self.media_url('mobi')
 412
 413     def txt_url(self):
 414         return self.media_url('txt')
 415
 416     def fb2_url(self):
 417         return self.media_url('fb2')
 418
 419     def xml_url(self):
 420         return self.media_url('xml')
 421
 422     def has_description(self):
 423         return len(self.description) > 0
 424     has_description.short_description = _('description')
 425     has_description.boolean = True
 426
 427     def has_mp3_file(self):
 428         return self.has_media("mp3")
 429     has_mp3_file.short_description = 'MP3'
 430     has_mp3_file.boolean = True
 431
 432     def has_ogg_file(self):
 433         return self.has_media("ogg")
 434     has_ogg_file.short_description = 'OGG'
 435     has_ogg_file.boolean = True
 436
 437     def has_daisy_file(self):
 438         return self.has_media("daisy")
 439     has_daisy_file.short_description = 'DAISY'
 440     has_daisy_file.boolean = True
 441
 442     @property
 443     def media_daisy(self):
 444         return self.get_media('daisy')
 445
 446     def get_audiobooks(self):
 447         ogg_files = {}
 448         for m in self.media.filter(type='ogg').order_by().iterator():
 449             ogg_files[m.name] = m
 450
 451         audiobooks = []
 452         projects = set()
 453         total_duration = 0
 454         for mp3 in self.media.filter(type='mp3').iterator():
 455             # ogg files are always from the same project
 456             meta = mp3.get_extra_info_json()
 457             project = meta.get('project')
 458             if not project:
 459                 # temporary fallback
 460                 project = 'CzytamySłuchając'
 461
 462             projects.add((project, meta.get('funded_by', '')))
 463             total_duration += mp3.duration or 0
 464
 465             media = {'mp3': mp3}
 466
 467             ogg = ogg_files.get(mp3.name)
 468             if ogg:
 469                 media['ogg'] = ogg
 470             audiobooks.append(media)
 471
 472         projects = sorted(projects)
 473         total_duration = '%d:%02d' % (
 474             total_duration // 60,
 475             total_duration % 60
 476         )
 477         return audiobooks, projects, total_duration
 478
 479     def wldocument(self, parse_dublincore=True, inherit=True):
 480         from catalogue.import_utils import ORMDocProvider
 481         from librarian.parser import WLDocument
 482
 483         if inherit and self.parent:
 484             meta_fallbacks = self.parent.cover_info()
 485         else:
 486             meta_fallbacks = None
 487
 488         return WLDocument.from_file(
 489             self.xml_file.path,
 490             provider=ORMDocProvider(self),
 491             parse_dublincore=parse_dublincore,
 492             meta_fallbacks=meta_fallbacks)
 493
 494     def wldocument2(self):
 495         from catalogue.import_utils import ORMDocProvider
 496         from librarian.document import WLDocument
 497         doc = WLDocument(
 498             self.xml_file.path,
 499             provider=ORMDocProvider(self)
 500         )
 501         doc.meta.update(self.cover_info())
 502         return doc
 503
 504
 505     @staticmethod
 506     def zip_format(format_):
 507         def pretty_file_name(book):
 508             return "%s/%s.%s" % (
 509                 book.get_extra_info_json()['author'],
 510                 book.slug,
 511                 format_)
 512
 513         field_name = "%s_file" % format_
 514         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 515         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 516         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 517
 518     def zip_audiobooks(self, format_):
 519         bm = BookMedia.objects.filter(book=self, type=format_)
 520         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 521         licenses = set()
 522         for m in bm:
 523             license = constants.LICENSES.get(
 524                 m.get_extra_info_json().get('license'), {}
 525             ).get('locative')
 526             if license:
 527                 licenses.add(license)
 528         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 529             'licenses': licenses,
 530         })
 531         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 532
 533     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 534         if not self.findable:
 535             return
 536         if index is None:
 537             from search.index import Index
 538             index = Index()
 539         try:
 540             index.index_book(self, book_info)
 541             if index_tags:
 542                 index.index_tags()
 543             if commit:
 544                 index.index.commit()
 545         except Exception as e:
 546             index.index.rollback()
 547             raise e
 548
 549     # will make problems in conjunction with paid previews
 550     def download_pictures(self, remote_gallery_url):
 551         gallery_path = self.gallery_path()
 552         # delete previous files, so we don't include old files in ebooks
 553         if os.path.isdir(gallery_path):
 554             for filename in os.listdir(gallery_path):
 555                 file_path = os.path.join(gallery_path, filename)
 556                 os.unlink(file_path)
 557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 558         if ilustr_elements:
 559             makedirs(gallery_path)
 560             for ilustr in ilustr_elements:
 561                 ilustr_src = ilustr.get('src')
 562                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 563                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 564
 565     def load_abstract(self):
 566         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 567         if abstract is not None:
 568             self.abstract = transform_abstrakt(abstract)
 569         else:
 570             self.abstract = ''
 571
 572     def load_toc(self):
 573         self.toc = ''
 574         if self.html_file:
 575             parser = html.HTMLParser(encoding='utf-8')
 576             tree = html.parse(self.html_file.path, parser=parser)
 577             toc = tree.find('//div[@id="toc"]/ol')
 578             if toc is None or not len(toc):
 579                 return
 580             html_link = reverse('book_text', args=[self.slug])
 581             for a in toc.findall('.//a'):
 582                 a.attrib['href'] = html_link + a.attrib['href']
 583             self.toc = html.tostring(toc, encoding='unicode')
 584             # div#toc
 585
 586     @classmethod
 587     def from_xml_file(cls, xml_file, **kwargs):
 588         from django.core.files import File
 589         from librarian import dcparser
 590
 591         # use librarian to parse meta-data
 592         book_info = dcparser.parse(xml_file)
 593
 594         if not isinstance(xml_file, File):
 595             xml_file = File(open(xml_file))
 596
 597         try:
 598             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 599         finally:
 600             xml_file.close()
 601
 602     @classmethod
 603     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 604                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 605         if dont_build is None:
 606             dont_build = set()
 607         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 608
 609         # check for parts before we do anything
 610         children = []
 611         if hasattr(book_info, 'parts'):
 612             for part_url in book_info.parts:
 613                 try:
 614                     children.append(Book.objects.get(slug=part_url.slug))
 615                 except Book.DoesNotExist:
 616                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 617
 618         # Read book metadata
 619         book_slug = book_info.url.slug
 620         if re.search(r'[^a-z0-9-]', book_slug):
 621             raise ValueError('Invalid characters in slug')
 622         book, created = Book.objects.get_or_create(slug=book_slug)
 623
 624         if created:
 625             book_shelves = []
 626             old_cover = None
 627             book.preview = bool(days)
 628             if book.preview:
 629                 book.preview_until = date.today() + timedelta(days)
 630         else:
 631             if not overwrite:
 632                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 633             # Save shelves for this book
 634             book_shelves = list(book.tags.filter(category='set'))
 635             old_cover = book.cover_info()
 636
 637         # Save XML file
 638         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 639         if book.preview:
 640             book.xml_file.set_readable(False)
 641
 642         book.findable = findable
 643         book.language = book_info.language
 644         book.title = book_info.title
 645         if book_info.variant_of:
 646             book.common_slug = book_info.variant_of.slug
 647         else:
 648             book.common_slug = book.slug
 649         book.extra_info = json.dumps(book_info.to_dict())
 650         book.load_abstract()
 651         book.load_toc()
 652         book.save()
 653
 654         meta_tags = Tag.tags_from_info(book_info)
 655
 656         for tag in meta_tags:
 657             if not tag.for_books:
 658                 tag.for_books = True
 659                 tag.save()
 660
 661         book.tags = set(meta_tags + book_shelves)
 662         book.save()  # update sort_key_author
 663
 664         cover_changed = old_cover != book.cover_info()
 665         obsolete_children = set(b for b in book.children.all()
 666                                 if b not in children)
 667         notify_cover_changed = []
 668         for n, child_book in enumerate(children):
 669             new_child = child_book.parent != book
 670             child_book.parent = book
 671             child_book.parent_number = n
 672             child_book.save()
 673             if new_child or cover_changed:
 674                 notify_cover_changed.append(child_book)
 675         # Disown unfaithful children and let them cope on their own.
 676         for child in obsolete_children:
 677             child.parent = None
 678             child.parent_number = 0
 679             child.save()
 680             if old_cover:
 681                 notify_cover_changed.append(child)
 682
 683         cls.repopulate_ancestors()
 684         tasks.update_counters.delay()
 685
 686         if remote_gallery_url:
 687             book.download_pictures(remote_gallery_url)
 688
 689         # No saves beyond this point.
 690
 691         # Build cover.
 692         if 'cover' not in dont_build:
 693             book.cover.build_delay()
 694             book.cover_clean.build_delay()
 695             book.cover_thumb.build_delay()
 696             book.cover_api_thumb.build_delay()
 697             book.simple_cover.build_delay()
 698             book.cover_ebookpoint.build_delay()
 699
 700         # Build HTML and ebooks.
 701         book.html_file.build_delay()
 702         if not children:
 703             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 704                 if format_ not in dont_build:
 705                     getattr(book, '%s_file' % format_).build_delay()
 706         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 707             if format_ not in dont_build:
 708                 getattr(book, '%s_file' % format_).build_delay()
 709
 710         if not settings.NO_SEARCH_INDEX and search_index and findable:
 711             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 712
 713         for child in notify_cover_changed:
 714             child.parent_cover_changed()
 715
 716         book.update_popularity()
 717         tasks.update_references.delay(book.id)
 718
 719         cls.published.send(sender=cls, instance=book)
 720         return book
 721
 722     def get_master(self):
 723         master_tags = [
 724             'opowiadanie',
 725             'powiesc',
 726             'dramat_wierszowany_l',
 727             'dramat_wierszowany_lp',
 728             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 729             'wywiad',
 730         ]
 731         from librarian.parser import WLDocument
 732         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 733         root = wld.edoc.getroot()
 734         for master in root.iter():
 735             if master.tag in master_tags:
 736                 return master
 737
 738     def update_references(self):
 739         from references.models import Entity, Reference
 740         master = self.get_master()
 741         if master is None:
 742             master = []
 743         found = set()
 744         for i, sec in enumerate(master):
 745             for ref in sec.findall('.//ref'):
 746                 href = ref.attrib.get('href', '')
 747                 if not href or href in found:
 748                     continue
 749                 found.add(href)
 750                 entity, created = Entity.objects.get_or_create(
 751                     uri=href
 752                 )
 753                 ref, created = Reference.objects.get_or_create(
 754                     book=self,
 755                     entity=entity
 756                 )
 757                 ref.first_section = 'sec%d' % (i + 1)
 758                 entity.populate()
 759                 entity.save()
 760         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 761
 762     @property
 763     def references(self):
 764         return self.reference_set.all().select_related('entity')
 765
 766     @classmethod
 767     @transaction.atomic
 768     def repopulate_ancestors(cls):
 769         """Fixes the ancestry cache."""
 770         # TODO: table names
 771         cursor = connection.cursor()
 772         if connection.vendor == 'postgres':
 773             cursor.execute("TRUNCATE catalogue_book_ancestor")
 774             cursor.execute("""
 775                 WITH RECURSIVE ancestry AS (
 776                     SELECT book.id, book.parent_id
 777                     FROM catalogue_book AS book
 778                     WHERE book.parent_id IS NOT NULL
 779                     UNION
 780                     SELECT ancestor.id, book.parent_id
 781                     FROM ancestry AS ancestor, catalogue_book AS book
 782                     WHERE ancestor.parent_id = book.id
 783                         AND book.parent_id IS NOT NULL
 784                     )
 785                 INSERT INTO catalogue_book_ancestor
 786                     (from_book_id, to_book_id)
 787                     SELECT id, parent_id
 788                     FROM ancestry
 789                     ORDER BY id;
 790                 """)
 791         else:
 792             cursor.execute("DELETE FROM catalogue_book_ancestor")
 793             for b in cls.objects.exclude(parent=None):
 794                 parent = b.parent
 795                 while parent is not None:
 796                     b.ancestor.add(parent)
 797                     parent = parent.parent
 798
 799     @property
 800     def ancestors(self):
 801         if self.parent:
 802             for anc in self.parent.ancestors:
 803                 yield anc
 804             yield self.parent
 805         else:
 806             return []
 807
 808     def clear_cache(self):
 809         clear_cached_renders(self.mini_box)
 810         clear_cached_renders(self.mini_box_nolink)
 811
 812     def cover_info(self, inherit=True):
 813         """Returns a dictionary to serve as fallback for BookInfo.
 814
 815         For now, the only thing inherited is the cover image.
 816         """
 817         need = False
 818         info = {}
 819         for field in ('cover_url', 'cover_by', 'cover_source'):
 820             val = self.get_extra_info_json().get(field)
 821             if val:
 822                 info[field] = val
 823             else:
 824                 need = True
 825         if inherit and need and self.parent is not None:
 826             parent_info = self.parent.cover_info()
 827             parent_info.update(info)
 828             info = parent_info
 829         return info
 830
 831     def related_themes(self):
 832         return Tag.objects.usage_for_queryset(
 833             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 834             counts=True).filter(category='theme')
 835
 836     def parent_cover_changed(self):
 837         """Called when parent book's cover image is changed."""
 838         if not self.cover_info(inherit=False):
 839             if 'cover' not in app_settings.DONT_BUILD:
 840                 self.cover.build_delay()
 841                 self.cover_clean.build_delay()
 842                 self.cover_thumb.build_delay()
 843                 self.cover_api_thumb.build_delay()
 844                 self.simple_cover.build_delay()
 845                 self.cover_ebookpoint.build_delay()
 846             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 847                 if format_ not in app_settings.DONT_BUILD:
 848                     getattr(self, '%s_file' % format_).build_delay()
 849             for child in self.children.all():
 850                 child.parent_cover_changed()
 851
 852     def other_versions(self):
 853         """Find other versions (i.e. in other languages) of the book."""
 854         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 855
 856     def parents(self):
 857         books = []
 858         parent = self.parent
 859         while parent is not None:
 860             books.insert(0, parent)
 861             parent = parent.parent
 862         return books
 863
 864     def pretty_title(self, html_links=False):
 865         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 866         books = self.parents() + [self]
 867         names.extend([(b.title, b.get_absolute_url()) for b in books])
 868
 869         if html_links:
 870             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 871         else:
 872             names = [tag[0] for tag in names]
 873         return ', '.join(names)
 874
 875     def publisher(self):
 876         publisher = self.get_extra_info_json()['publisher']
 877         if isinstance(publisher, str):
 878             return publisher
 879         elif isinstance(publisher, list):
 880             return ', '.join(publisher)
 881
 882     @classmethod
 883     def tagged_top_level(cls, tags):
 884         """ Returns top-level books tagged with `tags`.
 885
 886         It only returns those books which don't have ancestors which are
 887         also tagged with those tags.
 888
 889         """
 890         objects = cls.tagged.with_all(tags)
 891         return objects.filter(findable=True).exclude(ancestor__in=objects)
 892
 893     @classmethod
 894     def book_list(cls, book_filter=None):
 895         """Generates a hierarchical listing of all books.
 896
 897         Books are optionally filtered with a test function.
 898
 899         """
 900
 901         books_by_parent = {}
 902         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 903         if book_filter:
 904             books = books.filter(book_filter).distinct()
 905
 906             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 907             for book in books.iterator():
 908                 parent = book.parent_id
 909                 if parent not in book_ids:
 910                     parent = None
 911                 books_by_parent.setdefault(parent, []).append(book)
 912         else:
 913             for book in books.iterator():
 914                 books_by_parent.setdefault(book.parent_id, []).append(book)
 915
 916         orphans = []
 917         books_by_author = OrderedDict()
 918         for tag in Tag.objects.filter(category='author').iterator():
 919             books_by_author[tag] = []
 920
 921         for book in books_by_parent.get(None, ()):
 922             authors = list(book.authors().only('pk'))
 923             if authors:
 924                 for author in authors:
 925                     books_by_author[author].append(book)
 926             else:
 927                 orphans.append(book)
 928
 929         return books_by_author, orphans, books_by_parent
 930
 931     _audiences_pl = {
 932         "SP": (1, "szkoła podstawowa"),
 933         "SP1": (1, "szkoła podstawowa"),
 934         "SP2": (1, "szkoła podstawowa"),
 935         "SP3": (1, "szkoła podstawowa"),
 936         "P": (1, "szkoła podstawowa"),
 937         "G": (2, "gimnazjum"),
 938         "L": (3, "liceum"),
 939         "LP": (3, "liceum"),
 940     }
 941
 942     def audiences_pl(self):
 943         audiences = self.get_extra_info_json().get('audiences', [])
 944         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 945         return [a[1] for a in audiences]
 946
 947     def stage_note(self):
 948         stage = self.get_extra_info_json().get('stage')
 949         if stage and stage < '0.4':
 950             return (_('This work needs modernisation'),
 951                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 952         else:
 953             return None, None
 954
 955     def choose_fragments(self, number):
 956         fragments = self.fragments.order_by()
 957         fragments_count = fragments.count()
 958         if not fragments_count and self.children.exists():
 959             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 960             fragments_count = fragments.count()
 961         if fragments_count:
 962             if fragments_count > number:
 963                 offset = randint(0, fragments_count - number)
 964             else:
 965                 offset = 0
 966             return fragments[offset : offset + number]
 967         elif self.parent:
 968             return self.parent.choose_fragments(number)
 969         else:
 970             return []
 971
 972     def choose_fragment(self):
 973         fragments = self.choose_fragments(1)
 974         if fragments:
 975             return fragments[0]
 976         else:
 977             return None
 978
 979     def fragment_data(self):
 980         fragment = self.choose_fragment()
 981         if fragment:
 982             return {
 983                 'title': fragment.book.pretty_title(),
 984                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 985             }
 986         else:
 987             return None
 988
 989     def update_popularity(self):
 990         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 991         try:
 992             pop = self.popularity
 993             pop.count = count
 994             pop.save()
 995         except BookPopularity.DoesNotExist:
 996             BookPopularity.objects.create(book=self, count=count)
 997
 998     def ridero_link(self):
 999         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1000
1001     def like(self, user):
1002         from social.utils import likes, get_set, set_sets
1003         if not likes(user, self):
1004             tag = get_set(user, '')
1005             set_sets(user, self, [tag])
1006
1007     def unlike(self, user):
1008         from social.utils import likes, set_sets
1009         if likes(user, self):
1010             set_sets(user, self, [])
1011
1012     def full_sort_key(self):
1013         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1014
1015     def cover_color(self):
1016         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1017
1018     @cached_render('catalogue/book_mini_box.html')
1019     def mini_box(self):
1020         return {
1021             'book': self
1022         }
1023
1024     @cached_render('catalogue/book_mini_box.html')
1025     def mini_box_nolink(self):
1026         return {
1027             'book': self,
1028             'no_link': True,
1029         }
1030
1031 def add_file_fields():
1032     for format_ in Book.formats:
1033         field_name = "%s_file" % format_
1034         # This weird globals() assignment makes Django migrations comfortable.
1035         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1036         _upload_to.__name__ = '_%s_upload_to' % format_
1037         globals()[_upload_to.__name__] = _upload_to
1038
1039         EbookField(
1040             format_, _("%s file" % format_.upper()),
1041             upload_to=_upload_to,
1042             storage=bofh_storage,
1043             max_length=255,
1044             blank=True,
1045             default=''
1046         ).contribute_to_class(Book, field_name)
1047         if format_ != 'xml':
1048             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1049
1050
1051 add_file_fields()
1052
1053
1054 class BookPopularity(models.Model):
1055     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1056     count = models.IntegerField(default=0, db_index=True)