src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     class AlreadyExists(Exception):
 143         pass
 144
 145     class Meta:
 146         ordering = ('sort_key_author', 'sort_key')
 147         verbose_name = _('book')
 148         verbose_name_plural = _('books')
 149         app_label = 'catalogue'
 150
 151     def __str__(self):
 152         return self.title
 153
 154     def get_extra_info_json(self):
 155         return json.loads(self.extra_info or '{}')
 156
 157     def get_initial(self):
 158         try:
 159             return re.search(r'\w', self.title, re.U).group(0)
 160         except AttributeError:
 161             return ''
 162
 163     def authors(self):
 164         return self.tags.filter(category='author')
 165
 166     def epochs(self):
 167         return self.tags.filter(category='epoch')
 168
 169     def genres(self):
 170         return self.tags.filter(category='genre')
 171
 172     def kinds(self):
 173         return self.tags.filter(category='kind')
 174
 175     def tag_unicode(self, category):
 176         relations = prefetched_relations(self, category)
 177         if relations:
 178             return ', '.join(rel.tag.name for rel in relations)
 179         else:
 180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 181
 182     def tags_by_category(self):
 183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 184
 185     def author_unicode(self):
 186         return self.cached_author
 187
 188     def kind_unicode(self):
 189         return self.tag_unicode('kind')
 190
 191     def epoch_unicode(self):
 192         return self.tag_unicode('epoch')
 193
 194     def genre_unicode(self):
 195         return self.tag_unicode('genre')
 196
 197     def translators(self):
 198         translators = self.get_extra_info_json().get('translators') or []
 199         return [
 200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 201         ]
 202
 203     def translator(self):
 204         translators = self.get_extra_info_json().get('translators')
 205         if not translators:
 206             return None
 207         if len(translators) > 3:
 208             translators = translators[:2]
 209             others = ' i inni'
 210         else:
 211             others = ''
 212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 213
 214     def cover_source(self):
 215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 216
 217     @property
 218     def isbn_pdf(self):
 219         return self.get_extra_info_json().get('isbn_pdf')
 220
 221     @property
 222     def isbn_epub(self):
 223         return self.get_extra_info_json().get('isbn_epub')
 224
 225     @property
 226     def isbn_mobi(self):
 227         return self.get_extra_info_json().get('isbn_mobi')
 228
 229     def is_accessible_to(self, user):
 230         if not self.preview:
 231             return True
 232         Membership = apps.get_model('club', 'Membership')
 233         if Membership.is_active_for(user):
 234             return True
 235         Funding = apps.get_model('funding', 'Funding')
 236         if Funding.objects.filter(user=user, offer__book=self):
 237             return True
 238         return False
 239
 240     def save(self, force_insert=False, force_update=False, **kwargs):
 241         from sortify import sortify
 242
 243         self.sort_key = sortify(self.title)[:120]
 244         self.title = str(self.title)  # ???
 245
 246         try:
 247             author = self.authors().first().sort_key
 248         except AttributeError:
 249             author = ''
 250         self.sort_key_author = author
 251
 252         self.cached_author = self.tag_unicode('author')
 253         self.has_audience = 'audience' in self.get_extra_info_json()
 254
 255         if self.preview and not self.preview_key:
 256             self.preview_key = get_random_hash(self.slug)[:32]
 257
 258         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 259
 260         return ret
 261
 262     def get_absolute_url(self):
 263         return reverse('book_detail', args=[self.slug])
 264
 265     def gallery_path(self):
 266         return gallery_path(self.slug)
 267
 268     def gallery_url(self):
 269         return gallery_url(self.slug)
 270
 271     def get_first_text(self):
 272         if self.html_file:
 273             return self
 274         child = self.children.all().order_by('parent_number').first()
 275         if child is not None:
 276             return child.get_first_text()
 277
 278     def get_last_text(self):
 279         if self.html_file:
 280             return self
 281         child = self.children.all().order_by('parent_number').last()
 282         if child is not None:
 283             return child.get_last_text()
 284
 285     def get_prev_text(self):
 286         if not self.parent:
 287             return None
 288         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 289         if sibling is not None:
 290             return sibling.get_last_text()
 291
 292         if self.parent.html_file:
 293             return self.parent
 294
 295         return self.parent.get_prev_text()
 296
 297     def get_next_text(self):
 298         child = self.children.order_by('parent_number').first()
 299         if child is not None:
 300             return child.get_first_text()
 301
 302         if not self.parent:
 303             return None
 304         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 305         if sibling is not None:
 306             return sibling.get_first_text()
 307         return self.parent.get_next_text()
 308
 309     def get_child_audiobook(self):
 310         BookMedia = apps.get_model('catalogue', 'BookMedia')
 311         if not BookMedia.objects.filter(book__ancestor=self).exists():
 312             return None
 313         for child in self.children.all():
 314             if child.has_mp3_file():
 315                 return child
 316             child_sub = child.get_child_audiobook()
 317             if child_sub is not None:
 318                 return child_sub
 319
 320     def get_siblings(self):
 321         if not self.parent:
 322             return []
 323         return self.parent.children.all().order_by('parent_number')
 324
 325     def get_children(self):
 326         return self.children.all().order_by('parent_number')
 327
 328     @property
 329     def name(self):
 330         return self.title
 331
 332     def language_code(self):
 333         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 334
 335     def language_name(self):
 336         return dict(settings.LANGUAGES).get(self.language_code(), "")
 337
 338     def is_foreign(self):
 339         return self.language_code() != settings.LANGUAGE_CODE
 340
 341     def set_audio_length(self):
 342         length = self.get_audio_length()
 343         if length > 0:
 344             self.audio_length = self.format_audio_length(length)
 345             self.save()
 346
 347     @staticmethod
 348     def format_audio_length(seconds):
 349         """
 350         >>> Book.format_audio_length(1)
 351         '0:01'
 352         >>> Book.format_audio_length(3661)
 353         '1:01:01'
 354         """
 355         if seconds < 60*60:
 356             minutes = seconds // 60
 357             seconds = seconds % 60
 358             return '%d:%02d' % (minutes, seconds)
 359         else:
 360             hours = seconds // 3600
 361             minutes = seconds % 3600 // 60
 362             seconds = seconds % 60
 363             return '%d:%02d:%02d' % (hours, minutes, seconds)
 364
 365     def get_audio_length(self):
 366         total = 0
 367         for media in self.get_mp3() or ():
 368             total += app_settings.GET_MP3_LENGTH(media.file.path)
 369         return int(total)
 370
 371     def has_media(self, type_):
 372         if type_ in Book.formats:
 373             return bool(getattr(self, "%s_file" % type_))
 374         else:
 375             return self.media.filter(type=type_).exists()
 376
 377     def has_audio(self):
 378         return self.has_media('mp3')
 379
 380     def get_media(self, type_):
 381         if self.has_media(type_):
 382             if type_ in Book.formats:
 383                 return getattr(self, "%s_file" % type_)
 384             else:
 385                 return self.media.filter(type=type_)
 386         else:
 387             return None
 388
 389     def get_mp3(self):
 390         return self.get_media("mp3")
 391
 392     def get_odt(self):
 393         return self.get_media("odt")
 394
 395     def get_ogg(self):
 396         return self.get_media("ogg")
 397
 398     def get_daisy(self):
 399         return self.get_media("daisy")
 400
 401     def media_url(self, format_):
 402         media = self.get_media(format_)
 403         if media:
 404             if self.preview:
 405                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 406             else:
 407                 return media.url
 408         else:
 409             return None
 410
 411     def html_url(self):
 412         return self.media_url('html')
 413
 414     def pdf_url(self):
 415         return self.media_url('pdf')
 416
 417     def epub_url(self):
 418         return self.media_url('epub')
 419
 420     def mobi_url(self):
 421         return self.media_url('mobi')
 422
 423     def txt_url(self):
 424         return self.media_url('txt')
 425
 426     def fb2_url(self):
 427         return self.media_url('fb2')
 428
 429     def xml_url(self):
 430         return self.media_url('xml')
 431
 432     def has_description(self):
 433         return len(self.description) > 0
 434     has_description.short_description = _('description')
 435     has_description.boolean = True
 436
 437     def has_mp3_file(self):
 438         return self.has_media("mp3")
 439     has_mp3_file.short_description = 'MP3'
 440     has_mp3_file.boolean = True
 441
 442     def has_ogg_file(self):
 443         return self.has_media("ogg")
 444     has_ogg_file.short_description = 'OGG'
 445     has_ogg_file.boolean = True
 446
 447     def has_daisy_file(self):
 448         return self.has_media("daisy")
 449     has_daisy_file.short_description = 'DAISY'
 450     has_daisy_file.boolean = True
 451
 452     @property
 453     def media_daisy(self):
 454         return self.get_media('daisy')
 455
 456     def get_audiobooks(self):
 457         ogg_files = {}
 458         for m in self.media.filter(type='ogg').order_by().iterator():
 459             ogg_files[m.name] = m
 460
 461         audiobooks = []
 462         projects = set()
 463         total_duration = 0
 464         for mp3 in self.media.filter(type='mp3').iterator():
 465             # ogg files are always from the same project
 466             meta = mp3.get_extra_info_json()
 467             project = meta.get('project')
 468             if not project:
 469                 # temporary fallback
 470                 project = 'CzytamySłuchając'
 471
 472             projects.add((project, meta.get('funded_by', '')))
 473             total_duration += mp3.duration or 0
 474
 475             media = {'mp3': mp3}
 476
 477             ogg = ogg_files.get(mp3.name)
 478             if ogg:
 479                 media['ogg'] = ogg
 480             audiobooks.append(media)
 481
 482         projects = sorted(projects)
 483         total_duration = '%d:%02d' % (
 484             total_duration // 60,
 485             total_duration % 60
 486         )
 487         return audiobooks, projects, total_duration
 488
 489     def wldocument(self, parse_dublincore=True, inherit=True):
 490         from catalogue.import_utils import ORMDocProvider
 491         from librarian.parser import WLDocument
 492
 493         if inherit and self.parent:
 494             meta_fallbacks = self.parent.cover_info()
 495         else:
 496             meta_fallbacks = None
 497
 498         return WLDocument.from_file(
 499             self.xml_file.path,
 500             provider=ORMDocProvider(self),
 501             parse_dublincore=parse_dublincore,
 502             meta_fallbacks=meta_fallbacks)
 503
 504     def wldocument2(self):
 505         from catalogue.import_utils import ORMDocProvider
 506         from librarian.document import WLDocument
 507         doc = WLDocument(
 508             self.xml_file.path,
 509             provider=ORMDocProvider(self)
 510         )
 511         doc.meta.update(self.cover_info())
 512         return doc
 513
 514
 515     @staticmethod
 516     def zip_format(format_):
 517         def pretty_file_name(book):
 518             return "%s/%s.%s" % (
 519                 book.get_extra_info_json()['author'],
 520                 book.slug,
 521                 format_)
 522
 523         field_name = "%s_file" % format_
 524         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 525         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 526         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 527
 528     def zip_audiobooks(self, format_):
 529         bm = BookMedia.objects.filter(book=self, type=format_)
 530         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 531         licenses = set()
 532         for m in bm:
 533             license = constants.LICENSES.get(
 534                 m.get_extra_info_json().get('license'), {}
 535             ).get('locative')
 536             if license:
 537                 licenses.add(license)
 538         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 539             'licenses': licenses,
 540         })
 541         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 542
 543     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 544         if not self.findable:
 545             return
 546         if index is None:
 547             from search.index import Index
 548             index = Index()
 549         try:
 550             index.index_book(self, book_info)
 551             if index_tags:
 552                 index.index_tags()
 553             if commit:
 554                 index.index.commit()
 555         except Exception as e:
 556             index.index.rollback()
 557             raise e
 558
 559     # will make problems in conjunction with paid previews
 560     def download_pictures(self, remote_gallery_url):
 561         gallery_path = self.gallery_path()
 562         # delete previous files, so we don't include old files in ebooks
 563         if os.path.isdir(gallery_path):
 564             for filename in os.listdir(gallery_path):
 565                 file_path = os.path.join(gallery_path, filename)
 566                 os.unlink(file_path)
 567         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 568         if ilustr_elements:
 569             makedirs(gallery_path)
 570             for ilustr in ilustr_elements:
 571                 ilustr_src = ilustr.get('src')
 572                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 573                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 574
 575     def load_abstract(self):
 576         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 577         if abstract is not None:
 578             self.abstract = transform_abstrakt(abstract)
 579         else:
 580             self.abstract = ''
 581
 582     def load_toc(self):
 583         self.toc = ''
 584         if self.html_file:
 585             parser = html.HTMLParser(encoding='utf-8')
 586             tree = html.parse(self.html_file.path, parser=parser)
 587             toc = tree.find('//div[@id="toc"]/ol')
 588             if toc is None or not len(toc):
 589                 return
 590             html_link = reverse('book_text', args=[self.slug])
 591             for a in toc.findall('.//a'):
 592                 a.attrib['href'] = html_link + a.attrib['href']
 593             self.toc = html.tostring(toc, encoding='unicode')
 594             # div#toc
 595
 596     @classmethod
 597     def from_xml_file(cls, xml_file, **kwargs):
 598         from django.core.files import File
 599         from librarian import dcparser
 600
 601         # use librarian to parse meta-data
 602         book_info = dcparser.parse(xml_file)
 603
 604         if not isinstance(xml_file, File):
 605             xml_file = File(open(xml_file))
 606
 607         try:
 608             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 609         finally:
 610             xml_file.close()
 611
 612     @classmethod
 613     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 614                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 615         if dont_build is None:
 616             dont_build = set()
 617         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 618
 619         # check for parts before we do anything
 620         children = []
 621         if hasattr(book_info, 'parts'):
 622             for part_url in book_info.parts:
 623                 try:
 624                     children.append(Book.objects.get(slug=part_url.slug))
 625                 except Book.DoesNotExist:
 626                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 627
 628         # Read book metadata
 629         book_slug = book_info.url.slug
 630         if re.search(r'[^a-z0-9-]', book_slug):
 631             raise ValueError('Invalid characters in slug')
 632         book, created = Book.objects.get_or_create(slug=book_slug)
 633
 634         if created:
 635             book_shelves = []
 636             old_cover = None
 637             book.preview = bool(days)
 638             if book.preview:
 639                 book.preview_until = date.today() + timedelta(days)
 640         else:
 641             if not overwrite:
 642                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 643             # Save shelves for this book
 644             book_shelves = list(book.tags.filter(category='set'))
 645             old_cover = book.cover_info()
 646
 647         # Save XML file
 648         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 649         if book.preview:
 650             book.xml_file.set_readable(False)
 651
 652         book.findable = findable
 653         book.language = book_info.language
 654         book.title = book_info.title
 655         if book_info.variant_of:
 656             book.common_slug = book_info.variant_of.slug
 657         else:
 658             book.common_slug = book.slug
 659         book.extra_info = json.dumps(book_info.to_dict())
 660         book.load_abstract()
 661         book.load_toc()
 662         book.save()
 663
 664         meta_tags = Tag.tags_from_info(book_info)
 665
 666         for tag in meta_tags:
 667             if not tag.for_books:
 668                 tag.for_books = True
 669                 tag.save()
 670
 671         book.tags = set(meta_tags + book_shelves)
 672         book.save()  # update sort_key_author
 673
 674         cover_changed = old_cover != book.cover_info()
 675         obsolete_children = set(b for b in book.children.all()
 676                                 if b not in children)
 677         notify_cover_changed = []
 678         for n, child_book in enumerate(children):
 679             new_child = child_book.parent != book
 680             child_book.parent = book
 681             child_book.parent_number = n
 682             child_book.save()
 683             if new_child or cover_changed:
 684                 notify_cover_changed.append(child_book)
 685         # Disown unfaithful children and let them cope on their own.
 686         for child in obsolete_children:
 687             child.parent = None
 688             child.parent_number = 0
 689             child.save()
 690             if old_cover:
 691                 notify_cover_changed.append(child)
 692
 693         cls.repopulate_ancestors()
 694         tasks.update_counters.delay()
 695
 696         if remote_gallery_url:
 697             book.download_pictures(remote_gallery_url)
 698
 699         # No saves beyond this point.
 700
 701         # Build cover.
 702         if 'cover' not in dont_build:
 703             book.cover.build_delay()
 704             book.cover_clean.build_delay()
 705             book.cover_thumb.build_delay()
 706             book.cover_api_thumb.build_delay()
 707             book.simple_cover.build_delay()
 708             book.cover_ebookpoint.build_delay()
 709
 710         # Build HTML and ebooks.
 711         book.html_file.build_delay()
 712         if not children:
 713             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 714                 if format_ not in dont_build:
 715                     getattr(book, '%s_file' % format_).build_delay()
 716         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 717             if format_ not in dont_build:
 718                 getattr(book, '%s_file' % format_).build_delay()
 719
 720         if not settings.NO_SEARCH_INDEX and search_index and findable:
 721             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 722
 723         for child in notify_cover_changed:
 724             child.parent_cover_changed()
 725
 726         book.update_popularity()
 727         tasks.update_references.delay(book.id)
 728
 729         cls.published.send(sender=cls, instance=book)
 730         return book
 731
 732     def get_master(self):
 733         master_tags = [
 734             'opowiadanie',
 735             'powiesc',
 736             'dramat_wierszowany_l',
 737             'dramat_wierszowany_lp',
 738             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 739             'wywiad',
 740         ]
 741         from librarian.parser import WLDocument
 742         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 743         root = wld.edoc.getroot()
 744         for master in root.iter():
 745             if master.tag in master_tags:
 746                 return master
 747
 748     def update_references(self):
 749         from references.models import Entity, Reference
 750         master = self.get_master()
 751         if master is None:
 752             master = []
 753         found = set()
 754         for i, sec in enumerate(master):
 755             for ref in sec.findall('.//ref'):
 756                 href = ref.attrib.get('href', '')
 757                 if not href or href in found:
 758                     continue
 759                 found.add(href)
 760                 entity, created = Entity.objects.get_or_create(
 761                     uri=href
 762                 )
 763                 ref, created = Reference.objects.get_or_create(
 764                     book=self,
 765                     entity=entity
 766                 )
 767                 ref.first_section = 'sec%d' % (i + 1)
 768                 entity.populate()
 769                 entity.save()
 770         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 771
 772     @property
 773     def references(self):
 774         return self.reference_set.all().select_related('entity')
 775
 776     @classmethod
 777     @transaction.atomic
 778     def repopulate_ancestors(cls):
 779         """Fixes the ancestry cache."""
 780         # TODO: table names
 781         cursor = connection.cursor()
 782         if connection.vendor == 'postgres':
 783             cursor.execute("TRUNCATE catalogue_book_ancestor")
 784             cursor.execute("""
 785                 WITH RECURSIVE ancestry AS (
 786                     SELECT book.id, book.parent_id
 787                     FROM catalogue_book AS book
 788                     WHERE book.parent_id IS NOT NULL
 789                     UNION
 790                     SELECT ancestor.id, book.parent_id
 791                     FROM ancestry AS ancestor, catalogue_book AS book
 792                     WHERE ancestor.parent_id = book.id
 793                         AND book.parent_id IS NOT NULL
 794                     )
 795                 INSERT INTO catalogue_book_ancestor
 796                     (from_book_id, to_book_id)
 797                     SELECT id, parent_id
 798                     FROM ancestry
 799                     ORDER BY id;
 800                 """)
 801         else:
 802             cursor.execute("DELETE FROM catalogue_book_ancestor")
 803             for b in cls.objects.exclude(parent=None):
 804                 parent = b.parent
 805                 while parent is not None:
 806                     b.ancestor.add(parent)
 807                     parent = parent.parent
 808
 809     @property
 810     def ancestors(self):
 811         if self.parent:
 812             for anc in self.parent.ancestors:
 813                 yield anc
 814             yield self.parent
 815         else:
 816             return []
 817
 818     def clear_cache(self):
 819         clear_cached_renders(self.mini_box)
 820         clear_cached_renders(self.mini_box_nolink)
 821
 822     def cover_info(self, inherit=True):
 823         """Returns a dictionary to serve as fallback for BookInfo.
 824
 825         For now, the only thing inherited is the cover image.
 826         """
 827         need = False
 828         info = {}
 829         for field in ('cover_url', 'cover_by', 'cover_source'):
 830             val = self.get_extra_info_json().get(field)
 831             if val:
 832                 info[field] = val
 833             else:
 834                 need = True
 835         if inherit and need and self.parent is not None:
 836             parent_info = self.parent.cover_info()
 837             parent_info.update(info)
 838             info = parent_info
 839         return info
 840
 841     def related_themes(self):
 842         return Tag.objects.usage_for_queryset(
 843             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 844             counts=True).filter(category='theme').order_by('-count')
 845
 846     def parent_cover_changed(self):
 847         """Called when parent book's cover image is changed."""
 848         if not self.cover_info(inherit=False):
 849             if 'cover' not in app_settings.DONT_BUILD:
 850                 self.cover.build_delay()
 851                 self.cover_clean.build_delay()
 852                 self.cover_thumb.build_delay()
 853                 self.cover_api_thumb.build_delay()
 854                 self.simple_cover.build_delay()
 855                 self.cover_ebookpoint.build_delay()
 856             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 857                 if format_ not in app_settings.DONT_BUILD:
 858                     getattr(self, '%s_file' % format_).build_delay()
 859             for child in self.children.all():
 860                 child.parent_cover_changed()
 861
 862     def other_versions(self):
 863         """Find other versions (i.e. in other languages) of the book."""
 864         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 865
 866     def parents(self):
 867         books = []
 868         parent = self.parent
 869         while parent is not None:
 870             books.insert(0, parent)
 871             parent = parent.parent
 872         return books
 873
 874     def pretty_title(self, html_links=False):
 875         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 876         books = self.parents() + [self]
 877         names.extend([(b.title, b.get_absolute_url()) for b in books])
 878
 879         if html_links:
 880             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 881         else:
 882             names = [tag[0] for tag in names]
 883         return ', '.join(names)
 884
 885     def publisher(self):
 886         publisher = self.get_extra_info_json()['publisher']
 887         if isinstance(publisher, str):
 888             return publisher
 889         elif isinstance(publisher, list):
 890             return ', '.join(publisher)
 891
 892     @classmethod
 893     def tagged_top_level(cls, tags):
 894         """ Returns top-level books tagged with `tags`.
 895
 896         It only returns those books which don't have ancestors which are
 897         also tagged with those tags.
 898
 899         """
 900         objects = cls.tagged.with_all(tags)
 901         return objects.filter(findable=True).exclude(ancestor__in=objects)
 902
 903     @classmethod
 904     def book_list(cls, book_filter=None):
 905         """Generates a hierarchical listing of all books.
 906
 907         Books are optionally filtered with a test function.
 908
 909         """
 910
 911         books_by_parent = {}
 912         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 913         if book_filter:
 914             books = books.filter(book_filter).distinct()
 915
 916             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 917             for book in books.iterator():
 918                 parent = book.parent_id
 919                 if parent not in book_ids:
 920                     parent = None
 921                 books_by_parent.setdefault(parent, []).append(book)
 922         else:
 923             for book in books.iterator():
 924                 books_by_parent.setdefault(book.parent_id, []).append(book)
 925
 926         orphans = []
 927         books_by_author = OrderedDict()
 928         for tag in Tag.objects.filter(category='author').iterator():
 929             books_by_author[tag] = []
 930
 931         for book in books_by_parent.get(None, ()):
 932             authors = list(book.authors().only('pk'))
 933             if authors:
 934                 for author in authors:
 935                     books_by_author[author].append(book)
 936             else:
 937                 orphans.append(book)
 938
 939         return books_by_author, orphans, books_by_parent
 940
 941     _audiences_pl = {
 942         "SP": (1, "szkoła podstawowa"),
 943         "SP1": (1, "szkoła podstawowa"),
 944         "SP2": (1, "szkoła podstawowa"),
 945         "SP3": (1, "szkoła podstawowa"),
 946         "P": (1, "szkoła podstawowa"),
 947         "G": (2, "gimnazjum"),
 948         "L": (3, "liceum"),
 949         "LP": (3, "liceum"),
 950     }
 951
 952     def audiences_pl(self):
 953         audiences = self.get_extra_info_json().get('audiences', [])
 954         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 955         return [a[1] for a in audiences]
 956
 957     def stage_note(self):
 958         stage = self.get_extra_info_json().get('stage')
 959         if stage and stage < '0.4':
 960             return (_('This work needs modernisation'),
 961                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 962         else:
 963             return None, None
 964
 965     def choose_fragments(self, number):
 966         fragments = self.fragments.order_by()
 967         fragments_count = fragments.count()
 968         if not fragments_count and self.children.exists():
 969             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 970             fragments_count = fragments.count()
 971         if fragments_count:
 972             if fragments_count > number:
 973                 offset = randint(0, fragments_count - number)
 974             else:
 975                 offset = 0
 976             return fragments[offset : offset + number]
 977         elif self.parent:
 978             return self.parent.choose_fragments(number)
 979         else:
 980             return []
 981
 982     def choose_fragment(self):
 983         fragments = self.choose_fragments(1)
 984         if fragments:
 985             return fragments[0]
 986         else:
 987             return None
 988
 989     def fragment_data(self):
 990         fragment = self.choose_fragment()
 991         if fragment:
 992             return {
 993                 'title': fragment.book.pretty_title(),
 994                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 995             }
 996         else:
 997             return None
 998
 999     def update_popularity(self):
1000         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1001         try:
1002             pop = self.popularity
1003             pop.count = count
1004             pop.save()
1005         except BookPopularity.DoesNotExist:
1006             BookPopularity.objects.create(book=self, count=count)
1007
1008     def ridero_link(self):
1009         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1010
1011     def like(self, user):
1012         from social.utils import likes, get_set, set_sets
1013         if not likes(user, self):
1014             tag = get_set(user, '')
1015             set_sets(user, self, [tag])
1016
1017     def unlike(self, user):
1018         from social.utils import likes, set_sets
1019         if likes(user, self):
1020             set_sets(user, self, [])
1021
1022     def full_sort_key(self):
1023         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1024
1025     def cover_color(self):
1026         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1027
1028     @cached_render('catalogue/book_mini_box.html')
1029     def mini_box(self):
1030         return {
1031             'book': self
1032         }
1033
1034     @cached_render('catalogue/book_mini_box.html')
1035     def mini_box_nolink(self):
1036         return {
1037             'book': self,
1038             'no_link': True,
1039         }
1040
1041 def add_file_fields():
1042     for format_ in Book.formats:
1043         field_name = "%s_file" % format_
1044         # This weird globals() assignment makes Django migrations comfortable.
1045         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1046         _upload_to.__name__ = '_%s_upload_to' % format_
1047         globals()[_upload_to.__name__] = _upload_to
1048
1049         EbookField(
1050             format_, _("%s file" % format_.upper()),
1051             upload_to=_upload_to,
1052             storage=bofh_storage,
1053             max_length=255,
1054             blank=True,
1055             default=''
1056         ).contribute_to_class(Book, field_name)
1057         if format_ != 'xml':
1058             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1059
1060
1061 add_file_fields()
1062
1063
1064 class BookPopularity(models.Model):
1065     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1066     count = models.IntegerField(default=0, db_index=True)