src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     class AlreadyExists(Exception):
 143         pass
 144
 145     class Meta:
 146         ordering = ('sort_key_author', 'sort_key')
 147         verbose_name = _('book')
 148         verbose_name_plural = _('books')
 149         app_label = 'catalogue'
 150
 151     def __str__(self):
 152         return self.title
 153
 154     def get_extra_info_json(self):
 155         return json.loads(self.extra_info or '{}')
 156
 157     def get_initial(self):
 158         try:
 159             return re.search(r'\w', self.title, re.U).group(0)
 160         except AttributeError:
 161             return ''
 162
 163     def authors(self):
 164         return self.tags.filter(category='author')
 165
 166     def epochs(self):
 167         return self.tags.filter(category='epoch')
 168
 169     def genres(self):
 170         return self.tags.filter(category='genre')
 171
 172     def kinds(self):
 173         return self.tags.filter(category='kind')
 174
 175     def tag_unicode(self, category):
 176         relations = prefetched_relations(self, category)
 177         if relations:
 178             return ', '.join(rel.tag.name for rel in relations)
 179         else:
 180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 181
 182     def tags_by_category(self):
 183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 184
 185     def author_unicode(self):
 186         return self.cached_author
 187
 188     def kind_unicode(self):
 189         return self.tag_unicode('kind')
 190
 191     def epoch_unicode(self):
 192         return self.tag_unicode('epoch')
 193
 194     def genre_unicode(self):
 195         return self.tag_unicode('genre')
 196
 197     def translators(self):
 198         translators = self.get_extra_info_json().get('translators') or []
 199         return [
 200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 201         ]
 202
 203     def translator(self):
 204         translators = self.get_extra_info_json().get('translators')
 205         if not translators:
 206             return None
 207         if len(translators) > 3:
 208             translators = translators[:2]
 209             others = ' i inni'
 210         else:
 211             others = ''
 212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 213
 214     def cover_source(self):
 215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 216
 217     @property
 218     def isbn_pdf(self):
 219         return self.get_extra_info_json().get('isbn_pdf')
 220
 221     @property
 222     def isbn_epub(self):
 223         return self.get_extra_info_json().get('isbn_epub')
 224
 225     @property
 226     def isbn_mobi(self):
 227         return self.get_extra_info_json().get('isbn_mobi')
 228
 229     def is_accessible_to(self, user):
 230         if not self.preview:
 231             return True
 232         if not user.is_authenticated:
 233             return False
 234         Membership = apps.get_model('club', 'Membership')
 235         if Membership.is_active_for(user):
 236             return True
 237         Funding = apps.get_model('funding', 'Funding')
 238         if Funding.objects.filter(user=user, offer__book=self):
 239             return True
 240         return False
 241
 242     def save(self, force_insert=False, force_update=False, **kwargs):
 243         from sortify import sortify
 244
 245         self.sort_key = sortify(self.title)[:120]
 246         self.title = str(self.title)  # ???
 247
 248         try:
 249             author = self.authors().first().sort_key
 250         except AttributeError:
 251             author = ''
 252         self.sort_key_author = author
 253
 254         self.cached_author = self.tag_unicode('author')
 255         self.has_audience = 'audience' in self.get_extra_info_json()
 256
 257         if self.preview and not self.preview_key:
 258             self.preview_key = get_random_hash(self.slug)[:32]
 259
 260         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 261
 262         return ret
 263
 264     def get_absolute_url(self):
 265         return reverse('book_detail', args=[self.slug])
 266
 267     def gallery_path(self):
 268         return gallery_path(self.slug)
 269
 270     def gallery_url(self):
 271         return gallery_url(self.slug)
 272
 273     def get_first_text(self):
 274         if self.html_file:
 275             return self
 276         child = self.children.all().order_by('parent_number').first()
 277         if child is not None:
 278             return child.get_first_text()
 279
 280     def get_last_text(self):
 281         if self.html_file:
 282             return self
 283         child = self.children.all().order_by('parent_number').last()
 284         if child is not None:
 285             return child.get_last_text()
 286
 287     def get_prev_text(self):
 288         if not self.parent:
 289             return None
 290         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 291         if sibling is not None:
 292             return sibling.get_last_text()
 293
 294         if self.parent.html_file:
 295             return self.parent
 296
 297         return self.parent.get_prev_text()
 298
 299     def get_next_text(self, inside=True):
 300         if inside:
 301             child = self.children.order_by('parent_number').first()
 302             if child is not None:
 303                 return child.get_first_text()
 304
 305         if not self.parent:
 306             return None
 307         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 308         if sibling is not None:
 309             return sibling.get_first_text()
 310         return self.parent.get_next_text(inside=False)
 311
 312     def get_child_audiobook(self):
 313         BookMedia = apps.get_model('catalogue', 'BookMedia')
 314         if not BookMedia.objects.filter(book__ancestor=self).exists():
 315             return None
 316         for child in self.children.order_by('parent_number').all():
 317             if child.has_mp3_file():
 318                 return child
 319             child_sub = child.get_child_audiobook()
 320             if child_sub is not None:
 321                 return child_sub
 322
 323     def get_siblings(self):
 324         if not self.parent:
 325             return []
 326         return self.parent.children.all().order_by('parent_number')
 327
 328     def get_children(self):
 329         return self.children.all().order_by('parent_number')
 330
 331     @property
 332     def name(self):
 333         return self.title
 334
 335     def language_code(self):
 336         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 337
 338     def language_name(self):
 339         return dict(settings.LANGUAGES).get(self.language_code(), "")
 340
 341     def is_foreign(self):
 342         return self.language_code() != settings.LANGUAGE_CODE
 343
 344     def set_audio_length(self):
 345         length = self.get_audio_length()
 346         if length > 0:
 347             self.audio_length = self.format_audio_length(length)
 348             self.save()
 349
 350     @staticmethod
 351     def format_audio_length(seconds):
 352         """
 353         >>> Book.format_audio_length(1)
 354         '0:01'
 355         >>> Book.format_audio_length(3661)
 356         '1:01:01'
 357         """
 358         if seconds < 60*60:
 359             minutes = seconds // 60
 360             seconds = seconds % 60
 361             return '%d:%02d' % (minutes, seconds)
 362         else:
 363             hours = seconds // 3600
 364             minutes = seconds % 3600 // 60
 365             seconds = seconds % 60
 366             return '%d:%02d:%02d' % (hours, minutes, seconds)
 367
 368     def get_audio_length(self):
 369         total = 0
 370         for media in self.get_mp3() or ():
 371             total += app_settings.GET_MP3_LENGTH(media.file.path)
 372         return int(total)
 373
 374     def has_media(self, type_):
 375         if type_ in Book.formats:
 376             return bool(getattr(self, "%s_file" % type_))
 377         else:
 378             return self.media.filter(type=type_).exists()
 379
 380     def has_audio(self):
 381         return self.has_media('mp3')
 382
 383     def get_media(self, type_):
 384         if self.has_media(type_):
 385             if type_ in Book.formats:
 386                 return getattr(self, "%s_file" % type_)
 387             else:
 388                 return self.media.filter(type=type_)
 389         else:
 390             return None
 391
 392     def get_mp3(self):
 393         return self.get_media("mp3")
 394
 395     def get_odt(self):
 396         return self.get_media("odt")
 397
 398     def get_ogg(self):
 399         return self.get_media("ogg")
 400
 401     def get_daisy(self):
 402         return self.get_media("daisy")
 403
 404     def media_url(self, format_):
 405         media = self.get_media(format_)
 406         if media:
 407             if self.preview:
 408                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 409             else:
 410                 return media.url
 411         else:
 412             return None
 413
 414     def html_url(self):
 415         return self.media_url('html')
 416
 417     def pdf_url(self):
 418         return self.media_url('pdf')
 419
 420     def epub_url(self):
 421         return self.media_url('epub')
 422
 423     def mobi_url(self):
 424         return self.media_url('mobi')
 425
 426     def txt_url(self):
 427         return self.media_url('txt')
 428
 429     def fb2_url(self):
 430         return self.media_url('fb2')
 431
 432     def xml_url(self):
 433         return self.media_url('xml')
 434
 435     def has_description(self):
 436         return len(self.description) > 0
 437     has_description.short_description = _('description')
 438     has_description.boolean = True
 439
 440     def has_mp3_file(self):
 441         return self.has_media("mp3")
 442     has_mp3_file.short_description = 'MP3'
 443     has_mp3_file.boolean = True
 444
 445     def has_ogg_file(self):
 446         return self.has_media("ogg")
 447     has_ogg_file.short_description = 'OGG'
 448     has_ogg_file.boolean = True
 449
 450     def has_daisy_file(self):
 451         return self.has_media("daisy")
 452     has_daisy_file.short_description = 'DAISY'
 453     has_daisy_file.boolean = True
 454
 455     @property
 456     def media_daisy(self):
 457         return self.get_media('daisy')
 458
 459     def get_audiobooks(self):
 460         ogg_files = {}
 461         for m in self.media.filter(type='ogg').order_by().iterator():
 462             ogg_files[m.name] = m
 463
 464         audiobooks = []
 465         projects = set()
 466         total_duration = 0
 467         for mp3 in self.media.filter(type='mp3').iterator():
 468             # ogg files are always from the same project
 469             meta = mp3.get_extra_info_json()
 470             project = meta.get('project')
 471             if not project:
 472                 # temporary fallback
 473                 project = 'CzytamySłuchając'
 474
 475             projects.add((project, meta.get('funded_by', '')))
 476             total_duration += mp3.duration or 0
 477
 478             media = {'mp3': mp3}
 479
 480             ogg = ogg_files.get(mp3.name)
 481             if ogg:
 482                 media['ogg'] = ogg
 483             audiobooks.append(media)
 484
 485         projects = sorted(projects)
 486         total_duration = '%d:%02d' % (
 487             total_duration // 60,
 488             total_duration % 60
 489         )
 490         return audiobooks, projects, total_duration
 491
 492     def wldocument(self, parse_dublincore=True, inherit=True):
 493         from catalogue.import_utils import ORMDocProvider
 494         from librarian.parser import WLDocument
 495
 496         if inherit and self.parent:
 497             meta_fallbacks = self.parent.cover_info()
 498         else:
 499             meta_fallbacks = None
 500
 501         return WLDocument.from_file(
 502             self.xml_file.path,
 503             provider=ORMDocProvider(self),
 504             parse_dublincore=parse_dublincore,
 505             meta_fallbacks=meta_fallbacks)
 506
 507     def wldocument2(self):
 508         from catalogue.import_utils import ORMDocProvider
 509         from librarian.document import WLDocument
 510         doc = WLDocument(
 511             self.xml_file.path,
 512             provider=ORMDocProvider(self)
 513         )
 514         doc.meta.update(self.cover_info())
 515         return doc
 516
 517
 518     @staticmethod
 519     def zip_format(format_):
 520         def pretty_file_name(book):
 521             return "%s/%s.%s" % (
 522                 book.get_extra_info_json()['author'],
 523                 book.slug,
 524                 format_)
 525
 526         field_name = "%s_file" % format_
 527         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 528         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 529         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 530
 531     def zip_audiobooks(self, format_):
 532         bm = BookMedia.objects.filter(book=self, type=format_)
 533         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 534         licenses = set()
 535         for m in bm:
 536             license = constants.LICENSES.get(
 537                 m.get_extra_info_json().get('license'), {}
 538             ).get('locative')
 539             if license:
 540                 licenses.add(license)
 541         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 542             'licenses': licenses,
 543         })
 544         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 545
 546     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 547         if not self.findable:
 548             return
 549         if index is None:
 550             from search.index import Index
 551             index = Index()
 552         try:
 553             index.index_book(self, book_info)
 554             if index_tags:
 555                 index.index_tags()
 556             if commit:
 557                 index.index.commit()
 558         except Exception as e:
 559             index.index.rollback()
 560             raise e
 561
 562     # will make problems in conjunction with paid previews
 563     def download_pictures(self, remote_gallery_url):
 564         # This is only needed for legacy relative image paths.
 565         gallery_path = self.gallery_path()
 566         # delete previous files, so we don't include old files in ebooks
 567         if os.path.isdir(gallery_path):
 568             for filename in os.listdir(gallery_path):
 569                 file_path = os.path.join(gallery_path, filename)
 570                 os.unlink(file_path)
 571         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 572         if ilustr_elements:
 573             makedirs(gallery_path)
 574             for ilustr in ilustr_elements:
 575                 ilustr_src = ilustr.get('src')
 576                 if '/' in ilustr_src:
 577                     continue
 578                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 579                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 580
 581     def load_abstract(self):
 582         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 583         if abstract is not None:
 584             self.abstract = transform_abstrakt(abstract)
 585         else:
 586             self.abstract = ''
 587
 588     def load_toc(self):
 589         self.toc = ''
 590         if self.html_file:
 591             parser = html.HTMLParser(encoding='utf-8')
 592             tree = html.parse(self.html_file.path, parser=parser)
 593             toc = tree.find('//div[@id="toc"]/ol')
 594             if toc is None or not len(toc):
 595                 return
 596             html_link = reverse('book_text', args=[self.slug])
 597             for a in toc.findall('.//a'):
 598                 a.attrib['href'] = html_link + a.attrib['href']
 599             self.toc = html.tostring(toc, encoding='unicode')
 600             # div#toc
 601
 602     @classmethod
 603     def from_xml_file(cls, xml_file, **kwargs):
 604         from django.core.files import File
 605         from librarian import dcparser
 606
 607         # use librarian to parse meta-data
 608         book_info = dcparser.parse(xml_file)
 609
 610         if not isinstance(xml_file, File):
 611             xml_file = File(open(xml_file))
 612
 613         try:
 614             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 615         finally:
 616             xml_file.close()
 617
 618     @classmethod
 619     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 620                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 621         if dont_build is None:
 622             dont_build = set()
 623         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 624
 625         # check for parts before we do anything
 626         children = []
 627         if hasattr(book_info, 'parts'):
 628             for part_url in book_info.parts:
 629                 try:
 630                     children.append(Book.objects.get(slug=part_url.slug))
 631                 except Book.DoesNotExist:
 632                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 633
 634         # Read book metadata
 635         book_slug = book_info.url.slug
 636         if re.search(r'[^a-z0-9-]', book_slug):
 637             raise ValueError('Invalid characters in slug')
 638         book, created = Book.objects.get_or_create(slug=book_slug)
 639
 640         if created:
 641             book_shelves = []
 642             old_cover = None
 643             book.preview = bool(days)
 644             if book.preview:
 645                 book.preview_until = date.today() + timedelta(days)
 646         else:
 647             if not overwrite:
 648                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 649             # Save shelves for this book
 650             book_shelves = list(book.tags.filter(category='set'))
 651             old_cover = book.cover_info()
 652
 653         # Save XML file
 654         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 655         if book.preview:
 656             book.xml_file.set_readable(False)
 657
 658         book.findable = findable
 659         book.language = book_info.language
 660         book.title = book_info.title
 661         if book_info.variant_of:
 662             book.common_slug = book_info.variant_of.slug
 663         else:
 664             book.common_slug = book.slug
 665         book.extra_info = json.dumps(book_info.to_dict())
 666         book.load_abstract()
 667         book.load_toc()
 668         book.save()
 669
 670         meta_tags = Tag.tags_from_info(book_info)
 671
 672         for tag in meta_tags:
 673             if not tag.for_books:
 674                 tag.for_books = True
 675                 tag.save()
 676
 677         book.tags = set(meta_tags + book_shelves)
 678         book.save()  # update sort_key_author
 679
 680         cover_changed = old_cover != book.cover_info()
 681         obsolete_children = set(b for b in book.children.all()
 682                                 if b not in children)
 683         notify_cover_changed = []
 684         for n, child_book in enumerate(children):
 685             new_child = child_book.parent != book
 686             child_book.parent = book
 687             child_book.parent_number = n
 688             child_book.save()
 689             if new_child or cover_changed:
 690                 notify_cover_changed.append(child_book)
 691         # Disown unfaithful children and let them cope on their own.
 692         for child in obsolete_children:
 693             child.parent = None
 694             child.parent_number = 0
 695             child.save()
 696             if old_cover:
 697                 notify_cover_changed.append(child)
 698
 699         cls.repopulate_ancestors()
 700         tasks.update_counters.delay()
 701
 702         if remote_gallery_url:
 703             book.download_pictures(remote_gallery_url)
 704
 705         # No saves beyond this point.
 706
 707         # Build cover.
 708         if 'cover' not in dont_build:
 709             book.cover.build_delay()
 710             book.cover_clean.build_delay()
 711             book.cover_thumb.build_delay()
 712             book.cover_api_thumb.build_delay()
 713             book.simple_cover.build_delay()
 714             book.cover_ebookpoint.build_delay()
 715
 716         # Build HTML and ebooks.
 717         book.html_file.build_delay()
 718         if not children:
 719             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 720                 if format_ not in dont_build:
 721                     getattr(book, '%s_file' % format_).build_delay()
 722         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 723             if format_ not in dont_build:
 724                 getattr(book, '%s_file' % format_).build_delay()
 725
 726         if not settings.NO_SEARCH_INDEX and search_index and findable:
 727             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 728
 729         for child in notify_cover_changed:
 730             child.parent_cover_changed()
 731
 732         book.update_popularity()
 733         tasks.update_references.delay(book.id)
 734
 735         cls.published.send(sender=cls, instance=book)
 736         return book
 737
 738     def get_master(self):
 739         master_tags = [
 740             'opowiadanie',
 741             'powiesc',
 742             'dramat_wierszowany_l',
 743             'dramat_wierszowany_lp',
 744             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 745             'wywiad',
 746         ]
 747         from librarian.parser import WLDocument
 748         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 749         root = wld.edoc.getroot()
 750         for master in root.iter():
 751             if master.tag in master_tags:
 752                 return master
 753
 754     def update_references(self):
 755         from references.models import Entity, Reference
 756         master = self.get_master()
 757         if master is None:
 758             master = []
 759         found = set()
 760         for i, sec in enumerate(master):
 761             for ref in sec.findall('.//ref'):
 762                 href = ref.attrib.get('href', '')
 763                 if not href or href in found:
 764                     continue
 765                 found.add(href)
 766                 entity, created = Entity.objects.get_or_create(
 767                     uri=href
 768                 )
 769                 ref, created = Reference.objects.get_or_create(
 770                     book=self,
 771                     entity=entity
 772                 )
 773                 ref.first_section = 'sec%d' % (i + 1)
 774                 entity.populate()
 775                 entity.save()
 776         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 777
 778     @property
 779     def references(self):
 780         return self.reference_set.all().select_related('entity')
 781
 782     @classmethod
 783     @transaction.atomic
 784     def repopulate_ancestors(cls):
 785         """Fixes the ancestry cache."""
 786         # TODO: table names
 787         cursor = connection.cursor()
 788         if connection.vendor == 'postgres':
 789             cursor.execute("TRUNCATE catalogue_book_ancestor")
 790             cursor.execute("""
 791                 WITH RECURSIVE ancestry AS (
 792                     SELECT book.id, book.parent_id
 793                     FROM catalogue_book AS book
 794                     WHERE book.parent_id IS NOT NULL
 795                     UNION
 796                     SELECT ancestor.id, book.parent_id
 797                     FROM ancestry AS ancestor, catalogue_book AS book
 798                     WHERE ancestor.parent_id = book.id
 799                         AND book.parent_id IS NOT NULL
 800                     )
 801                 INSERT INTO catalogue_book_ancestor
 802                     (from_book_id, to_book_id)
 803                     SELECT id, parent_id
 804                     FROM ancestry
 805                     ORDER BY id;
 806                 """)
 807         else:
 808             cursor.execute("DELETE FROM catalogue_book_ancestor")
 809             for b in cls.objects.exclude(parent=None):
 810                 parent = b.parent
 811                 while parent is not None:
 812                     b.ancestor.add(parent)
 813                     parent = parent.parent
 814
 815     @property
 816     def ancestors(self):
 817         if self.parent:
 818             for anc in self.parent.ancestors:
 819                 yield anc
 820             yield self.parent
 821         else:
 822             return []
 823
 824     def clear_cache(self):
 825         clear_cached_renders(self.mini_box)
 826         clear_cached_renders(self.mini_box_nolink)
 827
 828     def cover_info(self, inherit=True):
 829         """Returns a dictionary to serve as fallback for BookInfo.
 830
 831         For now, the only thing inherited is the cover image.
 832         """
 833         need = False
 834         info = {}
 835         for field in ('cover_url', 'cover_by', 'cover_source'):
 836             val = self.get_extra_info_json().get(field)
 837             if val:
 838                 info[field] = val
 839             else:
 840                 need = True
 841         if inherit and need and self.parent is not None:
 842             parent_info = self.parent.cover_info()
 843             parent_info.update(info)
 844             info = parent_info
 845         return info
 846
 847     def related_themes(self):
 848         return Tag.objects.usage_for_queryset(
 849             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 850             counts=True).filter(category='theme').order_by('-count')
 851
 852     def parent_cover_changed(self):
 853         """Called when parent book's cover image is changed."""
 854         if not self.cover_info(inherit=False):
 855             if 'cover' not in app_settings.DONT_BUILD:
 856                 self.cover.build_delay()
 857                 self.cover_clean.build_delay()
 858                 self.cover_thumb.build_delay()
 859                 self.cover_api_thumb.build_delay()
 860                 self.simple_cover.build_delay()
 861                 self.cover_ebookpoint.build_delay()
 862             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 863                 if format_ not in app_settings.DONT_BUILD:
 864                     getattr(self, '%s_file' % format_).build_delay()
 865             for child in self.children.all():
 866                 child.parent_cover_changed()
 867
 868     def other_versions(self):
 869         """Find other versions (i.e. in other languages) of the book."""
 870         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 871
 872     def parents(self):
 873         books = []
 874         parent = self.parent
 875         while parent is not None:
 876             books.insert(0, parent)
 877             parent = parent.parent
 878         return books
 879
 880     def pretty_title(self, html_links=False):
 881         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 882         books = self.parents() + [self]
 883         names.extend([(b.title, b.get_absolute_url()) for b in books])
 884
 885         if html_links:
 886             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 887         else:
 888             names = [tag[0] for tag in names]
 889         return ', '.join(names)
 890
 891     def publisher(self):
 892         publisher = self.get_extra_info_json()['publisher']
 893         if isinstance(publisher, str):
 894             return publisher
 895         elif isinstance(publisher, list):
 896             return ', '.join(publisher)
 897
 898     @classmethod
 899     def tagged_top_level(cls, tags):
 900         """ Returns top-level books tagged with `tags`.
 901
 902         It only returns those books which don't have ancestors which are
 903         also tagged with those tags.
 904
 905         """
 906         objects = cls.tagged.with_all(tags)
 907         return objects.filter(findable=True).exclude(ancestor__in=objects)
 908
 909     @classmethod
 910     def book_list(cls, book_filter=None):
 911         """Generates a hierarchical listing of all books.
 912
 913         Books are optionally filtered with a test function.
 914
 915         """
 916
 917         books_by_parent = {}
 918         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 919         if book_filter:
 920             books = books.filter(book_filter).distinct()
 921
 922             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 923             for book in books.iterator():
 924                 parent = book.parent_id
 925                 if parent not in book_ids:
 926                     parent = None
 927                 books_by_parent.setdefault(parent, []).append(book)
 928         else:
 929             for book in books.iterator():
 930                 books_by_parent.setdefault(book.parent_id, []).append(book)
 931
 932         orphans = []
 933         books_by_author = OrderedDict()
 934         for tag in Tag.objects.filter(category='author').iterator():
 935             books_by_author[tag] = []
 936
 937         for book in books_by_parent.get(None, ()):
 938             authors = list(book.authors().only('pk'))
 939             if authors:
 940                 for author in authors:
 941                     books_by_author[author].append(book)
 942             else:
 943                 orphans.append(book)
 944
 945         return books_by_author, orphans, books_by_parent
 946
 947     _audiences_pl = {
 948         "SP": (1, "szkoła podstawowa"),
 949         "SP1": (1, "szkoła podstawowa"),
 950         "SP2": (1, "szkoła podstawowa"),
 951         "SP3": (1, "szkoła podstawowa"),
 952         "P": (1, "szkoła podstawowa"),
 953         "G": (2, "gimnazjum"),
 954         "L": (3, "liceum"),
 955         "LP": (3, "liceum"),
 956     }
 957
 958     def audiences_pl(self):
 959         audiences = self.get_extra_info_json().get('audiences', [])
 960         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 961         return [a[1] for a in audiences]
 962
 963     def stage_note(self):
 964         stage = self.get_extra_info_json().get('stage')
 965         if stage and stage < '0.4':
 966             return (_('This work needs modernisation'),
 967                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 968         else:
 969             return None, None
 970
 971     def choose_fragments(self, number):
 972         fragments = self.fragments.order_by()
 973         fragments_count = fragments.count()
 974         if not fragments_count and self.children.exists():
 975             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 976             fragments_count = fragments.count()
 977         if fragments_count:
 978             if fragments_count > number:
 979                 offset = randint(0, fragments_count - number)
 980             else:
 981                 offset = 0
 982             return fragments[offset : offset + number]
 983         elif self.parent:
 984             return self.parent.choose_fragments(number)
 985         else:
 986             return []
 987
 988     def choose_fragment(self):
 989         fragments = self.choose_fragments(1)
 990         if fragments:
 991             return fragments[0]
 992         else:
 993             return None
 994
 995     def fragment_data(self):
 996         fragment = self.choose_fragment()
 997         if fragment:
 998             return {
 999                 'title': fragment.book.pretty_title(),
1000                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1001             }
1002         else:
1003             return None
1004
1005     def update_popularity(self):
1006         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1007         try:
1008             pop = self.popularity
1009             pop.count = count
1010             pop.save()
1011         except BookPopularity.DoesNotExist:
1012             BookPopularity.objects.create(book=self, count=count)
1013
1014     def ridero_link(self):
1015         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1016
1017     def like(self, user):
1018         from social.utils import likes, get_set, set_sets
1019         if not likes(user, self):
1020             tag = get_set(user, '')
1021             set_sets(user, self, [tag])
1022
1023     def unlike(self, user):
1024         from social.utils import likes, set_sets
1025         if likes(user, self):
1026             set_sets(user, self, [])
1027
1028     def full_sort_key(self):
1029         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1030
1031     def cover_color(self):
1032         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1033
1034     @cached_render('catalogue/book_mini_box.html')
1035     def mini_box(self):
1036         return {
1037             'book': self
1038         }
1039
1040     @cached_render('catalogue/book_mini_box.html')
1041     def mini_box_nolink(self):
1042         return {
1043             'book': self,
1044             'no_link': True,
1045         }
1046
1047 def add_file_fields():
1048     for format_ in Book.formats:
1049         field_name = "%s_file" % format_
1050         # This weird globals() assignment makes Django migrations comfortable.
1051         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1052         _upload_to.__name__ = '_%s_upload_to' % format_
1053         globals()[_upload_to.__name__] = _upload_to
1054
1055         EbookField(
1056             format_, _("%s file" % format_.upper()),
1057             upload_to=_upload_to,
1058             storage=bofh_storage,
1059             max_length=255,
1060             blank=True,
1061             default=''
1062         ).contribute_to_class(Book, field_name)
1063         if format_ != 'xml':
1064             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1065
1066
1067 add_file_fields()
1068
1069
1070 class BookPopularity(models.Model):
1071     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1072     count = models.IntegerField(default=0, db_index=True)