src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     is_book = True
 143
 144     class AlreadyExists(Exception):
 145         pass
 146
 147     class Meta:
 148         ordering = ('sort_key_author', 'sort_key')
 149         verbose_name = _('book')
 150         verbose_name_plural = _('books')
 151         app_label = 'catalogue'
 152
 153     def __str__(self):
 154         return self.title
 155
 156     def get_extra_info_json(self):
 157         return json.loads(self.extra_info or '{}')
 158
 159     def get_initial(self):
 160         try:
 161             return re.search(r'\w', self.title, re.U).group(0)
 162         except AttributeError:
 163             return ''
 164
 165     def authors(self):
 166         return self.tags.filter(category='author')
 167
 168     def epochs(self):
 169         return self.tags.filter(category='epoch')
 170
 171     def genres(self):
 172         return self.tags.filter(category='genre')
 173
 174     def kinds(self):
 175         return self.tags.filter(category='kind')
 176
 177     def tag_unicode(self, category):
 178         relations = prefetched_relations(self, category)
 179         if relations:
 180             return ', '.join(rel.tag.name for rel in relations)
 181         else:
 182             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 183
 184     def tags_by_category(self):
 185         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 186
 187     def author_unicode(self):
 188         return self.cached_author
 189
 190     def kind_unicode(self):
 191         return self.tag_unicode('kind')
 192
 193     def epoch_unicode(self):
 194         return self.tag_unicode('epoch')
 195
 196     def genre_unicode(self):
 197         return self.tag_unicode('genre')
 198
 199     def translators(self):
 200         translators = self.get_extra_info_json().get('translators') or []
 201         return [
 202             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 203         ]
 204
 205     def translator(self):
 206         translators = self.get_extra_info_json().get('translators')
 207         if not translators:
 208             return None
 209         if len(translators) > 3:
 210             translators = translators[:2]
 211             others = ' i inni'
 212         else:
 213             others = ''
 214         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 215
 216     def cover_source(self):
 217         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 218
 219     @property
 220     def isbn_pdf(self):
 221         return self.get_extra_info_json().get('isbn_pdf')
 222
 223     @property
 224     def isbn_epub(self):
 225         return self.get_extra_info_json().get('isbn_epub')
 226
 227     @property
 228     def isbn_mobi(self):
 229         return self.get_extra_info_json().get('isbn_mobi')
 230
 231     def is_accessible_to(self, user):
 232         if not self.preview:
 233             return True
 234         if not user.is_authenticated:
 235             return False
 236         Membership = apps.get_model('club', 'Membership')
 237         if Membership.is_active_for(user):
 238             return True
 239         Funding = apps.get_model('funding', 'Funding')
 240         if Funding.objects.filter(user=user, offer__book=self):
 241             return True
 242         return False
 243
 244     def save(self, force_insert=False, force_update=False, **kwargs):
 245         from sortify import sortify
 246
 247         self.sort_key = sortify(self.title)[:120]
 248         self.title = str(self.title)  # ???
 249
 250         try:
 251             author = self.authors().first().sort_key
 252         except AttributeError:
 253             author = ''
 254         self.sort_key_author = author
 255
 256         self.cached_author = self.tag_unicode('author')
 257         self.has_audience = 'audience' in self.get_extra_info_json()
 258
 259         if self.preview and not self.preview_key:
 260             self.preview_key = get_random_hash(self.slug)[:32]
 261
 262         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 263
 264         return ret
 265
 266     def get_absolute_url(self):
 267         return reverse('book_detail', args=[self.slug])
 268
 269     def gallery_path(self):
 270         return gallery_path(self.slug)
 271
 272     def gallery_url(self):
 273         return gallery_url(self.slug)
 274
 275     def get_first_text(self):
 276         if self.html_file:
 277             return self
 278         child = self.children.all().order_by('parent_number').first()
 279         if child is not None:
 280             return child.get_first_text()
 281
 282     def get_last_text(self):
 283         if self.html_file:
 284             return self
 285         child = self.children.all().order_by('parent_number').last()
 286         if child is not None:
 287             return child.get_last_text()
 288
 289     def get_prev_text(self):
 290         if not self.parent:
 291             return None
 292         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 293         if sibling is not None:
 294             return sibling.get_last_text()
 295
 296         if self.parent.html_file:
 297             return self.parent
 298
 299         return self.parent.get_prev_text()
 300
 301     def get_next_text(self, inside=True):
 302         if inside:
 303             child = self.children.order_by('parent_number').first()
 304             if child is not None:
 305                 return child.get_first_text()
 306
 307         if not self.parent:
 308             return None
 309         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 310         if sibling is not None:
 311             return sibling.get_first_text()
 312         return self.parent.get_next_text(inside=False)
 313
 314     def get_child_audiobook(self):
 315         BookMedia = apps.get_model('catalogue', 'BookMedia')
 316         if not BookMedia.objects.filter(book__ancestor=self).exists():
 317             return None
 318         for child in self.children.order_by('parent_number').all():
 319             if child.has_mp3_file():
 320                 return child
 321             child_sub = child.get_child_audiobook()
 322             if child_sub is not None:
 323                 return child_sub
 324
 325     def get_siblings(self):
 326         if not self.parent:
 327             return []
 328         return self.parent.children.all().order_by('parent_number')
 329
 330     def get_children(self):
 331         return self.children.all().order_by('parent_number')
 332
 333     @property
 334     def name(self):
 335         return self.title
 336
 337     def language_code(self):
 338         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 339
 340     def language_name(self):
 341         return dict(settings.LANGUAGES).get(self.language_code(), "")
 342
 343     def is_foreign(self):
 344         return self.language_code() != settings.LANGUAGE_CODE
 345
 346     def set_audio_length(self):
 347         length = self.get_audio_length()
 348         if length > 0:
 349             self.audio_length = self.format_audio_length(length)
 350             self.save()
 351
 352     @staticmethod
 353     def format_audio_length(seconds):
 354         """
 355         >>> Book.format_audio_length(1)
 356         '0:01'
 357         >>> Book.format_audio_length(3661)
 358         '1:01:01'
 359         """
 360         if seconds < 60*60:
 361             minutes = seconds // 60
 362             seconds = seconds % 60
 363             return '%d:%02d' % (minutes, seconds)
 364         else:
 365             hours = seconds // 3600
 366             minutes = seconds % 3600 // 60
 367             seconds = seconds % 60
 368             return '%d:%02d:%02d' % (hours, minutes, seconds)
 369
 370     def get_audio_length(self):
 371         total = 0
 372         for media in self.get_mp3() or ():
 373             total += app_settings.GET_MP3_LENGTH(media.file.path)
 374         return int(total)
 375
 376     def has_media(self, type_):
 377         if type_ in Book.formats:
 378             return bool(getattr(self, "%s_file" % type_))
 379         else:
 380             return self.media.filter(type=type_).exists()
 381
 382     def has_audio(self):
 383         return self.has_media('mp3')
 384
 385     def get_media(self, type_):
 386         if self.has_media(type_):
 387             if type_ in Book.formats:
 388                 return getattr(self, "%s_file" % type_)
 389             else:
 390                 return self.media.filter(type=type_)
 391         else:
 392             return None
 393
 394     def get_mp3(self):
 395         return self.get_media("mp3")
 396
 397     def get_odt(self):
 398         return self.get_media("odt")
 399
 400     def get_ogg(self):
 401         return self.get_media("ogg")
 402
 403     def get_daisy(self):
 404         return self.get_media("daisy")
 405
 406     def media_url(self, format_):
 407         media = self.get_media(format_)
 408         if media:
 409             if self.preview:
 410                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 411             else:
 412                 return media.url
 413         else:
 414             return None
 415
 416     def html_url(self):
 417         return self.media_url('html')
 418
 419     def pdf_url(self):
 420         return self.media_url('pdf')
 421
 422     def epub_url(self):
 423         return self.media_url('epub')
 424
 425     def mobi_url(self):
 426         return self.media_url('mobi')
 427
 428     def txt_url(self):
 429         return self.media_url('txt')
 430
 431     def fb2_url(self):
 432         return self.media_url('fb2')
 433
 434     def xml_url(self):
 435         return self.media_url('xml')
 436
 437     def has_description(self):
 438         return len(self.description) > 0
 439     has_description.short_description = _('description')
 440     has_description.boolean = True
 441
 442     def has_mp3_file(self):
 443         return self.has_media("mp3")
 444     has_mp3_file.short_description = 'MP3'
 445     has_mp3_file.boolean = True
 446
 447     def has_ogg_file(self):
 448         return self.has_media("ogg")
 449     has_ogg_file.short_description = 'OGG'
 450     has_ogg_file.boolean = True
 451
 452     def has_daisy_file(self):
 453         return self.has_media("daisy")
 454     has_daisy_file.short_description = 'DAISY'
 455     has_daisy_file.boolean = True
 456
 457     @property
 458     def media_daisy(self):
 459         return self.get_media('daisy')
 460
 461     def get_audiobooks(self):
 462         ogg_files = {}
 463         for m in self.media.filter(type='ogg').order_by().iterator():
 464             ogg_files[m.name] = m
 465
 466         audiobooks = []
 467         projects = set()
 468         total_duration = 0
 469         for mp3 in self.media.filter(type='mp3').iterator():
 470             # ogg files are always from the same project
 471             meta = mp3.get_extra_info_json()
 472             project = meta.get('project')
 473             if not project:
 474                 # temporary fallback
 475                 project = 'CzytamySłuchając'
 476
 477             projects.add((project, meta.get('funded_by', '')))
 478             total_duration += mp3.duration or 0
 479
 480             media = {'mp3': mp3}
 481
 482             ogg = ogg_files.get(mp3.name)
 483             if ogg:
 484                 media['ogg'] = ogg
 485             audiobooks.append(media)
 486
 487         projects = sorted(projects)
 488         total_duration = '%d:%02d' % (
 489             total_duration // 60,
 490             total_duration % 60
 491         )
 492         return audiobooks, projects, total_duration
 493
 494     def wldocument(self, parse_dublincore=True, inherit=True):
 495         from catalogue.import_utils import ORMDocProvider
 496         from librarian.parser import WLDocument
 497
 498         if inherit and self.parent:
 499             meta_fallbacks = self.parent.cover_info()
 500         else:
 501             meta_fallbacks = None
 502
 503         return WLDocument.from_file(
 504             self.xml_file.path,
 505             provider=ORMDocProvider(self),
 506             parse_dublincore=parse_dublincore,
 507             meta_fallbacks=meta_fallbacks)
 508
 509     def wldocument2(self):
 510         from catalogue.import_utils import ORMDocProvider
 511         from librarian.document import WLDocument
 512         doc = WLDocument(
 513             self.xml_file.path,
 514             provider=ORMDocProvider(self)
 515         )
 516         doc.meta.update(self.cover_info())
 517         return doc
 518
 519
 520     @staticmethod
 521     def zip_format(format_):
 522         def pretty_file_name(book):
 523             return "%s/%s.%s" % (
 524                 book.get_extra_info_json()['author'],
 525                 book.slug,
 526                 format_)
 527
 528         field_name = "%s_file" % format_
 529         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 530         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 531         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 532
 533     def zip_audiobooks(self, format_):
 534         bm = BookMedia.objects.filter(book=self, type=format_)
 535         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 536         licenses = set()
 537         for m in bm:
 538             license = constants.LICENSES.get(
 539                 m.get_extra_info_json().get('license'), {}
 540             ).get('locative')
 541             if license:
 542                 licenses.add(license)
 543         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 544             'licenses': licenses,
 545         })
 546         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 547
 548     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 549         if not self.findable:
 550             return
 551         if index is None:
 552             from search.index import Index
 553             index = Index()
 554         try:
 555             index.index_book(self, book_info)
 556             if index_tags:
 557                 index.index_tags()
 558             if commit:
 559                 index.index.commit()
 560         except Exception as e:
 561             index.index.rollback()
 562             raise e
 563
 564     # will make problems in conjunction with paid previews
 565     def download_pictures(self, remote_gallery_url):
 566         # This is only needed for legacy relative image paths.
 567         gallery_path = self.gallery_path()
 568         # delete previous files, so we don't include old files in ebooks
 569         if os.path.isdir(gallery_path):
 570             for filename in os.listdir(gallery_path):
 571                 file_path = os.path.join(gallery_path, filename)
 572                 os.unlink(file_path)
 573         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 574         if ilustr_elements:
 575             makedirs(gallery_path)
 576             for ilustr in ilustr_elements:
 577                 ilustr_src = ilustr.get('src')
 578                 if '/' in ilustr_src:
 579                     continue
 580                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 581                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 582
 583     def load_abstract(self):
 584         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 585         if abstract is not None:
 586             self.abstract = transform_abstrakt(abstract)
 587         else:
 588             self.abstract = ''
 589
 590     def load_toc(self):
 591         self.toc = ''
 592         if self.html_file:
 593             parser = html.HTMLParser(encoding='utf-8')
 594             tree = html.parse(self.html_file.path, parser=parser)
 595             toc = tree.find('//div[@id="toc"]/ol')
 596             if toc is None or not len(toc):
 597                 return
 598             html_link = reverse('book_text', args=[self.slug])
 599             for a in toc.findall('.//a'):
 600                 a.attrib['href'] = html_link + a.attrib['href']
 601             self.toc = html.tostring(toc, encoding='unicode')
 602             # div#toc
 603
 604     @classmethod
 605     def from_xml_file(cls, xml_file, **kwargs):
 606         from django.core.files import File
 607         from librarian import dcparser
 608
 609         # use librarian to parse meta-data
 610         book_info = dcparser.parse(xml_file)
 611
 612         if not isinstance(xml_file, File):
 613             xml_file = File(open(xml_file))
 614
 615         try:
 616             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 617         finally:
 618             xml_file.close()
 619
 620     @classmethod
 621     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 622                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 623         if dont_build is None:
 624             dont_build = set()
 625         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 626
 627         # check for parts before we do anything
 628         children = []
 629         if hasattr(book_info, 'parts'):
 630             for part_url in book_info.parts:
 631                 try:
 632                     children.append(Book.objects.get(slug=part_url.slug))
 633                 except Book.DoesNotExist:
 634                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 635
 636         # Read book metadata
 637         book_slug = book_info.url.slug
 638         if re.search(r'[^a-z0-9-]', book_slug):
 639             raise ValueError('Invalid characters in slug')
 640         book, created = Book.objects.get_or_create(slug=book_slug)
 641
 642         if created:
 643             book_shelves = []
 644             old_cover = None
 645             book.preview = bool(days)
 646             if book.preview:
 647                 book.preview_until = date.today() + timedelta(days)
 648         else:
 649             if not overwrite:
 650                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 651             # Save shelves for this book
 652             book_shelves = list(book.tags.filter(category='set'))
 653             old_cover = book.cover_info()
 654
 655         # Save XML file
 656         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 657         if book.preview:
 658             book.xml_file.set_readable(False)
 659
 660         book.findable = findable
 661         book.language = book_info.language
 662         book.title = book_info.title
 663         if book_info.variant_of:
 664             book.common_slug = book_info.variant_of.slug
 665         else:
 666             book.common_slug = book.slug
 667         book.extra_info = json.dumps(book_info.to_dict())
 668         book.load_abstract()
 669         book.load_toc()
 670         book.save()
 671
 672         meta_tags = Tag.tags_from_info(book_info)
 673
 674         for tag in meta_tags:
 675             if not tag.for_books:
 676                 tag.for_books = True
 677                 tag.save()
 678
 679         book.tags = set(meta_tags + book_shelves)
 680         book.save()  # update sort_key_author
 681
 682         cover_changed = old_cover != book.cover_info()
 683         obsolete_children = set(b for b in book.children.all()
 684                                 if b not in children)
 685         notify_cover_changed = []
 686         for n, child_book in enumerate(children):
 687             new_child = child_book.parent != book
 688             child_book.parent = book
 689             child_book.parent_number = n
 690             child_book.save()
 691             if new_child or cover_changed:
 692                 notify_cover_changed.append(child_book)
 693         # Disown unfaithful children and let them cope on their own.
 694         for child in obsolete_children:
 695             child.parent = None
 696             child.parent_number = 0
 697             child.save()
 698             if old_cover:
 699                 notify_cover_changed.append(child)
 700
 701         cls.repopulate_ancestors()
 702         tasks.update_counters.delay()
 703
 704         if remote_gallery_url:
 705             book.download_pictures(remote_gallery_url)
 706
 707         # No saves beyond this point.
 708
 709         # Build cover.
 710         if 'cover' not in dont_build:
 711             book.cover.build_delay()
 712             book.cover_clean.build_delay()
 713             book.cover_thumb.build_delay()
 714             book.cover_api_thumb.build_delay()
 715             book.simple_cover.build_delay()
 716             book.cover_ebookpoint.build_delay()
 717
 718         # Build HTML and ebooks.
 719         book.html_file.build_delay()
 720         if not children:
 721             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 722                 if format_ not in dont_build:
 723                     getattr(book, '%s_file' % format_).build_delay()
 724         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 725             if format_ not in dont_build:
 726                 getattr(book, '%s_file' % format_).build_delay()
 727
 728         if not settings.NO_SEARCH_INDEX and search_index and findable:
 729             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 730
 731         for child in notify_cover_changed:
 732             child.parent_cover_changed()
 733
 734         book.update_popularity()
 735         tasks.update_references.delay(book.id)
 736
 737         cls.published.send(sender=cls, instance=book)
 738         return book
 739
 740     def get_master(self):
 741         master_tags = [
 742             'opowiadanie',
 743             'powiesc',
 744             'dramat_wierszowany_l',
 745             'dramat_wierszowany_lp',
 746             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 747             'wywiad',
 748         ]
 749         from librarian.parser import WLDocument
 750         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 751         root = wld.edoc.getroot()
 752         for master in root.iter():
 753             if master.tag in master_tags:
 754                 return master
 755
 756     def update_references(self):
 757         from references.models import Entity, Reference
 758         master = self.get_master()
 759         if master is None:
 760             master = []
 761         found = set()
 762         for i, sec in enumerate(master):
 763             for ref in sec.findall('.//ref'):
 764                 href = ref.attrib.get('href', '')
 765                 if not href or href in found:
 766                     continue
 767                 found.add(href)
 768                 entity, created = Entity.objects.get_or_create(
 769                     uri=href
 770                 )
 771                 ref, created = Reference.objects.get_or_create(
 772                     book=self,
 773                     entity=entity
 774                 )
 775                 ref.first_section = 'sec%d' % (i + 1)
 776                 entity.populate()
 777                 entity.save()
 778         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 779
 780     @property
 781     def references(self):
 782         return self.reference_set.all().select_related('entity')
 783
 784     @classmethod
 785     @transaction.atomic
 786     def repopulate_ancestors(cls):
 787         """Fixes the ancestry cache."""
 788         # TODO: table names
 789         cursor = connection.cursor()
 790         if connection.vendor == 'postgres':
 791             cursor.execute("TRUNCATE catalogue_book_ancestor")
 792             cursor.execute("""
 793                 WITH RECURSIVE ancestry AS (
 794                     SELECT book.id, book.parent_id
 795                     FROM catalogue_book AS book
 796                     WHERE book.parent_id IS NOT NULL
 797                     UNION
 798                     SELECT ancestor.id, book.parent_id
 799                     FROM ancestry AS ancestor, catalogue_book AS book
 800                     WHERE ancestor.parent_id = book.id
 801                         AND book.parent_id IS NOT NULL
 802                     )
 803                 INSERT INTO catalogue_book_ancestor
 804                     (from_book_id, to_book_id)
 805                     SELECT id, parent_id
 806                     FROM ancestry
 807                     ORDER BY id;
 808                 """)
 809         else:
 810             cursor.execute("DELETE FROM catalogue_book_ancestor")
 811             for b in cls.objects.exclude(parent=None):
 812                 parent = b.parent
 813                 while parent is not None:
 814                     b.ancestor.add(parent)
 815                     parent = parent.parent
 816
 817     @property
 818     def ancestors(self):
 819         if self.parent:
 820             for anc in self.parent.ancestors:
 821                 yield anc
 822             yield self.parent
 823         else:
 824             return []
 825
 826     def clear_cache(self):
 827         clear_cached_renders(self.mini_box)
 828         clear_cached_renders(self.mini_box_nolink)
 829
 830     def cover_info(self, inherit=True):
 831         """Returns a dictionary to serve as fallback for BookInfo.
 832
 833         For now, the only thing inherited is the cover image.
 834         """
 835         need = False
 836         info = {}
 837         for field in ('cover_url', 'cover_by', 'cover_source'):
 838             val = self.get_extra_info_json().get(field)
 839             if val:
 840                 info[field] = val
 841             else:
 842                 need = True
 843         if inherit and need and self.parent is not None:
 844             parent_info = self.parent.cover_info()
 845             parent_info.update(info)
 846             info = parent_info
 847         return info
 848
 849     def related_themes(self):
 850         return Tag.objects.usage_for_queryset(
 851             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 852             counts=True).filter(category='theme').order_by('-count')
 853
 854     def parent_cover_changed(self):
 855         """Called when parent book's cover image is changed."""
 856         if not self.cover_info(inherit=False):
 857             if 'cover' not in app_settings.DONT_BUILD:
 858                 self.cover.build_delay()
 859                 self.cover_clean.build_delay()
 860                 self.cover_thumb.build_delay()
 861                 self.cover_api_thumb.build_delay()
 862                 self.simple_cover.build_delay()
 863                 self.cover_ebookpoint.build_delay()
 864             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 865                 if format_ not in app_settings.DONT_BUILD:
 866                     getattr(self, '%s_file' % format_).build_delay()
 867             for child in self.children.all():
 868                 child.parent_cover_changed()
 869
 870     def other_versions(self):
 871         """Find other versions (i.e. in other languages) of the book."""
 872         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 873
 874     def parents(self):
 875         books = []
 876         parent = self.parent
 877         while parent is not None:
 878             books.insert(0, parent)
 879             parent = parent.parent
 880         return books
 881
 882     def pretty_title(self, html_links=False):
 883         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 884         books = self.parents() + [self]
 885         names.extend([(b.title, b.get_absolute_url()) for b in books])
 886
 887         if html_links:
 888             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 889         else:
 890             names = [tag[0] for tag in names]
 891         return ', '.join(names)
 892
 893     def publisher(self):
 894         publisher = self.get_extra_info_json()['publisher']
 895         if isinstance(publisher, str):
 896             return publisher
 897         elif isinstance(publisher, list):
 898             return ', '.join(publisher)
 899
 900     @classmethod
 901     def tagged_top_level(cls, tags):
 902         """ Returns top-level books tagged with `tags`.
 903
 904         It only returns those books which don't have ancestors which are
 905         also tagged with those tags.
 906
 907         """
 908         objects = cls.tagged.with_all(tags)
 909         return objects.filter(findable=True).exclude(ancestor__in=objects)
 910
 911     @classmethod
 912     def book_list(cls, book_filter=None):
 913         """Generates a hierarchical listing of all books.
 914
 915         Books are optionally filtered with a test function.
 916
 917         """
 918
 919         books_by_parent = {}
 920         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 921         if book_filter:
 922             books = books.filter(book_filter).distinct()
 923
 924             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 925             for book in books.iterator():
 926                 parent = book.parent_id
 927                 if parent not in book_ids:
 928                     parent = None
 929                 books_by_parent.setdefault(parent, []).append(book)
 930         else:
 931             for book in books.iterator():
 932                 books_by_parent.setdefault(book.parent_id, []).append(book)
 933
 934         orphans = []
 935         books_by_author = OrderedDict()
 936         for tag in Tag.objects.filter(category='author').iterator():
 937             books_by_author[tag] = []
 938
 939         for book in books_by_parent.get(None, ()):
 940             authors = list(book.authors().only('pk'))
 941             if authors:
 942                 for author in authors:
 943                     books_by_author[author].append(book)
 944             else:
 945                 orphans.append(book)
 946
 947         return books_by_author, orphans, books_by_parent
 948
 949     _audiences_pl = {
 950         "SP": (1, "szkoła podstawowa"),
 951         "SP1": (1, "szkoła podstawowa"),
 952         "SP2": (1, "szkoła podstawowa"),
 953         "SP3": (1, "szkoła podstawowa"),
 954         "P": (1, "szkoła podstawowa"),
 955         "G": (2, "gimnazjum"),
 956         "L": (3, "liceum"),
 957         "LP": (3, "liceum"),
 958     }
 959
 960     def audiences_pl(self):
 961         audiences = self.get_extra_info_json().get('audiences', [])
 962         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 963         return [a[1] for a in audiences]
 964
 965     def stage_note(self):
 966         stage = self.get_extra_info_json().get('stage')
 967         if stage and stage < '0.4':
 968             return (_('This work needs modernisation'),
 969                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 970         else:
 971             return None, None
 972
 973     def choose_fragments(self, number):
 974         fragments = self.fragments.order_by()
 975         fragments_count = fragments.count()
 976         if not fragments_count and self.children.exists():
 977             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 978             fragments_count = fragments.count()
 979         if fragments_count:
 980             if fragments_count > number:
 981                 offset = randint(0, fragments_count - number)
 982             else:
 983                 offset = 0
 984             return fragments[offset : offset + number]
 985         elif self.parent:
 986             return self.parent.choose_fragments(number)
 987         else:
 988             return []
 989
 990     def choose_fragment(self):
 991         fragments = self.choose_fragments(1)
 992         if fragments:
 993             return fragments[0]
 994         else:
 995             return None
 996
 997     def fragment_data(self):
 998         fragment = self.choose_fragment()
 999         if fragment:
1000             return {
1001                 'title': fragment.book.pretty_title(),
1002                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1003             }
1004         else:
1005             return None
1006
1007     def update_popularity(self):
1008         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1009         try:
1010             pop = self.popularity
1011             pop.count = count
1012             pop.save()
1013         except BookPopularity.DoesNotExist:
1014             BookPopularity.objects.create(book=self, count=count)
1015
1016     def ridero_link(self):
1017         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1018
1019     def like(self, user):
1020         from social.utils import likes, get_set, set_sets
1021         if not likes(user, self):
1022             tag = get_set(user, '')
1023             set_sets(user, self, [tag])
1024
1025     def unlike(self, user):
1026         from social.utils import likes, set_sets
1027         if likes(user, self):
1028             set_sets(user, self, [])
1029
1030     def full_sort_key(self):
1031         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1032
1033     def cover_color(self):
1034         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1035
1036     @cached_render('catalogue/book_mini_box.html')
1037     def mini_box(self):
1038         return {
1039             'book': self
1040         }
1041
1042     @cached_render('catalogue/book_mini_box.html')
1043     def mini_box_nolink(self):
1044         return {
1045             'book': self,
1046             'no_link': True,
1047         }
1048
1049 def add_file_fields():
1050     for format_ in Book.formats:
1051         field_name = "%s_file" % format_
1052         # This weird globals() assignment makes Django migrations comfortable.
1053         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1054         _upload_to.__name__ = '_%s_upload_to' % format_
1055         globals()[_upload_to.__name__] = _upload_to
1056
1057         EbookField(
1058             format_, _("%s file" % format_.upper()),
1059             upload_to=_upload_to,
1060             storage=bofh_storage,
1061             max_length=255,
1062             blank=True,
1063             default=''
1064         ).contribute_to_class(Book, field_name)
1065         if format_ != 'xml':
1066             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1067
1068
1069 add_file_fields()
1070
1071
1072 class BookPopularity(models.Model):
1073     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1074     count = models.IntegerField(default=0, db_index=True)