src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.conf import settings
  12 from django.db import connection, models, transaction
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.template.loader import render_to_string
  16 from django.urls import reverse
  17 from django.utils.translation import ugettext_lazy as _, get_language
  18 from django.utils.deconstruct import deconstructible
  19 from fnpdjango.storage import BofhFileSystemStorage
  20
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue.fields import EbookField
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from catalogue import tasks
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 @deconstructible
  37 class UploadToPath(object):
  38     def __init__(self, path):
  39         self.path = path
  40
  41     def __call__(self, instance, filename):
  42         return self.path % instance.slug
  43
  44
  45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  49 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  50
  51
  52 def _ebook_upload_to(upload_path):
  53     return UploadToPath(upload_path)
  54
  55
  56 class Book(models.Model):
  57     """Represents a book imported from WL-XML."""
  58     title = models.CharField(_('title'), max_length=32767)
  59     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  60     sort_key_author = models.CharField(
  61         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  62     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  63     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  64     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  65     description = models.TextField(_('description'), blank=True)
  66     abstract = models.TextField(_('abstract'), blank=True)
  67     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  68     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  69     parent_number = models.IntegerField(_('parent number'), default=0)
  70     extra_info = models.TextField(_('extra information'), default='{}')
  71     gazeta_link = models.CharField(blank=True, max_length=240)
  72     wiki_link = models.CharField(blank=True, max_length=240)
  73     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  74     recommended = models.BooleanField(_('recommended'), default=False)
  75     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  76     preview = models.BooleanField(_('preview'), default=False)
  77     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  78     preview_key = models.CharField(max_length=32, blank=True, null=True)
  79     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  80
  81     # files generated during publication
  82     cover = EbookField(
  83         'cover', _('cover'),
  84         null=True, blank=True,
  85         upload_to=_cover_upload_to,
  86         storage=bofh_storage, max_length=255)
  87     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  88     # Cleaner version of cover for thumbs
  89     cover_thumb = EbookField(
  90         'cover_thumb', _('cover thumbnail'),
  91         null=True, blank=True,
  92         upload_to=_cover_thumb_upload_to,
  93         max_length=255)
  94     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  95     cover_api_thumb = EbookField(
  96         'cover_api_thumb', _('cover thumbnail for mobile app'),
  97         null=True, blank=True,
  98         upload_to=_cover_api_thumb_upload_to,
  99         max_length=255)
 100     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 101     simple_cover = EbookField(
 102         'simple_cover', _('cover for mobile app'),
 103         null=True, blank=True,
 104         upload_to=_simple_cover_upload_to,
 105         max_length=255)
 106     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 107     cover_ebookpoint = EbookField(
 108         'cover_ebookpoint', _('cover for Ebookpoint'),
 109         null=True, blank=True,
 110         upload_to=_cover_ebookpoint_upload_to,
 111         max_length=255)
 112     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 113     ebook_formats = constants.EBOOK_FORMATS
 114     formats = ebook_formats + ['html', 'xml']
 115
 116     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 117     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 118
 119     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 120     has_audience = models.BooleanField(default=False)
 121
 122     objects = models.Manager()
 123     tagged = managers.ModelTaggedItemManager(Tag)
 124     tags = managers.TagDescriptor(Tag)
 125     tag_relations = GenericRelation(Tag.intermediary_table_model)
 126
 127     html_built = django.dispatch.Signal()
 128     published = django.dispatch.Signal()
 129
 130     SORT_KEY_SEP = '$'
 131
 132     class AlreadyExists(Exception):
 133         pass
 134
 135     class Meta:
 136         ordering = ('sort_key_author', 'sort_key')
 137         verbose_name = _('book')
 138         verbose_name_plural = _('books')
 139         app_label = 'catalogue'
 140
 141     def __str__(self):
 142         return self.title
 143
 144     def get_extra_info_json(self):
 145         return json.loads(self.extra_info or '{}')
 146
 147     def get_initial(self):
 148         try:
 149             return re.search(r'\w', self.title, re.U).group(0)
 150         except AttributeError:
 151             return ''
 152
 153     def authors(self):
 154         return self.tags.filter(category='author')
 155
 156     def epochs(self):
 157         return self.tags.filter(category='epoch')
 158
 159     def genres(self):
 160         return self.tags.filter(category='genre')
 161
 162     def kinds(self):
 163         return self.tags.filter(category='kind')
 164
 165     def tag_unicode(self, category):
 166         relations = prefetched_relations(self, category)
 167         if relations:
 168             return ', '.join(rel.tag.name for rel in relations)
 169         else:
 170             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 171
 172     def tags_by_category(self):
 173         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 174
 175     def author_unicode(self):
 176         return self.cached_author
 177
 178     def kind_unicode(self):
 179         return self.tag_unicode('kind')
 180
 181     def epoch_unicode(self):
 182         return self.tag_unicode('epoch')
 183
 184     def genre_unicode(self):
 185         return self.tag_unicode('genre')
 186
 187     def translators(self):
 188         translators = self.get_extra_info_json().get('translators') or []
 189         return [
 190             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 191         ]
 192
 193     def translator(self):
 194         translators = self.get_extra_info_json().get('translators')
 195         if not translators:
 196             return None
 197         if len(translators) > 3:
 198             translators = translators[:2]
 199             others = ' i inni'
 200         else:
 201             others = ''
 202         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 203
 204     def cover_source(self):
 205         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 206
 207     @property
 208     def isbn_pdf(self):
 209         return self.get_extra_info_json().get('isbn_pdf')
 210
 211     @property
 212     def isbn_epub(self):
 213         return self.get_extra_info_json().get('isbn_epub')
 214
 215     @property
 216     def isbn_mobi(self):
 217         return self.get_extra_info_json().get('isbn_mobi')
 218
 219
 220     def save(self, force_insert=False, force_update=False, **kwargs):
 221         from sortify import sortify
 222
 223         self.sort_key = sortify(self.title)[:120]
 224         self.title = str(self.title)  # ???
 225
 226         try:
 227             author = self.authors().first().sort_key
 228         except AttributeError:
 229             author = ''
 230         self.sort_key_author = author
 231
 232         self.cached_author = self.tag_unicode('author')
 233         self.has_audience = 'audience' in self.get_extra_info_json()
 234
 235         if self.preview and not self.preview_key:
 236             self.preview_key = get_random_hash(self.slug)[:32]
 237
 238         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 239
 240         return ret
 241
 242     def get_absolute_url(self):
 243         return reverse('book_detail', args=[self.slug])
 244
 245     def gallery_path(self):
 246         return gallery_path(self.slug)
 247
 248     def gallery_url(self):
 249         return gallery_url(self.slug)
 250
 251     def get_first_text(self):
 252         if self.html_file:
 253             return self
 254         child = self.children.all().order_by('parent_number').first()
 255         if child is not None:
 256             return child.get_first_text()
 257
 258     def get_last_text(self):
 259         if self.html_file:
 260             return self
 261         child = self.children.all().order_by('parent_number').last()
 262         if child is not None:
 263             return child.get_last_text()
 264
 265     def get_prev_text(self):
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_last_text()
 271         return self.parent.get_prev_text()
 272
 273     def get_next_text(self):
 274         if not self.parent:
 275             return None
 276         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 277         if sibling is not None:
 278             return sibling.get_first_text()
 279         return self.parent.get_next_text()
 280
 281     def get_siblings(self):
 282         if not self.parent:
 283             return []
 284         return self.parent.children.all().order_by('parent_number')
 285
 286     @property
 287     def name(self):
 288         return self.title
 289
 290     def language_code(self):
 291         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 292
 293     def language_name(self):
 294         return dict(settings.LANGUAGES).get(self.language_code(), "")
 295
 296     def is_foreign(self):
 297         return self.language_code() != settings.LANGUAGE_CODE
 298
 299     def set_audio_length(self):
 300         length = self.get_audio_length()
 301         if length > 0:
 302             self.audio_length = self.format_audio_length(length)
 303             self.save()
 304
 305     @staticmethod
 306     def format_audio_length(seconds):
 307         """
 308         >>> Book.format_audio_length(1)
 309         '0:01'
 310         >>> Book.format_audio_length(3661)
 311         '1:01:01'
 312         """
 313         if seconds < 60*60:
 314             minutes = seconds // 60
 315             seconds = seconds % 60
 316             return '%d:%02d' % (minutes, seconds)
 317         else:
 318             hours = seconds // 3600
 319             minutes = seconds % 3600 // 60
 320             seconds = seconds % 60
 321             return '%d:%02d:%02d' % (hours, minutes, seconds)
 322
 323     def get_audio_length(self):
 324         total = 0
 325         for media in self.get_mp3() or ():
 326             total += app_settings.GET_MP3_LENGTH(media.file.path)
 327         return int(total)
 328
 329     def has_media(self, type_):
 330         if type_ in Book.formats:
 331             return bool(getattr(self, "%s_file" % type_))
 332         else:
 333             return self.media.filter(type=type_).exists()
 334
 335     def has_audio(self):
 336         return self.has_media('mp3')
 337
 338     def get_media(self, type_):
 339         if self.has_media(type_):
 340             if type_ in Book.formats:
 341                 return getattr(self, "%s_file" % type_)
 342             else:
 343                 return self.media.filter(type=type_)
 344         else:
 345             return None
 346
 347     def get_mp3(self):
 348         return self.get_media("mp3")
 349
 350     def get_odt(self):
 351         return self.get_media("odt")
 352
 353     def get_ogg(self):
 354         return self.get_media("ogg")
 355
 356     def get_daisy(self):
 357         return self.get_media("daisy")
 358
 359     def media_url(self, format_):
 360         media = self.get_media(format_)
 361         if media:
 362             if self.preview:
 363                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 364             else:
 365                 return media.url
 366         else:
 367             return None
 368
 369     def html_url(self):
 370         return self.media_url('html')
 371
 372     def pdf_url(self):
 373         return self.media_url('pdf')
 374
 375     def epub_url(self):
 376         return self.media_url('epub')
 377
 378     def mobi_url(self):
 379         return self.media_url('mobi')
 380
 381     def txt_url(self):
 382         return self.media_url('txt')
 383
 384     def fb2_url(self):
 385         return self.media_url('fb2')
 386
 387     def xml_url(self):
 388         return self.media_url('xml')
 389
 390     def has_description(self):
 391         return len(self.description) > 0
 392     has_description.short_description = _('description')
 393     has_description.boolean = True
 394
 395     def has_mp3_file(self):
 396         return self.has_media("mp3")
 397     has_mp3_file.short_description = 'MP3'
 398     has_mp3_file.boolean = True
 399
 400     def has_ogg_file(self):
 401         return self.has_media("ogg")
 402     has_ogg_file.short_description = 'OGG'
 403     has_ogg_file.boolean = True
 404
 405     def has_daisy_file(self):
 406         return self.has_media("daisy")
 407     has_daisy_file.short_description = 'DAISY'
 408     has_daisy_file.boolean = True
 409
 410     def get_audiobooks(self):
 411         ogg_files = {}
 412         for m in self.media.filter(type='ogg').order_by().iterator():
 413             ogg_files[m.name] = m
 414
 415         audiobooks = []
 416         projects = set()
 417         for mp3 in self.media.filter(type='mp3').iterator():
 418             # ogg files are always from the same project
 419             meta = mp3.get_extra_info_json()
 420             project = meta.get('project')
 421             if not project:
 422                 # temporary fallback
 423                 project = 'CzytamySłuchając'
 424
 425             projects.add((project, meta.get('funded_by', '')))
 426
 427             media = {'mp3': mp3}
 428
 429             ogg = ogg_files.get(mp3.name)
 430             if ogg:
 431                 media['ogg'] = ogg
 432             audiobooks.append(media)
 433
 434         projects = sorted(projects)
 435         return audiobooks, projects
 436
 437     def wldocument(self, parse_dublincore=True, inherit=True):
 438         from catalogue.import_utils import ORMDocProvider
 439         from librarian.parser import WLDocument
 440
 441         if inherit and self.parent:
 442             meta_fallbacks = self.parent.cover_info()
 443         else:
 444             meta_fallbacks = None
 445
 446         return WLDocument.from_file(
 447             self.xml_file.path,
 448             provider=ORMDocProvider(self),
 449             parse_dublincore=parse_dublincore,
 450             meta_fallbacks=meta_fallbacks)
 451
 452     def wldocument2(self):
 453         from catalogue.import_utils import ORMDocProvider
 454         from librarian.document import WLDocument
 455         doc = WLDocument(
 456             self.xml_file.path,
 457             provider=ORMDocProvider(self)
 458         )
 459         doc.meta.update(self.cover_info())
 460         return doc
 461
 462
 463     @staticmethod
 464     def zip_format(format_):
 465         def pretty_file_name(book):
 466             return "%s/%s.%s" % (
 467                 book.get_extra_info_json()['author'],
 468                 book.slug,
 469                 format_)
 470
 471         field_name = "%s_file" % format_
 472         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 473         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 474         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 475
 476     def zip_audiobooks(self, format_):
 477         bm = BookMedia.objects.filter(book=self, type=format_)
 478         paths = map(lambda bm: (None, bm.file.path), bm)
 479         licenses = set()
 480         for m in bm:
 481             license = constants.LICENSES.get(
 482                 m.get_extra_info_json().get('license'), {}
 483             ).get('locative')
 484             if license:
 485                 licenses.add(license)
 486         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 487             'licenses': licenses,
 488         })
 489         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 490
 491     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 492         if not self.findable:
 493             return
 494         if index is None:
 495             from search.index import Index
 496             index = Index()
 497         try:
 498             index.index_book(self, book_info)
 499             if index_tags:
 500                 index.index_tags()
 501             if commit:
 502                 index.index.commit()
 503         except Exception as e:
 504             index.index.rollback()
 505             raise e
 506
 507     # will make problems in conjunction with paid previews
 508     def download_pictures(self, remote_gallery_url):
 509         gallery_path = self.gallery_path()
 510         # delete previous files, so we don't include old files in ebooks
 511         if os.path.isdir(gallery_path):
 512             for filename in os.listdir(gallery_path):
 513                 file_path = os.path.join(gallery_path, filename)
 514                 os.unlink(file_path)
 515         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 516         if ilustr_elements:
 517             makedirs(gallery_path)
 518             for ilustr in ilustr_elements:
 519                 ilustr_src = ilustr.get('src')
 520                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 521                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 522
 523     def load_abstract(self):
 524         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 525         if abstract is not None:
 526             self.abstract = transform_abstrakt(abstract)
 527         else:
 528             self.abstract = ''
 529
 530     @classmethod
 531     def from_xml_file(cls, xml_file, **kwargs):
 532         from django.core.files import File
 533         from librarian import dcparser
 534
 535         # use librarian to parse meta-data
 536         book_info = dcparser.parse(xml_file)
 537
 538         if not isinstance(xml_file, File):
 539             xml_file = File(open(xml_file))
 540
 541         try:
 542             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 543         finally:
 544             xml_file.close()
 545
 546     @classmethod
 547     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 548                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 549         if dont_build is None:
 550             dont_build = set()
 551         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 552
 553         # check for parts before we do anything
 554         children = []
 555         if hasattr(book_info, 'parts'):
 556             for part_url in book_info.parts:
 557                 try:
 558                     children.append(Book.objects.get(slug=part_url.slug))
 559                 except Book.DoesNotExist:
 560                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 561
 562         # Read book metadata
 563         book_slug = book_info.url.slug
 564         if re.search(r'[^a-z0-9-]', book_slug):
 565             raise ValueError('Invalid characters in slug')
 566         book, created = Book.objects.get_or_create(slug=book_slug)
 567
 568         if created:
 569             book_shelves = []
 570             old_cover = None
 571             book.preview = bool(days)
 572             if book.preview:
 573                 book.preview_until = date.today() + timedelta(days)
 574         else:
 575             if not overwrite:
 576                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 577             # Save shelves for this book
 578             book_shelves = list(book.tags.filter(category='set'))
 579             old_cover = book.cover_info()
 580
 581         # Save XML file
 582         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 583         if book.preview:
 584             book.xml_file.set_readable(False)
 585
 586         book.findable = findable
 587         book.language = book_info.language
 588         book.title = book_info.title
 589         if book_info.variant_of:
 590             book.common_slug = book_info.variant_of.slug
 591         else:
 592             book.common_slug = book.slug
 593         book.extra_info = json.dumps(book_info.to_dict())
 594         book.load_abstract()
 595         book.save()
 596
 597         meta_tags = Tag.tags_from_info(book_info)
 598
 599         for tag in meta_tags:
 600             if not tag.for_books:
 601                 tag.for_books = True
 602                 tag.save()
 603
 604         book.tags = set(meta_tags + book_shelves)
 605         book.save()  # update sort_key_author
 606
 607         cover_changed = old_cover != book.cover_info()
 608         obsolete_children = set(b for b in book.children.all()
 609                                 if b not in children)
 610         notify_cover_changed = []
 611         for n, child_book in enumerate(children):
 612             new_child = child_book.parent != book
 613             child_book.parent = book
 614             child_book.parent_number = n
 615             child_book.save()
 616             if new_child or cover_changed:
 617                 notify_cover_changed.append(child_book)
 618         # Disown unfaithful children and let them cope on their own.
 619         for child in obsolete_children:
 620             child.parent = None
 621             child.parent_number = 0
 622             child.save()
 623             if old_cover:
 624                 notify_cover_changed.append(child)
 625
 626         cls.repopulate_ancestors()
 627         tasks.update_counters.delay()
 628
 629         if remote_gallery_url:
 630             book.download_pictures(remote_gallery_url)
 631
 632         # No saves beyond this point.
 633
 634         # Build cover.
 635         if 'cover' not in dont_build:
 636             book.cover.build_delay()
 637             book.cover_thumb.build_delay()
 638             book.cover_api_thumb.build_delay()
 639             book.simple_cover.build_delay()
 640             book.cover_ebookpoint.build_delay()
 641
 642         # Build HTML and ebooks.
 643         book.html_file.build_delay()
 644         if not children:
 645             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 646                 if format_ not in dont_build:
 647                     getattr(book, '%s_file' % format_).build_delay()
 648         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 649             if format_ not in dont_build:
 650                 getattr(book, '%s_file' % format_).build_delay()
 651
 652         if not settings.NO_SEARCH_INDEX and search_index and findable:
 653             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 654
 655         for child in notify_cover_changed:
 656             child.parent_cover_changed()
 657
 658         book.update_popularity()
 659         tasks.update_references.delay(book.id)
 660
 661         cls.published.send(sender=cls, instance=book)
 662         return book
 663
 664     def get_master(self):
 665         master_tags = [
 666             'opowiadanie',
 667             'powiesc',
 668             'dramat_wierszowany_l',
 669             'dramat_wierszowany_lp',
 670             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 671             'wywiad',
 672         ]
 673         from librarian.parser import WLDocument
 674         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 675         root = wld.edoc.getroot()
 676         for master in root.iter():
 677             if master.tag in master_tags:
 678                 return master
 679
 680     def update_references(self):
 681         from references.models import Entity, Reference
 682         master = self.get_master()
 683         found = set()
 684         for i, sec in enumerate(master):
 685             for ref in sec.findall('.//ref'):
 686                 href = ref.attrib.get('href', '')
 687                 if not href or href in found:
 688                     continue
 689                 found.add(href)
 690                 entity, created = Entity.objects.get_or_create(
 691                     uri=href
 692                 )
 693                 ref, created = Reference.objects.get_or_create(
 694                     book=self,
 695                     entity=entity
 696                 )
 697                 ref.first_section = 'sec%d' % (i + 1)
 698                 entity.populate()
 699                 entity.save()
 700         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 701
 702     @property
 703     def references(self):
 704         return self.reference_set.all().select_related('entity')
 705
 706     @classmethod
 707     @transaction.atomic
 708     def repopulate_ancestors(cls):
 709         """Fixes the ancestry cache."""
 710         # TODO: table names
 711         cursor = connection.cursor()
 712         if connection.vendor == 'postgres':
 713             cursor.execute("TRUNCATE catalogue_book_ancestor")
 714             cursor.execute("""
 715                 WITH RECURSIVE ancestry AS (
 716                     SELECT book.id, book.parent_id
 717                     FROM catalogue_book AS book
 718                     WHERE book.parent_id IS NOT NULL
 719                     UNION
 720                     SELECT ancestor.id, book.parent_id
 721                     FROM ancestry AS ancestor, catalogue_book AS book
 722                     WHERE ancestor.parent_id = book.id
 723                         AND book.parent_id IS NOT NULL
 724                     )
 725                 INSERT INTO catalogue_book_ancestor
 726                     (from_book_id, to_book_id)
 727                     SELECT id, parent_id
 728                     FROM ancestry
 729                     ORDER BY id;
 730                 """)
 731         else:
 732             cursor.execute("DELETE FROM catalogue_book_ancestor")
 733             for b in cls.objects.exclude(parent=None):
 734                 parent = b.parent
 735                 while parent is not None:
 736                     b.ancestor.add(parent)
 737                     parent = parent.parent
 738
 739     def clear_cache(self):
 740         clear_cached_renders(self.mini_box)
 741         clear_cached_renders(self.mini_box_nolink)
 742
 743     def cover_info(self, inherit=True):
 744         """Returns a dictionary to serve as fallback for BookInfo.
 745
 746         For now, the only thing inherited is the cover image.
 747         """
 748         need = False
 749         info = {}
 750         for field in ('cover_url', 'cover_by', 'cover_source'):
 751             val = self.get_extra_info_json().get(field)
 752             if val:
 753                 info[field] = val
 754             else:
 755                 need = True
 756         if inherit and need and self.parent is not None:
 757             parent_info = self.parent.cover_info()
 758             parent_info.update(info)
 759             info = parent_info
 760         return info
 761
 762     def related_themes(self):
 763         return Tag.objects.usage_for_queryset(
 764             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 765             counts=True).filter(category='theme')
 766
 767     def parent_cover_changed(self):
 768         """Called when parent book's cover image is changed."""
 769         if not self.cover_info(inherit=False):
 770             if 'cover' not in app_settings.DONT_BUILD:
 771                 self.cover.build_delay()
 772                 self.cover_thumb.build_delay()
 773                 self.cover_api_thumb.build_delay()
 774                 self.simple_cover.build_delay()
 775             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 776                 if format_ not in app_settings.DONT_BUILD:
 777                     getattr(self, '%s_file' % format_).build_delay()
 778             for child in self.children.all():
 779                 child.parent_cover_changed()
 780
 781     def other_versions(self):
 782         """Find other versions (i.e. in other languages) of the book."""
 783         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 784
 785     def parents(self):
 786         books = []
 787         parent = self.parent
 788         while parent is not None:
 789             books.insert(0, parent)
 790             parent = parent.parent
 791         return books
 792
 793     def pretty_title(self, html_links=False):
 794         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 795         books = self.parents() + [self]
 796         names.extend([(b.title, b.get_absolute_url()) for b in books])
 797
 798         if html_links:
 799             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 800         else:
 801             names = [tag[0] for tag in names]
 802         return ', '.join(names)
 803
 804     def publisher(self):
 805         publisher = self.get_extra_info_json()['publisher']
 806         if isinstance(publisher, str):
 807             return publisher
 808         elif isinstance(publisher, list):
 809             return ', '.join(publisher)
 810
 811     @classmethod
 812     def tagged_top_level(cls, tags):
 813         """ Returns top-level books tagged with `tags`.
 814
 815         It only returns those books which don't have ancestors which are
 816         also tagged with those tags.
 817
 818         """
 819         objects = cls.tagged.with_all(tags)
 820         return objects.filter(findable=True).exclude(ancestor__in=objects)
 821
 822     @classmethod
 823     def book_list(cls, book_filter=None):
 824         """Generates a hierarchical listing of all books.
 825
 826         Books are optionally filtered with a test function.
 827
 828         """
 829
 830         books_by_parent = {}
 831         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 832         if book_filter:
 833             books = books.filter(book_filter).distinct()
 834
 835             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 836             for book in books.iterator():
 837                 parent = book.parent_id
 838                 if parent not in book_ids:
 839                     parent = None
 840                 books_by_parent.setdefault(parent, []).append(book)
 841         else:
 842             for book in books.iterator():
 843                 books_by_parent.setdefault(book.parent_id, []).append(book)
 844
 845         orphans = []
 846         books_by_author = OrderedDict()
 847         for tag in Tag.objects.filter(category='author').iterator():
 848             books_by_author[tag] = []
 849
 850         for book in books_by_parent.get(None, ()):
 851             authors = list(book.authors().only('pk'))
 852             if authors:
 853                 for author in authors:
 854                     books_by_author[author].append(book)
 855             else:
 856                 orphans.append(book)
 857
 858         return books_by_author, orphans, books_by_parent
 859
 860     _audiences_pl = {
 861         "SP": (1, "szkoła podstawowa"),
 862         "SP1": (1, "szkoła podstawowa"),
 863         "SP2": (1, "szkoła podstawowa"),
 864         "SP3": (1, "szkoła podstawowa"),
 865         "P": (1, "szkoła podstawowa"),
 866         "G": (2, "gimnazjum"),
 867         "L": (3, "liceum"),
 868         "LP": (3, "liceum"),
 869     }
 870
 871     def audiences_pl(self):
 872         audiences = self.get_extra_info_json().get('audiences', [])
 873         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 874         return [a[1] for a in audiences]
 875
 876     def stage_note(self):
 877         stage = self.get_extra_info_json().get('stage')
 878         if stage and stage < '0.4':
 879             return (_('This work needs modernisation'),
 880                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 881         else:
 882             return None, None
 883
 884     def choose_fragment(self):
 885         fragments = self.fragments.order_by()
 886         fragments_count = fragments.count()
 887         if not fragments_count and self.children.exists():
 888             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 889             fragments_count = fragments.count()
 890         if fragments_count:
 891             return fragments[randint(0, fragments_count - 1)]
 892         elif self.parent:
 893             return self.parent.choose_fragment()
 894         else:
 895             return None
 896
 897     def fragment_data(self):
 898         fragment = self.choose_fragment()
 899         if fragment:
 900             return {
 901                 'title': fragment.book.pretty_title(),
 902                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 903             }
 904         else:
 905             return None
 906
 907     def update_popularity(self):
 908         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 909         try:
 910             pop = self.popularity
 911             pop.count = count
 912             pop.save()
 913         except BookPopularity.DoesNotExist:
 914             BookPopularity.objects.create(book=self, count=count)
 915
 916     def ridero_link(self):
 917         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 918
 919     def like(self, user):
 920         from social.utils import likes, get_set, set_sets
 921         if not likes(user, self):
 922             tag = get_set(user, '')
 923             set_sets(user, self, [tag])
 924
 925     def unlike(self, user):
 926         from social.utils import likes, set_sets
 927         if likes(user, self):
 928             set_sets(user, self, [])
 929
 930     def full_sort_key(self):
 931         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
 932
 933     def cover_color(self):
 934         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
 935
 936     @cached_render('catalogue/book_mini_box.html')
 937     def mini_box(self):
 938         return {
 939             'book': self
 940         }
 941
 942     @cached_render('catalogue/book_mini_box.html')
 943     def mini_box_nolink(self):
 944         return {
 945             'book': self,
 946             'no_link': True,
 947         }
 948
 949 def add_file_fields():
 950     for format_ in Book.formats:
 951         field_name = "%s_file" % format_
 952         # This weird globals() assignment makes Django migrations comfortable.
 953         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 954         _upload_to.__name__ = '_%s_upload_to' % format_
 955         globals()[_upload_to.__name__] = _upload_to
 956
 957         EbookField(
 958             format_, _("%s file" % format_.upper()),
 959             upload_to=_upload_to,
 960             storage=bofh_storage,
 961             max_length=255,
 962             blank=True,
 963             default=''
 964         ).contribute_to_class(Book, field_name)
 965         if format_ != 'xml':
 966             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
 967
 968
 969 add_file_fields()
 970
 971
 972 class BookPopularity(models.Model):
 973     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
 974     count = models.IntegerField(default=0, db_index=True)