src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.conf import settings
  12 from django.db import connection, models, transaction
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.template.loader import render_to_string
  16 from django.urls import reverse
  17 from django.utils.translation import ugettext_lazy as _, get_language
  18 from django.utils.deconstruct import deconstructible
  19 from fnpdjango.storage import BofhFileSystemStorage
  20
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue.fields import EbookField
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from catalogue import tasks
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 @deconstructible
  37 class UploadToPath(object):
  38     def __init__(self, path):
  39         self.path = path
  40
  41     def __call__(self, instance, filename):
  42         return self.path % instance.slug
  43
  44
  45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  49 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  50
  51
  52 def _ebook_upload_to(upload_path):
  53     return UploadToPath(upload_path)
  54
  55
  56 class Book(models.Model):
  57     """Represents a book imported from WL-XML."""
  58     title = models.CharField(_('title'), max_length=32767)
  59     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  60     sort_key_author = models.CharField(
  61         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  62     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  63     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  64     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  65     description = models.TextField(_('description'), blank=True)
  66     abstract = models.TextField(_('abstract'), blank=True)
  67     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  68     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  69     parent_number = models.IntegerField(_('parent number'), default=0)
  70     extra_info = models.TextField(_('extra information'), default='{}')
  71     gazeta_link = models.CharField(blank=True, max_length=240)
  72     wiki_link = models.CharField(blank=True, max_length=240)
  73     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  74     recommended = models.BooleanField(_('recommended'), default=False)
  75     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  76     preview = models.BooleanField(_('preview'), default=False)
  77     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  78     preview_key = models.CharField(max_length=32, blank=True, null=True)
  79     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  80
  81     # files generated during publication
  82     cover = EbookField(
  83         'cover', _('cover'),
  84         null=True, blank=True,
  85         upload_to=_cover_upload_to,
  86         storage=bofh_storage, max_length=255)
  87     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  88     # Cleaner version of cover for thumbs
  89     cover_thumb = EbookField(
  90         'cover_thumb', _('cover thumbnail'),
  91         null=True, blank=True,
  92         upload_to=_cover_thumb_upload_to,
  93         max_length=255)
  94     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  95     cover_api_thumb = EbookField(
  96         'cover_api_thumb', _('cover thumbnail for mobile app'),
  97         null=True, blank=True,
  98         upload_to=_cover_api_thumb_upload_to,
  99         max_length=255)
 100     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 101     simple_cover = EbookField(
 102         'simple_cover', _('cover for mobile app'),
 103         null=True, blank=True,
 104         upload_to=_simple_cover_upload_to,
 105         max_length=255)
 106     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 107     cover_ebookpoint = EbookField(
 108         'cover_ebookpoint', _('cover for Ebookpoint'),
 109         null=True, blank=True,
 110         upload_to=_cover_ebookpoint_upload_to,
 111         max_length=255)
 112     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 113     ebook_formats = constants.EBOOK_FORMATS
 114     formats = ebook_formats + ['html', 'xml']
 115
 116     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 117     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 118
 119     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 120     has_audience = models.BooleanField(default=False)
 121
 122     objects = models.Manager()
 123     tagged = managers.ModelTaggedItemManager(Tag)
 124     tags = managers.TagDescriptor(Tag)
 125     tag_relations = GenericRelation(Tag.intermediary_table_model)
 126
 127     html_built = django.dispatch.Signal()
 128     published = django.dispatch.Signal()
 129
 130     SORT_KEY_SEP = '$'
 131
 132     class AlreadyExists(Exception):
 133         pass
 134
 135     class Meta:
 136         ordering = ('sort_key_author', 'sort_key')
 137         verbose_name = _('book')
 138         verbose_name_plural = _('books')
 139         app_label = 'catalogue'
 140
 141     def __str__(self):
 142         return self.title
 143
 144     def get_extra_info_json(self):
 145         return json.loads(self.extra_info or '{}')
 146
 147     def get_initial(self):
 148         try:
 149             return re.search(r'\w', self.title, re.U).group(0)
 150         except AttributeError:
 151             return ''
 152
 153     def authors(self):
 154         return self.tags.filter(category='author')
 155
 156     def epochs(self):
 157         return self.tags.filter(category='epoch')
 158
 159     def genres(self):
 160         return self.tags.filter(category='genre')
 161
 162     def kinds(self):
 163         return self.tags.filter(category='kind')
 164
 165     def tag_unicode(self, category):
 166         relations = prefetched_relations(self, category)
 167         if relations:
 168             return ', '.join(rel.tag.name for rel in relations)
 169         else:
 170             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 171
 172     def tags_by_category(self):
 173         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 174
 175     def author_unicode(self):
 176         return self.cached_author
 177
 178     def kind_unicode(self):
 179         return self.tag_unicode('kind')
 180
 181     def epoch_unicode(self):
 182         return self.tag_unicode('epoch')
 183
 184     def genre_unicode(self):
 185         return self.tag_unicode('genre')
 186
 187     def translators(self):
 188         translators = self.get_extra_info_json().get('translators') or []
 189         return [
 190             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 191         ]
 192
 193     def translator(self):
 194         translators = self.get_extra_info_json().get('translators')
 195         if not translators:
 196             return None
 197         if len(translators) > 3:
 198             translators = translators[:2]
 199             others = ' i inni'
 200         else:
 201             others = ''
 202         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 203
 204     def cover_source(self):
 205         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 206
 207     @property
 208     def isbn_pdf(self):
 209         return self.get_extra_info_json().get('isbn_pdf')
 210
 211     @property
 212     def isbn_epub(self):
 213         return self.get_extra_info_json().get('isbn_epub')
 214
 215     @property
 216     def isbn_mobi(self):
 217         return self.get_extra_info_json().get('isbn_mobi')
 218
 219
 220     def save(self, force_insert=False, force_update=False, **kwargs):
 221         from sortify import sortify
 222
 223         self.sort_key = sortify(self.title)[:120]
 224         self.title = str(self.title)  # ???
 225
 226         try:
 227             author = self.authors().first().sort_key
 228         except AttributeError:
 229             author = ''
 230         self.sort_key_author = author
 231
 232         self.cached_author = self.tag_unicode('author')
 233         self.has_audience = 'audience' in self.get_extra_info_json()
 234
 235         if self.preview and not self.preview_key:
 236             self.preview_key = get_random_hash(self.slug)[:32]
 237
 238         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 239
 240         return ret
 241
 242     def get_absolute_url(self):
 243         return reverse('book_detail', args=[self.slug])
 244
 245     def gallery_path(self):
 246         return gallery_path(self.slug)
 247
 248     def gallery_url(self):
 249         return gallery_url(self.slug)
 250
 251     def get_first_text(self):
 252         if self.html_file:
 253             return self
 254         child = self.children.all().order_by('parent_number').first()
 255         if child is not None:
 256             return child.get_first_text()
 257
 258     def get_last_text(self):
 259         if self.html_file:
 260             return self
 261         child = self.children.all().order_by('parent_number').last()
 262         if child is not None:
 263             return child.get_last_text()
 264
 265     def get_prev_text(self):
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_last_text()
 271         return self.parent.get_prev_text()
 272
 273     def get_next_text(self):
 274         if not self.parent:
 275             return None
 276         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 277         if sibling is not None:
 278             return sibling.get_first_text()
 279         return self.parent.get_next_text()
 280
 281     def get_siblings(self):
 282         if not self.parent:
 283             return []
 284         return self.parent.children.all().order_by('parent_number')
 285
 286     @property
 287     def name(self):
 288         return self.title
 289
 290     def language_code(self):
 291         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 292
 293     def language_name(self):
 294         return dict(settings.LANGUAGES).get(self.language_code(), "")
 295
 296     def is_foreign(self):
 297         return self.language_code() != settings.LANGUAGE_CODE
 298
 299     def set_audio_length(self):
 300         length = self.get_audio_length()
 301         if length > 0:
 302             self.audio_length = self.format_audio_length(length)
 303             self.save()
 304
 305     @staticmethod
 306     def format_audio_length(seconds):
 307         """
 308         >>> Book.format_audio_length(1)
 309         '0:01'
 310         >>> Book.format_audio_length(3661)
 311         '1:01:01'
 312         """
 313         if seconds < 60*60:
 314             minutes = seconds // 60
 315             seconds = seconds % 60
 316             return '%d:%02d' % (minutes, seconds)
 317         else:
 318             hours = seconds // 3600
 319             minutes = seconds % 3600 // 60
 320             seconds = seconds % 60
 321             return '%d:%02d:%02d' % (hours, minutes, seconds)
 322
 323     def get_audio_length(self):
 324         total = 0
 325         for media in self.get_mp3() or ():
 326             total += app_settings.GET_MP3_LENGTH(media.file.path)
 327         return int(total)
 328
 329     def has_media(self, type_):
 330         if type_ in Book.formats:
 331             return bool(getattr(self, "%s_file" % type_))
 332         else:
 333             return self.media.filter(type=type_).exists()
 334
 335     def has_audio(self):
 336         return self.has_media('mp3')
 337
 338     def get_media(self, type_):
 339         if self.has_media(type_):
 340             if type_ in Book.formats:
 341                 return getattr(self, "%s_file" % type_)
 342             else:
 343                 return self.media.filter(type=type_)
 344         else:
 345             return None
 346
 347     def get_mp3(self):
 348         return self.get_media("mp3")
 349
 350     def get_odt(self):
 351         return self.get_media("odt")
 352
 353     def get_ogg(self):
 354         return self.get_media("ogg")
 355
 356     def get_daisy(self):
 357         return self.get_media("daisy")
 358
 359     def media_url(self, format_):
 360         media = self.get_media(format_)
 361         if media:
 362             if self.preview:
 363                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 364             else:
 365                 return media.url
 366         else:
 367             return None
 368
 369     def html_url(self):
 370         return self.media_url('html')
 371
 372     def pdf_url(self):
 373         return self.media_url('pdf')
 374
 375     def epub_url(self):
 376         return self.media_url('epub')
 377
 378     def mobi_url(self):
 379         return self.media_url('mobi')
 380
 381     def txt_url(self):
 382         return self.media_url('txt')
 383
 384     def fb2_url(self):
 385         return self.media_url('fb2')
 386
 387     def xml_url(self):
 388         return self.media_url('xml')
 389
 390     def has_description(self):
 391         return len(self.description) > 0
 392     has_description.short_description = _('description')
 393     has_description.boolean = True
 394
 395     def has_mp3_file(self):
 396         return self.has_media("mp3")
 397     has_mp3_file.short_description = 'MP3'
 398     has_mp3_file.boolean = True
 399
 400     def has_ogg_file(self):
 401         return self.has_media("ogg")
 402     has_ogg_file.short_description = 'OGG'
 403     has_ogg_file.boolean = True
 404
 405     def has_daisy_file(self):
 406         return self.has_media("daisy")
 407     has_daisy_file.short_description = 'DAISY'
 408     has_daisy_file.boolean = True
 409
 410     @property
 411     def media_daisy(self):
 412         return self.get_media('daisy')
 413
 414     def get_audiobooks(self):
 415         ogg_files = {}
 416         for m in self.media.filter(type='ogg').order_by().iterator():
 417             ogg_files[m.name] = m
 418
 419         audiobooks = []
 420         projects = set()
 421         for mp3 in self.media.filter(type='mp3').iterator():
 422             # ogg files are always from the same project
 423             meta = mp3.get_extra_info_json()
 424             project = meta.get('project')
 425             if not project:
 426                 # temporary fallback
 427                 project = 'CzytamySłuchając'
 428
 429             projects.add((project, meta.get('funded_by', '')))
 430
 431             media = {'mp3': mp3}
 432
 433             ogg = ogg_files.get(mp3.name)
 434             if ogg:
 435                 media['ogg'] = ogg
 436             audiobooks.append(media)
 437
 438         projects = sorted(projects)
 439         return audiobooks, projects
 440
 441     def wldocument(self, parse_dublincore=True, inherit=True):
 442         from catalogue.import_utils import ORMDocProvider
 443         from librarian.parser import WLDocument
 444
 445         if inherit and self.parent:
 446             meta_fallbacks = self.parent.cover_info()
 447         else:
 448             meta_fallbacks = None
 449
 450         return WLDocument.from_file(
 451             self.xml_file.path,
 452             provider=ORMDocProvider(self),
 453             parse_dublincore=parse_dublincore,
 454             meta_fallbacks=meta_fallbacks)
 455
 456     def wldocument2(self):
 457         from catalogue.import_utils import ORMDocProvider
 458         from librarian.document import WLDocument
 459         doc = WLDocument(
 460             self.xml_file.path,
 461             provider=ORMDocProvider(self)
 462         )
 463         doc.meta.update(self.cover_info())
 464         return doc
 465
 466
 467     @staticmethod
 468     def zip_format(format_):
 469         def pretty_file_name(book):
 470             return "%s/%s.%s" % (
 471                 book.get_extra_info_json()['author'],
 472                 book.slug,
 473                 format_)
 474
 475         field_name = "%s_file" % format_
 476         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 477         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 478         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 479
 480     def zip_audiobooks(self, format_):
 481         bm = BookMedia.objects.filter(book=self, type=format_)
 482         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 483         licenses = set()
 484         for m in bm:
 485             license = constants.LICENSES.get(
 486                 m.get_extra_info_json().get('license'), {}
 487             ).get('locative')
 488             if license:
 489                 licenses.add(license)
 490         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 491             'licenses': licenses,
 492         })
 493         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 494
 495     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 496         if not self.findable:
 497             return
 498         if index is None:
 499             from search.index import Index
 500             index = Index()
 501         try:
 502             index.index_book(self, book_info)
 503             if index_tags:
 504                 index.index_tags()
 505             if commit:
 506                 index.index.commit()
 507         except Exception as e:
 508             index.index.rollback()
 509             raise e
 510
 511     # will make problems in conjunction with paid previews
 512     def download_pictures(self, remote_gallery_url):
 513         gallery_path = self.gallery_path()
 514         # delete previous files, so we don't include old files in ebooks
 515         if os.path.isdir(gallery_path):
 516             for filename in os.listdir(gallery_path):
 517                 file_path = os.path.join(gallery_path, filename)
 518                 os.unlink(file_path)
 519         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 520         if ilustr_elements:
 521             makedirs(gallery_path)
 522             for ilustr in ilustr_elements:
 523                 ilustr_src = ilustr.get('src')
 524                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 525                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 526
 527     def load_abstract(self):
 528         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 529         if abstract is not None:
 530             self.abstract = transform_abstrakt(abstract)
 531         else:
 532             self.abstract = ''
 533
 534     @classmethod
 535     def from_xml_file(cls, xml_file, **kwargs):
 536         from django.core.files import File
 537         from librarian import dcparser
 538
 539         # use librarian to parse meta-data
 540         book_info = dcparser.parse(xml_file)
 541
 542         if not isinstance(xml_file, File):
 543             xml_file = File(open(xml_file))
 544
 545         try:
 546             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 547         finally:
 548             xml_file.close()
 549
 550     @classmethod
 551     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 552                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 553         if dont_build is None:
 554             dont_build = set()
 555         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 556
 557         # check for parts before we do anything
 558         children = []
 559         if hasattr(book_info, 'parts'):
 560             for part_url in book_info.parts:
 561                 try:
 562                     children.append(Book.objects.get(slug=part_url.slug))
 563                 except Book.DoesNotExist:
 564                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 565
 566         # Read book metadata
 567         book_slug = book_info.url.slug
 568         if re.search(r'[^a-z0-9-]', book_slug):
 569             raise ValueError('Invalid characters in slug')
 570         book, created = Book.objects.get_or_create(slug=book_slug)
 571
 572         if created:
 573             book_shelves = []
 574             old_cover = None
 575             book.preview = bool(days)
 576             if book.preview:
 577                 book.preview_until = date.today() + timedelta(days)
 578         else:
 579             if not overwrite:
 580                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 581             # Save shelves for this book
 582             book_shelves = list(book.tags.filter(category='set'))
 583             old_cover = book.cover_info()
 584
 585         # Save XML file
 586         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 587         if book.preview:
 588             book.xml_file.set_readable(False)
 589
 590         book.findable = findable
 591         book.language = book_info.language
 592         book.title = book_info.title
 593         if book_info.variant_of:
 594             book.common_slug = book_info.variant_of.slug
 595         else:
 596             book.common_slug = book.slug
 597         book.extra_info = json.dumps(book_info.to_dict())
 598         book.load_abstract()
 599         book.save()
 600
 601         meta_tags = Tag.tags_from_info(book_info)
 602
 603         for tag in meta_tags:
 604             if not tag.for_books:
 605                 tag.for_books = True
 606                 tag.save()
 607
 608         book.tags = set(meta_tags + book_shelves)
 609         book.save()  # update sort_key_author
 610
 611         cover_changed = old_cover != book.cover_info()
 612         obsolete_children = set(b for b in book.children.all()
 613                                 if b not in children)
 614         notify_cover_changed = []
 615         for n, child_book in enumerate(children):
 616             new_child = child_book.parent != book
 617             child_book.parent = book
 618             child_book.parent_number = n
 619             child_book.save()
 620             if new_child or cover_changed:
 621                 notify_cover_changed.append(child_book)
 622         # Disown unfaithful children and let them cope on their own.
 623         for child in obsolete_children:
 624             child.parent = None
 625             child.parent_number = 0
 626             child.save()
 627             if old_cover:
 628                 notify_cover_changed.append(child)
 629
 630         cls.repopulate_ancestors()
 631         tasks.update_counters.delay()
 632
 633         if remote_gallery_url:
 634             book.download_pictures(remote_gallery_url)
 635
 636         # No saves beyond this point.
 637
 638         # Build cover.
 639         if 'cover' not in dont_build:
 640             book.cover.build_delay()
 641             book.cover_thumb.build_delay()
 642             book.cover_api_thumb.build_delay()
 643             book.simple_cover.build_delay()
 644             book.cover_ebookpoint.build_delay()
 645
 646         # Build HTML and ebooks.
 647         book.html_file.build_delay()
 648         if not children:
 649             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 650                 if format_ not in dont_build:
 651                     getattr(book, '%s_file' % format_).build_delay()
 652         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 653             if format_ not in dont_build:
 654                 getattr(book, '%s_file' % format_).build_delay()
 655
 656         if not settings.NO_SEARCH_INDEX and search_index and findable:
 657             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 658
 659         for child in notify_cover_changed:
 660             child.parent_cover_changed()
 661
 662         book.update_popularity()
 663         tasks.update_references.delay(book.id)
 664
 665         cls.published.send(sender=cls, instance=book)
 666         return book
 667
 668     def get_master(self):
 669         master_tags = [
 670             'opowiadanie',
 671             'powiesc',
 672             'dramat_wierszowany_l',
 673             'dramat_wierszowany_lp',
 674             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 675             'wywiad',
 676         ]
 677         from librarian.parser import WLDocument
 678         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 679         root = wld.edoc.getroot()
 680         for master in root.iter():
 681             if master.tag in master_tags:
 682                 return master
 683
 684     def update_references(self):
 685         from references.models import Entity, Reference
 686         master = self.get_master()
 687         found = set()
 688         for i, sec in enumerate(master):
 689             for ref in sec.findall('.//ref'):
 690                 href = ref.attrib.get('href', '')
 691                 if not href or href in found:
 692                     continue
 693                 found.add(href)
 694                 entity, created = Entity.objects.get_or_create(
 695                     uri=href
 696                 )
 697                 ref, created = Reference.objects.get_or_create(
 698                     book=self,
 699                     entity=entity
 700                 )
 701                 ref.first_section = 'sec%d' % (i + 1)
 702                 entity.populate()
 703                 entity.save()
 704         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 705
 706     @property
 707     def references(self):
 708         return self.reference_set.all().select_related('entity')
 709
 710     @classmethod
 711     @transaction.atomic
 712     def repopulate_ancestors(cls):
 713         """Fixes the ancestry cache."""
 714         # TODO: table names
 715         cursor = connection.cursor()
 716         if connection.vendor == 'postgres':
 717             cursor.execute("TRUNCATE catalogue_book_ancestor")
 718             cursor.execute("""
 719                 WITH RECURSIVE ancestry AS (
 720                     SELECT book.id, book.parent_id
 721                     FROM catalogue_book AS book
 722                     WHERE book.parent_id IS NOT NULL
 723                     UNION
 724                     SELECT ancestor.id, book.parent_id
 725                     FROM ancestry AS ancestor, catalogue_book AS book
 726                     WHERE ancestor.parent_id = book.id
 727                         AND book.parent_id IS NOT NULL
 728                     )
 729                 INSERT INTO catalogue_book_ancestor
 730                     (from_book_id, to_book_id)
 731                     SELECT id, parent_id
 732                     FROM ancestry
 733                     ORDER BY id;
 734                 """)
 735         else:
 736             cursor.execute("DELETE FROM catalogue_book_ancestor")
 737             for b in cls.objects.exclude(parent=None):
 738                 parent = b.parent
 739                 while parent is not None:
 740                     b.ancestor.add(parent)
 741                     parent = parent.parent
 742
 743     def clear_cache(self):
 744         clear_cached_renders(self.mini_box)
 745         clear_cached_renders(self.mini_box_nolink)
 746
 747     def cover_info(self, inherit=True):
 748         """Returns a dictionary to serve as fallback for BookInfo.
 749
 750         For now, the only thing inherited is the cover image.
 751         """
 752         need = False
 753         info = {}
 754         for field in ('cover_url', 'cover_by', 'cover_source'):
 755             val = self.get_extra_info_json().get(field)
 756             if val:
 757                 info[field] = val
 758             else:
 759                 need = True
 760         if inherit and need and self.parent is not None:
 761             parent_info = self.parent.cover_info()
 762             parent_info.update(info)
 763             info = parent_info
 764         return info
 765
 766     def related_themes(self):
 767         return Tag.objects.usage_for_queryset(
 768             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 769             counts=True).filter(category='theme')
 770
 771     def parent_cover_changed(self):
 772         """Called when parent book's cover image is changed."""
 773         if not self.cover_info(inherit=False):
 774             if 'cover' not in app_settings.DONT_BUILD:
 775                 self.cover.build_delay()
 776                 self.cover_thumb.build_delay()
 777                 self.cover_api_thumb.build_delay()
 778                 self.simple_cover.build_delay()
 779             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 780                 if format_ not in app_settings.DONT_BUILD:
 781                     getattr(self, '%s_file' % format_).build_delay()
 782             for child in self.children.all():
 783                 child.parent_cover_changed()
 784
 785     def other_versions(self):
 786         """Find other versions (i.e. in other languages) of the book."""
 787         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 788
 789     def parents(self):
 790         books = []
 791         parent = self.parent
 792         while parent is not None:
 793             books.insert(0, parent)
 794             parent = parent.parent
 795         return books
 796
 797     def pretty_title(self, html_links=False):
 798         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 799         books = self.parents() + [self]
 800         names.extend([(b.title, b.get_absolute_url()) for b in books])
 801
 802         if html_links:
 803             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 804         else:
 805             names = [tag[0] for tag in names]
 806         return ', '.join(names)
 807
 808     def publisher(self):
 809         publisher = self.get_extra_info_json()['publisher']
 810         if isinstance(publisher, str):
 811             return publisher
 812         elif isinstance(publisher, list):
 813             return ', '.join(publisher)
 814
 815     @classmethod
 816     def tagged_top_level(cls, tags):
 817         """ Returns top-level books tagged with `tags`.
 818
 819         It only returns those books which don't have ancestors which are
 820         also tagged with those tags.
 821
 822         """
 823         objects = cls.tagged.with_all(tags)
 824         return objects.filter(findable=True).exclude(ancestor__in=objects)
 825
 826     @classmethod
 827     def book_list(cls, book_filter=None):
 828         """Generates a hierarchical listing of all books.
 829
 830         Books are optionally filtered with a test function.
 831
 832         """
 833
 834         books_by_parent = {}
 835         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 836         if book_filter:
 837             books = books.filter(book_filter).distinct()
 838
 839             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 840             for book in books.iterator():
 841                 parent = book.parent_id
 842                 if parent not in book_ids:
 843                     parent = None
 844                 books_by_parent.setdefault(parent, []).append(book)
 845         else:
 846             for book in books.iterator():
 847                 books_by_parent.setdefault(book.parent_id, []).append(book)
 848
 849         orphans = []
 850         books_by_author = OrderedDict()
 851         for tag in Tag.objects.filter(category='author').iterator():
 852             books_by_author[tag] = []
 853
 854         for book in books_by_parent.get(None, ()):
 855             authors = list(book.authors().only('pk'))
 856             if authors:
 857                 for author in authors:
 858                     books_by_author[author].append(book)
 859             else:
 860                 orphans.append(book)
 861
 862         return books_by_author, orphans, books_by_parent
 863
 864     _audiences_pl = {
 865         "SP": (1, "szkoła podstawowa"),
 866         "SP1": (1, "szkoła podstawowa"),
 867         "SP2": (1, "szkoła podstawowa"),
 868         "SP3": (1, "szkoła podstawowa"),
 869         "P": (1, "szkoła podstawowa"),
 870         "G": (2, "gimnazjum"),
 871         "L": (3, "liceum"),
 872         "LP": (3, "liceum"),
 873     }
 874
 875     def audiences_pl(self):
 876         audiences = self.get_extra_info_json().get('audiences', [])
 877         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 878         return [a[1] for a in audiences]
 879
 880     def stage_note(self):
 881         stage = self.get_extra_info_json().get('stage')
 882         if stage and stage < '0.4':
 883             return (_('This work needs modernisation'),
 884                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 885         else:
 886             return None, None
 887
 888     def choose_fragment(self):
 889         fragments = self.fragments.order_by()
 890         fragments_count = fragments.count()
 891         if not fragments_count and self.children.exists():
 892             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 893             fragments_count = fragments.count()
 894         if fragments_count:
 895             return fragments[randint(0, fragments_count - 1)]
 896         elif self.parent:
 897             return self.parent.choose_fragment()
 898         else:
 899             return None
 900
 901     def fragment_data(self):
 902         fragment = self.choose_fragment()
 903         if fragment:
 904             return {
 905                 'title': fragment.book.pretty_title(),
 906                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 907             }
 908         else:
 909             return None
 910
 911     def update_popularity(self):
 912         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 913         try:
 914             pop = self.popularity
 915             pop.count = count
 916             pop.save()
 917         except BookPopularity.DoesNotExist:
 918             BookPopularity.objects.create(book=self, count=count)
 919
 920     def ridero_link(self):
 921         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 922
 923     def like(self, user):
 924         from social.utils import likes, get_set, set_sets
 925         if not likes(user, self):
 926             tag = get_set(user, '')
 927             set_sets(user, self, [tag])
 928
 929     def unlike(self, user):
 930         from social.utils import likes, set_sets
 931         if likes(user, self):
 932             set_sets(user, self, [])
 933
 934     def full_sort_key(self):
 935         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
 936
 937     def cover_color(self):
 938         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
 939
 940     @cached_render('catalogue/book_mini_box.html')
 941     def mini_box(self):
 942         return {
 943             'book': self
 944         }
 945
 946     @cached_render('catalogue/book_mini_box.html')
 947     def mini_box_nolink(self):
 948         return {
 949             'book': self,
 950             'no_link': True,
 951         }
 952
 953 def add_file_fields():
 954     for format_ in Book.formats:
 955         field_name = "%s_file" % format_
 956         # This weird globals() assignment makes Django migrations comfortable.
 957         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 958         _upload_to.__name__ = '_%s_upload_to' % format_
 959         globals()[_upload_to.__name__] = _upload_to
 960
 961         EbookField(
 962             format_, _("%s file" % format_.upper()),
 963             upload_to=_upload_to,
 964             storage=bofh_storage,
 965             max_length=255,
 966             blank=True,
 967             default=''
 968         ).contribute_to_class(Book, field_name)
 969         if format_ != 'xml':
 970             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
 971
 972
 973 add_file_fields()
 974
 975
 976 class BookPopularity(models.Model):
 977     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
 978     count = models.IntegerField(default=0, db_index=True)