src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.conf import settings
  12 from django.db import connection, models, transaction
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.template.loader import render_to_string
  16 from django.urls import reverse
  17 from django.utils.translation import ugettext_lazy as _, get_language
  18 from django.utils.deconstruct import deconstructible
  19 from fnpdjango.storage import BofhFileSystemStorage
  20
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue.fields import EbookField
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from catalogue import tasks
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 @deconstructible
  37 class UploadToPath(object):
  38     def __init__(self, path):
  39         self.path = path
  40
  41     def __call__(self, instance, filename):
  42         return self.path % instance.slug
  43
  44
  45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  49 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  50
  51
  52 def _ebook_upload_to(upload_path):
  53     return UploadToPath(upload_path)
  54
  55
  56 class Book(models.Model):
  57     """Represents a book imported from WL-XML."""
  58     title = models.CharField(_('title'), max_length=32767)
  59     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  60     sort_key_author = models.CharField(
  61         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  62     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  63     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  64     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  65     description = models.TextField(_('description'), blank=True)
  66     abstract = models.TextField(_('abstract'), blank=True)
  67     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  68     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  69     parent_number = models.IntegerField(_('parent number'), default=0)
  70     extra_info = models.TextField(_('extra information'), default='{}')
  71     gazeta_link = models.CharField(blank=True, max_length=240)
  72     wiki_link = models.CharField(blank=True, max_length=240)
  73     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  74     recommended = models.BooleanField(_('recommended'), default=False)
  75     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  76     preview = models.BooleanField(_('preview'), default=False)
  77     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  78     preview_key = models.CharField(max_length=32, blank=True, null=True)
  79     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  80
  81     # files generated during publication
  82     cover = EbookField(
  83         'cover', _('cover'),
  84         null=True, blank=True,
  85         upload_to=_cover_upload_to,
  86         storage=bofh_storage, max_length=255)
  87     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  88     # Cleaner version of cover for thumbs
  89     cover_thumb = EbookField(
  90         'cover_thumb', _('cover thumbnail'),
  91         null=True, blank=True,
  92         upload_to=_cover_thumb_upload_to,
  93         max_length=255)
  94     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  95     cover_api_thumb = EbookField(
  96         'cover_api_thumb', _('cover thumbnail for mobile app'),
  97         null=True, blank=True,
  98         upload_to=_cover_api_thumb_upload_to,
  99         max_length=255)
 100     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 101     simple_cover = EbookField(
 102         'simple_cover', _('cover for mobile app'),
 103         null=True, blank=True,
 104         upload_to=_simple_cover_upload_to,
 105         max_length=255)
 106     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 107     cover_ebookpoint = EbookField(
 108         'cover_ebookpoint', _('cover for Ebookpoint'),
 109         null=True, blank=True,
 110         upload_to=_cover_ebookpoint_upload_to,
 111         max_length=255)
 112     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 113     ebook_formats = constants.EBOOK_FORMATS
 114     formats = ebook_formats + ['html', 'xml']
 115
 116     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 117     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 118
 119     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 120     has_audience = models.BooleanField(default=False)
 121
 122     objects = models.Manager()
 123     tagged = managers.ModelTaggedItemManager(Tag)
 124     tags = managers.TagDescriptor(Tag)
 125     tag_relations = GenericRelation(Tag.intermediary_table_model)
 126
 127     html_built = django.dispatch.Signal()
 128     published = django.dispatch.Signal()
 129
 130     SORT_KEY_SEP = '$'
 131
 132     class AlreadyExists(Exception):
 133         pass
 134
 135     class Meta:
 136         ordering = ('sort_key_author', 'sort_key')
 137         verbose_name = _('book')
 138         verbose_name_plural = _('books')
 139         app_label = 'catalogue'
 140
 141     def __str__(self):
 142         return self.title
 143
 144     def get_extra_info_json(self):
 145         return json.loads(self.extra_info or '{}')
 146
 147     def get_initial(self):
 148         try:
 149             return re.search(r'\w', self.title, re.U).group(0)
 150         except AttributeError:
 151             return ''
 152
 153     def authors(self):
 154         return self.tags.filter(category='author')
 155
 156     def epochs(self):
 157         return self.tags.filter(category='epoch')
 158
 159     def genres(self):
 160         return self.tags.filter(category='genre')
 161
 162     def kinds(self):
 163         return self.tags.filter(category='kind')
 164
 165     def tag_unicode(self, category):
 166         relations = prefetched_relations(self, category)
 167         if relations:
 168             return ', '.join(rel.tag.name for rel in relations)
 169         else:
 170             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 171
 172     def tags_by_category(self):
 173         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 174
 175     def author_unicode(self):
 176         return self.cached_author
 177
 178     def kind_unicode(self):
 179         return self.tag_unicode('kind')
 180
 181     def epoch_unicode(self):
 182         return self.tag_unicode('epoch')
 183
 184     def genre_unicode(self):
 185         return self.tag_unicode('genre')
 186
 187     def translators(self):
 188         translators = self.get_extra_info_json().get('translators') or []
 189         return [
 190             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 191         ]
 192
 193     def translator(self):
 194         translators = self.get_extra_info_json().get('translators')
 195         if not translators:
 196             return None
 197         if len(translators) > 3:
 198             translators = translators[:2]
 199             others = ' i inni'
 200         else:
 201             others = ''
 202         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 203
 204     def cover_source(self):
 205         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 206
 207     @property
 208     def isbn_pdf(self):
 209         return self.get_extra_info_json().get('isbn_pdf')
 210
 211     @property
 212     def isbn_epub(self):
 213         return self.get_extra_info_json().get('isbn_epub')
 214
 215     @property
 216     def isbn_mobi(self):
 217         return self.get_extra_info_json().get('isbn_mobi')
 218
 219
 220     def save(self, force_insert=False, force_update=False, **kwargs):
 221         from sortify import sortify
 222
 223         self.sort_key = sortify(self.title)[:120]
 224         self.title = str(self.title)  # ???
 225
 226         try:
 227             author = self.authors().first().sort_key
 228         except AttributeError:
 229             author = ''
 230         self.sort_key_author = author
 231
 232         self.cached_author = self.tag_unicode('author')
 233         self.has_audience = 'audience' in self.get_extra_info_json()
 234
 235         if self.preview and not self.preview_key:
 236             self.preview_key = get_random_hash(self.slug)[:32]
 237
 238         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 239
 240         return ret
 241
 242     def get_absolute_url(self):
 243         return reverse('book_detail', args=[self.slug])
 244
 245     def gallery_path(self):
 246         return gallery_path(self.slug)
 247
 248     def gallery_url(self):
 249         return gallery_url(self.slug)
 250
 251     def get_first_text(self):
 252         if self.html_file:
 253             return self
 254         child = self.children.all().order_by('parent_number').first()
 255         if child is not None:
 256             return child.get_first_text()
 257
 258     def get_last_text(self):
 259         if self.html_file:
 260             return self
 261         child = self.children.all().order_by('parent_number').last()
 262         if child is not None:
 263             return child.get_last_text()
 264
 265     def get_prev_text(self):
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_last_text()
 271
 272         if self.parent.html_file:
 273             return self.parent
 274
 275         return self.parent.get_prev_text()
 276
 277     def get_next_text(self):
 278         child = self.children.order_by('parent_number').first()
 279         if child is not None:
 280             return child.get_first_text()
 281
 282         if not self.parent:
 283             return None
 284         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 285         if sibling is not None:
 286             return sibling.get_first_text()
 287         return self.parent.get_next_text()
 288
 289     def get_siblings(self):
 290         if not self.parent:
 291             return []
 292         return self.parent.children.all().order_by('parent_number')
 293
 294     def get_children(self):
 295         return self.children.all().order_by('parent_number')
 296
 297     @property
 298     def name(self):
 299         return self.title
 300
 301     def language_code(self):
 302         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 303
 304     def language_name(self):
 305         return dict(settings.LANGUAGES).get(self.language_code(), "")
 306
 307     def is_foreign(self):
 308         return self.language_code() != settings.LANGUAGE_CODE
 309
 310     def set_audio_length(self):
 311         length = self.get_audio_length()
 312         if length > 0:
 313             self.audio_length = self.format_audio_length(length)
 314             self.save()
 315
 316     @staticmethod
 317     def format_audio_length(seconds):
 318         """
 319         >>> Book.format_audio_length(1)
 320         '0:01'
 321         >>> Book.format_audio_length(3661)
 322         '1:01:01'
 323         """
 324         if seconds < 60*60:
 325             minutes = seconds // 60
 326             seconds = seconds % 60
 327             return '%d:%02d' % (minutes, seconds)
 328         else:
 329             hours = seconds // 3600
 330             minutes = seconds % 3600 // 60
 331             seconds = seconds % 60
 332             return '%d:%02d:%02d' % (hours, minutes, seconds)
 333
 334     def get_audio_length(self):
 335         total = 0
 336         for media in self.get_mp3() or ():
 337             total += app_settings.GET_MP3_LENGTH(media.file.path)
 338         return int(total)
 339
 340     def has_media(self, type_):
 341         if type_ in Book.formats:
 342             return bool(getattr(self, "%s_file" % type_))
 343         else:
 344             return self.media.filter(type=type_).exists()
 345
 346     def has_audio(self):
 347         return self.has_media('mp3')
 348
 349     def get_media(self, type_):
 350         if self.has_media(type_):
 351             if type_ in Book.formats:
 352                 return getattr(self, "%s_file" % type_)
 353             else:
 354                 return self.media.filter(type=type_)
 355         else:
 356             return None
 357
 358     def get_mp3(self):
 359         return self.get_media("mp3")
 360
 361     def get_odt(self):
 362         return self.get_media("odt")
 363
 364     def get_ogg(self):
 365         return self.get_media("ogg")
 366
 367     def get_daisy(self):
 368         return self.get_media("daisy")
 369
 370     def media_url(self, format_):
 371         media = self.get_media(format_)
 372         if media:
 373             if self.preview:
 374                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 375             else:
 376                 return media.url
 377         else:
 378             return None
 379
 380     def html_url(self):
 381         return self.media_url('html')
 382
 383     def pdf_url(self):
 384         return self.media_url('pdf')
 385
 386     def epub_url(self):
 387         return self.media_url('epub')
 388
 389     def mobi_url(self):
 390         return self.media_url('mobi')
 391
 392     def txt_url(self):
 393         return self.media_url('txt')
 394
 395     def fb2_url(self):
 396         return self.media_url('fb2')
 397
 398     def xml_url(self):
 399         return self.media_url('xml')
 400
 401     def has_description(self):
 402         return len(self.description) > 0
 403     has_description.short_description = _('description')
 404     has_description.boolean = True
 405
 406     def has_mp3_file(self):
 407         return self.has_media("mp3")
 408     has_mp3_file.short_description = 'MP3'
 409     has_mp3_file.boolean = True
 410
 411     def has_ogg_file(self):
 412         return self.has_media("ogg")
 413     has_ogg_file.short_description = 'OGG'
 414     has_ogg_file.boolean = True
 415
 416     def has_daisy_file(self):
 417         return self.has_media("daisy")
 418     has_daisy_file.short_description = 'DAISY'
 419     has_daisy_file.boolean = True
 420
 421     @property
 422     def media_daisy(self):
 423         return self.get_media('daisy')
 424
 425     def get_audiobooks(self):
 426         ogg_files = {}
 427         for m in self.media.filter(type='ogg').order_by().iterator():
 428             ogg_files[m.name] = m
 429
 430         audiobooks = []
 431         projects = set()
 432         for mp3 in self.media.filter(type='mp3').iterator():
 433             # ogg files are always from the same project
 434             meta = mp3.get_extra_info_json()
 435             project = meta.get('project')
 436             if not project:
 437                 # temporary fallback
 438                 project = 'CzytamySłuchając'
 439
 440             projects.add((project, meta.get('funded_by', '')))
 441
 442             media = {'mp3': mp3}
 443
 444             ogg = ogg_files.get(mp3.name)
 445             if ogg:
 446                 media['ogg'] = ogg
 447             audiobooks.append(media)
 448
 449         projects = sorted(projects)
 450         return audiobooks, projects
 451
 452     def wldocument(self, parse_dublincore=True, inherit=True):
 453         from catalogue.import_utils import ORMDocProvider
 454         from librarian.parser import WLDocument
 455
 456         if inherit and self.parent:
 457             meta_fallbacks = self.parent.cover_info()
 458         else:
 459             meta_fallbacks = None
 460
 461         return WLDocument.from_file(
 462             self.xml_file.path,
 463             provider=ORMDocProvider(self),
 464             parse_dublincore=parse_dublincore,
 465             meta_fallbacks=meta_fallbacks)
 466
 467     def wldocument2(self):
 468         from catalogue.import_utils import ORMDocProvider
 469         from librarian.document import WLDocument
 470         doc = WLDocument(
 471             self.xml_file.path,
 472             provider=ORMDocProvider(self)
 473         )
 474         doc.meta.update(self.cover_info())
 475         return doc
 476
 477
 478     @staticmethod
 479     def zip_format(format_):
 480         def pretty_file_name(book):
 481             return "%s/%s.%s" % (
 482                 book.get_extra_info_json()['author'],
 483                 book.slug,
 484                 format_)
 485
 486         field_name = "%s_file" % format_
 487         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 488         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 489         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 490
 491     def zip_audiobooks(self, format_):
 492         bm = BookMedia.objects.filter(book=self, type=format_)
 493         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 494         licenses = set()
 495         for m in bm:
 496             license = constants.LICENSES.get(
 497                 m.get_extra_info_json().get('license'), {}
 498             ).get('locative')
 499             if license:
 500                 licenses.add(license)
 501         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 502             'licenses': licenses,
 503         })
 504         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 505
 506     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 507         if not self.findable:
 508             return
 509         if index is None:
 510             from search.index import Index
 511             index = Index()
 512         try:
 513             index.index_book(self, book_info)
 514             if index_tags:
 515                 index.index_tags()
 516             if commit:
 517                 index.index.commit()
 518         except Exception as e:
 519             index.index.rollback()
 520             raise e
 521
 522     # will make problems in conjunction with paid previews
 523     def download_pictures(self, remote_gallery_url):
 524         gallery_path = self.gallery_path()
 525         # delete previous files, so we don't include old files in ebooks
 526         if os.path.isdir(gallery_path):
 527             for filename in os.listdir(gallery_path):
 528                 file_path = os.path.join(gallery_path, filename)
 529                 os.unlink(file_path)
 530         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 531         if ilustr_elements:
 532             makedirs(gallery_path)
 533             for ilustr in ilustr_elements:
 534                 ilustr_src = ilustr.get('src')
 535                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 536                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 537
 538     def load_abstract(self):
 539         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 540         if abstract is not None:
 541             self.abstract = transform_abstrakt(abstract)
 542         else:
 543             self.abstract = ''
 544
 545     @classmethod
 546     def from_xml_file(cls, xml_file, **kwargs):
 547         from django.core.files import File
 548         from librarian import dcparser
 549
 550         # use librarian to parse meta-data
 551         book_info = dcparser.parse(xml_file)
 552
 553         if not isinstance(xml_file, File):
 554             xml_file = File(open(xml_file))
 555
 556         try:
 557             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 558         finally:
 559             xml_file.close()
 560
 561     @classmethod
 562     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 563                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 564         if dont_build is None:
 565             dont_build = set()
 566         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 567
 568         # check for parts before we do anything
 569         children = []
 570         if hasattr(book_info, 'parts'):
 571             for part_url in book_info.parts:
 572                 try:
 573                     children.append(Book.objects.get(slug=part_url.slug))
 574                 except Book.DoesNotExist:
 575                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 576
 577         # Read book metadata
 578         book_slug = book_info.url.slug
 579         if re.search(r'[^a-z0-9-]', book_slug):
 580             raise ValueError('Invalid characters in slug')
 581         book, created = Book.objects.get_or_create(slug=book_slug)
 582
 583         if created:
 584             book_shelves = []
 585             old_cover = None
 586             book.preview = bool(days)
 587             if book.preview:
 588                 book.preview_until = date.today() + timedelta(days)
 589         else:
 590             if not overwrite:
 591                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 592             # Save shelves for this book
 593             book_shelves = list(book.tags.filter(category='set'))
 594             old_cover = book.cover_info()
 595
 596         # Save XML file
 597         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 598         if book.preview:
 599             book.xml_file.set_readable(False)
 600
 601         book.findable = findable
 602         book.language = book_info.language
 603         book.title = book_info.title
 604         if book_info.variant_of:
 605             book.common_slug = book_info.variant_of.slug
 606         else:
 607             book.common_slug = book.slug
 608         book.extra_info = json.dumps(book_info.to_dict())
 609         book.load_abstract()
 610         book.save()
 611
 612         meta_tags = Tag.tags_from_info(book_info)
 613
 614         for tag in meta_tags:
 615             if not tag.for_books:
 616                 tag.for_books = True
 617                 tag.save()
 618
 619         book.tags = set(meta_tags + book_shelves)
 620         book.save()  # update sort_key_author
 621
 622         cover_changed = old_cover != book.cover_info()
 623         obsolete_children = set(b for b in book.children.all()
 624                                 if b not in children)
 625         notify_cover_changed = []
 626         for n, child_book in enumerate(children):
 627             new_child = child_book.parent != book
 628             child_book.parent = book
 629             child_book.parent_number = n
 630             child_book.save()
 631             if new_child or cover_changed:
 632                 notify_cover_changed.append(child_book)
 633         # Disown unfaithful children and let them cope on their own.
 634         for child in obsolete_children:
 635             child.parent = None
 636             child.parent_number = 0
 637             child.save()
 638             if old_cover:
 639                 notify_cover_changed.append(child)
 640
 641         cls.repopulate_ancestors()
 642         tasks.update_counters.delay()
 643
 644         if remote_gallery_url:
 645             book.download_pictures(remote_gallery_url)
 646
 647         # No saves beyond this point.
 648
 649         # Build cover.
 650         if 'cover' not in dont_build:
 651             book.cover.build_delay()
 652             book.cover_thumb.build_delay()
 653             book.cover_api_thumb.build_delay()
 654             book.simple_cover.build_delay()
 655             book.cover_ebookpoint.build_delay()
 656
 657         # Build HTML and ebooks.
 658         book.html_file.build_delay()
 659         if not children:
 660             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 661                 if format_ not in dont_build:
 662                     getattr(book, '%s_file' % format_).build_delay()
 663         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 664             if format_ not in dont_build:
 665                 getattr(book, '%s_file' % format_).build_delay()
 666
 667         if not settings.NO_SEARCH_INDEX and search_index and findable:
 668             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 669
 670         for child in notify_cover_changed:
 671             child.parent_cover_changed()
 672
 673         book.update_popularity()
 674         tasks.update_references.delay(book.id)
 675
 676         cls.published.send(sender=cls, instance=book)
 677         return book
 678
 679     def get_master(self):
 680         master_tags = [
 681             'opowiadanie',
 682             'powiesc',
 683             'dramat_wierszowany_l',
 684             'dramat_wierszowany_lp',
 685             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 686             'wywiad',
 687         ]
 688         from librarian.parser import WLDocument
 689         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 690         root = wld.edoc.getroot()
 691         for master in root.iter():
 692             if master.tag in master_tags:
 693                 return master
 694
 695     def update_references(self):
 696         from references.models import Entity, Reference
 697         master = self.get_master()
 698         found = set()
 699         for i, sec in enumerate(master):
 700             for ref in sec.findall('.//ref'):
 701                 href = ref.attrib.get('href', '')
 702                 if not href or href in found:
 703                     continue
 704                 found.add(href)
 705                 entity, created = Entity.objects.get_or_create(
 706                     uri=href
 707                 )
 708                 ref, created = Reference.objects.get_or_create(
 709                     book=self,
 710                     entity=entity
 711                 )
 712                 ref.first_section = 'sec%d' % (i + 1)
 713                 entity.populate()
 714                 entity.save()
 715         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 716
 717     @property
 718     def references(self):
 719         return self.reference_set.all().select_related('entity')
 720
 721     @classmethod
 722     @transaction.atomic
 723     def repopulate_ancestors(cls):
 724         """Fixes the ancestry cache."""
 725         # TODO: table names
 726         cursor = connection.cursor()
 727         if connection.vendor == 'postgres':
 728             cursor.execute("TRUNCATE catalogue_book_ancestor")
 729             cursor.execute("""
 730                 WITH RECURSIVE ancestry AS (
 731                     SELECT book.id, book.parent_id
 732                     FROM catalogue_book AS book
 733                     WHERE book.parent_id IS NOT NULL
 734                     UNION
 735                     SELECT ancestor.id, book.parent_id
 736                     FROM ancestry AS ancestor, catalogue_book AS book
 737                     WHERE ancestor.parent_id = book.id
 738                         AND book.parent_id IS NOT NULL
 739                     )
 740                 INSERT INTO catalogue_book_ancestor
 741                     (from_book_id, to_book_id)
 742                     SELECT id, parent_id
 743                     FROM ancestry
 744                     ORDER BY id;
 745                 """)
 746         else:
 747             cursor.execute("DELETE FROM catalogue_book_ancestor")
 748             for b in cls.objects.exclude(parent=None):
 749                 parent = b.parent
 750                 while parent is not None:
 751                     b.ancestor.add(parent)
 752                     parent = parent.parent
 753
 754     def clear_cache(self):
 755         clear_cached_renders(self.mini_box)
 756         clear_cached_renders(self.mini_box_nolink)
 757
 758     def cover_info(self, inherit=True):
 759         """Returns a dictionary to serve as fallback for BookInfo.
 760
 761         For now, the only thing inherited is the cover image.
 762         """
 763         need = False
 764         info = {}
 765         for field in ('cover_url', 'cover_by', 'cover_source'):
 766             val = self.get_extra_info_json().get(field)
 767             if val:
 768                 info[field] = val
 769             else:
 770                 need = True
 771         if inherit and need and self.parent is not None:
 772             parent_info = self.parent.cover_info()
 773             parent_info.update(info)
 774             info = parent_info
 775         return info
 776
 777     def related_themes(self):
 778         return Tag.objects.usage_for_queryset(
 779             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 780             counts=True).filter(category='theme')
 781
 782     def parent_cover_changed(self):
 783         """Called when parent book's cover image is changed."""
 784         if not self.cover_info(inherit=False):
 785             if 'cover' not in app_settings.DONT_BUILD:
 786                 self.cover.build_delay()
 787                 self.cover_thumb.build_delay()
 788                 self.cover_api_thumb.build_delay()
 789                 self.simple_cover.build_delay()
 790             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 791                 if format_ not in app_settings.DONT_BUILD:
 792                     getattr(self, '%s_file' % format_).build_delay()
 793             for child in self.children.all():
 794                 child.parent_cover_changed()
 795
 796     def other_versions(self):
 797         """Find other versions (i.e. in other languages) of the book."""
 798         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 799
 800     def parents(self):
 801         books = []
 802         parent = self.parent
 803         while parent is not None:
 804             books.insert(0, parent)
 805             parent = parent.parent
 806         return books
 807
 808     def pretty_title(self, html_links=False):
 809         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 810         books = self.parents() + [self]
 811         names.extend([(b.title, b.get_absolute_url()) for b in books])
 812
 813         if html_links:
 814             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 815         else:
 816             names = [tag[0] for tag in names]
 817         return ', '.join(names)
 818
 819     def publisher(self):
 820         publisher = self.get_extra_info_json()['publisher']
 821         if isinstance(publisher, str):
 822             return publisher
 823         elif isinstance(publisher, list):
 824             return ', '.join(publisher)
 825
 826     @classmethod
 827     def tagged_top_level(cls, tags):
 828         """ Returns top-level books tagged with `tags`.
 829
 830         It only returns those books which don't have ancestors which are
 831         also tagged with those tags.
 832
 833         """
 834         objects = cls.tagged.with_all(tags)
 835         return objects.filter(findable=True).exclude(ancestor__in=objects)
 836
 837     @classmethod
 838     def book_list(cls, book_filter=None):
 839         """Generates a hierarchical listing of all books.
 840
 841         Books are optionally filtered with a test function.
 842
 843         """
 844
 845         books_by_parent = {}
 846         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 847         if book_filter:
 848             books = books.filter(book_filter).distinct()
 849
 850             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 851             for book in books.iterator():
 852                 parent = book.parent_id
 853                 if parent not in book_ids:
 854                     parent = None
 855                 books_by_parent.setdefault(parent, []).append(book)
 856         else:
 857             for book in books.iterator():
 858                 books_by_parent.setdefault(book.parent_id, []).append(book)
 859
 860         orphans = []
 861         books_by_author = OrderedDict()
 862         for tag in Tag.objects.filter(category='author').iterator():
 863             books_by_author[tag] = []
 864
 865         for book in books_by_parent.get(None, ()):
 866             authors = list(book.authors().only('pk'))
 867             if authors:
 868                 for author in authors:
 869                     books_by_author[author].append(book)
 870             else:
 871                 orphans.append(book)
 872
 873         return books_by_author, orphans, books_by_parent
 874
 875     _audiences_pl = {
 876         "SP": (1, "szkoła podstawowa"),
 877         "SP1": (1, "szkoła podstawowa"),
 878         "SP2": (1, "szkoła podstawowa"),
 879         "SP3": (1, "szkoła podstawowa"),
 880         "P": (1, "szkoła podstawowa"),
 881         "G": (2, "gimnazjum"),
 882         "L": (3, "liceum"),
 883         "LP": (3, "liceum"),
 884     }
 885
 886     def audiences_pl(self):
 887         audiences = self.get_extra_info_json().get('audiences', [])
 888         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 889         return [a[1] for a in audiences]
 890
 891     def stage_note(self):
 892         stage = self.get_extra_info_json().get('stage')
 893         if stage and stage < '0.4':
 894             return (_('This work needs modernisation'),
 895                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 896         else:
 897             return None, None
 898
 899     def choose_fragment(self):
 900         fragments = self.fragments.order_by()
 901         fragments_count = fragments.count()
 902         if not fragments_count and self.children.exists():
 903             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 904             fragments_count = fragments.count()
 905         if fragments_count:
 906             return fragments[randint(0, fragments_count - 1)]
 907         elif self.parent:
 908             return self.parent.choose_fragment()
 909         else:
 910             return None
 911
 912     def fragment_data(self):
 913         fragment = self.choose_fragment()
 914         if fragment:
 915             return {
 916                 'title': fragment.book.pretty_title(),
 917                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 918             }
 919         else:
 920             return None
 921
 922     def update_popularity(self):
 923         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 924         try:
 925             pop = self.popularity
 926             pop.count = count
 927             pop.save()
 928         except BookPopularity.DoesNotExist:
 929             BookPopularity.objects.create(book=self, count=count)
 930
 931     def ridero_link(self):
 932         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 933
 934     def like(self, user):
 935         from social.utils import likes, get_set, set_sets
 936         if not likes(user, self):
 937             tag = get_set(user, '')
 938             set_sets(user, self, [tag])
 939
 940     def unlike(self, user):
 941         from social.utils import likes, set_sets
 942         if likes(user, self):
 943             set_sets(user, self, [])
 944
 945     def full_sort_key(self):
 946         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
 947
 948     def cover_color(self):
 949         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
 950
 951     @cached_render('catalogue/book_mini_box.html')
 952     def mini_box(self):
 953         return {
 954             'book': self
 955         }
 956
 957     @cached_render('catalogue/book_mini_box.html')
 958     def mini_box_nolink(self):
 959         return {
 960             'book': self,
 961             'no_link': True,
 962         }
 963
 964 def add_file_fields():
 965     for format_ in Book.formats:
 966         field_name = "%s_file" % format_
 967         # This weird globals() assignment makes Django migrations comfortable.
 968         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 969         _upload_to.__name__ = '_%s_upload_to' % format_
 970         globals()[_upload_to.__name__] = _upload_to
 971
 972         EbookField(
 973             format_, _("%s file" % format_.upper()),
 974             upload_to=_upload_to,
 975             storage=bofh_storage,
 976             max_length=255,
 977             blank=True,
 978             default=''
 979         ).contribute_to_class(Book, field_name)
 980         if format_ != 'xml':
 981             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
 982
 983
 984 add_file_fields()
 985
 986
 987 class BookPopularity(models.Model):
 988     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
 989     count = models.IntegerField(default=0, db_index=True)