src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.conf import settings
  12 from django.db import connection, models, transaction
  13 import django.dispatch
  14 from django.contrib.contenttypes.fields import GenericRelation
  15 from django.template.loader import render_to_string
  16 from django.urls import reverse
  17 from django.utils.translation import ugettext_lazy as _, get_language
  18 from django.utils.deconstruct import deconstructible
  19 from fnpdjango.storage import BofhFileSystemStorage
  20
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue.fields import EbookField
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from catalogue import tasks
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 @deconstructible
  37 class UploadToPath(object):
  38     def __init__(self, path):
  39         self.path = path
  40
  41     def __call__(self, instance, filename):
  42         return self.path % instance.slug
  43
  44
  45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  49 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  50
  51
  52 def _ebook_upload_to(upload_path):
  53     return UploadToPath(upload_path)
  54
  55
  56 class Book(models.Model):
  57     """Represents a book imported from WL-XML."""
  58     title = models.CharField(_('title'), max_length=32767)
  59     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  60     sort_key_author = models.CharField(
  61         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  62     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  63     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  64     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  65     description = models.TextField(_('description'), blank=True)
  66     abstract = models.TextField(_('abstract'), blank=True)
  67     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  68     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  69     parent_number = models.IntegerField(_('parent number'), default=0)
  70     extra_info = models.TextField(_('extra information'), default='{}')
  71     gazeta_link = models.CharField(blank=True, max_length=240)
  72     wiki_link = models.CharField(blank=True, max_length=240)
  73     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  74     recommended = models.BooleanField(_('recommended'), default=False)
  75     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  76     preview = models.BooleanField(_('preview'), default=False)
  77     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  78     preview_key = models.CharField(max_length=32, blank=True, null=True)
  79     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  80
  81     # files generated during publication
  82     cover = EbookField(
  83         'cover', _('cover'),
  84         null=True, blank=True,
  85         upload_to=_cover_upload_to,
  86         storage=bofh_storage, max_length=255)
  87     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  88     # Cleaner version of cover for thumbs
  89     cover_thumb = EbookField(
  90         'cover_thumb', _('cover thumbnail'),
  91         null=True, blank=True,
  92         upload_to=_cover_thumb_upload_to,
  93         max_length=255)
  94     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  95     cover_api_thumb = EbookField(
  96         'cover_api_thumb', _('cover thumbnail for mobile app'),
  97         null=True, blank=True,
  98         upload_to=_cover_api_thumb_upload_to,
  99         max_length=255)
 100     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 101     simple_cover = EbookField(
 102         'simple_cover', _('cover for mobile app'),
 103         null=True, blank=True,
 104         upload_to=_simple_cover_upload_to,
 105         max_length=255)
 106     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 107     cover_ebookpoint = EbookField(
 108         'cover_ebookpoint', _('cover for Ebookpoint'),
 109         null=True, blank=True,
 110         upload_to=_cover_ebookpoint_upload_to,
 111         max_length=255)
 112     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 113     ebook_formats = constants.EBOOK_FORMATS
 114     formats = ebook_formats + ['html', 'xml']
 115
 116     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 117     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 118
 119     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 120     has_audience = models.BooleanField(default=False)
 121
 122     objects = models.Manager()
 123     tagged = managers.ModelTaggedItemManager(Tag)
 124     tags = managers.TagDescriptor(Tag)
 125     tag_relations = GenericRelation(Tag.intermediary_table_model)
 126
 127     html_built = django.dispatch.Signal()
 128     published = django.dispatch.Signal()
 129
 130     SORT_KEY_SEP = '$'
 131
 132     class AlreadyExists(Exception):
 133         pass
 134
 135     class Meta:
 136         ordering = ('sort_key_author', 'sort_key')
 137         verbose_name = _('book')
 138         verbose_name_plural = _('books')
 139         app_label = 'catalogue'
 140
 141     def __str__(self):
 142         return self.title
 143
 144     def get_extra_info_json(self):
 145         return json.loads(self.extra_info or '{}')
 146
 147     def get_initial(self):
 148         try:
 149             return re.search(r'\w', self.title, re.U).group(0)
 150         except AttributeError:
 151             return ''
 152
 153     def authors(self):
 154         return self.tags.filter(category='author')
 155
 156     def epochs(self):
 157         return self.tags.filter(category='epoch')
 158
 159     def genres(self):
 160         return self.tags.filter(category='genre')
 161
 162     def kinds(self):
 163         return self.tags.filter(category='kind')
 164
 165     def tag_unicode(self, category):
 166         relations = prefetched_relations(self, category)
 167         if relations:
 168             return ', '.join(rel.tag.name for rel in relations)
 169         else:
 170             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 171
 172     def tags_by_category(self):
 173         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 174
 175     def author_unicode(self):
 176         return self.cached_author
 177
 178     def kind_unicode(self):
 179         return self.tag_unicode('kind')
 180
 181     def epoch_unicode(self):
 182         return self.tag_unicode('epoch')
 183
 184     def genre_unicode(self):
 185         return self.tag_unicode('genre')
 186
 187     def translators(self):
 188         translators = self.get_extra_info_json().get('translators') or []
 189         return [
 190             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 191         ]
 192
 193     def translator(self):
 194         translators = self.get_extra_info_json().get('translators')
 195         if not translators:
 196             return None
 197         if len(translators) > 3:
 198             translators = translators[:2]
 199             others = ' i inni'
 200         else:
 201             others = ''
 202         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 203
 204     def cover_source(self):
 205         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 206
 207     @property
 208     def isbn_pdf(self):
 209         return self.get_extra_info_json().get('isbn_pdf')
 210
 211     @property
 212     def isbn_epub(self):
 213         return self.get_extra_info_json().get('isbn_epub')
 214
 215     @property
 216     def isbn_mobi(self):
 217         return self.get_extra_info_json().get('isbn_mobi')
 218
 219
 220     def save(self, force_insert=False, force_update=False, **kwargs):
 221         from sortify import sortify
 222
 223         self.sort_key = sortify(self.title)[:120]
 224         self.title = str(self.title)  # ???
 225
 226         try:
 227             author = self.authors().first().sort_key
 228         except AttributeError:
 229             author = ''
 230         self.sort_key_author = author
 231
 232         self.cached_author = self.tag_unicode('author')
 233         self.has_audience = 'audience' in self.get_extra_info_json()
 234
 235         if self.preview and not self.preview_key:
 236             self.preview_key = get_random_hash(self.slug)[:32]
 237
 238         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 239
 240         return ret
 241
 242     def get_absolute_url(self):
 243         return reverse('book_detail', args=[self.slug])
 244
 245     def gallery_path(self):
 246         return gallery_path(self.slug)
 247
 248     def gallery_url(self):
 249         return gallery_url(self.slug)
 250
 251     def get_first_text(self):
 252         if self.html_file:
 253             return self
 254         child = self.children.all().order_by('parent_number').first()
 255         if child is not None:
 256             return child.get_first_text()
 257
 258     def get_last_text(self):
 259         if self.html_file:
 260             return self
 261         child = self.children.all().order_by('parent_number').last()
 262         if child is not None:
 263             return child.get_last_text()
 264
 265     def get_prev_text(self):
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_last_text()
 271
 272         if self.parent.html_file:
 273             return self.parent
 274
 275         return self.parent.get_prev_text()
 276
 277     def get_next_text(self):
 278         child = self.children.order_by('parent_number').first()
 279         if child is not None:
 280             return child.get_first_text()
 281
 282         if not self.parent:
 283             return None
 284         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 285         if sibling is not None:
 286             return sibling.get_first_text()
 287         return self.parent.get_next_text()
 288
 289     def get_siblings(self):
 290         if not self.parent:
 291             return []
 292         return self.parent.children.all().order_by('parent_number')
 293
 294     def get_children(self):
 295         return self.children.all().order_by('parent_number')
 296
 297     @property
 298     def name(self):
 299         return self.title
 300
 301     def language_code(self):
 302         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 303
 304     def language_name(self):
 305         return dict(settings.LANGUAGES).get(self.language_code(), "")
 306
 307     def is_foreign(self):
 308         return self.language_code() != settings.LANGUAGE_CODE
 309
 310     def set_audio_length(self):
 311         length = self.get_audio_length()
 312         if length > 0:
 313             self.audio_length = self.format_audio_length(length)
 314             self.save()
 315
 316     @staticmethod
 317     def format_audio_length(seconds):
 318         """
 319         >>> Book.format_audio_length(1)
 320         '0:01'
 321         >>> Book.format_audio_length(3661)
 322         '1:01:01'
 323         """
 324         if seconds < 60*60:
 325             minutes = seconds // 60
 326             seconds = seconds % 60
 327             return '%d:%02d' % (minutes, seconds)
 328         else:
 329             hours = seconds // 3600
 330             minutes = seconds % 3600 // 60
 331             seconds = seconds % 60
 332             return '%d:%02d:%02d' % (hours, minutes, seconds)
 333
 334     def get_audio_length(self):
 335         total = 0
 336         for media in self.get_mp3() or ():
 337             total += app_settings.GET_MP3_LENGTH(media.file.path)
 338         return int(total)
 339
 340     def has_media(self, type_):
 341         if type_ in Book.formats:
 342             return bool(getattr(self, "%s_file" % type_))
 343         else:
 344             return self.media.filter(type=type_).exists()
 345
 346     def has_audio(self):
 347         return self.has_media('mp3')
 348
 349     def get_media(self, type_):
 350         if self.has_media(type_):
 351             if type_ in Book.formats:
 352                 return getattr(self, "%s_file" % type_)
 353             else:
 354                 return self.media.filter(type=type_)
 355         else:
 356             return None
 357
 358     def get_mp3(self):
 359         return self.get_media("mp3")
 360
 361     def get_odt(self):
 362         return self.get_media("odt")
 363
 364     def get_ogg(self):
 365         return self.get_media("ogg")
 366
 367     def get_daisy(self):
 368         return self.get_media("daisy")
 369
 370     def media_url(self, format_):
 371         media = self.get_media(format_)
 372         if media:
 373             if self.preview:
 374                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 375             else:
 376                 return media.url
 377         else:
 378             return None
 379
 380     def html_url(self):
 381         return self.media_url('html')
 382
 383     def pdf_url(self):
 384         return self.media_url('pdf')
 385
 386     def epub_url(self):
 387         return self.media_url('epub')
 388
 389     def mobi_url(self):
 390         return self.media_url('mobi')
 391
 392     def txt_url(self):
 393         return self.media_url('txt')
 394
 395     def fb2_url(self):
 396         return self.media_url('fb2')
 397
 398     def xml_url(self):
 399         return self.media_url('xml')
 400
 401     def has_description(self):
 402         return len(self.description) > 0
 403     has_description.short_description = _('description')
 404     has_description.boolean = True
 405
 406     def has_mp3_file(self):
 407         return self.has_media("mp3")
 408     has_mp3_file.short_description = 'MP3'
 409     has_mp3_file.boolean = True
 410
 411     def has_ogg_file(self):
 412         return self.has_media("ogg")
 413     has_ogg_file.short_description = 'OGG'
 414     has_ogg_file.boolean = True
 415
 416     def has_daisy_file(self):
 417         return self.has_media("daisy")
 418     has_daisy_file.short_description = 'DAISY'
 419     has_daisy_file.boolean = True
 420
 421     @property
 422     def media_daisy(self):
 423         return self.get_media('daisy')
 424
 425     def get_audiobooks(self):
 426         ogg_files = {}
 427         for m in self.media.filter(type='ogg').order_by().iterator():
 428             ogg_files[m.name] = m
 429
 430         audiobooks = []
 431         projects = set()
 432         for mp3 in self.media.filter(type='mp3').iterator():
 433             # ogg files are always from the same project
 434             meta = mp3.get_extra_info_json()
 435             project = meta.get('project')
 436             if not project:
 437                 # temporary fallback
 438                 project = 'CzytamySłuchając'
 439
 440             projects.add((project, meta.get('funded_by', '')))
 441
 442             media = {'mp3': mp3}
 443
 444             ogg = ogg_files.get(mp3.name)
 445             if ogg:
 446                 media['ogg'] = ogg
 447             audiobooks.append(media)
 448
 449         projects = sorted(projects)
 450         return audiobooks, projects
 451
 452     def wldocument(self, parse_dublincore=True, inherit=True):
 453         from catalogue.import_utils import ORMDocProvider
 454         from librarian.parser import WLDocument
 455
 456         if inherit and self.parent:
 457             meta_fallbacks = self.parent.cover_info()
 458         else:
 459             meta_fallbacks = None
 460
 461         return WLDocument.from_file(
 462             self.xml_file.path,
 463             provider=ORMDocProvider(self),
 464             parse_dublincore=parse_dublincore,
 465             meta_fallbacks=meta_fallbacks)
 466
 467     def wldocument2(self):
 468         from catalogue.import_utils import ORMDocProvider
 469         from librarian.document import WLDocument
 470         doc = WLDocument(
 471             self.xml_file.path,
 472             provider=ORMDocProvider(self)
 473         )
 474         doc.meta.update(self.cover_info())
 475         return doc
 476
 477
 478     @staticmethod
 479     def zip_format(format_):
 480         def pretty_file_name(book):
 481             return "%s/%s.%s" % (
 482                 book.get_extra_info_json()['author'],
 483                 book.slug,
 484                 format_)
 485
 486         field_name = "%s_file" % format_
 487         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 488         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 489         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 490
 491     def zip_audiobooks(self, format_):
 492         bm = BookMedia.objects.filter(book=self, type=format_)
 493         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 494         licenses = set()
 495         for m in bm:
 496             license = constants.LICENSES.get(
 497                 m.get_extra_info_json().get('license'), {}
 498             ).get('locative')
 499             if license:
 500                 licenses.add(license)
 501         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 502             'licenses': licenses,
 503         })
 504         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 505
 506     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 507         if not self.findable:
 508             return
 509         if index is None:
 510             from search.index import Index
 511             index = Index()
 512         try:
 513             index.index_book(self, book_info)
 514             if index_tags:
 515                 index.index_tags()
 516             if commit:
 517                 index.index.commit()
 518         except Exception as e:
 519             index.index.rollback()
 520             raise e
 521
 522     # will make problems in conjunction with paid previews
 523     def download_pictures(self, remote_gallery_url):
 524         gallery_path = self.gallery_path()
 525         # delete previous files, so we don't include old files in ebooks
 526         if os.path.isdir(gallery_path):
 527             for filename in os.listdir(gallery_path):
 528                 file_path = os.path.join(gallery_path, filename)
 529                 os.unlink(file_path)
 530         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 531         if ilustr_elements:
 532             makedirs(gallery_path)
 533             for ilustr in ilustr_elements:
 534                 ilustr_src = ilustr.get('src')
 535                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 536                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 537
 538     def load_abstract(self):
 539         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 540         if abstract is not None:
 541             self.abstract = transform_abstrakt(abstract)
 542         else:
 543             self.abstract = ''
 544
 545     @classmethod
 546     def from_xml_file(cls, xml_file, **kwargs):
 547         from django.core.files import File
 548         from librarian import dcparser
 549
 550         # use librarian to parse meta-data
 551         book_info = dcparser.parse(xml_file)
 552
 553         if not isinstance(xml_file, File):
 554             xml_file = File(open(xml_file))
 555
 556         try:
 557             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 558         finally:
 559             xml_file.close()
 560
 561     @classmethod
 562     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 563                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 564         if dont_build is None:
 565             dont_build = set()
 566         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 567
 568         # check for parts before we do anything
 569         children = []
 570         if hasattr(book_info, 'parts'):
 571             for part_url in book_info.parts:
 572                 try:
 573                     children.append(Book.objects.get(slug=part_url.slug))
 574                 except Book.DoesNotExist:
 575                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 576
 577         # Read book metadata
 578         book_slug = book_info.url.slug
 579         if re.search(r'[^a-z0-9-]', book_slug):
 580             raise ValueError('Invalid characters in slug')
 581         book, created = Book.objects.get_or_create(slug=book_slug)
 582
 583         if created:
 584             book_shelves = []
 585             old_cover = None
 586             book.preview = bool(days)
 587             if book.preview:
 588                 book.preview_until = date.today() + timedelta(days)
 589         else:
 590             if not overwrite:
 591                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 592             # Save shelves for this book
 593             book_shelves = list(book.tags.filter(category='set'))
 594             old_cover = book.cover_info()
 595
 596         # Save XML file
 597         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 598         if book.preview:
 599             book.xml_file.set_readable(False)
 600
 601         book.findable = findable
 602         book.language = book_info.language
 603         book.title = book_info.title
 604         if book_info.variant_of:
 605             book.common_slug = book_info.variant_of.slug
 606         else:
 607             book.common_slug = book.slug
 608         book.extra_info = json.dumps(book_info.to_dict())
 609         book.load_abstract()
 610         book.save()
 611
 612         meta_tags = Tag.tags_from_info(book_info)
 613
 614         for tag in meta_tags:
 615             if not tag.for_books:
 616                 tag.for_books = True
 617                 tag.save()
 618
 619         book.tags = set(meta_tags + book_shelves)
 620         book.save()  # update sort_key_author
 621
 622         cover_changed = old_cover != book.cover_info()
 623         obsolete_children = set(b for b in book.children.all()
 624                                 if b not in children)
 625         notify_cover_changed = []
 626         for n, child_book in enumerate(children):
 627             new_child = child_book.parent != book
 628             child_book.parent = book
 629             child_book.parent_number = n
 630             child_book.save()
 631             if new_child or cover_changed:
 632                 notify_cover_changed.append(child_book)
 633         # Disown unfaithful children and let them cope on their own.
 634         for child in obsolete_children:
 635             child.parent = None
 636             child.parent_number = 0
 637             child.save()
 638             if old_cover:
 639                 notify_cover_changed.append(child)
 640
 641         cls.repopulate_ancestors()
 642         tasks.update_counters.delay()
 643
 644         if remote_gallery_url:
 645             book.download_pictures(remote_gallery_url)
 646
 647         # No saves beyond this point.
 648
 649         # Build cover.
 650         if 'cover' not in dont_build:
 651             book.cover.build_delay()
 652             book.cover_thumb.build_delay()
 653             book.cover_api_thumb.build_delay()
 654             book.simple_cover.build_delay()
 655             book.cover_ebookpoint.build_delay()
 656
 657         # Build HTML and ebooks.
 658         book.html_file.build_delay()
 659         if not children:
 660             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 661                 if format_ not in dont_build:
 662                     getattr(book, '%s_file' % format_).build_delay()
 663         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 664             if format_ not in dont_build:
 665                 getattr(book, '%s_file' % format_).build_delay()
 666
 667         if not settings.NO_SEARCH_INDEX and search_index and findable:
 668             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 669
 670         for child in notify_cover_changed:
 671             child.parent_cover_changed()
 672
 673         book.update_popularity()
 674         tasks.update_references.delay(book.id)
 675
 676         cls.published.send(sender=cls, instance=book)
 677         return book
 678
 679     def get_master(self):
 680         master_tags = [
 681             'opowiadanie',
 682             'powiesc',
 683             'dramat_wierszowany_l',
 684             'dramat_wierszowany_lp',
 685             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 686             'wywiad',
 687         ]
 688         from librarian.parser import WLDocument
 689         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 690         root = wld.edoc.getroot()
 691         for master in root.iter():
 692             if master.tag in master_tags:
 693                 return master
 694
 695     def update_references(self):
 696         from references.models import Entity, Reference
 697         master = self.get_master()
 698         found = set()
 699         for i, sec in enumerate(master):
 700             for ref in sec.findall('.//ref'):
 701                 href = ref.attrib.get('href', '')
 702                 if not href or href in found:
 703                     continue
 704                 found.add(href)
 705                 entity, created = Entity.objects.get_or_create(
 706                     uri=href
 707                 )
 708                 ref, created = Reference.objects.get_or_create(
 709                     book=self,
 710                     entity=entity
 711                 )
 712                 ref.first_section = 'sec%d' % (i + 1)
 713                 entity.populate()
 714                 entity.save()
 715         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 716
 717     @property
 718     def references(self):
 719         return self.reference_set.all().select_related('entity')
 720
 721     @classmethod
 722     @transaction.atomic
 723     def repopulate_ancestors(cls):
 724         """Fixes the ancestry cache."""
 725         # TODO: table names
 726         cursor = connection.cursor()
 727         if connection.vendor == 'postgres':
 728             cursor.execute("TRUNCATE catalogue_book_ancestor")
 729             cursor.execute("""
 730                 WITH RECURSIVE ancestry AS (
 731                     SELECT book.id, book.parent_id
 732                     FROM catalogue_book AS book
 733                     WHERE book.parent_id IS NOT NULL
 734                     UNION
 735                     SELECT ancestor.id, book.parent_id
 736                     FROM ancestry AS ancestor, catalogue_book AS book
 737                     WHERE ancestor.parent_id = book.id
 738                         AND book.parent_id IS NOT NULL
 739                     )
 740                 INSERT INTO catalogue_book_ancestor
 741                     (from_book_id, to_book_id)
 742                     SELECT id, parent_id
 743                     FROM ancestry
 744                     ORDER BY id;
 745                 """)
 746         else:
 747             cursor.execute("DELETE FROM catalogue_book_ancestor")
 748             for b in cls.objects.exclude(parent=None):
 749                 parent = b.parent
 750                 while parent is not None:
 751                     b.ancestor.add(parent)
 752                     parent = parent.parent
 753
 754     @property
 755     def ancestors(self):
 756         if self.parent:
 757             for anc in self.parent.ancestors:
 758                 yield anc
 759             yield self.parent
 760         else:
 761             return []
 762
 763     def clear_cache(self):
 764         clear_cached_renders(self.mini_box)
 765         clear_cached_renders(self.mini_box_nolink)
 766
 767     def cover_info(self, inherit=True):
 768         """Returns a dictionary to serve as fallback for BookInfo.
 769
 770         For now, the only thing inherited is the cover image.
 771         """
 772         need = False
 773         info = {}
 774         for field in ('cover_url', 'cover_by', 'cover_source'):
 775             val = self.get_extra_info_json().get(field)
 776             if val:
 777                 info[field] = val
 778             else:
 779                 need = True
 780         if inherit and need and self.parent is not None:
 781             parent_info = self.parent.cover_info()
 782             parent_info.update(info)
 783             info = parent_info
 784         return info
 785
 786     def related_themes(self):
 787         return Tag.objects.usage_for_queryset(
 788             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 789             counts=True).filter(category='theme')
 790
 791     def parent_cover_changed(self):
 792         """Called when parent book's cover image is changed."""
 793         if not self.cover_info(inherit=False):
 794             if 'cover' not in app_settings.DONT_BUILD:
 795                 self.cover.build_delay()
 796                 self.cover_thumb.build_delay()
 797                 self.cover_api_thumb.build_delay()
 798                 self.simple_cover.build_delay()
 799             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 800                 if format_ not in app_settings.DONT_BUILD:
 801                     getattr(self, '%s_file' % format_).build_delay()
 802             for child in self.children.all():
 803                 child.parent_cover_changed()
 804
 805     def other_versions(self):
 806         """Find other versions (i.e. in other languages) of the book."""
 807         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 808
 809     def parents(self):
 810         books = []
 811         parent = self.parent
 812         while parent is not None:
 813             books.insert(0, parent)
 814             parent = parent.parent
 815         return books
 816
 817     def pretty_title(self, html_links=False):
 818         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 819         books = self.parents() + [self]
 820         names.extend([(b.title, b.get_absolute_url()) for b in books])
 821
 822         if html_links:
 823             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 824         else:
 825             names = [tag[0] for tag in names]
 826         return ', '.join(names)
 827
 828     def publisher(self):
 829         publisher = self.get_extra_info_json()['publisher']
 830         if isinstance(publisher, str):
 831             return publisher
 832         elif isinstance(publisher, list):
 833             return ', '.join(publisher)
 834
 835     @classmethod
 836     def tagged_top_level(cls, tags):
 837         """ Returns top-level books tagged with `tags`.
 838
 839         It only returns those books which don't have ancestors which are
 840         also tagged with those tags.
 841
 842         """
 843         objects = cls.tagged.with_all(tags)
 844         return objects.filter(findable=True).exclude(ancestor__in=objects)
 845
 846     @classmethod
 847     def book_list(cls, book_filter=None):
 848         """Generates a hierarchical listing of all books.
 849
 850         Books are optionally filtered with a test function.
 851
 852         """
 853
 854         books_by_parent = {}
 855         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 856         if book_filter:
 857             books = books.filter(book_filter).distinct()
 858
 859             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 860             for book in books.iterator():
 861                 parent = book.parent_id
 862                 if parent not in book_ids:
 863                     parent = None
 864                 books_by_parent.setdefault(parent, []).append(book)
 865         else:
 866             for book in books.iterator():
 867                 books_by_parent.setdefault(book.parent_id, []).append(book)
 868
 869         orphans = []
 870         books_by_author = OrderedDict()
 871         for tag in Tag.objects.filter(category='author').iterator():
 872             books_by_author[tag] = []
 873
 874         for book in books_by_parent.get(None, ()):
 875             authors = list(book.authors().only('pk'))
 876             if authors:
 877                 for author in authors:
 878                     books_by_author[author].append(book)
 879             else:
 880                 orphans.append(book)
 881
 882         return books_by_author, orphans, books_by_parent
 883
 884     _audiences_pl = {
 885         "SP": (1, "szkoła podstawowa"),
 886         "SP1": (1, "szkoła podstawowa"),
 887         "SP2": (1, "szkoła podstawowa"),
 888         "SP3": (1, "szkoła podstawowa"),
 889         "P": (1, "szkoła podstawowa"),
 890         "G": (2, "gimnazjum"),
 891         "L": (3, "liceum"),
 892         "LP": (3, "liceum"),
 893     }
 894
 895     def audiences_pl(self):
 896         audiences = self.get_extra_info_json().get('audiences', [])
 897         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 898         return [a[1] for a in audiences]
 899
 900     def stage_note(self):
 901         stage = self.get_extra_info_json().get('stage')
 902         if stage and stage < '0.4':
 903             return (_('This work needs modernisation'),
 904                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 905         else:
 906             return None, None
 907
 908     def choose_fragments(self, number):
 909         fragments = self.fragments.order_by()
 910         fragments_count = fragments.count()
 911         if not fragments_count and self.children.exists():
 912             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 913             fragments_count = fragments.count()
 914         if fragments_count:
 915             offset = randint(0, fragments_count - number)
 916             return fragments[offset : offset + number]
 917         elif self.parent:
 918             return self.parent.choose_fragments(number)
 919         else:
 920             return []
 921
 922     def choose_fragment(self):
 923         fragments = self.choose_fragments(1)
 924         if fragments:
 925             return fragments[0]
 926         else:
 927             return None
 928
 929     def fragment_data(self):
 930         fragment = self.choose_fragment()
 931         if fragment:
 932             return {
 933                 'title': fragment.book.pretty_title(),
 934                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 935             }
 936         else:
 937             return None
 938
 939     def update_popularity(self):
 940         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 941         try:
 942             pop = self.popularity
 943             pop.count = count
 944             pop.save()
 945         except BookPopularity.DoesNotExist:
 946             BookPopularity.objects.create(book=self, count=count)
 947
 948     def ridero_link(self):
 949         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 950
 951     def like(self, user):
 952         from social.utils import likes, get_set, set_sets
 953         if not likes(user, self):
 954             tag = get_set(user, '')
 955             set_sets(user, self, [tag])
 956
 957     def unlike(self, user):
 958         from social.utils import likes, set_sets
 959         if likes(user, self):
 960             set_sets(user, self, [])
 961
 962     def full_sort_key(self):
 963         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
 964
 965     def cover_color(self):
 966         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
 967
 968     @cached_render('catalogue/book_mini_box.html')
 969     def mini_box(self):
 970         return {
 971             'book': self
 972         }
 973
 974     @cached_render('catalogue/book_mini_box.html')
 975     def mini_box_nolink(self):
 976         return {
 977             'book': self,
 978             'no_link': True,
 979         }
 980
 981 def add_file_fields():
 982     for format_ in Book.formats:
 983         field_name = "%s_file" % format_
 984         # This weird globals() assignment makes Django migrations comfortable.
 985         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
 986         _upload_to.__name__ = '_%s_upload_to' % format_
 987         globals()[_upload_to.__name__] = _upload_to
 988
 989         EbookField(
 990             format_, _("%s file" % format_.upper()),
 991             upload_to=_upload_to,
 992             storage=bofh_storage,
 993             max_length=255,
 994             blank=True,
 995             default=''
 996         ).contribute_to_class(Book, field_name)
 997         if format_ != 'xml':
 998             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
 999
1000
1001 add_file_fields()
1002
1003
1004 class BookPopularity(models.Model):
1005     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1006     count = models.IntegerField(default=0, db_index=True)