src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import gettext_lazy as _, get_language
  19 from fnpdjango.storage import BofhFileSystemStorage
  20 from lxml import html
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue import fields
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  31
  32 bofh_storage = BofhFileSystemStorage()
  33
  34
  35 class Book(models.Model):
  36     """Represents a book imported from WL-XML."""
  37     title = models.CharField(_('title'), max_length=32767)
  38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  39     sort_key_author = models.CharField(
  40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  44     description = models.TextField(_('description'), blank=True)
  45     abstract = models.TextField(_('abstract'), blank=True)
  46     toc = models.TextField(_('toc'), blank=True)
  47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  49     parent_number = models.IntegerField(_('parent number'), default=0)
  50     extra_info = models.TextField(_('extra information'), default='{}')
  51     gazeta_link = models.CharField(blank=True, max_length=240)
  52     wiki_link = models.CharField(blank=True, max_length=240)
  53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  54     recommended = models.BooleanField(_('recommended'), default=False)
  55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  56     preview = models.BooleanField(_('preview'), default=False)
  57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  58     preview_key = models.CharField(max_length=32, blank=True, null=True)
  59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  60
  61     # files generated during publication
  62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  63     html_file = fields.HtmlField(storage=bofh_storage)
  64     fb2_file = fields.Fb2Field(storage=bofh_storage)
  65     txt_file = fields.TxtField(storage=bofh_storage)
  66     epub_file = fields.EpubField(storage=bofh_storage)
  67     mobi_file = fields.MobiField(storage=bofh_storage)
  68     pdf_file = fields.PdfField(storage=bofh_storage)
  69
  70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
  71     # Cleaner version of cover for thumbs
  72     cover_clean = fields.CoverCleanField(_('clean cover'))
  73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
  74     cover_api_thumb = fields.CoverApiThumbField(
  75         _('cover thumbnail for mobile app'))
  76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
  77     cover_ebookpoint = fields.CoverEbookpointField(
  78         _('cover for Ebookpoint'))
  79
  80     ebook_formats = constants.EBOOK_FORMATS
  81     formats = ebook_formats + ['html', 'xml']
  82
  83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  85
  86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  87     has_audience = models.BooleanField(default=False)
  88
  89     objects = models.Manager()
  90     tagged = managers.ModelTaggedItemManager(Tag)
  91     tags = managers.TagDescriptor(Tag)
  92     tag_relations = GenericRelation(Tag.intermediary_table_model)
  93
  94     html_built = django.dispatch.Signal()
  95     published = django.dispatch.Signal()
  96
  97     SORT_KEY_SEP = '$'
  98
  99     is_book = True
 100
 101     class AlreadyExists(Exception):
 102         pass
 103
 104     class Meta:
 105         ordering = ('sort_key_author', 'sort_key')
 106         verbose_name = _('book')
 107         verbose_name_plural = _('books')
 108         app_label = 'catalogue'
 109
 110     def __str__(self):
 111         return self.title
 112
 113     def get_extra_info_json(self):
 114         return json.loads(self.extra_info or '{}')
 115
 116     def get_initial(self):
 117         try:
 118             return re.search(r'\w', self.title, re.U).group(0)
 119         except AttributeError:
 120             return ''
 121
 122     def authors(self):
 123         return self.tags.filter(category='author')
 124
 125     def epochs(self):
 126         return self.tags.filter(category='epoch')
 127
 128     def genres(self):
 129         return self.tags.filter(category='genre')
 130
 131     def kinds(self):
 132         return self.tags.filter(category='kind')
 133
 134     def tag_unicode(self, category):
 135         relations = prefetched_relations(self, category)
 136         if relations:
 137             return ', '.join(rel.tag.name for rel in relations)
 138         else:
 139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 140
 141     def tags_by_category(self):
 142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 143
 144     def author_unicode(self):
 145         return self.cached_author
 146
 147     def kind_unicode(self):
 148         return self.tag_unicode('kind')
 149
 150     def epoch_unicode(self):
 151         return self.tag_unicode('epoch')
 152
 153     def genre_unicode(self):
 154         return self.tag_unicode('genre')
 155
 156     def translators(self):
 157         translators = self.get_extra_info_json().get('translators') or []
 158         return [
 159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 160         ]
 161
 162     def translator(self):
 163         translators = self.get_extra_info_json().get('translators')
 164         if not translators:
 165             return None
 166         if len(translators) > 3:
 167             translators = translators[:2]
 168             others = ' i inni'
 169         else:
 170             others = ''
 171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 172
 173     def cover_source(self):
 174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 175
 176     @property
 177     def isbn_pdf(self):
 178         return self.get_extra_info_json().get('isbn_pdf')
 179
 180     @property
 181     def isbn_epub(self):
 182         return self.get_extra_info_json().get('isbn_epub')
 183
 184     @property
 185     def isbn_mobi(self):
 186         return self.get_extra_info_json().get('isbn_mobi')
 187
 188     def is_accessible_to(self, user):
 189         if not self.preview:
 190             return True
 191         if not user.is_authenticated:
 192             return False
 193         Membership = apps.get_model('club', 'Membership')
 194         if Membership.is_active_for(user):
 195             return True
 196         Funding = apps.get_model('funding', 'Funding')
 197         if Funding.objects.filter(user=user, offer__book=self):
 198             return True
 199         return False
 200
 201     def save(self, force_insert=False, force_update=False, **kwargs):
 202         from sortify import sortify
 203
 204         self.sort_key = sortify(self.title)[:120]
 205         self.title = str(self.title)  # ???
 206
 207         try:
 208             author = self.authors().first().sort_key
 209         except AttributeError:
 210             author = ''
 211         self.sort_key_author = author
 212
 213         self.cached_author = self.tag_unicode('author')
 214         self.has_audience = 'audience' in self.get_extra_info_json()
 215
 216         if self.preview and not self.preview_key:
 217             self.preview_key = get_random_hash(self.slug)[:32]
 218
 219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 220
 221         return ret
 222
 223     def get_absolute_url(self):
 224         return reverse('book_detail', args=[self.slug])
 225
 226     def gallery_path(self):
 227         return gallery_path(self.slug)
 228
 229     def gallery_url(self):
 230         return gallery_url(self.slug)
 231
 232     def get_first_text(self):
 233         if self.html_file:
 234             return self
 235         child = self.children.all().order_by('parent_number').first()
 236         if child is not None:
 237             return child.get_first_text()
 238
 239     def get_last_text(self):
 240         if self.html_file:
 241             return self
 242         child = self.children.all().order_by('parent_number').last()
 243         if child is not None:
 244             return child.get_last_text()
 245
 246     def get_prev_text(self):
 247         if not self.parent:
 248             return None
 249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 250         if sibling is not None:
 251             return sibling.get_last_text()
 252
 253         if self.parent.html_file:
 254             return self.parent
 255
 256         return self.parent.get_prev_text()
 257
 258     def get_next_text(self, inside=True):
 259         if inside:
 260             child = self.children.order_by('parent_number').first()
 261             if child is not None:
 262                 return child.get_first_text()
 263
 264         if not self.parent:
 265             return None
 266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 267         if sibling is not None:
 268             return sibling.get_first_text()
 269         return self.parent.get_next_text(inside=False)
 270
 271     def get_child_audiobook(self):
 272         BookMedia = apps.get_model('catalogue', 'BookMedia')
 273         if not BookMedia.objects.filter(book__ancestor=self).exists():
 274             return None
 275         for child in self.children.order_by('parent_number').all():
 276             if child.has_mp3_file():
 277                 return child
 278             child_sub = child.get_child_audiobook()
 279             if child_sub is not None:
 280                 return child_sub
 281
 282     def get_siblings(self):
 283         if not self.parent:
 284             return []
 285         return self.parent.children.all().order_by('parent_number')
 286
 287     def get_children(self):
 288         return self.children.all().order_by('parent_number')
 289
 290     @property
 291     def name(self):
 292         return self.title
 293
 294     def language_code(self):
 295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 296
 297     def language_name(self):
 298         return dict(settings.LANGUAGES).get(self.language_code(), "")
 299
 300     def is_foreign(self):
 301         return self.language_code() != settings.LANGUAGE_CODE
 302
 303     def set_audio_length(self):
 304         length = self.get_audio_length()
 305         if length > 0:
 306             self.audio_length = self.format_audio_length(length)
 307             self.save()
 308
 309     @staticmethod
 310     def format_audio_length(seconds):
 311         """
 312         >>> Book.format_audio_length(1)
 313         '0:01'
 314         >>> Book.format_audio_length(3661)
 315         '1:01:01'
 316         """
 317         if seconds < 60*60:
 318             minutes = seconds // 60
 319             seconds = seconds % 60
 320             return '%d:%02d' % (minutes, seconds)
 321         else:
 322             hours = seconds // 3600
 323             minutes = seconds % 3600 // 60
 324             seconds = seconds % 60
 325             return '%d:%02d:%02d' % (hours, minutes, seconds)
 326
 327     def get_audio_length(self):
 328         total = 0
 329         for media in self.get_mp3() or ():
 330             total += app_settings.GET_MP3_LENGTH(media.file.path)
 331         return int(total)
 332
 333     def has_media(self, type_):
 334         if type_ in Book.formats:
 335             return bool(getattr(self, "%s_file" % type_))
 336         else:
 337             return self.media.filter(type=type_).exists()
 338
 339     def has_audio(self):
 340         return self.has_media('mp3')
 341
 342     def get_media(self, type_):
 343         if self.has_media(type_):
 344             if type_ in Book.formats:
 345                 return getattr(self, "%s_file" % type_)
 346             else:
 347                 return self.media.filter(type=type_)
 348         else:
 349             return None
 350
 351     def get_mp3(self):
 352         return self.get_media("mp3")
 353
 354     def get_odt(self):
 355         return self.get_media("odt")
 356
 357     def get_ogg(self):
 358         return self.get_media("ogg")
 359
 360     def get_daisy(self):
 361         return self.get_media("daisy")
 362
 363     def get_audio_epub(self):
 364         return self.get_media("audio.epub")
 365
 366     def media_url(self, format_):
 367         media = self.get_media(format_)
 368         if media:
 369             if self.preview:
 370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 371             else:
 372                 return media.url
 373         else:
 374             return None
 375
 376     def html_url(self):
 377         return self.media_url('html')
 378
 379     def pdf_url(self):
 380         return self.media_url('pdf')
 381
 382     def epub_url(self):
 383         return self.media_url('epub')
 384
 385     def mobi_url(self):
 386         return self.media_url('mobi')
 387
 388     def txt_url(self):
 389         return self.media_url('txt')
 390
 391     def fb2_url(self):
 392         return self.media_url('fb2')
 393
 394     def xml_url(self):
 395         return self.media_url('xml')
 396
 397     def has_description(self):
 398         return len(self.description) > 0
 399     has_description.short_description = _('description')
 400     has_description.boolean = True
 401
 402     def has_mp3_file(self):
 403         return self.has_media("mp3")
 404     has_mp3_file.short_description = 'MP3'
 405     has_mp3_file.boolean = True
 406
 407     def has_ogg_file(self):
 408         return self.has_media("ogg")
 409     has_ogg_file.short_description = 'OGG'
 410     has_ogg_file.boolean = True
 411
 412     def has_daisy_file(self):
 413         return self.has_media("daisy")
 414     has_daisy_file.short_description = 'DAISY'
 415     has_daisy_file.boolean = True
 416
 417     def has_audio_epub_file(self):
 418         return self.has_media("audio.epub")
 419
 420     @property
 421     def media_daisy(self):
 422         return self.get_media('daisy')
 423
 424     @property
 425     def media_audio_epub(self):
 426         return self.get_media('audio.epub')
 427
 428     def get_audiobooks(self):
 429         ogg_files = {}
 430         for m in self.media.filter(type='ogg').order_by().iterator():
 431             ogg_files[m.name] = m
 432
 433         audiobooks = []
 434         projects = set()
 435         total_duration = 0
 436         for mp3 in self.media.filter(type='mp3').iterator():
 437             # ogg files are always from the same project
 438             meta = mp3.get_extra_info_json()
 439             project = meta.get('project')
 440             if not project:
 441                 # temporary fallback
 442                 project = 'CzytamySłuchając'
 443
 444             projects.add((project, meta.get('funded_by', '')))
 445             total_duration += mp3.duration or 0
 446
 447             media = {'mp3': mp3}
 448
 449             ogg = ogg_files.get(mp3.name)
 450             if ogg:
 451                 media['ogg'] = ogg
 452             audiobooks.append(media)
 453
 454         projects = sorted(projects)
 455         total_duration = '%d:%02d' % (
 456             total_duration // 60,
 457             total_duration % 60
 458         )
 459         return audiobooks, projects, total_duration
 460
 461     def wldocument(self, parse_dublincore=True, inherit=True):
 462         from catalogue.import_utils import ORMDocProvider
 463         from librarian.parser import WLDocument
 464
 465         if inherit and self.parent:
 466             meta_fallbacks = self.parent.cover_info()
 467         else:
 468             meta_fallbacks = None
 469
 470         return WLDocument.from_file(
 471             self.xml_file.path,
 472             provider=ORMDocProvider(self),
 473             parse_dublincore=parse_dublincore,
 474             meta_fallbacks=meta_fallbacks)
 475
 476     def wldocument2(self):
 477         from catalogue.import_utils import ORMDocProvider
 478         from librarian.document import WLDocument
 479         doc = WLDocument(
 480             self.xml_file.path,
 481             provider=ORMDocProvider(self)
 482         )
 483         doc.meta.update(self.cover_info())
 484         return doc
 485
 486
 487     @staticmethod
 488     def zip_format(format_):
 489         def pretty_file_name(book):
 490             return "%s/%s.%s" % (
 491                 book.get_extra_info_json()['author'],
 492                 book.slug,
 493                 format_)
 494
 495         field_name = "%s_file" % format_
 496         field = getattr(Book, field_name)
 497         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 498         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 499         return create_zip(paths, field.ZIP)
 500
 501     def zip_audiobooks(self, format_):
 502         bm = BookMedia.objects.filter(book=self, type=format_)
 503         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 504         licenses = set()
 505         for m in bm:
 506             license = constants.LICENSES.get(
 507                 m.get_extra_info_json().get('license'), {}
 508             ).get('locative')
 509             if license:
 510                 licenses.add(license)
 511         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 512             'licenses': licenses,
 513             'meta': self.wldocument2().meta,
 514         })
 515         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 516
 517     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 518         if not self.findable:
 519             return
 520         if index is None:
 521             from search.index import Index
 522             index = Index()
 523         try:
 524             index.index_book(self, book_info)
 525             if index_tags:
 526                 index.index_tags()
 527             if commit:
 528                 index.index.commit()
 529         except Exception as e:
 530             index.index.rollback()
 531             raise e
 532
 533     # will make problems in conjunction with paid previews
 534     def download_pictures(self, remote_gallery_url):
 535         # This is only needed for legacy relative image paths.
 536         gallery_path = self.gallery_path()
 537         # delete previous files, so we don't include old files in ebooks
 538         if os.path.isdir(gallery_path):
 539             for filename in os.listdir(gallery_path):
 540                 file_path = os.path.join(gallery_path, filename)
 541                 os.unlink(file_path)
 542         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 543         if ilustr_elements:
 544             makedirs(gallery_path)
 545             for ilustr in ilustr_elements:
 546                 ilustr_src = ilustr.get('src')
 547                 if '/' in ilustr_src:
 548                     continue
 549                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 550                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 551
 552     def load_abstract(self):
 553         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 554         if abstract is not None:
 555             self.abstract = transform_abstrakt(abstract)
 556         else:
 557             self.abstract = ''
 558
 559     def load_toc(self):
 560         self.toc = ''
 561         if self.html_file:
 562             parser = html.HTMLParser(encoding='utf-8')
 563             tree = html.parse(self.html_file.path, parser=parser)
 564             toc = tree.find('//div[@id="toc"]/ol')
 565             if toc is None or not len(toc):
 566                 return
 567             html_link = reverse('book_text', args=[self.slug])
 568             for a in toc.findall('.//a'):
 569                 a.attrib['href'] = html_link + a.attrib['href']
 570             self.toc = html.tostring(toc, encoding='unicode')
 571             # div#toc
 572
 573     @classmethod
 574     def from_xml_file(cls, xml_file, **kwargs):
 575         from django.core.files import File
 576         from librarian import dcparser
 577
 578         # use librarian to parse meta-data
 579         book_info = dcparser.parse(xml_file)
 580
 581         if not isinstance(xml_file, File):
 582             xml_file = File(open(xml_file))
 583
 584         try:
 585             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 586         finally:
 587             xml_file.close()
 588
 589     @classmethod
 590     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 591                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 592         from catalogue import tasks
 593
 594         if dont_build is None:
 595             dont_build = set()
 596         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 597
 598         # check for parts before we do anything
 599         children = []
 600         if hasattr(book_info, 'parts'):
 601             for part_url in book_info.parts:
 602                 try:
 603                     children.append(Book.objects.get(slug=part_url.slug))
 604                 except Book.DoesNotExist:
 605                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 606
 607         # Read book metadata
 608         book_slug = book_info.url.slug
 609         if re.search(r'[^a-z0-9-]', book_slug):
 610             raise ValueError('Invalid characters in slug')
 611         book, created = Book.objects.get_or_create(slug=book_slug)
 612
 613         if created:
 614             book_shelves = []
 615             old_cover = None
 616             book.preview = bool(days)
 617             if book.preview:
 618                 book.preview_until = date.today() + timedelta(days)
 619         else:
 620             if not overwrite:
 621                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 622             # Save shelves for this book
 623             book_shelves = list(book.tags.filter(category='set'))
 624             old_cover = book.cover_info()
 625
 626         # Save XML file
 627         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 628         if book.preview:
 629             book.xml_file.set_readable(False)
 630
 631         book.findable = findable
 632         book.language = book_info.language
 633         book.title = book_info.title
 634         if book_info.variant_of:
 635             book.common_slug = book_info.variant_of.slug
 636         else:
 637             book.common_slug = book.slug
 638         book.extra_info = json.dumps(book_info.to_dict())
 639         book.load_abstract()
 640         book.load_toc()
 641         book.save()
 642
 643         meta_tags = Tag.tags_from_info(book_info)
 644
 645         for tag in meta_tags:
 646             if not tag.for_books:
 647                 tag.for_books = True
 648                 tag.save()
 649
 650         book.tags = set(meta_tags + book_shelves)
 651         book.save()  # update sort_key_author
 652
 653         cover_changed = old_cover != book.cover_info()
 654         obsolete_children = set(b for b in book.children.all()
 655                                 if b not in children)
 656         notify_cover_changed = []
 657         for n, child_book in enumerate(children):
 658             new_child = child_book.parent != book
 659             child_book.parent = book
 660             child_book.parent_number = n
 661             child_book.save()
 662             if new_child or cover_changed:
 663                 notify_cover_changed.append(child_book)
 664         # Disown unfaithful children and let them cope on their own.
 665         for child in obsolete_children:
 666             child.parent = None
 667             child.parent_number = 0
 668             child.save()
 669             if old_cover:
 670                 notify_cover_changed.append(child)
 671
 672         cls.repopulate_ancestors()
 673         tasks.update_counters.delay()
 674
 675         if remote_gallery_url:
 676             book.download_pictures(remote_gallery_url)
 677
 678         # No saves beyond this point.
 679
 680         # Build cover.
 681         if 'cover' not in dont_build:
 682             book.cover.build_delay()
 683             book.cover_clean.build_delay()
 684             book.cover_thumb.build_delay()
 685             book.cover_api_thumb.build_delay()
 686             book.simple_cover.build_delay()
 687             book.cover_ebookpoint.build_delay()
 688
 689         # Build HTML and ebooks.
 690         book.html_file.build_delay()
 691         if not children:
 692             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 693                 if format_ not in dont_build:
 694                     getattr(book, '%s_file' % format_).build_delay()
 695         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 696             if format_ not in dont_build:
 697                 getattr(book, '%s_file' % format_).build_delay()
 698
 699         if not settings.NO_SEARCH_INDEX and search_index and findable:
 700             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 701
 702         for child in notify_cover_changed:
 703             child.parent_cover_changed()
 704
 705         book.update_popularity()
 706         tasks.update_references.delay(book.id)
 707
 708         cls.published.send(sender=cls, instance=book)
 709         return book
 710
 711     def get_master(self):
 712         master_tags = [
 713             'opowiadanie',
 714             'powiesc',
 715             'dramat_wierszowany_l',
 716             'dramat_wierszowany_lp',
 717             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 718             'wywiad',
 719         ]
 720         from librarian.parser import WLDocument
 721         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 722         root = wld.edoc.getroot()
 723         for master in root.iter():
 724             if master.tag in master_tags:
 725                 return master
 726
 727     def update_references(self):
 728         from references.models import Entity, Reference
 729         master = self.get_master()
 730         if master is None:
 731             master = []
 732         found = set()
 733         for i, sec in enumerate(master):
 734             for ref in sec.findall('.//ref'):
 735                 href = ref.attrib.get('href', '')
 736                 if not href or href in found:
 737                     continue
 738                 found.add(href)
 739                 entity, created = Entity.objects.get_or_create(
 740                     uri=href
 741                 )
 742                 ref, created = Reference.objects.get_or_create(
 743                     book=self,
 744                     entity=entity
 745                 )
 746                 ref.first_section = 'sec%d' % (i + 1)
 747                 entity.populate()
 748                 entity.save()
 749         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 750
 751     @property
 752     def references(self):
 753         return self.reference_set.all().select_related('entity')
 754
 755     @classmethod
 756     @transaction.atomic
 757     def repopulate_ancestors(cls):
 758         """Fixes the ancestry cache."""
 759         # TODO: table names
 760         cursor = connection.cursor()
 761         if connection.vendor == 'postgres':
 762             cursor.execute("TRUNCATE catalogue_book_ancestor")
 763             cursor.execute("""
 764                 WITH RECURSIVE ancestry AS (
 765                     SELECT book.id, book.parent_id
 766                     FROM catalogue_book AS book
 767                     WHERE book.parent_id IS NOT NULL
 768                     UNION
 769                     SELECT ancestor.id, book.parent_id
 770                     FROM ancestry AS ancestor, catalogue_book AS book
 771                     WHERE ancestor.parent_id = book.id
 772                         AND book.parent_id IS NOT NULL
 773                     )
 774                 INSERT INTO catalogue_book_ancestor
 775                     (from_book_id, to_book_id)
 776                     SELECT id, parent_id
 777                     FROM ancestry
 778                     ORDER BY id;
 779                 """)
 780         else:
 781             cursor.execute("DELETE FROM catalogue_book_ancestor")
 782             for b in cls.objects.exclude(parent=None):
 783                 parent = b.parent
 784                 while parent is not None:
 785                     b.ancestor.add(parent)
 786                     parent = parent.parent
 787
 788     @property
 789     def ancestors(self):
 790         if self.parent:
 791             for anc in self.parent.ancestors:
 792                 yield anc
 793             yield self.parent
 794         else:
 795             return []
 796
 797     def clear_cache(self):
 798         clear_cached_renders(self.mini_box)
 799         clear_cached_renders(self.mini_box_nolink)
 800
 801     def cover_info(self, inherit=True):
 802         """Returns a dictionary to serve as fallback for BookInfo.
 803
 804         For now, the only thing inherited is the cover image.
 805         """
 806         need = False
 807         info = {}
 808         for field in ('cover_url', 'cover_by', 'cover_source'):
 809             val = self.get_extra_info_json().get(field)
 810             if val:
 811                 info[field] = val
 812             else:
 813                 need = True
 814         if inherit and need and self.parent is not None:
 815             parent_info = self.parent.cover_info()
 816             parent_info.update(info)
 817             info = parent_info
 818         return info
 819
 820     def related_themes(self):
 821         return Tag.objects.usage_for_queryset(
 822             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 823             counts=True).filter(category='theme').order_by('-count')
 824
 825     def parent_cover_changed(self):
 826         """Called when parent book's cover image is changed."""
 827         if not self.cover_info(inherit=False):
 828             if 'cover' not in app_settings.DONT_BUILD:
 829                 self.cover.build_delay()
 830                 self.cover_clean.build_delay()
 831                 self.cover_thumb.build_delay()
 832                 self.cover_api_thumb.build_delay()
 833                 self.simple_cover.build_delay()
 834                 self.cover_ebookpoint.build_delay()
 835             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 836                 if format_ not in app_settings.DONT_BUILD:
 837                     getattr(self, '%s_file' % format_).build_delay()
 838             for child in self.children.all():
 839                 child.parent_cover_changed()
 840
 841     def other_versions(self):
 842         """Find other versions (i.e. in other languages) of the book."""
 843         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 844
 845     def parents(self):
 846         books = []
 847         parent = self.parent
 848         while parent is not None:
 849             books.insert(0, parent)
 850             parent = parent.parent
 851         return books
 852
 853     def pretty_title(self, html_links=False):
 854         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 855         books = self.parents() + [self]
 856         names.extend([(b.title, b.get_absolute_url()) for b in books])
 857
 858         if html_links:
 859             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 860         else:
 861             names = [tag[0] for tag in names]
 862         return ', '.join(names)
 863
 864     def publisher(self):
 865         publisher = self.get_extra_info_json()['publisher']
 866         if isinstance(publisher, str):
 867             return publisher
 868         elif isinstance(publisher, list):
 869             return ', '.join(publisher)
 870
 871     @classmethod
 872     def tagged_top_level(cls, tags):
 873         """ Returns top-level books tagged with `tags`.
 874
 875         It only returns those books which don't have ancestors which are
 876         also tagged with those tags.
 877
 878         """
 879         objects = cls.tagged.with_all(tags)
 880         return objects.filter(findable=True).exclude(ancestor__in=objects)
 881
 882     @classmethod
 883     def book_list(cls, book_filter=None):
 884         """Generates a hierarchical listing of all books.
 885
 886         Books are optionally filtered with a test function.
 887
 888         """
 889
 890         books_by_parent = {}
 891         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 892         if book_filter:
 893             books = books.filter(book_filter).distinct()
 894
 895             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 896             for book in books.iterator():
 897                 parent = book.parent_id
 898                 if parent not in book_ids:
 899                     parent = None
 900                 books_by_parent.setdefault(parent, []).append(book)
 901         else:
 902             for book in books.iterator():
 903                 books_by_parent.setdefault(book.parent_id, []).append(book)
 904
 905         orphans = []
 906         books_by_author = OrderedDict()
 907         for tag in Tag.objects.filter(category='author').iterator():
 908             books_by_author[tag] = []
 909
 910         for book in books_by_parent.get(None, ()):
 911             authors = list(book.authors().only('pk'))
 912             if authors:
 913                 for author in authors:
 914                     books_by_author[author].append(book)
 915             else:
 916                 orphans.append(book)
 917
 918         return books_by_author, orphans, books_by_parent
 919
 920     _audiences_pl = {
 921         "SP": (1, "szkoła podstawowa"),
 922         "SP1": (1, "szkoła podstawowa"),
 923         "SP2": (1, "szkoła podstawowa"),
 924         "SP3": (1, "szkoła podstawowa"),
 925         "P": (1, "szkoła podstawowa"),
 926         "G": (2, "gimnazjum"),
 927         "L": (3, "liceum"),
 928         "LP": (3, "liceum"),
 929     }
 930
 931     def audiences_pl(self):
 932         audiences = self.get_extra_info_json().get('audiences', [])
 933         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 934         return [a[1] for a in audiences]
 935
 936     def stage_note(self):
 937         stage = self.get_extra_info_json().get('stage')
 938         if stage and stage < '0.4':
 939             return (_('This work needs modernisation'),
 940                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 941         else:
 942             return None, None
 943
 944     def choose_fragments(self, number):
 945         fragments = self.fragments.order_by()
 946         fragments_count = fragments.count()
 947         if not fragments_count and self.children.exists():
 948             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 949             fragments_count = fragments.count()
 950         if fragments_count:
 951             if fragments_count > number:
 952                 offset = randint(0, fragments_count - number)
 953             else:
 954                 offset = 0
 955             return fragments[offset : offset + number]
 956         elif self.parent:
 957             return self.parent.choose_fragments(number)
 958         else:
 959             return []
 960
 961     def choose_fragment(self):
 962         fragments = self.choose_fragments(1)
 963         if fragments:
 964             return fragments[0]
 965         else:
 966             return None
 967
 968     def fragment_data(self):
 969         fragment = self.choose_fragment()
 970         if fragment:
 971             return {
 972                 'title': fragment.book.pretty_title(),
 973                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 974             }
 975         else:
 976             return None
 977
 978     def update_popularity(self):
 979         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 980         try:
 981             pop = self.popularity
 982             pop.count = count
 983             pop.save()
 984         except BookPopularity.DoesNotExist:
 985             BookPopularity.objects.create(book=self, count=count)
 986
 987     def ridero_link(self):
 988         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 989
 990     def like(self, user):
 991         from social.utils import likes, get_set, set_sets
 992         if not likes(user, self):
 993             tag = get_set(user, '')
 994             set_sets(user, self, [tag])
 995
 996     def unlike(self, user):
 997         from social.utils import likes, set_sets
 998         if likes(user, self):
 999             set_sets(user, self, [])
1000
1001     def full_sort_key(self):
1002         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1003
1004     def cover_color(self):
1005         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1006
1007     @cached_render('catalogue/book_mini_box.html')
1008     def mini_box(self):
1009         return {
1010             'book': self
1011         }
1012
1013     @cached_render('catalogue/book_mini_box.html')
1014     def mini_box_nolink(self):
1015         return {
1016             'book': self,
1017             'no_link': True,
1018         }
1019
1020
1021 class BookPopularity(models.Model):
1022     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1023     count = models.IntegerField(default=0, db_index=True)