src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import gettext_lazy as _, get_language
  19 from fnpdjango.storage import BofhFileSystemStorage
  20 from lxml import html
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue import fields
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  31
  32 bofh_storage = BofhFileSystemStorage()
  33
  34
  35 class Book(models.Model):
  36     """Represents a book imported from WL-XML."""
  37     title = models.CharField(_('title'), max_length=32767)
  38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  39     sort_key_author = models.CharField(
  40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  44     description = models.TextField(_('description'), blank=True)
  45     abstract = models.TextField(_('abstract'), blank=True)
  46     toc = models.TextField(_('toc'), blank=True)
  47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  49     parent_number = models.IntegerField(_('parent number'), default=0)
  50     extra_info = models.TextField(_('extra information'), default='{}')
  51     gazeta_link = models.CharField(blank=True, max_length=240)
  52     wiki_link = models.CharField(blank=True, max_length=240)
  53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  54     recommended = models.BooleanField(_('recommended'), default=False)
  55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  56     preview = models.BooleanField(_('preview'), default=False)
  57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  58     preview_key = models.CharField(max_length=32, blank=True, null=True)
  59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  60
  61     # files generated during publication
  62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  63     html_file = fields.HtmlField(storage=bofh_storage)
  64     fb2_file = fields.Fb2Field(storage=bofh_storage)
  65     txt_file = fields.TxtField(storage=bofh_storage)
  66     epub_file = fields.EpubField(storage=bofh_storage)
  67     mobi_file = fields.MobiField(storage=bofh_storage)
  68     pdf_file = fields.PdfField(storage=bofh_storage)
  69
  70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
  71     # Cleaner version of cover for thumbs
  72     cover_clean = fields.CoverCleanField(_('clean cover'))
  73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
  74     cover_api_thumb = fields.CoverApiThumbField(
  75         _('cover thumbnail for mobile app'))
  76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
  77     cover_ebookpoint = fields.CoverEbookpointField(
  78         _('cover for Ebookpoint'))
  79
  80     ebook_formats = constants.EBOOK_FORMATS
  81     formats = ebook_formats + ['html', 'xml']
  82
  83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  85
  86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  87     has_audience = models.BooleanField(default=False)
  88
  89     objects = models.Manager()
  90     tagged = managers.ModelTaggedItemManager(Tag)
  91     tags = managers.TagDescriptor(Tag)
  92     tag_relations = GenericRelation(Tag.intermediary_table_model)
  93
  94     html_built = django.dispatch.Signal()
  95     published = django.dispatch.Signal()
  96
  97     SORT_KEY_SEP = '$'
  98
  99     is_book = True
 100
 101     class AlreadyExists(Exception):
 102         pass
 103
 104     class Meta:
 105         ordering = ('sort_key_author', 'sort_key')
 106         verbose_name = _('book')
 107         verbose_name_plural = _('books')
 108         app_label = 'catalogue'
 109
 110     def __str__(self):
 111         return self.title
 112
 113     def get_extra_info_json(self):
 114         return json.loads(self.extra_info or '{}')
 115
 116     def get_initial(self):
 117         try:
 118             return re.search(r'\w', self.title, re.U).group(0)
 119         except AttributeError:
 120             return ''
 121
 122     def authors(self):
 123         return self.tags.filter(category='author')
 124
 125     def epochs(self):
 126         return self.tags.filter(category='epoch')
 127
 128     def genres(self):
 129         return self.tags.filter(category='genre')
 130
 131     def kinds(self):
 132         return self.tags.filter(category='kind')
 133
 134     def tag_unicode(self, category):
 135         relations = prefetched_relations(self, category)
 136         if relations:
 137             return ', '.join(rel.tag.name for rel in relations)
 138         else:
 139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 140
 141     def tags_by_category(self):
 142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 143
 144     def author_unicode(self):
 145         return self.cached_author
 146
 147     def kind_unicode(self):
 148         return self.tag_unicode('kind')
 149
 150     def epoch_unicode(self):
 151         return self.tag_unicode('epoch')
 152
 153     def genre_unicode(self):
 154         return self.tag_unicode('genre')
 155
 156     def translators(self):
 157         translators = self.get_extra_info_json().get('translators') or []
 158         return [
 159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 160         ]
 161
 162     def translator(self):
 163         translators = self.get_extra_info_json().get('translators')
 164         if not translators:
 165             return None
 166         if len(translators) > 3:
 167             translators = translators[:2]
 168             others = ' i inni'
 169         else:
 170             others = ''
 171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 172
 173     def cover_source(self):
 174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 175
 176     @property
 177     def isbn_pdf(self):
 178         return self.get_extra_info_json().get('isbn_pdf')
 179
 180     @property
 181     def isbn_epub(self):
 182         return self.get_extra_info_json().get('isbn_epub')
 183
 184     @property
 185     def isbn_mobi(self):
 186         return self.get_extra_info_json().get('isbn_mobi')
 187
 188     def is_accessible_to(self, user):
 189         if not self.preview:
 190             return True
 191         if not user.is_authenticated:
 192             return False
 193         Membership = apps.get_model('club', 'Membership')
 194         if Membership.is_active_for(user):
 195             return True
 196         Funding = apps.get_model('funding', 'Funding')
 197         if Funding.objects.filter(user=user, offer__book=self):
 198             return True
 199         return False
 200
 201     def save(self, force_insert=False, force_update=False, **kwargs):
 202         from sortify import sortify
 203
 204         self.sort_key = sortify(self.title)[:120]
 205         self.title = str(self.title)  # ???
 206
 207         try:
 208             author = self.authors().first().sort_key
 209         except AttributeError:
 210             author = ''
 211         self.sort_key_author = author
 212
 213         self.cached_author = self.tag_unicode('author')
 214         self.has_audience = 'audience' in self.get_extra_info_json()
 215
 216         if self.preview and not self.preview_key:
 217             self.preview_key = get_random_hash(self.slug)[:32]
 218
 219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 220
 221         return ret
 222
 223     def get_absolute_url(self):
 224         return reverse('book_detail', args=[self.slug])
 225
 226     def gallery_path(self):
 227         return gallery_path(self.slug)
 228
 229     def gallery_url(self):
 230         return gallery_url(self.slug)
 231
 232     def get_first_text(self):
 233         if self.html_file:
 234             return self
 235         child = self.children.all().order_by('parent_number').first()
 236         if child is not None:
 237             return child.get_first_text()
 238
 239     def get_last_text(self):
 240         if self.html_file:
 241             return self
 242         child = self.children.all().order_by('parent_number').last()
 243         if child is not None:
 244             return child.get_last_text()
 245
 246     def get_prev_text(self):
 247         if not self.parent:
 248             return None
 249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 250         if sibling is not None:
 251             return sibling.get_last_text()
 252
 253         if self.parent.html_file:
 254             return self.parent
 255
 256         return self.parent.get_prev_text()
 257
 258     def get_next_text(self, inside=True):
 259         if inside:
 260             child = self.children.order_by('parent_number').first()
 261             if child is not None:
 262                 return child.get_first_text()
 263
 264         if not self.parent:
 265             return None
 266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 267         if sibling is not None:
 268             return sibling.get_first_text()
 269         return self.parent.get_next_text(inside=False)
 270
 271     def get_child_audiobook(self):
 272         BookMedia = apps.get_model('catalogue', 'BookMedia')
 273         if not BookMedia.objects.filter(book__ancestor=self).exists():
 274             return None
 275         for child in self.children.order_by('parent_number').all():
 276             if child.has_mp3_file():
 277                 return child
 278             child_sub = child.get_child_audiobook()
 279             if child_sub is not None:
 280                 return child_sub
 281
 282     def get_siblings(self):
 283         if not self.parent:
 284             return []
 285         return self.parent.children.all().order_by('parent_number')
 286
 287     def get_children(self):
 288         return self.children.all().order_by('parent_number')
 289
 290     @property
 291     def name(self):
 292         return self.title
 293
 294     def language_code(self):
 295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 296
 297     def language_name(self):
 298         return dict(settings.LANGUAGES).get(self.language_code(), "")
 299
 300     def is_foreign(self):
 301         return self.language_code() != settings.LANGUAGE_CODE
 302
 303     def set_audio_length(self):
 304         length = self.get_audio_length()
 305         if length > 0:
 306             self.audio_length = self.format_audio_length(length)
 307             self.save()
 308
 309     @staticmethod
 310     def format_audio_length(seconds):
 311         """
 312         >>> Book.format_audio_length(1)
 313         '0:01'
 314         >>> Book.format_audio_length(3661)
 315         '1:01:01'
 316         """
 317         if seconds < 60*60:
 318             minutes = seconds // 60
 319             seconds = seconds % 60
 320             return '%d:%02d' % (minutes, seconds)
 321         else:
 322             hours = seconds // 3600
 323             minutes = seconds % 3600 // 60
 324             seconds = seconds % 60
 325             return '%d:%02d:%02d' % (hours, minutes, seconds)
 326
 327     def get_audio_length(self):
 328         total = 0
 329         for media in self.get_mp3() or ():
 330             total += app_settings.GET_MP3_LENGTH(media.file.path)
 331         return int(total)
 332
 333     def has_media(self, type_):
 334         if type_ in Book.formats:
 335             return bool(getattr(self, "%s_file" % type_))
 336         else:
 337             return self.media.filter(type=type_).exists()
 338
 339     def has_audio(self):
 340         return self.has_media('mp3')
 341
 342     def get_media(self, type_):
 343         if self.has_media(type_):
 344             if type_ in Book.formats:
 345                 return getattr(self, "%s_file" % type_)
 346             else:
 347                 return self.media.filter(type=type_)
 348         else:
 349             return None
 350
 351     def get_mp3(self):
 352         return self.get_media("mp3")
 353
 354     def get_odt(self):
 355         return self.get_media("odt")
 356
 357     def get_ogg(self):
 358         return self.get_media("ogg")
 359
 360     def get_daisy(self):
 361         return self.get_media("daisy")
 362
 363     def get_audio_epub(self):
 364         return self.get_media("audio.epub")
 365
 366     def media_url(self, format_):
 367         media = self.get_media(format_)
 368         if media:
 369             if self.preview:
 370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 371             else:
 372                 return media.url
 373         else:
 374             return None
 375
 376     def html_url(self):
 377         return self.media_url('html')
 378
 379     def pdf_url(self):
 380         return self.media_url('pdf')
 381
 382     def epub_url(self):
 383         return self.media_url('epub')
 384
 385     def mobi_url(self):
 386         return self.media_url('mobi')
 387
 388     def txt_url(self):
 389         return self.media_url('txt')
 390
 391     def fb2_url(self):
 392         return self.media_url('fb2')
 393
 394     def xml_url(self):
 395         return self.media_url('xml')
 396
 397     def has_description(self):
 398         return len(self.description) > 0
 399     has_description.short_description = _('description')
 400     has_description.boolean = True
 401
 402     def has_mp3_file(self):
 403         return self.has_media("mp3")
 404     has_mp3_file.short_description = 'MP3'
 405     has_mp3_file.boolean = True
 406
 407     def has_ogg_file(self):
 408         return self.has_media("ogg")
 409     has_ogg_file.short_description = 'OGG'
 410     has_ogg_file.boolean = True
 411
 412     def has_daisy_file(self):
 413         return self.has_media("daisy")
 414     has_daisy_file.short_description = 'DAISY'
 415     has_daisy_file.boolean = True
 416
 417     def has_audio_epub_file(self):
 418         return self.has_media("audio.epub")
 419
 420     @property
 421     def media_daisy(self):
 422         return self.get_media('daisy')
 423
 424     @property
 425     def media_audio_epub(self):
 426         return self.get_media('audio.epub')
 427
 428     def get_audiobooks(self):
 429         ogg_files = {}
 430         for m in self.media.filter(type='ogg').order_by().iterator():
 431             ogg_files[m.name] = m
 432
 433         audiobooks = []
 434         projects = set()
 435         total_duration = 0
 436         for mp3 in self.media.filter(type='mp3').iterator():
 437             # ogg files are always from the same project
 438             meta = mp3.get_extra_info_json()
 439             project = meta.get('project')
 440             if not project:
 441                 # temporary fallback
 442                 project = 'CzytamySłuchając'
 443
 444             projects.add((project, meta.get('funded_by', '')))
 445             total_duration += mp3.duration or 0
 446
 447             media = {'mp3': mp3}
 448
 449             ogg = ogg_files.get(mp3.name)
 450             if ogg:
 451                 media['ogg'] = ogg
 452             audiobooks.append(media)
 453
 454         projects = sorted(projects)
 455         total_duration = '%d:%02d' % (
 456             total_duration // 60,
 457             total_duration % 60
 458         )
 459         return audiobooks, projects, total_duration
 460
 461     def wldocument(self, parse_dublincore=True, inherit=True):
 462         from catalogue.import_utils import ORMDocProvider
 463         from librarian.parser import WLDocument
 464
 465         if inherit and self.parent:
 466             meta_fallbacks = self.parent.cover_info()
 467         else:
 468             meta_fallbacks = None
 469
 470         return WLDocument.from_file(
 471             self.xml_file.path,
 472             provider=ORMDocProvider(self),
 473             parse_dublincore=parse_dublincore,
 474             meta_fallbacks=meta_fallbacks)
 475
 476     def wldocument2(self):
 477         from catalogue.import_utils import ORMDocProvider
 478         from librarian.document import WLDocument
 479         doc = WLDocument(
 480             self.xml_file.path,
 481             provider=ORMDocProvider(self)
 482         )
 483         doc.meta.update(self.cover_info())
 484         return doc
 485
 486
 487     @staticmethod
 488     def zip_format(format_):
 489         def pretty_file_name(book):
 490             return "%s/%s.%s" % (
 491                 book.get_extra_info_json()['author'],
 492                 book.slug,
 493                 format_)
 494
 495         field_name = "%s_file" % format_
 496         field = getattr(Book, field_name)
 497         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 498         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 499         return create_zip(paths, field.ZIP)
 500
 501     def zip_audiobooks(self, format_):
 502         bm = BookMedia.objects.filter(book=self, type=format_)
 503         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 504         licenses = set()
 505         for m in bm:
 506             license = constants.LICENSES.get(
 507                 m.get_extra_info_json().get('license'), {}
 508             ).get('locative')
 509             if license:
 510                 licenses.add(license)
 511         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 512             'licenses': licenses,
 513         })
 514         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 515
 516     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 517         if not self.findable:
 518             return
 519         if index is None:
 520             from search.index import Index
 521             index = Index()
 522         try:
 523             index.index_book(self, book_info)
 524             if index_tags:
 525                 index.index_tags()
 526             if commit:
 527                 index.index.commit()
 528         except Exception as e:
 529             index.index.rollback()
 530             raise e
 531
 532     # will make problems in conjunction with paid previews
 533     def download_pictures(self, remote_gallery_url):
 534         # This is only needed for legacy relative image paths.
 535         gallery_path = self.gallery_path()
 536         # delete previous files, so we don't include old files in ebooks
 537         if os.path.isdir(gallery_path):
 538             for filename in os.listdir(gallery_path):
 539                 file_path = os.path.join(gallery_path, filename)
 540                 os.unlink(file_path)
 541         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 542         if ilustr_elements:
 543             makedirs(gallery_path)
 544             for ilustr in ilustr_elements:
 545                 ilustr_src = ilustr.get('src')
 546                 if '/' in ilustr_src:
 547                     continue
 548                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 549                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 550
 551     def load_abstract(self):
 552         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 553         if abstract is not None:
 554             self.abstract = transform_abstrakt(abstract)
 555         else:
 556             self.abstract = ''
 557
 558     def load_toc(self):
 559         self.toc = ''
 560         if self.html_file:
 561             parser = html.HTMLParser(encoding='utf-8')
 562             tree = html.parse(self.html_file.path, parser=parser)
 563             toc = tree.find('//div[@id="toc"]/ol')
 564             if toc is None or not len(toc):
 565                 return
 566             html_link = reverse('book_text', args=[self.slug])
 567             for a in toc.findall('.//a'):
 568                 a.attrib['href'] = html_link + a.attrib['href']
 569             self.toc = html.tostring(toc, encoding='unicode')
 570             # div#toc
 571
 572     @classmethod
 573     def from_xml_file(cls, xml_file, **kwargs):
 574         from django.core.files import File
 575         from librarian import dcparser
 576
 577         # use librarian to parse meta-data
 578         book_info = dcparser.parse(xml_file)
 579
 580         if not isinstance(xml_file, File):
 581             xml_file = File(open(xml_file))
 582
 583         try:
 584             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 585         finally:
 586             xml_file.close()
 587
 588     @classmethod
 589     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 590                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 591         from catalogue import tasks
 592
 593         if dont_build is None:
 594             dont_build = set()
 595         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 596
 597         # check for parts before we do anything
 598         children = []
 599         if hasattr(book_info, 'parts'):
 600             for part_url in book_info.parts:
 601                 try:
 602                     children.append(Book.objects.get(slug=part_url.slug))
 603                 except Book.DoesNotExist:
 604                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 605
 606         # Read book metadata
 607         book_slug = book_info.url.slug
 608         if re.search(r'[^a-z0-9-]', book_slug):
 609             raise ValueError('Invalid characters in slug')
 610         book, created = Book.objects.get_or_create(slug=book_slug)
 611
 612         if created:
 613             book_shelves = []
 614             old_cover = None
 615             book.preview = bool(days)
 616             if book.preview:
 617                 book.preview_until = date.today() + timedelta(days)
 618         else:
 619             if not overwrite:
 620                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 621             # Save shelves for this book
 622             book_shelves = list(book.tags.filter(category='set'))
 623             old_cover = book.cover_info()
 624
 625         # Save XML file
 626         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 627         if book.preview:
 628             book.xml_file.set_readable(False)
 629
 630         book.findable = findable
 631         book.language = book_info.language
 632         book.title = book_info.title
 633         if book_info.variant_of:
 634             book.common_slug = book_info.variant_of.slug
 635         else:
 636             book.common_slug = book.slug
 637         book.extra_info = json.dumps(book_info.to_dict())
 638         book.load_abstract()
 639         book.load_toc()
 640         book.save()
 641
 642         meta_tags = Tag.tags_from_info(book_info)
 643
 644         for tag in meta_tags:
 645             if not tag.for_books:
 646                 tag.for_books = True
 647                 tag.save()
 648
 649         book.tags = set(meta_tags + book_shelves)
 650         book.save()  # update sort_key_author
 651
 652         cover_changed = old_cover != book.cover_info()
 653         obsolete_children = set(b for b in book.children.all()
 654                                 if b not in children)
 655         notify_cover_changed = []
 656         for n, child_book in enumerate(children):
 657             new_child = child_book.parent != book
 658             child_book.parent = book
 659             child_book.parent_number = n
 660             child_book.save()
 661             if new_child or cover_changed:
 662                 notify_cover_changed.append(child_book)
 663         # Disown unfaithful children and let them cope on their own.
 664         for child in obsolete_children:
 665             child.parent = None
 666             child.parent_number = 0
 667             child.save()
 668             if old_cover:
 669                 notify_cover_changed.append(child)
 670
 671         cls.repopulate_ancestors()
 672         tasks.update_counters.delay()
 673
 674         if remote_gallery_url:
 675             book.download_pictures(remote_gallery_url)
 676
 677         # No saves beyond this point.
 678
 679         # Build cover.
 680         if 'cover' not in dont_build:
 681             book.cover.build_delay()
 682             book.cover_clean.build_delay()
 683             book.cover_thumb.build_delay()
 684             book.cover_api_thumb.build_delay()
 685             book.simple_cover.build_delay()
 686             book.cover_ebookpoint.build_delay()
 687
 688         # Build HTML and ebooks.
 689         book.html_file.build_delay()
 690         if not children:
 691             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 692                 if format_ not in dont_build:
 693                     getattr(book, '%s_file' % format_).build_delay()
 694         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 695             if format_ not in dont_build:
 696                 getattr(book, '%s_file' % format_).build_delay()
 697
 698         if not settings.NO_SEARCH_INDEX and search_index and findable:
 699             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 700
 701         for child in notify_cover_changed:
 702             child.parent_cover_changed()
 703
 704         book.update_popularity()
 705         tasks.update_references.delay(book.id)
 706
 707         cls.published.send(sender=cls, instance=book)
 708         return book
 709
 710     def get_master(self):
 711         master_tags = [
 712             'opowiadanie',
 713             'powiesc',
 714             'dramat_wierszowany_l',
 715             'dramat_wierszowany_lp',
 716             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 717             'wywiad',
 718         ]
 719         from librarian.parser import WLDocument
 720         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 721         root = wld.edoc.getroot()
 722         for master in root.iter():
 723             if master.tag in master_tags:
 724                 return master
 725
 726     def update_references(self):
 727         from references.models import Entity, Reference
 728         master = self.get_master()
 729         if master is None:
 730             master = []
 731         found = set()
 732         for i, sec in enumerate(master):
 733             for ref in sec.findall('.//ref'):
 734                 href = ref.attrib.get('href', '')
 735                 if not href or href in found:
 736                     continue
 737                 found.add(href)
 738                 entity, created = Entity.objects.get_or_create(
 739                     uri=href
 740                 )
 741                 ref, created = Reference.objects.get_or_create(
 742                     book=self,
 743                     entity=entity
 744                 )
 745                 ref.first_section = 'sec%d' % (i + 1)
 746                 entity.populate()
 747                 entity.save()
 748         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 749
 750     @property
 751     def references(self):
 752         return self.reference_set.all().select_related('entity')
 753
 754     @classmethod
 755     @transaction.atomic
 756     def repopulate_ancestors(cls):
 757         """Fixes the ancestry cache."""
 758         # TODO: table names
 759         cursor = connection.cursor()
 760         if connection.vendor == 'postgres':
 761             cursor.execute("TRUNCATE catalogue_book_ancestor")
 762             cursor.execute("""
 763                 WITH RECURSIVE ancestry AS (
 764                     SELECT book.id, book.parent_id
 765                     FROM catalogue_book AS book
 766                     WHERE book.parent_id IS NOT NULL
 767                     UNION
 768                     SELECT ancestor.id, book.parent_id
 769                     FROM ancestry AS ancestor, catalogue_book AS book
 770                     WHERE ancestor.parent_id = book.id
 771                         AND book.parent_id IS NOT NULL
 772                     )
 773                 INSERT INTO catalogue_book_ancestor
 774                     (from_book_id, to_book_id)
 775                     SELECT id, parent_id
 776                     FROM ancestry
 777                     ORDER BY id;
 778                 """)
 779         else:
 780             cursor.execute("DELETE FROM catalogue_book_ancestor")
 781             for b in cls.objects.exclude(parent=None):
 782                 parent = b.parent
 783                 while parent is not None:
 784                     b.ancestor.add(parent)
 785                     parent = parent.parent
 786
 787     @property
 788     def ancestors(self):
 789         if self.parent:
 790             for anc in self.parent.ancestors:
 791                 yield anc
 792             yield self.parent
 793         else:
 794             return []
 795
 796     def clear_cache(self):
 797         clear_cached_renders(self.mini_box)
 798         clear_cached_renders(self.mini_box_nolink)
 799
 800     def cover_info(self, inherit=True):
 801         """Returns a dictionary to serve as fallback for BookInfo.
 802
 803         For now, the only thing inherited is the cover image.
 804         """
 805         need = False
 806         info = {}
 807         for field in ('cover_url', 'cover_by', 'cover_source'):
 808             val = self.get_extra_info_json().get(field)
 809             if val:
 810                 info[field] = val
 811             else:
 812                 need = True
 813         if inherit and need and self.parent is not None:
 814             parent_info = self.parent.cover_info()
 815             parent_info.update(info)
 816             info = parent_info
 817         return info
 818
 819     def related_themes(self):
 820         return Tag.objects.usage_for_queryset(
 821             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 822             counts=True).filter(category='theme').order_by('-count')
 823
 824     def parent_cover_changed(self):
 825         """Called when parent book's cover image is changed."""
 826         if not self.cover_info(inherit=False):
 827             if 'cover' not in app_settings.DONT_BUILD:
 828                 self.cover.build_delay()
 829                 self.cover_clean.build_delay()
 830                 self.cover_thumb.build_delay()
 831                 self.cover_api_thumb.build_delay()
 832                 self.simple_cover.build_delay()
 833                 self.cover_ebookpoint.build_delay()
 834             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 835                 if format_ not in app_settings.DONT_BUILD:
 836                     getattr(self, '%s_file' % format_).build_delay()
 837             for child in self.children.all():
 838                 child.parent_cover_changed()
 839
 840     def other_versions(self):
 841         """Find other versions (i.e. in other languages) of the book."""
 842         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 843
 844     def parents(self):
 845         books = []
 846         parent = self.parent
 847         while parent is not None:
 848             books.insert(0, parent)
 849             parent = parent.parent
 850         return books
 851
 852     def pretty_title(self, html_links=False):
 853         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 854         books = self.parents() + [self]
 855         names.extend([(b.title, b.get_absolute_url()) for b in books])
 856
 857         if html_links:
 858             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 859         else:
 860             names = [tag[0] for tag in names]
 861         return ', '.join(names)
 862
 863     def publisher(self):
 864         publisher = self.get_extra_info_json()['publisher']
 865         if isinstance(publisher, str):
 866             return publisher
 867         elif isinstance(publisher, list):
 868             return ', '.join(publisher)
 869
 870     @classmethod
 871     def tagged_top_level(cls, tags):
 872         """ Returns top-level books tagged with `tags`.
 873
 874         It only returns those books which don't have ancestors which are
 875         also tagged with those tags.
 876
 877         """
 878         objects = cls.tagged.with_all(tags)
 879         return objects.filter(findable=True).exclude(ancestor__in=objects)
 880
 881     @classmethod
 882     def book_list(cls, book_filter=None):
 883         """Generates a hierarchical listing of all books.
 884
 885         Books are optionally filtered with a test function.
 886
 887         """
 888
 889         books_by_parent = {}
 890         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 891         if book_filter:
 892             books = books.filter(book_filter).distinct()
 893
 894             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 895             for book in books.iterator():
 896                 parent = book.parent_id
 897                 if parent not in book_ids:
 898                     parent = None
 899                 books_by_parent.setdefault(parent, []).append(book)
 900         else:
 901             for book in books.iterator():
 902                 books_by_parent.setdefault(book.parent_id, []).append(book)
 903
 904         orphans = []
 905         books_by_author = OrderedDict()
 906         for tag in Tag.objects.filter(category='author').iterator():
 907             books_by_author[tag] = []
 908
 909         for book in books_by_parent.get(None, ()):
 910             authors = list(book.authors().only('pk'))
 911             if authors:
 912                 for author in authors:
 913                     books_by_author[author].append(book)
 914             else:
 915                 orphans.append(book)
 916
 917         return books_by_author, orphans, books_by_parent
 918
 919     _audiences_pl = {
 920         "SP": (1, "szkoła podstawowa"),
 921         "SP1": (1, "szkoła podstawowa"),
 922         "SP2": (1, "szkoła podstawowa"),
 923         "SP3": (1, "szkoła podstawowa"),
 924         "P": (1, "szkoła podstawowa"),
 925         "G": (2, "gimnazjum"),
 926         "L": (3, "liceum"),
 927         "LP": (3, "liceum"),
 928     }
 929
 930     def audiences_pl(self):
 931         audiences = self.get_extra_info_json().get('audiences', [])
 932         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 933         return [a[1] for a in audiences]
 934
 935     def stage_note(self):
 936         stage = self.get_extra_info_json().get('stage')
 937         if stage and stage < '0.4':
 938             return (_('This work needs modernisation'),
 939                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 940         else:
 941             return None, None
 942
 943     def choose_fragments(self, number):
 944         fragments = self.fragments.order_by()
 945         fragments_count = fragments.count()
 946         if not fragments_count and self.children.exists():
 947             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 948             fragments_count = fragments.count()
 949         if fragments_count:
 950             if fragments_count > number:
 951                 offset = randint(0, fragments_count - number)
 952             else:
 953                 offset = 0
 954             return fragments[offset : offset + number]
 955         elif self.parent:
 956             return self.parent.choose_fragments(number)
 957         else:
 958             return []
 959
 960     def choose_fragment(self):
 961         fragments = self.choose_fragments(1)
 962         if fragments:
 963             return fragments[0]
 964         else:
 965             return None
 966
 967     def fragment_data(self):
 968         fragment = self.choose_fragment()
 969         if fragment:
 970             return {
 971                 'title': fragment.book.pretty_title(),
 972                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 973             }
 974         else:
 975             return None
 976
 977     def update_popularity(self):
 978         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 979         try:
 980             pop = self.popularity
 981             pop.count = count
 982             pop.save()
 983         except BookPopularity.DoesNotExist:
 984             BookPopularity.objects.create(book=self, count=count)
 985
 986     def ridero_link(self):
 987         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 988
 989     def like(self, user):
 990         from social.utils import likes, get_set, set_sets
 991         if not likes(user, self):
 992             tag = get_set(user, '')
 993             set_sets(user, self, [tag])
 994
 995     def unlike(self, user):
 996         from social.utils import likes, set_sets
 997         if likes(user, self):
 998             set_sets(user, self, [])
 999
1000     def full_sort_key(self):
1001         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1002
1003     def cover_color(self):
1004         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1005
1006     @cached_render('catalogue/book_mini_box.html')
1007     def mini_box(self):
1008         return {
1009             'book': self
1010         }
1011
1012     @cached_render('catalogue/book_mini_box.html')
1013     def mini_box_nolink(self):
1014         return {
1015             'book': self,
1016             'no_link': True,
1017         }
1018
1019
1020 class BookPopularity(models.Model):
1021     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1022     count = models.IntegerField(default=0, db_index=True)