src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import gettext_lazy as _, get_language
  19 from fnpdjango.storage import BofhFileSystemStorage
  20 from lxml import html
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from librarian.builders import builders
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue import fields
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 class Book(models.Model):
  37     """Represents a book imported from WL-XML."""
  38     title = models.CharField('tytuł', max_length=32767)
  39     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  40     sort_key_author = models.CharField(
  41         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  42     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  43     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  44     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  45     description = models.TextField('opis', blank=True)
  46     abstract = models.TextField('abstrakt', blank=True)
  47     toc = models.TextField('spis treści', blank=True)
  48     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  49     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  50     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  51     extra_info = models.TextField('dodatkowe informacje', default='{}')
  52     gazeta_link = models.CharField(blank=True, max_length=240)
  53     wiki_link = models.CharField(blank=True, max_length=240)
  54     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  55     recommended = models.BooleanField('polecane', default=False)
  56     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  57     preview = models.BooleanField('prapremiera', default=False)
  58     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  59     preview_key = models.CharField(max_length=32, blank=True, null=True)
  60     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  61
  62     # files generated during publication
  63     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  64     html_file = fields.HtmlField(storage=bofh_storage)
  65     fb2_file = fields.Fb2Field(storage=bofh_storage)
  66     txt_file = fields.TxtField(storage=bofh_storage)
  67     epub_file = fields.EpubField(storage=bofh_storage)
  68     mobi_file = fields.MobiField(storage=bofh_storage)
  69     pdf_file = fields.PdfField(storage=bofh_storage)
  70
  71     cover = fields.CoverField('okładka', storage=bofh_storage)
  72     # Cleaner version of cover for thumbs
  73     cover_clean = fields.CoverCleanField('czysta okładka')
  74     cover_thumb = fields.CoverThumbField('miniatura okładki')
  75     cover_api_thumb = fields.CoverApiThumbField(
  76         'mniaturka okładki dla aplikacji')
  77     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  78     cover_ebookpoint = fields.CoverEbookpointField(
  79         'okładka dla Ebookpoint')
  80
  81     ebook_formats = constants.EBOOK_FORMATS
  82     formats = ebook_formats + ['html', 'xml']
  83
  84     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  85     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  86
  87     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  88     has_audience = models.BooleanField(default=False)
  89
  90     objects = models.Manager()
  91     tagged = managers.ModelTaggedItemManager(Tag)
  92     tags = managers.TagDescriptor(Tag)
  93     tag_relations = GenericRelation(Tag.intermediary_table_model)
  94     translators = models.ManyToManyField(Tag, blank=True)
  95
  96     html_built = django.dispatch.Signal()
  97     published = django.dispatch.Signal()
  98
  99     SORT_KEY_SEP = '$'
 100
 101     is_book = True
 102
 103     class AlreadyExists(Exception):
 104         pass
 105
 106     class Meta:
 107         ordering = ('sort_key_author', 'sort_key')
 108         verbose_name = 'książka'
 109         verbose_name_plural = 'książki'
 110         app_label = 'catalogue'
 111
 112     def __str__(self):
 113         return self.title
 114
 115     def get_extra_info_json(self):
 116         return json.loads(self.extra_info or '{}')
 117
 118     def get_initial(self):
 119         try:
 120             return re.search(r'\w', self.title, re.U).group(0)
 121         except AttributeError:
 122             return ''
 123
 124     def authors(self):
 125         return self.tags.filter(category='author')
 126
 127     def epochs(self):
 128         return self.tags.filter(category='epoch')
 129
 130     def genres(self):
 131         return self.tags.filter(category='genre')
 132
 133     def kinds(self):
 134         return self.tags.filter(category='kind')
 135
 136     def tag_unicode(self, category):
 137         relations = prefetched_relations(self, category)
 138         if relations:
 139             return ', '.join(rel.tag.name for rel in relations)
 140         else:
 141             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 142
 143     def tags_by_category(self):
 144         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 145
 146     def author_unicode(self):
 147         return self.cached_author
 148
 149     def kind_unicode(self):
 150         return self.tag_unicode('kind')
 151
 152     def epoch_unicode(self):
 153         return self.tag_unicode('epoch')
 154
 155     def genre_unicode(self):
 156         return self.tag_unicode('genre')
 157
 158     def translator(self):
 159         translators = self.get_extra_info_json().get('translators')
 160         if not translators:
 161             return None
 162         if len(translators) > 3:
 163             translators = translators[:2]
 164             others = ' i inni'
 165         else:
 166             others = ''
 167         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 168
 169     def cover_source(self):
 170         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 171
 172     @property
 173     def isbn_pdf(self):
 174         return self.get_extra_info_json().get('isbn_pdf')
 175
 176     @property
 177     def isbn_epub(self):
 178         return self.get_extra_info_json().get('isbn_epub')
 179
 180     @property
 181     def isbn_mobi(self):
 182         return self.get_extra_info_json().get('isbn_mobi')
 183
 184     def is_accessible_to(self, user):
 185         if not self.preview:
 186             return True
 187         if not user.is_authenticated:
 188             return False
 189         Membership = apps.get_model('club', 'Membership')
 190         if Membership.is_active_for(user):
 191             return True
 192         Funding = apps.get_model('funding', 'Funding')
 193         if Funding.objects.filter(user=user, offer__book=self):
 194             return True
 195         return False
 196
 197     def save(self, force_insert=False, force_update=False, **kwargs):
 198         from sortify import sortify
 199
 200         self.sort_key = sortify(self.title)[:120]
 201         self.title = str(self.title)  # ???
 202
 203         try:
 204             author = self.authors().first().sort_key
 205         except AttributeError:
 206             author = ''
 207         self.sort_key_author = author
 208
 209         self.cached_author = self.tag_unicode('author')
 210         self.has_audience = 'audience' in self.get_extra_info_json()
 211
 212         if self.preview and not self.preview_key:
 213             self.preview_key = get_random_hash(self.slug)[:32]
 214
 215         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 216
 217         return ret
 218
 219     def get_absolute_url(self):
 220         return reverse('book_detail', args=[self.slug])
 221
 222     def gallery_path(self):
 223         return gallery_path(self.slug)
 224
 225     def gallery_url(self):
 226         return gallery_url(self.slug)
 227
 228     def get_first_text(self):
 229         if self.html_file:
 230             return self
 231         child = self.children.all().order_by('parent_number').first()
 232         if child is not None:
 233             return child.get_first_text()
 234
 235     def get_last_text(self):
 236         if self.html_file:
 237             return self
 238         child = self.children.all().order_by('parent_number').last()
 239         if child is not None:
 240             return child.get_last_text()
 241
 242     def get_prev_text(self):
 243         if not self.parent:
 244             return None
 245         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 246         if sibling is not None:
 247             return sibling.get_last_text()
 248
 249         if self.parent.html_file:
 250             return self.parent
 251
 252         return self.parent.get_prev_text()
 253
 254     def get_next_text(self, inside=True):
 255         if inside:
 256             child = self.children.order_by('parent_number').first()
 257             if child is not None:
 258                 return child.get_first_text()
 259
 260         if not self.parent:
 261             return None
 262         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 263         if sibling is not None:
 264             return sibling.get_first_text()
 265         return self.parent.get_next_text(inside=False)
 266
 267     def get_child_audiobook(self):
 268         BookMedia = apps.get_model('catalogue', 'BookMedia')
 269         if not BookMedia.objects.filter(book__ancestor=self).exists():
 270             return None
 271         for child in self.children.order_by('parent_number').all():
 272             if child.has_mp3_file():
 273                 return child
 274             child_sub = child.get_child_audiobook()
 275             if child_sub is not None:
 276                 return child_sub
 277
 278     def get_siblings(self):
 279         if not self.parent:
 280             return []
 281         return self.parent.children.all().order_by('parent_number')
 282
 283     def get_children(self):
 284         return self.children.all().order_by('parent_number')
 285
 286     @property
 287     def name(self):
 288         return self.title
 289
 290     def language_code(self):
 291         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 292
 293     def language_name(self):
 294         return dict(settings.LANGUAGES).get(self.language_code(), "")
 295
 296     def is_foreign(self):
 297         return self.language_code() != settings.LANGUAGE_CODE
 298
 299     def set_audio_length(self):
 300         length = self.get_audio_length()
 301         if length > 0:
 302             self.audio_length = self.format_audio_length(length)
 303             self.save()
 304
 305     @staticmethod
 306     def format_audio_length(seconds):
 307         """
 308         >>> Book.format_audio_length(1)
 309         '0:01'
 310         >>> Book.format_audio_length(3661)
 311         '1:01:01'
 312         """
 313         if seconds < 60*60:
 314             minutes = seconds // 60
 315             seconds = seconds % 60
 316             return '%d:%02d' % (minutes, seconds)
 317         else:
 318             hours = seconds // 3600
 319             minutes = seconds % 3600 // 60
 320             seconds = seconds % 60
 321             return '%d:%02d:%02d' % (hours, minutes, seconds)
 322
 323     def get_audio_length(self):
 324         total = 0
 325         for media in self.get_mp3() or ():
 326             total += app_settings.GET_MP3_LENGTH(media.file.path)
 327         return int(total)
 328
 329     def get_time(self):
 330         return round(self.xml_file.size / 1000 * 40)
 331
 332     def has_media(self, type_):
 333         if type_ in Book.formats:
 334             return bool(getattr(self, "%s_file" % type_))
 335         else:
 336             return self.media.filter(type=type_).exists()
 337
 338     def has_audio(self):
 339         return self.has_media('mp3')
 340
 341     def get_media(self, type_):
 342         if self.has_media(type_):
 343             if type_ in Book.formats:
 344                 return getattr(self, "%s_file" % type_)
 345             else:
 346                 return self.media.filter(type=type_)
 347         else:
 348             return None
 349
 350     def get_mp3(self):
 351         return self.get_media("mp3")
 352
 353     def get_odt(self):
 354         return self.get_media("odt")
 355
 356     def get_ogg(self):
 357         return self.get_media("ogg")
 358
 359     def get_daisy(self):
 360         return self.get_media("daisy")
 361
 362     def get_audio_epub(self):
 363         return self.get_media("audio.epub")
 364
 365     def media_url(self, format_):
 366         media = self.get_media(format_)
 367         if media:
 368             if self.preview:
 369                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 370             else:
 371                 return media.url
 372         else:
 373             return None
 374
 375     def html_url(self):
 376         return self.media_url('html')
 377
 378     def pdf_url(self):
 379         return self.media_url('pdf')
 380
 381     def epub_url(self):
 382         return self.media_url('epub')
 383
 384     def mobi_url(self):
 385         return self.media_url('mobi')
 386
 387     def txt_url(self):
 388         return self.media_url('txt')
 389
 390     def fb2_url(self):
 391         return self.media_url('fb2')
 392
 393     def xml_url(self):
 394         return self.media_url('xml')
 395
 396     def has_description(self):
 397         return len(self.description) > 0
 398     has_description.short_description = 'opis'
 399     has_description.boolean = True
 400
 401     def has_mp3_file(self):
 402         return self.has_media("mp3")
 403     has_mp3_file.short_description = 'MP3'
 404     has_mp3_file.boolean = True
 405
 406     def has_ogg_file(self):
 407         return self.has_media("ogg")
 408     has_ogg_file.short_description = 'OGG'
 409     has_ogg_file.boolean = True
 410
 411     def has_daisy_file(self):
 412         return self.has_media("daisy")
 413     has_daisy_file.short_description = 'DAISY'
 414     has_daisy_file.boolean = True
 415
 416     def has_sync_file(self):
 417         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 418
 419     def get_sync(self):
 420         with self.get_media('sync').first().file.open('r') as f:
 421             sync = f.read().split('\n')
 422         offset = float(sync[0])
 423         items = []
 424         for line in sync[1:]:
 425             if not line:
 426                 continue
 427             start, end, elid = line.split()
 428             items.append([elid, float(start) + offset])
 429         return json.dumps(items)
 430
 431     def has_audio_epub_file(self):
 432         return self.has_media("audio.epub")
 433
 434     @property
 435     def media_daisy(self):
 436         return self.get_media('daisy')
 437
 438     @property
 439     def media_audio_epub(self):
 440         return self.get_media('audio.epub')
 441
 442     def get_audiobooks(self):
 443         ogg_files = {}
 444         for m in self.media.filter(type='ogg').order_by().iterator():
 445             ogg_files[m.name] = m
 446
 447         audiobooks = []
 448         projects = set()
 449         total_duration = 0
 450         for mp3 in self.media.filter(type='mp3').iterator():
 451             # ogg files are always from the same project
 452             meta = mp3.get_extra_info_json()
 453             project = meta.get('project')
 454             if not project:
 455                 # temporary fallback
 456                 project = 'CzytamySłuchając'
 457
 458             projects.add((project, meta.get('funded_by', '')))
 459             total_duration += mp3.duration or 0
 460
 461             media = {'mp3': mp3}
 462
 463             ogg = ogg_files.get(mp3.name)
 464             if ogg:
 465                 media['ogg'] = ogg
 466             audiobooks.append(media)
 467
 468         projects = sorted(projects)
 469         total_duration = '%d:%02d' % (
 470             total_duration // 60,
 471             total_duration % 60
 472         )
 473         return audiobooks, projects, total_duration
 474
 475     def wldocument(self, parse_dublincore=True, inherit=True):
 476         from catalogue.import_utils import ORMDocProvider
 477         from librarian.parser import WLDocument
 478
 479         if inherit and self.parent:
 480             meta_fallbacks = self.parent.cover_info()
 481         else:
 482             meta_fallbacks = None
 483
 484         return WLDocument.from_file(
 485             self.xml_file.path,
 486             provider=ORMDocProvider(self),
 487             parse_dublincore=parse_dublincore,
 488             meta_fallbacks=meta_fallbacks)
 489
 490     def wldocument2(self):
 491         from catalogue.import_utils import ORMDocProvider
 492         from librarian.document import WLDocument
 493         doc = WLDocument(
 494             self.xml_file.path,
 495             provider=ORMDocProvider(self)
 496         )
 497         doc.meta.update(self.cover_info())
 498         return doc
 499
 500
 501     @staticmethod
 502     def zip_format(format_):
 503         def pretty_file_name(book):
 504             return "%s/%s.%s" % (
 505                 book.get_extra_info_json()['author'],
 506                 book.slug,
 507                 format_)
 508
 509         field_name = "%s_file" % format_
 510         field = getattr(Book, field_name)
 511         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 512         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 513         return create_zip(paths, field.ZIP)
 514
 515     def zip_audiobooks(self, format_):
 516         bm = BookMedia.objects.filter(book=self, type=format_)
 517         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 518         licenses = set()
 519         for m in bm:
 520             license = constants.LICENSES.get(
 521                 m.get_extra_info_json().get('license'), {}
 522             ).get('locative')
 523             if license:
 524                 licenses.add(license)
 525         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 526             'licenses': licenses,
 527             'meta': self.wldocument2().meta,
 528         })
 529         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 530
 531     def search_index(self, index=None):
 532         if not self.findable:
 533             return
 534         from search.index import Index
 535         Index.index_book(self)
 536
 537     # will make problems in conjunction with paid previews
 538     def download_pictures(self, remote_gallery_url):
 539         # This is only needed for legacy relative image paths.
 540         gallery_path = self.gallery_path()
 541         # delete previous files, so we don't include old files in ebooks
 542         if os.path.isdir(gallery_path):
 543             for filename in os.listdir(gallery_path):
 544                 file_path = os.path.join(gallery_path, filename)
 545                 os.unlink(file_path)
 546         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 547         if ilustr_elements:
 548             makedirs(gallery_path)
 549             for ilustr in ilustr_elements:
 550                 ilustr_src = ilustr.get('src')
 551                 if '/' in ilustr_src:
 552                     continue
 553                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 554                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 555
 556     def load_abstract(self):
 557         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 558         if abstract is not None:
 559             self.abstract = transform_abstrakt(abstract)
 560         else:
 561             self.abstract = ''
 562
 563     def load_toc(self):
 564         self.toc = ''
 565         if self.html_file:
 566             parser = html.HTMLParser(encoding='utf-8')
 567             tree = html.parse(self.html_file.path, parser=parser)
 568             toc = tree.find('//div[@id="toc"]/ol')
 569             if toc is None or not len(toc):
 570                 return
 571             html_link = reverse('book_text', args=[self.slug])
 572             for a in toc.findall('.//a'):
 573                 a.attrib['href'] = html_link + a.attrib['href']
 574             self.toc = html.tostring(toc, encoding='unicode')
 575             # div#toc
 576
 577     @classmethod
 578     def from_xml_file(cls, xml_file, **kwargs):
 579         from django.core.files import File
 580         from librarian import dcparser
 581
 582         # use librarian to parse meta-data
 583         book_info = dcparser.parse(xml_file)
 584
 585         if not isinstance(xml_file, File):
 586             xml_file = File(open(xml_file))
 587
 588         try:
 589             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 590         finally:
 591             xml_file.close()
 592
 593     @classmethod
 594     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 595                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 596         from catalogue import tasks
 597
 598         if dont_build is None:
 599             dont_build = set()
 600         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 601
 602         # check for parts before we do anything
 603         children = []
 604         if hasattr(book_info, 'parts'):
 605             for part_url in book_info.parts:
 606                 try:
 607                     children.append(Book.objects.get(slug=part_url.slug))
 608                 except Book.DoesNotExist:
 609                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 610
 611         # Read book metadata
 612         book_slug = book_info.url.slug
 613         if re.search(r'[^a-z0-9-]', book_slug):
 614             raise ValueError('Invalid characters in slug')
 615         book, created = Book.objects.get_or_create(slug=book_slug)
 616
 617         if created:
 618             book_shelves = []
 619             old_cover = None
 620             book.preview = bool(days)
 621             if book.preview:
 622                 book.preview_until = date.today() + timedelta(days)
 623         else:
 624             if not overwrite:
 625                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 626             # Save shelves for this book
 627             book_shelves = list(book.tags.filter(category='set'))
 628             old_cover = book.cover_info()
 629
 630         # Save XML file
 631         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 632         if book.preview:
 633             book.xml_file.set_readable(False)
 634
 635         book.findable = findable
 636         book.language = book_info.language
 637         book.title = book_info.title
 638         if book_info.variant_of:
 639             book.common_slug = book_info.variant_of.slug
 640         else:
 641             book.common_slug = book.slug
 642         extra = book_info.to_dict()
 643         if logo:
 644             extra['logo'] = logo
 645         if logo_mono:
 646             extra['logo_mono'] = logo_mono
 647         if logo_alt:
 648             extra['logo_alt'] = logo_alt
 649         book.extra_info = json.dumps(extra)
 650         book.load_abstract()
 651         book.load_toc()
 652         book.save()
 653
 654         meta_tags = Tag.tags_from_info(book_info)
 655
 656         for tag, relationship in meta_tags:
 657             if not tag.for_books:
 658                 tag.for_books = True
 659                 tag.save()
 660
 661         just_tags = [t for (t, rel) in meta_tags if not rel]
 662         book.tags = set(just_tags + book_shelves)
 663         book.save()  # update sort_key_author
 664
 665         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 666
 667         cover_changed = old_cover != book.cover_info()
 668         obsolete_children = set(b for b in book.children.all()
 669                                 if b not in children)
 670         notify_cover_changed = []
 671         for n, child_book in enumerate(children):
 672             new_child = child_book.parent != book
 673             child_book.parent = book
 674             child_book.parent_number = n
 675             child_book.save()
 676             if new_child or cover_changed:
 677                 notify_cover_changed.append(child_book)
 678         # Disown unfaithful children and let them cope on their own.
 679         for child in obsolete_children:
 680             child.parent = None
 681             child.parent_number = 0
 682             child.save()
 683             if old_cover:
 684                 notify_cover_changed.append(child)
 685
 686         cls.repopulate_ancestors()
 687         tasks.update_counters.delay()
 688
 689         if remote_gallery_url:
 690             book.download_pictures(remote_gallery_url)
 691
 692         # No saves beyond this point.
 693
 694         # Build cover.
 695         if 'cover' not in dont_build:
 696             book.cover.build_delay()
 697             book.cover_clean.build_delay()
 698             book.cover_thumb.build_delay()
 699             book.cover_api_thumb.build_delay()
 700             book.simple_cover.build_delay()
 701             book.cover_ebookpoint.build_delay()
 702
 703         # Build HTML and ebooks.
 704         book.html_file.build_delay()
 705         if not children:
 706             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 707                 if format_ not in dont_build:
 708                     getattr(book, '%s_file' % format_).build_delay()
 709         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 710             if format_ not in dont_build:
 711                 getattr(book, '%s_file' % format_).build_delay()
 712
 713         if not settings.NO_SEARCH_INDEX and search_index and findable:
 714             tasks.index_book.delay(book.id)
 715
 716         for child in notify_cover_changed:
 717             child.parent_cover_changed()
 718
 719         book.update_popularity()
 720         tasks.update_references.delay(book.id)
 721
 722         cls.published.send(sender=cls, instance=book)
 723         return book
 724
 725     def update_references(self):
 726         Entity = apps.get_model('references', 'Entity')
 727         doc = self.wldocument2()
 728         doc._compat_assign_section_ids()
 729         doc._compat_assign_ordered_ids()
 730         refs = {}
 731         for ref_elem in doc.references():
 732             uri = ref_elem.attrib.get('href', '')
 733             if not uri:
 734                 continue
 735             if uri in refs:
 736                 ref = refs[uri]
 737             else:
 738                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 739                 if entity_created:
 740                     try:
 741                         entity.populate()
 742                     except:
 743                         pass
 744                     else:
 745                         entity.save()
 746                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 747                 refs[uri] = ref
 748                 if not ref_created:
 749                     ref.occurence_set.all().delete()
 750             sec = ref_elem.get_link()
 751             m = re.match(r'sec(\d+)', sec)
 752             assert m is not None
 753             sec = int(m.group(1))
 754             snippet = ref_elem.get_snippet()
 755             b = builders['html-snippet']()
 756             for s in snippet:
 757                 s.html_build(b)
 758             html = b.output().get_bytes().decode('utf-8')
 759
 760             ref.occurence_set.create(
 761                 section=sec,
 762                 html=html
 763             )
 764         self.reference_set.exclude(entity__uri__in=refs).delete()
 765
 766     @property
 767     def references(self):
 768         return self.reference_set.all().select_related('entity')
 769
 770     @classmethod
 771     @transaction.atomic
 772     def repopulate_ancestors(cls):
 773         """Fixes the ancestry cache."""
 774         # TODO: table names
 775         cursor = connection.cursor()
 776         if connection.vendor == 'postgres':
 777             cursor.execute("TRUNCATE catalogue_book_ancestor")
 778             cursor.execute("""
 779                 WITH RECURSIVE ancestry AS (
 780                     SELECT book.id, book.parent_id
 781                     FROM catalogue_book AS book
 782                     WHERE book.parent_id IS NOT NULL
 783                     UNION
 784                     SELECT ancestor.id, book.parent_id
 785                     FROM ancestry AS ancestor, catalogue_book AS book
 786                     WHERE ancestor.parent_id = book.id
 787                         AND book.parent_id IS NOT NULL
 788                     )
 789                 INSERT INTO catalogue_book_ancestor
 790                     (from_book_id, to_book_id)
 791                     SELECT id, parent_id
 792                     FROM ancestry
 793                     ORDER BY id;
 794                 """)
 795         else:
 796             cursor.execute("DELETE FROM catalogue_book_ancestor")
 797             for b in cls.objects.exclude(parent=None):
 798                 parent = b.parent
 799                 while parent is not None:
 800                     b.ancestor.add(parent)
 801                     parent = parent.parent
 802
 803     @property
 804     def ancestors(self):
 805         if self.parent:
 806             for anc in self.parent.ancestors:
 807                 yield anc
 808             yield self.parent
 809         else:
 810             return []
 811
 812     def clear_cache(self):
 813         clear_cached_renders(self.mini_box)
 814         clear_cached_renders(self.mini_box_nolink)
 815
 816     def cover_info(self, inherit=True):
 817         """Returns a dictionary to serve as fallback for BookInfo.
 818
 819         For now, the only thing inherited is the cover image.
 820         """
 821         need = False
 822         info = {}
 823         for field in ('cover_url', 'cover_by', 'cover_source'):
 824             val = self.get_extra_info_json().get(field)
 825             if val:
 826                 info[field] = val
 827             else:
 828                 need = True
 829         if inherit and need and self.parent is not None:
 830             parent_info = self.parent.cover_info()
 831             parent_info.update(info)
 832             info = parent_info
 833         return info
 834
 835     def related_themes(self):
 836         return Tag.objects.usage_for_queryset(
 837             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 838             counts=True).filter(category='theme').order_by('-count')
 839
 840     def parent_cover_changed(self):
 841         """Called when parent book's cover image is changed."""
 842         if not self.cover_info(inherit=False):
 843             if 'cover' not in app_settings.DONT_BUILD:
 844                 self.cover.build_delay()
 845                 self.cover_clean.build_delay()
 846                 self.cover_thumb.build_delay()
 847                 self.cover_api_thumb.build_delay()
 848                 self.simple_cover.build_delay()
 849                 self.cover_ebookpoint.build_delay()
 850             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 851                 if format_ not in app_settings.DONT_BUILD:
 852                     getattr(self, '%s_file' % format_).build_delay()
 853             for child in self.children.all():
 854                 child.parent_cover_changed()
 855
 856     def other_versions(self):
 857         """Find other versions (i.e. in other languages) of the book."""
 858         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 859
 860     def parents(self):
 861         books = []
 862         parent = self.parent
 863         while parent is not None:
 864             books.insert(0, parent)
 865             parent = parent.parent
 866         return books
 867
 868     def pretty_title(self, html_links=False):
 869         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 870         books = self.parents() + [self]
 871         names.extend([(b.title, b.get_absolute_url()) for b in books])
 872
 873         if html_links:
 874             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 875         else:
 876             names = [tag[0] for tag in names]
 877         return ', '.join(names)
 878
 879     def publisher(self):
 880         publisher = self.get_extra_info_json()['publisher']
 881         if isinstance(publisher, str):
 882             return publisher
 883         elif isinstance(publisher, list):
 884             return ', '.join(publisher)
 885
 886     @classmethod
 887     def tagged_top_level(cls, tags):
 888         """ Returns top-level books tagged with `tags`.
 889
 890         It only returns those books which don't have ancestors which are
 891         also tagged with those tags.
 892
 893         """
 894         objects = cls.tagged.with_all(tags)
 895         return objects.filter(findable=True).exclude(ancestor__in=objects)
 896
 897     @classmethod
 898     def book_list(cls, book_filter=None):
 899         """Generates a hierarchical listing of all books.
 900
 901         Books are optionally filtered with a test function.
 902
 903         """
 904
 905         books_by_parent = {}
 906         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 907         if book_filter:
 908             books = books.filter(book_filter).distinct()
 909
 910             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 911             for book in books.iterator():
 912                 parent = book.parent_id
 913                 if parent not in book_ids:
 914                     parent = None
 915                 books_by_parent.setdefault(parent, []).append(book)
 916         else:
 917             for book in books.iterator():
 918                 books_by_parent.setdefault(book.parent_id, []).append(book)
 919
 920         orphans = []
 921         books_by_author = OrderedDict()
 922         for tag in Tag.objects.filter(category='author').iterator():
 923             books_by_author[tag] = []
 924
 925         for book in books_by_parent.get(None, ()):
 926             authors = list(book.authors().only('pk'))
 927             if authors:
 928                 for author in authors:
 929                     books_by_author[author].append(book)
 930             else:
 931                 orphans.append(book)
 932
 933         return books_by_author, orphans, books_by_parent
 934
 935     _audiences_pl = {
 936         "SP": (1, "szkoła podstawowa"),
 937         "SP1": (1, "szkoła podstawowa"),
 938         "SP2": (1, "szkoła podstawowa"),
 939         "SP3": (1, "szkoła podstawowa"),
 940         "P": (1, "szkoła podstawowa"),
 941         "G": (2, "gimnazjum"),
 942         "L": (3, "liceum"),
 943         "LP": (3, "liceum"),
 944     }
 945
 946     def audiences_pl(self):
 947         audiences = self.get_extra_info_json().get('audiences', [])
 948         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 949         return [a[1] for a in audiences]
 950
 951     def stage_note(self):
 952         stage = self.get_extra_info_json().get('stage')
 953         if stage and stage < '0.4':
 954             return (_('Ten utwór wymaga uwspółcześnienia'),
 955                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 956         else:
 957             return None, None
 958
 959     def choose_fragments(self, number):
 960         fragments = self.fragments.order_by()
 961         fragments_count = fragments.count()
 962         if not fragments_count and self.children.exists():
 963             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 964             fragments_count = fragments.count()
 965         if fragments_count:
 966             if fragments_count > number:
 967                 offset = randint(0, fragments_count - number)
 968             else:
 969                 offset = 0
 970             return fragments[offset : offset + number]
 971         elif self.parent:
 972             return self.parent.choose_fragments(number)
 973         else:
 974             return []
 975
 976     def choose_fragment(self):
 977         fragments = self.choose_fragments(1)
 978         if fragments:
 979             return fragments[0]
 980         else:
 981             return None
 982
 983     def fragment_data(self):
 984         fragment = self.choose_fragment()
 985         if fragment:
 986             return {
 987                 'title': fragment.book.pretty_title(),
 988                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 989             }
 990         else:
 991             return None
 992
 993     def update_popularity(self):
 994         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 995         try:
 996             pop = self.popularity
 997             pop.count = count
 998             pop.save()
 999         except BookPopularity.DoesNotExist:
1000             BookPopularity.objects.create(book=self, count=count)
1001
1002     def ridero_link(self):
1003         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1004
1005     def like(self, user):
1006         from social.utils import likes, get_set, set_sets
1007         if not likes(user, self):
1008             tag = get_set(user, '')
1009             set_sets(user, self, [tag])
1010
1011     def unlike(self, user):
1012         from social.utils import likes, set_sets
1013         if likes(user, self):
1014             set_sets(user, self, [])
1015
1016     def full_sort_key(self):
1017         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1018
1019     def cover_color(self):
1020         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1021
1022     @cached_render('catalogue/book_mini_box.html')
1023     def mini_box(self):
1024         return {
1025             'book': self
1026         }
1027
1028     @cached_render('catalogue/book_mini_box.html')
1029     def mini_box_nolink(self):
1030         return {
1031             'book': self,
1032             'no_link': True,
1033         }
1034
1035
1036 class BookPopularity(models.Model):
1037     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1038     count = models.IntegerField(default=0, db_index=True)