src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import gettext_lazy as _, get_language
  19 from fnpdjango.storage import BofhFileSystemStorage
  20 from lxml import html
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from newtagging import managers
  24 from catalogue import constants
  25 from catalogue import fields
  26 from catalogue.models import Tag, Fragment, BookMedia
  27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  28 from catalogue.models.tag import prefetched_relations
  29 from catalogue import app_settings
  30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  31
  32 bofh_storage = BofhFileSystemStorage()
  33
  34
  35 class Book(models.Model):
  36     """Represents a book imported from WL-XML."""
  37     title = models.CharField(_('title'), max_length=32767)
  38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  39     sort_key_author = models.CharField(
  40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  44     description = models.TextField(_('description'), blank=True)
  45     abstract = models.TextField(_('abstract'), blank=True)
  46     toc = models.TextField(_('toc'), blank=True)
  47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  49     parent_number = models.IntegerField(_('parent number'), default=0)
  50     extra_info = models.TextField(_('extra information'), default='{}')
  51     gazeta_link = models.CharField(blank=True, max_length=240)
  52     wiki_link = models.CharField(blank=True, max_length=240)
  53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  54     recommended = models.BooleanField(_('recommended'), default=False)
  55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  56     preview = models.BooleanField(_('preview'), default=False)
  57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  58     preview_key = models.CharField(max_length=32, blank=True, null=True)
  59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  60
  61     # files generated during publication
  62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  63     html_file = fields.HtmlField(storage=bofh_storage)
  64     fb2_file = fields.Fb2Field(storage=bofh_storage)
  65     txt_file = fields.TxtField(storage=bofh_storage)
  66     epub_file = fields.EpubField(storage=bofh_storage)
  67     mobi_file = fields.MobiField(storage=bofh_storage)
  68     pdf_file = fields.PdfField(storage=bofh_storage)
  69
  70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
  71     # Cleaner version of cover for thumbs
  72     cover_clean = fields.CoverCleanField(_('clean cover'))
  73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
  74     cover_api_thumb = fields.CoverApiThumbField(
  75         _('cover thumbnail for mobile app'))
  76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
  77     cover_ebookpoint = fields.CoverEbookpointField(
  78         _('cover for Ebookpoint'))
  79
  80     ebook_formats = constants.EBOOK_FORMATS
  81     formats = ebook_formats + ['html', 'xml']
  82
  83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  85
  86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  87     has_audience = models.BooleanField(default=False)
  88
  89     objects = models.Manager()
  90     tagged = managers.ModelTaggedItemManager(Tag)
  91     tags = managers.TagDescriptor(Tag)
  92     tag_relations = GenericRelation(Tag.intermediary_table_model)
  93
  94     html_built = django.dispatch.Signal()
  95     published = django.dispatch.Signal()
  96
  97     SORT_KEY_SEP = '$'
  98
  99     is_book = True
 100
 101     class AlreadyExists(Exception):
 102         pass
 103
 104     class Meta:
 105         ordering = ('sort_key_author', 'sort_key')
 106         verbose_name = _('book')
 107         verbose_name_plural = _('books')
 108         app_label = 'catalogue'
 109
 110     def __str__(self):
 111         return self.title
 112
 113     def get_extra_info_json(self):
 114         return json.loads(self.extra_info or '{}')
 115
 116     def get_initial(self):
 117         try:
 118             return re.search(r'\w', self.title, re.U).group(0)
 119         except AttributeError:
 120             return ''
 121
 122     def authors(self):
 123         return self.tags.filter(category='author')
 124
 125     def epochs(self):
 126         return self.tags.filter(category='epoch')
 127
 128     def genres(self):
 129         return self.tags.filter(category='genre')
 130
 131     def kinds(self):
 132         return self.tags.filter(category='kind')
 133
 134     def tag_unicode(self, category):
 135         relations = prefetched_relations(self, category)
 136         if relations:
 137             return ', '.join(rel.tag.name for rel in relations)
 138         else:
 139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 140
 141     def tags_by_category(self):
 142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 143
 144     def author_unicode(self):
 145         return self.cached_author
 146
 147     def kind_unicode(self):
 148         return self.tag_unicode('kind')
 149
 150     def epoch_unicode(self):
 151         return self.tag_unicode('epoch')
 152
 153     def genre_unicode(self):
 154         return self.tag_unicode('genre')
 155
 156     def translators(self):
 157         translators = self.get_extra_info_json().get('translators') or []
 158         return [
 159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 160         ]
 161
 162     def translator(self):
 163         translators = self.get_extra_info_json().get('translators')
 164         if not translators:
 165             return None
 166         if len(translators) > 3:
 167             translators = translators[:2]
 168             others = ' i inni'
 169         else:
 170             others = ''
 171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 172
 173     def cover_source(self):
 174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 175
 176     @property
 177     def isbn_pdf(self):
 178         return self.get_extra_info_json().get('isbn_pdf')
 179
 180     @property
 181     def isbn_epub(self):
 182         return self.get_extra_info_json().get('isbn_epub')
 183
 184     @property
 185     def isbn_mobi(self):
 186         return self.get_extra_info_json().get('isbn_mobi')
 187
 188     def is_accessible_to(self, user):
 189         if not self.preview:
 190             return True
 191         if not user.is_authenticated:
 192             return False
 193         Membership = apps.get_model('club', 'Membership')
 194         if Membership.is_active_for(user):
 195             return True
 196         Funding = apps.get_model('funding', 'Funding')
 197         if Funding.objects.filter(user=user, offer__book=self):
 198             return True
 199         return False
 200
 201     def save(self, force_insert=False, force_update=False, **kwargs):
 202         from sortify import sortify
 203
 204         self.sort_key = sortify(self.title)[:120]
 205         self.title = str(self.title)  # ???
 206
 207         try:
 208             author = self.authors().first().sort_key
 209         except AttributeError:
 210             author = ''
 211         self.sort_key_author = author
 212
 213         self.cached_author = self.tag_unicode('author')
 214         self.has_audience = 'audience' in self.get_extra_info_json()
 215
 216         if self.preview and not self.preview_key:
 217             self.preview_key = get_random_hash(self.slug)[:32]
 218
 219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 220
 221         return ret
 222
 223     def get_absolute_url(self):
 224         return reverse('book_detail', args=[self.slug])
 225
 226     def gallery_path(self):
 227         return gallery_path(self.slug)
 228
 229     def gallery_url(self):
 230         return gallery_url(self.slug)
 231
 232     def get_first_text(self):
 233         if self.html_file:
 234             return self
 235         child = self.children.all().order_by('parent_number').first()
 236         if child is not None:
 237             return child.get_first_text()
 238
 239     def get_last_text(self):
 240         if self.html_file:
 241             return self
 242         child = self.children.all().order_by('parent_number').last()
 243         if child is not None:
 244             return child.get_last_text()
 245
 246     def get_prev_text(self):
 247         if not self.parent:
 248             return None
 249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 250         if sibling is not None:
 251             return sibling.get_last_text()
 252
 253         if self.parent.html_file:
 254             return self.parent
 255
 256         return self.parent.get_prev_text()
 257
 258     def get_next_text(self, inside=True):
 259         if inside:
 260             child = self.children.order_by('parent_number').first()
 261             if child is not None:
 262                 return child.get_first_text()
 263
 264         if not self.parent:
 265             return None
 266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 267         if sibling is not None:
 268             return sibling.get_first_text()
 269         return self.parent.get_next_text(inside=False)
 270
 271     def get_child_audiobook(self):
 272         BookMedia = apps.get_model('catalogue', 'BookMedia')
 273         if not BookMedia.objects.filter(book__ancestor=self).exists():
 274             return None
 275         for child in self.children.order_by('parent_number').all():
 276             if child.has_mp3_file():
 277                 return child
 278             child_sub = child.get_child_audiobook()
 279             if child_sub is not None:
 280                 return child_sub
 281
 282     def get_siblings(self):
 283         if not self.parent:
 284             return []
 285         return self.parent.children.all().order_by('parent_number')
 286
 287     def get_children(self):
 288         return self.children.all().order_by('parent_number')
 289
 290     @property
 291     def name(self):
 292         return self.title
 293
 294     def language_code(self):
 295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 296
 297     def language_name(self):
 298         return dict(settings.LANGUAGES).get(self.language_code(), "")
 299
 300     def is_foreign(self):
 301         return self.language_code() != settings.LANGUAGE_CODE
 302
 303     def set_audio_length(self):
 304         length = self.get_audio_length()
 305         if length > 0:
 306             self.audio_length = self.format_audio_length(length)
 307             self.save()
 308
 309     @staticmethod
 310     def format_audio_length(seconds):
 311         """
 312         >>> Book.format_audio_length(1)
 313         '0:01'
 314         >>> Book.format_audio_length(3661)
 315         '1:01:01'
 316         """
 317         if seconds < 60*60:
 318             minutes = seconds // 60
 319             seconds = seconds % 60
 320             return '%d:%02d' % (minutes, seconds)
 321         else:
 322             hours = seconds // 3600
 323             minutes = seconds % 3600 // 60
 324             seconds = seconds % 60
 325             return '%d:%02d:%02d' % (hours, minutes, seconds)
 326
 327     def get_audio_length(self):
 328         total = 0
 329         for media in self.get_mp3() or ():
 330             total += app_settings.GET_MP3_LENGTH(media.file.path)
 331         return int(total)
 332
 333     def has_media(self, type_):
 334         if type_ in Book.formats:
 335             return bool(getattr(self, "%s_file" % type_))
 336         else:
 337             return self.media.filter(type=type_).exists()
 338
 339     def has_audio(self):
 340         return self.has_media('mp3')
 341
 342     def get_media(self, type_):
 343         if self.has_media(type_):
 344             if type_ in Book.formats:
 345                 return getattr(self, "%s_file" % type_)
 346             else:
 347                 return self.media.filter(type=type_)
 348         else:
 349             return None
 350
 351     def get_mp3(self):
 352         return self.get_media("mp3")
 353
 354     def get_odt(self):
 355         return self.get_media("odt")
 356
 357     def get_ogg(self):
 358         return self.get_media("ogg")
 359
 360     def get_daisy(self):
 361         return self.get_media("daisy")
 362
 363     def get_audio_epub(self):
 364         return self.get_media("audio.epub")
 365
 366     def media_url(self, format_):
 367         media = self.get_media(format_)
 368         if media:
 369             if self.preview:
 370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 371             else:
 372                 return media.url
 373         else:
 374             return None
 375
 376     def html_url(self):
 377         return self.media_url('html')
 378
 379     def pdf_url(self):
 380         return self.media_url('pdf')
 381
 382     def epub_url(self):
 383         return self.media_url('epub')
 384
 385     def mobi_url(self):
 386         return self.media_url('mobi')
 387
 388     def txt_url(self):
 389         return self.media_url('txt')
 390
 391     def fb2_url(self):
 392         return self.media_url('fb2')
 393
 394     def xml_url(self):
 395         return self.media_url('xml')
 396
 397     def has_description(self):
 398         return len(self.description) > 0
 399     has_description.short_description = _('description')
 400     has_description.boolean = True
 401
 402     def has_mp3_file(self):
 403         return self.has_media("mp3")
 404     has_mp3_file.short_description = 'MP3'
 405     has_mp3_file.boolean = True
 406
 407     def has_ogg_file(self):
 408         return self.has_media("ogg")
 409     has_ogg_file.short_description = 'OGG'
 410     has_ogg_file.boolean = True
 411
 412     def has_daisy_file(self):
 413         return self.has_media("daisy")
 414     has_daisy_file.short_description = 'DAISY'
 415     has_daisy_file.boolean = True
 416
 417     def has_sync_file(self):
 418         return self.has_media("sync")
 419
 420     def get_sync(self):
 421         with self.get_media('sync').first().file.open('r') as f:
 422             sync = f.read().split('\n')
 423         offset = float(sync[0])
 424         items = []
 425         for line in sync[1:]:
 426             if not line:
 427                 continue
 428             start, end, elid = line.split()
 429             items.append([elid, float(start) + offset])
 430         return json.dumps(items)
 431
 432     def has_audio_epub_file(self):
 433         return self.has_media("audio.epub")
 434
 435     @property
 436     def media_daisy(self):
 437         return self.get_media('daisy')
 438
 439     @property
 440     def media_audio_epub(self):
 441         return self.get_media('audio.epub')
 442
 443     def get_audiobooks(self):
 444         ogg_files = {}
 445         for m in self.media.filter(type='ogg').order_by().iterator():
 446             ogg_files[m.name] = m
 447
 448         audiobooks = []
 449         projects = set()
 450         total_duration = 0
 451         for mp3 in self.media.filter(type='mp3').iterator():
 452             # ogg files are always from the same project
 453             meta = mp3.get_extra_info_json()
 454             project = meta.get('project')
 455             if not project:
 456                 # temporary fallback
 457                 project = 'CzytamySłuchając'
 458
 459             projects.add((project, meta.get('funded_by', '')))
 460             total_duration += mp3.duration or 0
 461
 462             media = {'mp3': mp3}
 463
 464             ogg = ogg_files.get(mp3.name)
 465             if ogg:
 466                 media['ogg'] = ogg
 467             audiobooks.append(media)
 468
 469         projects = sorted(projects)
 470         total_duration = '%d:%02d' % (
 471             total_duration // 60,
 472             total_duration % 60
 473         )
 474         return audiobooks, projects, total_duration
 475
 476     def wldocument(self, parse_dublincore=True, inherit=True):
 477         from catalogue.import_utils import ORMDocProvider
 478         from librarian.parser import WLDocument
 479
 480         if inherit and self.parent:
 481             meta_fallbacks = self.parent.cover_info()
 482         else:
 483             meta_fallbacks = None
 484
 485         return WLDocument.from_file(
 486             self.xml_file.path,
 487             provider=ORMDocProvider(self),
 488             parse_dublincore=parse_dublincore,
 489             meta_fallbacks=meta_fallbacks)
 490
 491     def wldocument2(self):
 492         from catalogue.import_utils import ORMDocProvider
 493         from librarian.document import WLDocument
 494         doc = WLDocument(
 495             self.xml_file.path,
 496             provider=ORMDocProvider(self)
 497         )
 498         doc.meta.update(self.cover_info())
 499         return doc
 500
 501
 502     @staticmethod
 503     def zip_format(format_):
 504         def pretty_file_name(book):
 505             return "%s/%s.%s" % (
 506                 book.get_extra_info_json()['author'],
 507                 book.slug,
 508                 format_)
 509
 510         field_name = "%s_file" % format_
 511         field = getattr(Book, field_name)
 512         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 513         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 514         return create_zip(paths, field.ZIP)
 515
 516     def zip_audiobooks(self, format_):
 517         bm = BookMedia.objects.filter(book=self, type=format_)
 518         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 519         licenses = set()
 520         for m in bm:
 521             license = constants.LICENSES.get(
 522                 m.get_extra_info_json().get('license'), {}
 523             ).get('locative')
 524             if license:
 525                 licenses.add(license)
 526         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 527             'licenses': licenses,
 528             'meta': self.wldocument2().meta,
 529         })
 530         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 531
 532     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 533         if not self.findable:
 534             return
 535         if index is None:
 536             from search.index import Index
 537             index = Index()
 538         try:
 539             index.index_book(self, book_info)
 540             if index_tags:
 541                 index.index_tags()
 542             if commit:
 543                 index.index.commit()
 544         except Exception as e:
 545             index.index.rollback()
 546             raise e
 547
 548     # will make problems in conjunction with paid previews
 549     def download_pictures(self, remote_gallery_url):
 550         # This is only needed for legacy relative image paths.
 551         gallery_path = self.gallery_path()
 552         # delete previous files, so we don't include old files in ebooks
 553         if os.path.isdir(gallery_path):
 554             for filename in os.listdir(gallery_path):
 555                 file_path = os.path.join(gallery_path, filename)
 556                 os.unlink(file_path)
 557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 558         if ilustr_elements:
 559             makedirs(gallery_path)
 560             for ilustr in ilustr_elements:
 561                 ilustr_src = ilustr.get('src')
 562                 if '/' in ilustr_src:
 563                     continue
 564                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 565                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 566
 567     def load_abstract(self):
 568         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 569         if abstract is not None:
 570             self.abstract = transform_abstrakt(abstract)
 571         else:
 572             self.abstract = ''
 573
 574     def load_toc(self):
 575         self.toc = ''
 576         if self.html_file:
 577             parser = html.HTMLParser(encoding='utf-8')
 578             tree = html.parse(self.html_file.path, parser=parser)
 579             toc = tree.find('//div[@id="toc"]/ol')
 580             if toc is None or not len(toc):
 581                 return
 582             html_link = reverse('book_text', args=[self.slug])
 583             for a in toc.findall('.//a'):
 584                 a.attrib['href'] = html_link + a.attrib['href']
 585             self.toc = html.tostring(toc, encoding='unicode')
 586             # div#toc
 587
 588     @classmethod
 589     def from_xml_file(cls, xml_file, **kwargs):
 590         from django.core.files import File
 591         from librarian import dcparser
 592
 593         # use librarian to parse meta-data
 594         book_info = dcparser.parse(xml_file)
 595
 596         if not isinstance(xml_file, File):
 597             xml_file = File(open(xml_file))
 598
 599         try:
 600             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 601         finally:
 602             xml_file.close()
 603
 604     @classmethod
 605     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 606                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 607         from catalogue import tasks
 608
 609         if dont_build is None:
 610             dont_build = set()
 611         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 612
 613         # check for parts before we do anything
 614         children = []
 615         if hasattr(book_info, 'parts'):
 616             for part_url in book_info.parts:
 617                 try:
 618                     children.append(Book.objects.get(slug=part_url.slug))
 619                 except Book.DoesNotExist:
 620                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 621
 622         # Read book metadata
 623         book_slug = book_info.url.slug
 624         if re.search(r'[^a-z0-9-]', book_slug):
 625             raise ValueError('Invalid characters in slug')
 626         book, created = Book.objects.get_or_create(slug=book_slug)
 627
 628         if created:
 629             book_shelves = []
 630             old_cover = None
 631             book.preview = bool(days)
 632             if book.preview:
 633                 book.preview_until = date.today() + timedelta(days)
 634         else:
 635             if not overwrite:
 636                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 637             # Save shelves for this book
 638             book_shelves = list(book.tags.filter(category='set'))
 639             old_cover = book.cover_info()
 640
 641         # Save XML file
 642         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 643         if book.preview:
 644             book.xml_file.set_readable(False)
 645
 646         book.findable = findable
 647         book.language = book_info.language
 648         book.title = book_info.title
 649         if book_info.variant_of:
 650             book.common_slug = book_info.variant_of.slug
 651         else:
 652             book.common_slug = book.slug
 653         book.extra_info = json.dumps(book_info.to_dict())
 654         book.load_abstract()
 655         book.load_toc()
 656         book.save()
 657
 658         meta_tags = Tag.tags_from_info(book_info)
 659
 660         for tag in meta_tags:
 661             if not tag.for_books:
 662                 tag.for_books = True
 663                 tag.save()
 664
 665         book.tags = set(meta_tags + book_shelves)
 666         book.save()  # update sort_key_author
 667
 668         cover_changed = old_cover != book.cover_info()
 669         obsolete_children = set(b for b in book.children.all()
 670                                 if b not in children)
 671         notify_cover_changed = []
 672         for n, child_book in enumerate(children):
 673             new_child = child_book.parent != book
 674             child_book.parent = book
 675             child_book.parent_number = n
 676             child_book.save()
 677             if new_child or cover_changed:
 678                 notify_cover_changed.append(child_book)
 679         # Disown unfaithful children and let them cope on their own.
 680         for child in obsolete_children:
 681             child.parent = None
 682             child.parent_number = 0
 683             child.save()
 684             if old_cover:
 685                 notify_cover_changed.append(child)
 686
 687         cls.repopulate_ancestors()
 688         tasks.update_counters.delay()
 689
 690         if remote_gallery_url:
 691             book.download_pictures(remote_gallery_url)
 692
 693         # No saves beyond this point.
 694
 695         # Build cover.
 696         if 'cover' not in dont_build:
 697             book.cover.build_delay()
 698             book.cover_clean.build_delay()
 699             book.cover_thumb.build_delay()
 700             book.cover_api_thumb.build_delay()
 701             book.simple_cover.build_delay()
 702             book.cover_ebookpoint.build_delay()
 703
 704         # Build HTML and ebooks.
 705         book.html_file.build_delay()
 706         if not children:
 707             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 708                 if format_ not in dont_build:
 709                     getattr(book, '%s_file' % format_).build_delay()
 710         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 711             if format_ not in dont_build:
 712                 getattr(book, '%s_file' % format_).build_delay()
 713
 714         if not settings.NO_SEARCH_INDEX and search_index and findable:
 715             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 716
 717         for child in notify_cover_changed:
 718             child.parent_cover_changed()
 719
 720         book.update_popularity()
 721         tasks.update_references.delay(book.id)
 722
 723         cls.published.send(sender=cls, instance=book)
 724         return book
 725
 726     def get_master(self):
 727         master_tags = [
 728             'opowiadanie',
 729             'powiesc',
 730             'dramat_wierszowany_l',
 731             'dramat_wierszowany_lp',
 732             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 733             'wywiad',
 734         ]
 735         from librarian.parser import WLDocument
 736         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 737         root = wld.edoc.getroot()
 738         for master in root.iter():
 739             if master.tag in master_tags:
 740                 return master
 741
 742     def update_references(self):
 743         from references.models import Entity, Reference
 744         master = self.get_master()
 745         if master is None:
 746             master = []
 747         found = set()
 748         for i, sec in enumerate(master):
 749             for ref in sec.findall('.//ref'):
 750                 href = ref.attrib.get('href', '')
 751                 if not href or href in found:
 752                     continue
 753                 found.add(href)
 754                 entity, created = Entity.objects.get_or_create(
 755                     uri=href
 756                 )
 757                 ref, created = Reference.objects.get_or_create(
 758                     book=self,
 759                     entity=entity
 760                 )
 761                 ref.first_section = 'sec%d' % (i + 1)
 762                 entity.populate()
 763                 entity.save()
 764         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 765
 766     @property
 767     def references(self):
 768         return self.reference_set.all().select_related('entity')
 769
 770     @classmethod
 771     @transaction.atomic
 772     def repopulate_ancestors(cls):
 773         """Fixes the ancestry cache."""
 774         # TODO: table names
 775         cursor = connection.cursor()
 776         if connection.vendor == 'postgres':
 777             cursor.execute("TRUNCATE catalogue_book_ancestor")
 778             cursor.execute("""
 779                 WITH RECURSIVE ancestry AS (
 780                     SELECT book.id, book.parent_id
 781                     FROM catalogue_book AS book
 782                     WHERE book.parent_id IS NOT NULL
 783                     UNION
 784                     SELECT ancestor.id, book.parent_id
 785                     FROM ancestry AS ancestor, catalogue_book AS book
 786                     WHERE ancestor.parent_id = book.id
 787                         AND book.parent_id IS NOT NULL
 788                     )
 789                 INSERT INTO catalogue_book_ancestor
 790                     (from_book_id, to_book_id)
 791                     SELECT id, parent_id
 792                     FROM ancestry
 793                     ORDER BY id;
 794                 """)
 795         else:
 796             cursor.execute("DELETE FROM catalogue_book_ancestor")
 797             for b in cls.objects.exclude(parent=None):
 798                 parent = b.parent
 799                 while parent is not None:
 800                     b.ancestor.add(parent)
 801                     parent = parent.parent
 802
 803     @property
 804     def ancestors(self):
 805         if self.parent:
 806             for anc in self.parent.ancestors:
 807                 yield anc
 808             yield self.parent
 809         else:
 810             return []
 811
 812     def clear_cache(self):
 813         clear_cached_renders(self.mini_box)
 814         clear_cached_renders(self.mini_box_nolink)
 815
 816     def cover_info(self, inherit=True):
 817         """Returns a dictionary to serve as fallback for BookInfo.
 818
 819         For now, the only thing inherited is the cover image.
 820         """
 821         need = False
 822         info = {}
 823         for field in ('cover_url', 'cover_by', 'cover_source'):
 824             val = self.get_extra_info_json().get(field)
 825             if val:
 826                 info[field] = val
 827             else:
 828                 need = True
 829         if inherit and need and self.parent is not None:
 830             parent_info = self.parent.cover_info()
 831             parent_info.update(info)
 832             info = parent_info
 833         return info
 834
 835     def related_themes(self):
 836         return Tag.objects.usage_for_queryset(
 837             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 838             counts=True).filter(category='theme').order_by('-count')
 839
 840     def parent_cover_changed(self):
 841         """Called when parent book's cover image is changed."""
 842         if not self.cover_info(inherit=False):
 843             if 'cover' not in app_settings.DONT_BUILD:
 844                 self.cover.build_delay()
 845                 self.cover_clean.build_delay()
 846                 self.cover_thumb.build_delay()
 847                 self.cover_api_thumb.build_delay()
 848                 self.simple_cover.build_delay()
 849                 self.cover_ebookpoint.build_delay()
 850             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 851                 if format_ not in app_settings.DONT_BUILD:
 852                     getattr(self, '%s_file' % format_).build_delay()
 853             for child in self.children.all():
 854                 child.parent_cover_changed()
 855
 856     def other_versions(self):
 857         """Find other versions (i.e. in other languages) of the book."""
 858         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 859
 860     def parents(self):
 861         books = []
 862         parent = self.parent
 863         while parent is not None:
 864             books.insert(0, parent)
 865             parent = parent.parent
 866         return books
 867
 868     def pretty_title(self, html_links=False):
 869         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 870         books = self.parents() + [self]
 871         names.extend([(b.title, b.get_absolute_url()) for b in books])
 872
 873         if html_links:
 874             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 875         else:
 876             names = [tag[0] for tag in names]
 877         return ', '.join(names)
 878
 879     def publisher(self):
 880         publisher = self.get_extra_info_json()['publisher']
 881         if isinstance(publisher, str):
 882             return publisher
 883         elif isinstance(publisher, list):
 884             return ', '.join(publisher)
 885
 886     @classmethod
 887     def tagged_top_level(cls, tags):
 888         """ Returns top-level books tagged with `tags`.
 889
 890         It only returns those books which don't have ancestors which are
 891         also tagged with those tags.
 892
 893         """
 894         objects = cls.tagged.with_all(tags)
 895         return objects.filter(findable=True).exclude(ancestor__in=objects)
 896
 897     @classmethod
 898     def book_list(cls, book_filter=None):
 899         """Generates a hierarchical listing of all books.
 900
 901         Books are optionally filtered with a test function.
 902
 903         """
 904
 905         books_by_parent = {}
 906         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 907         if book_filter:
 908             books = books.filter(book_filter).distinct()
 909
 910             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 911             for book in books.iterator():
 912                 parent = book.parent_id
 913                 if parent not in book_ids:
 914                     parent = None
 915                 books_by_parent.setdefault(parent, []).append(book)
 916         else:
 917             for book in books.iterator():
 918                 books_by_parent.setdefault(book.parent_id, []).append(book)
 919
 920         orphans = []
 921         books_by_author = OrderedDict()
 922         for tag in Tag.objects.filter(category='author').iterator():
 923             books_by_author[tag] = []
 924
 925         for book in books_by_parent.get(None, ()):
 926             authors = list(book.authors().only('pk'))
 927             if authors:
 928                 for author in authors:
 929                     books_by_author[author].append(book)
 930             else:
 931                 orphans.append(book)
 932
 933         return books_by_author, orphans, books_by_parent
 934
 935     _audiences_pl = {
 936         "SP": (1, "szkoła podstawowa"),
 937         "SP1": (1, "szkoła podstawowa"),
 938         "SP2": (1, "szkoła podstawowa"),
 939         "SP3": (1, "szkoła podstawowa"),
 940         "P": (1, "szkoła podstawowa"),
 941         "G": (2, "gimnazjum"),
 942         "L": (3, "liceum"),
 943         "LP": (3, "liceum"),
 944     }
 945
 946     def audiences_pl(self):
 947         audiences = self.get_extra_info_json().get('audiences', [])
 948         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 949         return [a[1] for a in audiences]
 950
 951     def stage_note(self):
 952         stage = self.get_extra_info_json().get('stage')
 953         if stage and stage < '0.4':
 954             return (_('This work needs modernisation'),
 955                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 956         else:
 957             return None, None
 958
 959     def choose_fragments(self, number):
 960         fragments = self.fragments.order_by()
 961         fragments_count = fragments.count()
 962         if not fragments_count and self.children.exists():
 963             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 964             fragments_count = fragments.count()
 965         if fragments_count:
 966             if fragments_count > number:
 967                 offset = randint(0, fragments_count - number)
 968             else:
 969                 offset = 0
 970             return fragments[offset : offset + number]
 971         elif self.parent:
 972             return self.parent.choose_fragments(number)
 973         else:
 974             return []
 975
 976     def choose_fragment(self):
 977         fragments = self.choose_fragments(1)
 978         if fragments:
 979             return fragments[0]
 980         else:
 981             return None
 982
 983     def fragment_data(self):
 984         fragment = self.choose_fragment()
 985         if fragment:
 986             return {
 987                 'title': fragment.book.pretty_title(),
 988                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 989             }
 990         else:
 991             return None
 992
 993     def update_popularity(self):
 994         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 995         try:
 996             pop = self.popularity
 997             pop.count = count
 998             pop.save()
 999         except BookPopularity.DoesNotExist:
1000             BookPopularity.objects.create(book=self, count=count)
1001
1002     def ridero_link(self):
1003         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1004
1005     def like(self, user):
1006         from social.utils import likes, get_set, set_sets
1007         if not likes(user, self):
1008             tag = get_set(user, '')
1009             set_sets(user, self, [tag])
1010
1011     def unlike(self, user):
1012         from social.utils import likes, set_sets
1013         if likes(user, self):
1014             set_sets(user, self, [])
1015
1016     def full_sort_key(self):
1017         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1018
1019     def cover_color(self):
1020         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1021
1022     @cached_render('catalogue/book_mini_box.html')
1023     def mini_box(self):
1024         return {
1025             'book': self
1026         }
1027
1028     @cached_render('catalogue/book_mini_box.html')
1029     def mini_box_nolink(self):
1030         return {
1031             'book': self,
1032             'no_link': True,
1033         }
1034
1035
1036 class BookPopularity(models.Model):
1037     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1038     count = models.IntegerField(default=0, db_index=True)