src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64
  65     # files generated during publication
  66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  67     html_file = fields.HtmlField(storage=bofh_storage)
  68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  69     fb2_file = fields.Fb2Field(storage=bofh_storage)
  70     txt_file = fields.TxtField(storage=bofh_storage)
  71     epub_file = fields.EpubField(storage=bofh_storage)
  72     mobi_file = fields.MobiField(storage=bofh_storage)
  73     pdf_file = fields.PdfField(storage=bofh_storage)
  74
  75     cover = fields.CoverField('okładka', storage=bofh_storage)
  76     # Cleaner version of cover for thumbs
  77     cover_clean = fields.CoverCleanField('czysta okładka')
  78     cover_thumb = fields.CoverThumbField('miniatura okładki')
  79     cover_api_thumb = fields.CoverApiThumbField(
  80         'mniaturka okładki dla aplikacji')
  81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  82     cover_ebookpoint = fields.CoverEbookpointField(
  83         'okładka dla Ebookpoint')
  84
  85     ebook_formats = constants.EBOOK_FORMATS
  86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  87
  88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  90
  91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  92     has_audience = models.BooleanField(default=False)
  93
  94     objects = models.Manager()
  95     tagged = managers.ModelTaggedItemManager(Tag)
  96     tags = managers.TagDescriptor(Tag)
  97     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
  98     translators = models.ManyToManyField(Tag, blank=True)
  99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 100     has_audio = models.BooleanField(default=False)
 101
 102     html_built = django.dispatch.Signal()
 103     published = django.dispatch.Signal()
 104
 105     SORT_KEY_SEP = '$'
 106
 107     is_book = True
 108
 109     class AlreadyExists(Exception):
 110         pass
 111
 112     class Meta:
 113         ordering = ('sort_key_author', 'sort_key')
 114         verbose_name = 'książka'
 115         verbose_name_plural = 'książki'
 116         app_label = 'catalogue'
 117
 118     def __str__(self):
 119         return self.title
 120
 121     def get_extra_info_json(self):
 122         return json.loads(self.extra_info or '{}')
 123
 124     def get_initial(self):
 125         try:
 126             return re.search(r'\w', self.title, re.U).group(0)
 127         except AttributeError:
 128             return ''
 129
 130     def authors(self):
 131         return self.tags.filter(category='author')
 132
 133     def epochs(self):
 134         return self.tags.filter(category='epoch')
 135
 136     def genres(self):
 137         return self.tags.filter(category='genre')
 138
 139     def kinds(self):
 140         return self.tags.filter(category='kind')
 141
 142     def tag_unicode(self, category):
 143         relations = prefetched_relations(self, category)
 144         if relations:
 145             return ', '.join(rel.tag.name for rel in relations)
 146         else:
 147             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 148
 149     def tags_by_category(self):
 150         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 151
 152     def author_unicode(self):
 153         return self.cached_author
 154
 155     def kind_unicode(self):
 156         return self.tag_unicode('kind')
 157
 158     def epoch_unicode(self):
 159         return self.tag_unicode('epoch')
 160
 161     def genre_unicode(self):
 162         return self.tag_unicode('genre')
 163
 164     def translator(self):
 165         translators = self.get_extra_info_json().get('translators')
 166         if not translators:
 167             return None
 168         if len(translators) > 3:
 169             translators = translators[:2]
 170             others = ' i inni'
 171         else:
 172             others = ''
 173         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 174
 175     def cover_source(self):
 176         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 177
 178     @property
 179     def isbn_pdf(self):
 180         return self.get_extra_info_json().get('isbn_pdf')
 181
 182     @property
 183     def isbn_epub(self):
 184         return self.get_extra_info_json().get('isbn_epub')
 185
 186     @property
 187     def isbn_mobi(self):
 188         return self.get_extra_info_json().get('isbn_mobi')
 189
 190     def is_accessible_to(self, user):
 191         if not self.preview:
 192             return True
 193         if not user.is_authenticated:
 194             return False
 195         Membership = apps.get_model('club', 'Membership')
 196         if Membership.is_active_for(user):
 197             return True
 198         Funding = apps.get_model('funding', 'Funding')
 199         if Funding.objects.filter(user=user, offer__book=self):
 200             return True
 201         return False
 202
 203     def save(self, force_insert=False, force_update=False, **kwargs):
 204         from sortify import sortify
 205
 206         self.sort_key = sortify(self.title)[:120]
 207         self.title = str(self.title)  # ???
 208
 209         try:
 210             author = self.authors().first().sort_key
 211         except AttributeError:
 212             author = ''
 213         self.sort_key_author = author
 214
 215         self.cached_author = self.tag_unicode('author')
 216         self.has_audience = 'audience' in self.get_extra_info_json()
 217
 218         if self.preview and not self.preview_key:
 219             self.preview_key = get_random_hash(self.slug)[:32]
 220
 221         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 222
 223         return ret
 224
 225     def get_absolute_url(self):
 226         return reverse('book_detail', args=[self.slug])
 227
 228     def gallery_path(self):
 229         return gallery_path(self.slug)
 230
 231     def gallery_url(self):
 232         return gallery_url(self.slug)
 233
 234     def get_first_text(self):
 235         if self.html_file:
 236             return self
 237         child = self.children.all().order_by('parent_number').first()
 238         if child is not None:
 239             return child.get_first_text()
 240
 241     def get_last_text(self):
 242         if self.html_file:
 243             return self
 244         child = self.children.all().order_by('parent_number').last()
 245         if child is not None:
 246             return child.get_last_text()
 247
 248     def get_prev_text(self):
 249         if not self.parent:
 250             return None
 251         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 252         if sibling is not None:
 253             return sibling.get_last_text()
 254
 255         if self.parent.html_file:
 256             return self.parent
 257
 258         return self.parent.get_prev_text()
 259
 260     def get_next_text(self, inside=True):
 261         if inside:
 262             child = self.children.order_by('parent_number').first()
 263             if child is not None:
 264                 return child.get_first_text()
 265
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_first_text()
 271         return self.parent.get_next_text(inside=False)
 272
 273     def get_siblings(self):
 274         if not self.parent:
 275             return []
 276         return self.parent.children.all().order_by('parent_number')
 277
 278     def get_children(self):
 279         return self.children.all().order_by('parent_number')
 280
 281     @property
 282     def name(self):
 283         return self.title
 284
 285     def language_code(self):
 286         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 287
 288     def language_name(self):
 289         return dict(settings.LANGUAGES).get(self.language_code(), "")
 290
 291     def is_foreign(self):
 292         return self.language_code() != settings.LANGUAGE_CODE
 293
 294     def set_audio_length(self):
 295         length = self.get_audio_length()
 296         if length > 0:
 297             self.audio_length = self.format_audio_length(length)
 298             self.save()
 299
 300     @staticmethod
 301     def format_audio_length(seconds):
 302         """
 303         >>> Book.format_audio_length(1)
 304         '0:01'
 305         >>> Book.format_audio_length(3661)
 306         '1:01:01'
 307         """
 308         if seconds < 60*60:
 309             minutes = seconds // 60
 310             seconds = seconds % 60
 311             return '%d:%02d' % (minutes, seconds)
 312         else:
 313             hours = seconds // 3600
 314             minutes = seconds % 3600 // 60
 315             seconds = seconds % 60
 316             return '%d:%02d:%02d' % (hours, minutes, seconds)
 317
 318     def get_audio_length(self):
 319         total = 0
 320         for media in self.get_mp3() or ():
 321             total += app_settings.GET_MP3_LENGTH(media.file.path)
 322         return int(total)
 323
 324     def get_time(self):
 325         return round(self.xml_file.size / 1000 * 40)
 326
 327     def has_media(self, type_):
 328         if type_ in Book.formats:
 329             return bool(getattr(self, "%s_file" % type_))
 330         else:
 331             return self.media.filter(type=type_).exists()
 332
 333     def get_media(self, type_):
 334         if self.has_media(type_):
 335             if type_ in Book.formats:
 336                 return getattr(self, "%s_file" % type_)
 337             else:
 338                 return self.media.filter(type=type_)
 339         else:
 340             return None
 341
 342     def get_mp3(self):
 343         return self.get_media("mp3")
 344
 345     def get_odt(self):
 346         return self.get_media("odt")
 347
 348     def get_ogg(self):
 349         return self.get_media("ogg")
 350
 351     def get_daisy(self):
 352         return self.get_media("daisy")
 353
 354     def get_audio_epub(self):
 355         return self.get_media("audio.epub")
 356
 357     def media_url(self, format_):
 358         media = self.get_media(format_)
 359         if media:
 360             if self.preview:
 361                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 362             else:
 363                 return media.url
 364         else:
 365             return None
 366
 367     def html_url(self):
 368         return self.media_url('html')
 369
 370     def html_nonotes_url(self):
 371         return self.media_url('html_nonotes')
 372
 373     def pdf_url(self):
 374         return self.media_url('pdf')
 375
 376     def epub_url(self):
 377         return self.media_url('epub')
 378
 379     def mobi_url(self):
 380         return self.media_url('mobi')
 381
 382     def txt_url(self):
 383         return self.media_url('txt')
 384
 385     def fb2_url(self):
 386         return self.media_url('fb2')
 387
 388     def xml_url(self):
 389         return self.media_url('xml')
 390
 391     def has_description(self):
 392         return len(self.description) > 0
 393     has_description.short_description = 'opis'
 394     has_description.boolean = True
 395
 396     def has_mp3_file(self):
 397         return self.has_media("mp3")
 398     has_mp3_file.short_description = 'MP3'
 399     has_mp3_file.boolean = True
 400
 401     def has_ogg_file(self):
 402         return self.has_media("ogg")
 403     has_ogg_file.short_description = 'OGG'
 404     has_ogg_file.boolean = True
 405
 406     def has_daisy_file(self):
 407         return self.has_media("daisy")
 408     has_daisy_file.short_description = 'DAISY'
 409     has_daisy_file.boolean = True
 410
 411     def has_sync_file(self):
 412         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 413
 414     def build_sync_file(self):
 415         from lxml import html
 416         from django.core.files.base import ContentFile
 417         with self.html_file.open('rb') as f:
 418             h = html.fragment_fromstring(f.read().decode('utf-8'))
 419
 420         durations = [
 421             m['mp3'].duration
 422             for m in self.get_audiobooks()[0]
 423         ]
 424         if settings.MOCK_DURATIONS:
 425             durations = settings.MOCK_DURATIONS
 426
 427         sync = []
 428         ts = None
 429         sid = 1
 430         dirty = False
 431         for elem in h.iter():
 432             if elem.get('data-audio-ts'):
 433                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 434                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 435                 # check if inside verse
 436                 p = elem.getparent()
 437                 while p is not None:
 438                     # Workaround for missing ids.
 439                     if 'verse' in p.get('class', ''):
 440                         if not p.get('id'):
 441                             p.set('id', f'syn{sid}')
 442                             dirty = True
 443                             sid += 1
 444                         sync.append((ts, p.get('id')))
 445                         ts = None
 446                         break
 447                     p = p.getparent()
 448             elif ts:
 449                 cls = elem.get('class', '')
 450                 # Workaround for missing ids.
 451                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 452                     if not elem.get('id'):
 453                         elem.set('id', f'syn{sid}')
 454                         dirty = True
 455                         sid += 1
 456                     sync.append((ts, elem.get('id')))
 457                     ts = None
 458         if dirty:
 459             htext = html.tostring(h, encoding='utf-8')
 460             with open(self.html_file.path, 'wb') as f:
 461                 f.write(htext)
 462         try:
 463             bm = self.media.get(type='sync')
 464         except:
 465             bm = BookMedia(book=self, type='sync')
 466         sync = (
 467             '27\n' + '\n'.join(
 468                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 469             )).encode('latin1')
 470         bm.file.save(
 471             None, ContentFile(sync)
 472             )
 473
 474     def get_sync(self):
 475         if not self.has_sync_file():
 476             return []
 477         with self.get_media('sync').first().file.open('r') as f:
 478             sync = f.read().split('\n')
 479         offset = float(sync[0])
 480         items = []
 481         for line in sync[1:]:
 482             if not line:
 483                 continue
 484             start, end, elid = line.split()
 485             items.append([elid, float(start) + offset])
 486         return items
 487
 488     def sync_ts(self, ts):
 489         elid = None
 490         for cur_id, t in self.get_sync():
 491             if ts >= t:
 492                 elid = cur_id
 493             else:
 494                 break
 495         return elid
 496
 497     def sync_elid(self, elid):
 498         for cur_id, t in self.get_sync():
 499             if cur_id == elid:
 500                 return t
 501
 502     def has_audio_epub_file(self):
 503         return self.has_media("audio.epub")
 504
 505     @property
 506     def media_daisy(self):
 507         return self.get_media('daisy')
 508
 509     @property
 510     def media_audio_epub(self):
 511         return self.get_media('audio.epub')
 512
 513     def get_audiobooks(self, with_children=False, processing=False):
 514         ogg_files = {}
 515         for m in self.media.filter(type='ogg').order_by().iterator():
 516             ogg_files[m.name] = m
 517
 518         audiobooks = []
 519         projects = set()
 520         total_duration = 0
 521         for mp3 in self.media.filter(type='mp3').iterator():
 522             # ogg files are always from the same project
 523             meta = mp3.get_extra_info_json()
 524             project = meta.get('project')
 525             if not project:
 526                 # temporary fallback
 527                 project = 'CzytamySłuchając'
 528
 529             projects.add((project, meta.get('funded_by', '')))
 530             total_duration += mp3.duration or 0
 531
 532             media = {'mp3': mp3}
 533
 534             ogg = ogg_files.get(mp3.name)
 535             if ogg:
 536                 media['ogg'] = ogg
 537             audiobooks.append(media)
 538
 539         if with_children:
 540             for child in self.get_children():
 541                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 542                     with_children=True, processing=True)
 543                 audiobooks.append({'part': child})
 544                 audiobooks += ch_audiobooks
 545                 projects.update(ch_projects)
 546                 total_duration += ch_duration
 547
 548         if not processing:
 549             projects = sorted(projects)
 550             total_duration = '%d:%02d' % (
 551                 total_duration // 60,
 552                 total_duration % 60
 553             )
 554
 555         return audiobooks, projects, total_duration
 556
 557     def get_audiobooks_with_children(self):
 558         return self.get_audiobooks(with_children=True)
 559
 560     def wldocument(self, parse_dublincore=True, inherit=True):
 561         from catalogue.import_utils import ORMDocProvider
 562         from librarian.parser import WLDocument
 563
 564         if inherit and self.parent:
 565             meta_fallbacks = self.parent.cover_info()
 566         else:
 567             meta_fallbacks = None
 568
 569         return WLDocument.from_file(
 570             self.xml_file.path,
 571             provider=ORMDocProvider(self),
 572             parse_dublincore=parse_dublincore,
 573             meta_fallbacks=meta_fallbacks)
 574
 575     def wldocument2(self):
 576         from catalogue.import_utils import ORMDocProvider
 577         from librarian.document import WLDocument
 578         doc = WLDocument(
 579             self.xml_file.path,
 580             provider=ORMDocProvider(self)
 581         )
 582         doc.meta.update(self.cover_info())
 583         return doc
 584
 585
 586     @staticmethod
 587     def zip_format(format_):
 588         def pretty_file_name(book):
 589             return "%s/%s.%s" % (
 590                 book.get_extra_info_json()['author'],
 591                 book.slug,
 592                 format_)
 593
 594         field_name = "%s_file" % format_
 595         field = getattr(Book, field_name)
 596         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 597         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 598         return create_zip(paths, field.ZIP)
 599
 600     def zip_audiobooks(self, format_):
 601         bm = BookMedia.objects.filter(book=self, type=format_)
 602         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 603         licenses = set()
 604         for m in bm:
 605             license = constants.LICENSES.get(
 606                 m.get_extra_info_json().get('license'), {}
 607             ).get('locative')
 608             if license:
 609                 licenses.add(license)
 610         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 611             'licenses': licenses,
 612             'meta': self.wldocument2().meta,
 613         })
 614         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 615
 616     def search_index(self, index=None):
 617         if not self.findable:
 618             return
 619         from search.index import Index
 620         Index.index_book(self)
 621
 622     # will make problems in conjunction with paid previews
 623     def download_pictures(self, remote_gallery_url):
 624         # This is only needed for legacy relative image paths.
 625         gallery_path = self.gallery_path()
 626         # delete previous files, so we don't include old files in ebooks
 627         if os.path.isdir(gallery_path):
 628             for filename in os.listdir(gallery_path):
 629                 file_path = os.path.join(gallery_path, filename)
 630                 os.unlink(file_path)
 631         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 632         if ilustr_elements:
 633             makedirs(gallery_path)
 634             for ilustr in ilustr_elements:
 635                 ilustr_src = ilustr.get('src')
 636                 if '/' in ilustr_src:
 637                     continue
 638                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 639                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 640
 641     def load_abstract(self):
 642         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 643         if abstract is not None:
 644             self.abstract = transform_abstrakt(abstract)
 645         else:
 646             self.abstract = ''
 647
 648     def load_toc(self):
 649         self.toc = ''
 650         if self.html_file:
 651             parser = html.HTMLParser(encoding='utf-8')
 652             tree = html.parse(self.html_file.path, parser=parser)
 653             toc = tree.find('//div[@id="toc"]/ol')
 654             if toc is None or not len(toc):
 655                 return
 656             html_link = reverse('book_text', args=[self.slug])
 657             for a in toc.findall('.//a'):
 658                 a.attrib['href'] = html_link + a.attrib['href']
 659             self.toc = html.tostring(toc, encoding='unicode')
 660             # div#toc
 661
 662     @classmethod
 663     def from_xml_file(cls, xml_file, **kwargs):
 664         from django.core.files import File
 665         from librarian import dcparser
 666
 667         # use librarian to parse meta-data
 668         book_info = dcparser.parse(xml_file)
 669
 670         if not isinstance(xml_file, File):
 671             xml_file = File(open(xml_file))
 672
 673         try:
 674             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 675         finally:
 676             xml_file.close()
 677
 678     @classmethod
 679     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 680                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 681         from catalogue import tasks
 682
 683         if dont_build is None:
 684             dont_build = set()
 685         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 686
 687         # check for parts before we do anything
 688         children = []
 689         if hasattr(book_info, 'parts'):
 690             for part_url in book_info.parts:
 691                 try:
 692                     children.append(Book.objects.get(slug=part_url.slug))
 693                 except Book.DoesNotExist:
 694                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 695
 696         # Read book metadata
 697         book_slug = book_info.url.slug
 698         if re.search(r'[^a-z0-9-]', book_slug):
 699             raise ValueError('Invalid characters in slug')
 700         book, created = Book.objects.get_or_create(slug=book_slug)
 701
 702         if created:
 703             book_shelves = []
 704             old_cover = None
 705             book.preview = bool(days)
 706             if book.preview:
 707                 book.preview_until = date.today() + timedelta(days)
 708         else:
 709             if not overwrite:
 710                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 711             # Save shelves for this book
 712             book_shelves = list(book.tags.filter(category='set'))
 713             old_cover = book.cover_info()
 714
 715         # Save XML file
 716         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 717         if book.preview:
 718             book.xml_file.set_readable(False)
 719
 720         book.findable = findable
 721         book.language = book_info.language
 722         book.title = book_info.title
 723         book.license = book_info.license or ''
 724         if book_info.variant_of:
 725             book.common_slug = book_info.variant_of.slug
 726         else:
 727             book.common_slug = book.slug
 728         extra = book_info.to_dict()
 729         if logo:
 730             extra['logo'] = logo
 731         if logo_mono:
 732             extra['logo_mono'] = logo_mono
 733         if logo_alt:
 734             extra['logo_alt'] = logo_alt
 735         book.extra_info = json.dumps(extra)
 736         book.load_abstract()
 737         book.load_toc()
 738         book.save()
 739
 740         meta_tags = Tag.tags_from_info(book_info)
 741
 742         just_tags = [t for (t, rel) in meta_tags if not rel]
 743         book.tags = set(just_tags + book_shelves)
 744         book.save()  # update sort_key_author
 745
 746         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 747
 748         cover_changed = old_cover != book.cover_info()
 749         obsolete_children = set(b for b in book.children.all()
 750                                 if b not in children)
 751         notify_cover_changed = []
 752         for n, child_book in enumerate(children):
 753             new_child = child_book.parent != book
 754             child_book.parent = book
 755             child_book.parent_number = n
 756             child_book.save()
 757             if new_child or cover_changed:
 758                 notify_cover_changed.append(child_book)
 759         # Disown unfaithful children and let them cope on their own.
 760         for child in obsolete_children:
 761             child.parent = None
 762             child.parent_number = 0
 763             child.save()
 764             if old_cover:
 765                 notify_cover_changed.append(child)
 766
 767         cls.repopulate_ancestors()
 768         tasks.update_counters.delay()
 769
 770         if remote_gallery_url:
 771             book.download_pictures(remote_gallery_url)
 772
 773         # No saves beyond this point.
 774
 775         # Build cover.
 776         if 'cover' not in dont_build:
 777             book.cover.build_delay()
 778             book.cover_clean.build_delay()
 779             book.cover_thumb.build_delay()
 780             book.cover_api_thumb.build_delay()
 781             book.simple_cover.build_delay()
 782             book.cover_ebookpoint.build_delay()
 783
 784         # Build HTML and ebooks.
 785         book.html_file.build_delay()
 786         if not children:
 787             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 788                 if format_ not in dont_build:
 789                     getattr(book, '%s_file' % format_).build_delay()
 790         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 791             if format_ not in dont_build:
 792                 getattr(book, '%s_file' % format_).build_delay()
 793         book.html_nonotes_file.build_delay()
 794
 795         if not settings.NO_SEARCH_INDEX and search_index and findable:
 796             tasks.index_book.delay(book.id)
 797
 798         for child in notify_cover_changed:
 799             child.parent_cover_changed()
 800
 801         book.update_popularity()
 802         tasks.update_references.delay(book.id)
 803
 804         cls.published.send(sender=cls, instance=book)
 805         return book
 806
 807     def update_references(self):
 808         Entity = apps.get_model('references', 'Entity')
 809         doc = self.wldocument2()
 810         doc._compat_assign_section_ids()
 811         doc._compat_assign_ordered_ids()
 812         refs = {}
 813         for ref_elem in doc.references():
 814             uri = ref_elem.attrib.get('href', '')
 815             if not uri:
 816                 continue
 817             if uri in refs:
 818                 ref = refs[uri]
 819             else:
 820                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 821                 if entity_created:
 822                     try:
 823                         entity.populate()
 824                     except:
 825                         pass
 826                     else:
 827                         entity.save()
 828                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 829                 refs[uri] = ref
 830                 if not ref_created:
 831                     ref.occurence_set.all().delete()
 832             sec = ref_elem.get_link()
 833             m = re.match(r'sec(\d+)', sec)
 834             assert m is not None
 835             sec = int(m.group(1))
 836             snippet = ref_elem.get_snippet()
 837             b = builders['html-snippet']()
 838             for s in snippet:
 839                 s.html_build(b)
 840             html = b.output().get_bytes().decode('utf-8')
 841
 842             ref.occurence_set.create(
 843                 section=sec,
 844                 html=html
 845             )
 846         self.reference_set.exclude(entity__uri__in=refs).delete()
 847
 848     @property
 849     def references(self):
 850         return self.reference_set.all().select_related('entity')
 851
 852     def update_has_audio(self):
 853         self.has_audio = False
 854         if self.media.filter(type='mp3').exists():
 855             self.has_audio = True
 856         if self.descendant.filter(has_audio=True).exists():
 857             self.has_audio = True
 858         self.save(update_fields=['has_audio'])
 859         if self.parent is not None:
 860             self.parent.update_has_audio()
 861
 862     def update_narrators(self):
 863         narrator_names = set()
 864         for bm in self.media.filter(type='mp3'):
 865             narrator_names.update(set(
 866                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 867             ))
 868         narrators = []
 869
 870         for name in narrator_names:
 871             if not name: continue
 872             slug = slugify(name)
 873             try:
 874                 t = Tag.objects.get(category='author', slug=slug)
 875             except Tag.DoesNotExist:
 876                 sort_key = sortify(
 877                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 878                 )
 879                 t = Tag.objects.create(
 880                     category='author',
 881                     name_pl=name,
 882                     slug=slug,
 883                     sort_key=sort_key,
 884                 )
 885             narrators.append(t)
 886         self.narrators.set(narrators)
 887
 888     @classmethod
 889     @transaction.atomic
 890     def repopulate_ancestors(cls):
 891         """Fixes the ancestry cache."""
 892         # TODO: table names
 893         cursor = connection.cursor()
 894         if connection.vendor == 'postgres':
 895             cursor.execute("TRUNCATE catalogue_book_ancestor")
 896             cursor.execute("""
 897                 WITH RECURSIVE ancestry AS (
 898                     SELECT book.id, book.parent_id
 899                     FROM catalogue_book AS book
 900                     WHERE book.parent_id IS NOT NULL
 901                     UNION
 902                     SELECT ancestor.id, book.parent_id
 903                     FROM ancestry AS ancestor, catalogue_book AS book
 904                     WHERE ancestor.parent_id = book.id
 905                         AND book.parent_id IS NOT NULL
 906                     )
 907                 INSERT INTO catalogue_book_ancestor
 908                     (from_book_id, to_book_id)
 909                     SELECT id, parent_id
 910                     FROM ancestry
 911                     ORDER BY id;
 912                 """)
 913         else:
 914             cursor.execute("DELETE FROM catalogue_book_ancestor")
 915             for b in cls.objects.exclude(parent=None):
 916                 parent = b.parent
 917                 while parent is not None:
 918                     b.ancestor.add(parent)
 919                     parent = parent.parent
 920
 921     @property
 922     def ancestors(self):
 923         if self.parent:
 924             for anc in self.parent.ancestors:
 925                 yield anc
 926             yield self.parent
 927         else:
 928             return []
 929
 930     def clear_cache(self):
 931         clear_cached_renders(self.mini_box)
 932         clear_cached_renders(self.mini_box_nolink)
 933
 934     def cover_info(self, inherit=True):
 935         """Returns a dictionary to serve as fallback for BookInfo.
 936
 937         For now, the only thing inherited is the cover image.
 938         """
 939         need = False
 940         info = {}
 941         for field in ('cover_url', 'cover_by', 'cover_source'):
 942             val = self.get_extra_info_json().get(field)
 943             if val:
 944                 info[field] = val
 945             else:
 946                 need = True
 947         if inherit and need and self.parent is not None:
 948             parent_info = self.parent.cover_info()
 949             parent_info.update(info)
 950             info = parent_info
 951         return info
 952
 953     def related_themes(self):
 954         return Tag.objects.usage_for_queryset(
 955             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 956             counts=True).filter(category='theme').order_by('-count')
 957
 958     def parent_cover_changed(self):
 959         """Called when parent book's cover image is changed."""
 960         if not self.cover_info(inherit=False):
 961             if 'cover' not in app_settings.DONT_BUILD:
 962                 self.cover.build_delay()
 963                 self.cover_clean.build_delay()
 964                 self.cover_thumb.build_delay()
 965                 self.cover_api_thumb.build_delay()
 966                 self.simple_cover.build_delay()
 967                 self.cover_ebookpoint.build_delay()
 968             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 969                 if format_ not in app_settings.DONT_BUILD:
 970                     getattr(self, '%s_file' % format_).build_delay()
 971             for child in self.children.all():
 972                 child.parent_cover_changed()
 973
 974     def other_versions(self):
 975         """Find other versions (i.e. in other languages) of the book."""
 976         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 977
 978     def parents(self):
 979         books = []
 980         parent = self.parent
 981         while parent is not None:
 982             books.insert(0, parent)
 983             parent = parent.parent
 984         return books
 985
 986     def pretty_title(self, html_links=False):
 987         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 988         books = self.parents() + [self]
 989         names.extend([(b.title, b.get_absolute_url()) for b in books])
 990
 991         if html_links:
 992             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 993         else:
 994             names = [tag[0] for tag in names]
 995         return ', '.join(names)
 996
 997     def publisher(self):
 998         publisher = self.get_extra_info_json()['publisher']
 999         if isinstance(publisher, str):
1000             return publisher
1001         elif isinstance(publisher, list):
1002             return ', '.join(publisher)
1003
1004     @classmethod
1005     def tagged_top_level(cls, tags):
1006         """ Returns top-level books tagged with `tags`.
1007
1008         It only returns those books which don't have ancestors which are
1009         also tagged with those tags.
1010
1011         """
1012         objects = cls.tagged.with_all(tags)
1013         return objects.filter(findable=True).exclude(ancestor__in=objects)
1014
1015     @classmethod
1016     def book_list(cls, book_filter=None):
1017         """Generates a hierarchical listing of all books.
1018
1019         Books are optionally filtered with a test function.
1020
1021         """
1022
1023         books_by_parent = {}
1024         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1025         if book_filter:
1026             books = books.filter(book_filter).distinct()
1027
1028             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1029             for book in books.iterator():
1030                 parent = book.parent_id
1031                 if parent not in book_ids:
1032                     parent = None
1033                 books_by_parent.setdefault(parent, []).append(book)
1034         else:
1035             for book in books.iterator():
1036                 books_by_parent.setdefault(book.parent_id, []).append(book)
1037
1038         orphans = []
1039         books_by_author = OrderedDict()
1040         for tag in Tag.objects.filter(category='author').iterator():
1041             books_by_author[tag] = []
1042
1043         for book in books_by_parent.get(None, ()):
1044             authors = list(book.authors().only('pk'))
1045             if authors:
1046                 for author in authors:
1047                     books_by_author[author].append(book)
1048             else:
1049                 orphans.append(book)
1050
1051         return books_by_author, orphans, books_by_parent
1052
1053     _audiences_pl = {
1054         "SP": (1, "szkoła podstawowa"),
1055         "SP1": (1, "szkoła podstawowa"),
1056         "SP2": (1, "szkoła podstawowa"),
1057         "SP3": (1, "szkoła podstawowa"),
1058         "P": (1, "szkoła podstawowa"),
1059         "G": (2, "gimnazjum"),
1060         "L": (3, "liceum"),
1061         "LP": (3, "liceum"),
1062     }
1063
1064     def audiences_pl(self):
1065         audiences = self.get_extra_info_json().get('audiences', [])
1066         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1067         return [a[1] for a in audiences]
1068
1069     def stage_note(self):
1070         stage = self.get_extra_info_json().get('stage')
1071         if stage and stage < '0.4':
1072             return (_('Ten utwór wymaga uwspółcześnienia'),
1073                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1074         else:
1075             return None, None
1076
1077     def choose_fragments(self, number):
1078         fragments = self.fragments.order_by()
1079         fragments_count = fragments.count()
1080         if not fragments_count and self.children.exists():
1081             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1082             fragments_count = fragments.count()
1083         if fragments_count:
1084             if fragments_count > number:
1085                 offset = randint(0, fragments_count - number)
1086             else:
1087                 offset = 0
1088             return fragments[offset : offset + number]
1089         elif self.parent:
1090             return self.parent.choose_fragments(number)
1091         else:
1092             return []
1093
1094     def choose_fragment(self):
1095         fragments = self.choose_fragments(1)
1096         if fragments:
1097             return fragments[0]
1098         else:
1099             return None
1100
1101     def fragment_data(self):
1102         fragment = self.choose_fragment()
1103         if fragment:
1104             return {
1105                 'title': fragment.book.pretty_title(),
1106                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1107             }
1108         else:
1109             return None
1110
1111     def update_popularity(self):
1112         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1113         try:
1114             pop = self.popularity
1115             pop.count = count
1116             pop.save()
1117         except BookPopularity.DoesNotExist:
1118             BookPopularity.objects.create(book=self, count=count)
1119
1120     def ridero_link(self):
1121         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1122
1123     def elevenreader_link(self):
1124         first_text = self.get_first_text()
1125         if first_text is None:
1126             return None
1127         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1128
1129     def content_warnings(self):
1130         warnings_def = {
1131             'wulgaryzmy': _('wulgaryzmy'),
1132         }
1133         warnings = self.get_extra_info_json().get('content_warnings', [])
1134         warnings = [
1135             warnings_def.get(w, w)
1136             for w in warnings
1137         ]
1138         warnings.sort()
1139         return warnings
1140
1141     def full_sort_key(self):
1142         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1143
1144     def cover_color(self):
1145         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1146
1147     @cached_render('catalogue/book_mini_box.html')
1148     def mini_box(self):
1149         return {
1150             'book': self
1151         }
1152
1153     @cached_render('catalogue/book_mini_box.html')
1154     def mini_box_nolink(self):
1155         return {
1156             'book': self,
1157             'no_link': True,
1158         }
1159
1160
1161 class BookPopularity(models.Model):
1162     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1163     count = models.IntegerField(default=0, db_index=True)