src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 import requests
  11 from slugify import slugify
  12 from sortify import sortify
  13 from urllib.request import urlretrieve
  14 from django.apps import apps
  15 from django.conf import settings
  16 from django.db import connection, models, transaction
  17 import django.dispatch
  18 from django.contrib.contenttypes.fields import GenericRelation
  19 from django.template.loader import render_to_string
  20 from django.urls import reverse
  21 from django.utils.translation import gettext_lazy as _, get_language
  22 from fnpdjango.storage import BofhFileSystemStorage
  23 from lxml import html
  24 from librarian.cover import WLCover
  25 from librarian.html import transform_abstrakt
  26 from librarian.builders import builders
  27 from newtagging import managers
  28 from catalogue import constants
  29 from catalogue import fields
  30 from catalogue.models import Tag, Fragment, BookMedia
  31 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  32 from catalogue.models.tag import prefetched_relations
  33 from catalogue import app_settings
  34 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  35
  36 bofh_storage = BofhFileSystemStorage()
  37
  38
  39 class Book(models.Model):
  40     """Represents a book imported from WL-XML."""
  41     title = models.CharField('tytuł', max_length=32767)
  42     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
  43     sort_key_author = models.CharField(
  44         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
  45     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  46     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  47     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  48     description = models.TextField('opis', blank=True)
  49     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  50     abstract = models.TextField('abstrakt', blank=True)
  51     toc = models.TextField('spis treści', blank=True)
  52     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  53     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  54     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  55     extra_info = models.TextField('dodatkowe informacje', default='{}')
  56     gazeta_link = models.CharField(blank=True, max_length=240)
  57     wiki_link = models.CharField(blank=True, max_length=240)
  58     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  59     recommended = models.BooleanField('polecane', default=False)
  60     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  61     preview = models.BooleanField('prapremiera', default=False)
  62     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  63     preview_key = models.CharField(max_length=32, blank=True, null=True)
  64     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  65     can_sell = models.BooleanField('do sprzedaży', default=True)
  66     can_sell_mp3 = models.BooleanField('do sprzedaży mp3', default=True)
  67     isbn_mp3 = models.CharField('ISBN audiobooka', max_length=32, blank=True)
  68
  69     # files generated during publication
  70     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  71     html_file = fields.HtmlField(storage=bofh_storage)
  72     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  73     fb2_file = fields.Fb2Field(storage=bofh_storage)
  74     txt_file = fields.TxtField(storage=bofh_storage)
  75     epub_file = fields.EpubField(storage=bofh_storage)
  76     mobi_file = fields.MobiField(storage=bofh_storage)
  77     pdf_file = fields.PdfField(storage=bofh_storage)
  78
  79     cover = fields.CoverField('okładka', storage=bofh_storage)
  80     # Cleaner version of cover for thumbs
  81     cover_clean = fields.CoverCleanField('czysta okładka')
  82     cover_thumb = fields.CoverThumbField('miniatura okładki')
  83     cover_api_thumb = fields.CoverApiThumbField(
  84         'mniaturka okładki dla aplikacji')
  85     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  86     cover_ebookpoint = fields.CoverEbookpointField(
  87         'okładka dla Ebookpoint')
  88
  89     ebook_formats = constants.EBOOK_FORMATS
  90     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  91
  92     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  93     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  94
  95     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  96     has_audience = models.BooleanField(default=False)
  97
  98     objects = models.Manager()
  99     tagged = managers.ModelTaggedItemManager(Tag)
 100     tags = managers.TagDescriptor(Tag)
 101     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
 102     translators = models.ManyToManyField(Tag, blank=True)
 103     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 104     has_audio = models.BooleanField(default=False)
 105     read_time = models.IntegerField(blank=True, null=True)
 106     pages = models.IntegerField(blank=True, null=True)
 107
 108     html_built = django.dispatch.Signal()
 109     published = django.dispatch.Signal()
 110
 111     SORT_KEY_SEP = '$'
 112
 113     is_book = True
 114
 115     class AlreadyExists(Exception):
 116         pass
 117
 118     class Meta:
 119         ordering = ('sort_key_author', 'sort_key')
 120         verbose_name = 'książka'
 121         verbose_name_plural = 'książki'
 122         app_label = 'catalogue'
 123
 124     def __str__(self):
 125         return self.title
 126
 127     def get_extra_info_json(self):
 128         return json.loads(self.extra_info or '{}')
 129
 130     def get_initial(self):
 131         try:
 132             return re.search(r'\w', self.title, re.U).group(0)
 133         except AttributeError:
 134             return ''
 135
 136     def authors(self):
 137         return self.tags.filter(category='author')
 138
 139     def epochs(self):
 140         return self.tags.filter(category='epoch')
 141
 142     def genres(self):
 143         return self.tags.filter(category='genre')
 144
 145     def kinds(self):
 146         return self.tags.filter(category='kind')
 147
 148     def tag_unicode(self, category):
 149         relations = prefetched_relations(self, category)
 150         if relations:
 151             return ', '.join(rel.tag.name for rel in relations)
 152         else:
 153             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 154
 155     def tags_by_category(self):
 156         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 157
 158     def author_unicode(self):
 159         return self.cached_author
 160
 161     def kind_unicode(self):
 162         return self.tag_unicode('kind')
 163
 164     def epoch_unicode(self):
 165         return self.tag_unicode('epoch')
 166
 167     def genre_unicode(self):
 168         return self.tag_unicode('genre')
 169
 170     def translator(self):
 171         translators = self.get_extra_info_json().get('translators')
 172         if not translators:
 173             return None
 174         if len(translators) > 3:
 175             translators = translators[:2]
 176             others = ' i inni'
 177         else:
 178             others = ''
 179         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 180
 181     def cover_source(self):
 182         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 183
 184     @property
 185     def isbn_pdf(self):
 186         return self.get_extra_info_json().get('isbn_pdf')
 187
 188     @property
 189     def isbn_epub(self):
 190         return self.get_extra_info_json().get('isbn_epub')
 191
 192     @property
 193     def isbn_mobi(self):
 194         return self.get_extra_info_json().get('isbn_mobi')
 195
 196     @property
 197     def redakcja(self):
 198         return self.get_extra_info_json().get('about')
 199
 200     def is_accessible_to(self, user):
 201         if not self.preview:
 202             return True
 203         if not user.is_authenticated:
 204             return False
 205         Membership = apps.get_model('club', 'Membership')
 206         if Membership.is_active_for(user):
 207             return True
 208         Funding = apps.get_model('funding', 'Funding')
 209         if Funding.objects.filter(user=user, offer__book=self):
 210             return True
 211         return False
 212
 213     def save(self, force_insert=False, force_update=False, **kwargs):
 214         from sortify import sortify
 215
 216         self.sort_key = sortify(self.title)[:120]
 217         self.title = str(self.title)  # ???
 218
 219         try:
 220             author = self.authors().first().sort_key
 221         except AttributeError:
 222             author = ''
 223         self.sort_key_author = author
 224
 225         self.cached_author = self.tag_unicode('author')
 226         self.has_audience = 'audience' in self.get_extra_info_json()
 227
 228         if self.preview and not self.preview_key:
 229             self.preview_key = get_random_hash(self.slug)[:32]
 230
 231         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 232
 233         return ret
 234
 235     def get_absolute_url(self):
 236         return reverse('book_detail', args=[self.slug])
 237
 238     def gallery_path(self):
 239         return gallery_path(self.slug)
 240
 241     def gallery_url(self):
 242         return gallery_url(self.slug)
 243
 244     def get_first_text(self):
 245         if self.html_file:
 246             return self
 247         child = self.children.all().order_by('parent_number').first()
 248         if child is not None:
 249             return child.get_first_text()
 250
 251     def get_last_text(self):
 252         if self.html_file:
 253             return self
 254         child = self.children.all().order_by('parent_number').last()
 255         if child is not None:
 256             return child.get_last_text()
 257
 258     def get_prev_text(self):
 259         if not self.parent:
 260             return None
 261         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 262         if sibling is not None:
 263             return sibling.get_last_text()
 264
 265         if self.parent.html_file:
 266             return self.parent
 267
 268         return self.parent.get_prev_text()
 269
 270     def get_next_text(self, inside=True):
 271         if inside:
 272             child = self.children.order_by('parent_number').first()
 273             if child is not None:
 274                 return child.get_first_text()
 275
 276         if not self.parent:
 277             return None
 278         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 279         if sibling is not None:
 280             return sibling.get_first_text()
 281         return self.parent.get_next_text(inside=False)
 282
 283     def get_siblings(self):
 284         if not self.parent:
 285             return []
 286         return self.parent.children.all().order_by('parent_number')
 287
 288     def get_children(self):
 289         return self.children.all().order_by('parent_number')
 290
 291     @property
 292     def name(self):
 293         return self.title
 294
 295     def language_code(self):
 296         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 297
 298     def language_name(self):
 299         return dict(settings.LANGUAGES).get(self.language_code(), "")
 300
 301     def is_foreign(self):
 302         return self.language_code() != settings.LANGUAGE_CODE
 303
 304     def set_audio_length(self):
 305         length = self.get_audio_length()
 306         if length > 0:
 307             self.audio_length = self.format_audio_length(length)
 308             self.save()
 309
 310     @staticmethod
 311     def format_audio_length(seconds):
 312         """
 313         >>> Book.format_audio_length(1)
 314         '0:01'
 315         >>> Book.format_audio_length(3661)
 316         '1:01:01'
 317         """
 318         if seconds < 60*60:
 319             minutes = seconds // 60
 320             seconds = seconds % 60
 321             return '%d:%02d' % (minutes, seconds)
 322         else:
 323             hours = seconds // 3600
 324             minutes = seconds % 3600 // 60
 325             seconds = seconds % 60
 326             return '%d:%02d:%02d' % (hours, minutes, seconds)
 327
 328     def get_audio_length(self):
 329         total = 0
 330         for media in self.get_mp3() or ():
 331             total += app_settings.GET_MP3_LENGTH(media.file.path)
 332         return int(total)
 333
 334     def get_time(self):
 335         return round(self.xml_file.size / 1000 * 40)
 336
 337     def has_media(self, type_):
 338         if type_ in Book.formats:
 339             return bool(getattr(self, "%s_file" % type_))
 340         else:
 341             return self.media.filter(type=type_).exists()
 342
 343     def get_media(self, type_):
 344         if self.has_media(type_):
 345             if type_ in Book.formats:
 346                 return getattr(self, "%s_file" % type_)
 347             else:
 348                 return self.media.filter(type=type_)
 349         else:
 350             return None
 351
 352     def get_mp3(self):
 353         return self.get_media("mp3")
 354
 355     def get_odt(self):
 356         return self.get_media("odt")
 357
 358     def get_ogg(self):
 359         return self.get_media("ogg")
 360
 361     def get_daisy(self):
 362         return self.get_media("daisy")
 363
 364     def get_audio_epub(self):
 365         return self.get_media("audio.epub")
 366
 367     def media_url(self, format_):
 368         media = self.get_media(format_)
 369         if media:
 370             if self.preview:
 371                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 372             else:
 373                 return media.url
 374         else:
 375             return None
 376
 377     def html_url(self):
 378         return self.media_url('html')
 379
 380     def html_nonotes_url(self):
 381         return self.media_url('html_nonotes')
 382
 383     def pdf_url(self):
 384         return self.media_url('pdf')
 385
 386     def epub_url(self):
 387         return self.media_url('epub')
 388
 389     def mobi_url(self):
 390         return self.media_url('mobi')
 391
 392     def txt_url(self):
 393         return self.media_url('txt')
 394
 395     def fb2_url(self):
 396         return self.media_url('fb2')
 397
 398     def xml_url(self):
 399         return self.media_url('xml')
 400
 401     def has_description(self):
 402         return len(self.description) > 0
 403     has_description.short_description = 'opis'
 404     has_description.boolean = True
 405
 406     def has_mp3_file(self):
 407         return self.has_media("mp3")
 408     has_mp3_file.short_description = 'MP3'
 409     has_mp3_file.boolean = True
 410
 411     def has_ogg_file(self):
 412         return self.has_media("ogg")
 413     has_ogg_file.short_description = 'OGG'
 414     has_ogg_file.boolean = True
 415
 416     def has_daisy_file(self):
 417         return self.has_media("daisy")
 418     has_daisy_file.short_description = 'DAISY'
 419     has_daisy_file.boolean = True
 420
 421     def has_sync_file(self):
 422         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 423
 424     def build_sync_file(self):
 425         from lxml import html
 426         from django.core.files.base import ContentFile
 427         with self.html_file.open('rb') as f:
 428             h = html.fragment_fromstring(f.read().decode('utf-8'))
 429
 430         durations = [
 431             m['mp3'].duration
 432             for m in self.get_audiobooks()[0]
 433         ]
 434         if settings.MOCK_DURATIONS:
 435             durations = settings.MOCK_DURATIONS
 436
 437         sync = []
 438         ts = None
 439         sid = 1
 440         dirty = False
 441         for elem in h.iter():
 442             if elem.get('data-audio-ts'):
 443                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 444                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 445                 # check if inside verse
 446                 p = elem.getparent()
 447                 while p is not None:
 448                     # Workaround for missing ids.
 449                     if 'verse' in p.get('class', ''):
 450                         if not p.get('id'):
 451                             p.set('id', f'syn{sid}')
 452                             dirty = True
 453                             sid += 1
 454                         sync.append((ts, p.get('id')))
 455                         ts = None
 456                         break
 457                     p = p.getparent()
 458             elif ts:
 459                 cls = elem.get('class', '')
 460                 # Workaround for missing ids.
 461                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 462                     if not elem.get('id'):
 463                         elem.set('id', f'syn{sid}')
 464                         dirty = True
 465                         sid += 1
 466                     sync.append((ts, elem.get('id')))
 467                     ts = None
 468         if dirty:
 469             htext = html.tostring(h, encoding='utf-8')
 470             with open(self.html_file.path, 'wb') as f:
 471                 f.write(htext)
 472         try:
 473             bm = self.media.get(type='sync')
 474         except:
 475             bm = BookMedia(book=self, type='sync')
 476         sync = (
 477             '27\n' + '\n'.join(
 478                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 479             )).encode('latin1')
 480         bm.file.save(
 481             None, ContentFile(sync)
 482             )
 483
 484     def get_sync(self):
 485         if not self.has_sync_file():
 486             return []
 487         with self.get_media('sync').first().file.open('r') as f:
 488             sync = f.read().split('\n')
 489         offset = float(sync[0])
 490         items = []
 491         for line in sync[1:]:
 492             if not line:
 493                 continue
 494             start, end, elid = line.split()
 495             items.append([elid, float(start) + offset])
 496         return items
 497
 498     def sync_ts(self, ts):
 499         elid = None
 500         for cur_id, t in self.get_sync():
 501             if ts >= t:
 502                 elid = cur_id
 503             else:
 504                 break
 505         return elid
 506
 507     def sync_elid(self, elid):
 508         for cur_id, t in self.get_sync():
 509             if cur_id == elid:
 510                 return t
 511
 512     def has_audio_epub_file(self):
 513         return self.has_media("audio.epub")
 514
 515     @property
 516     def media_daisy(self):
 517         return self.get_media('daisy')
 518
 519     @property
 520     def media_audio_epub(self):
 521         return self.get_media('audio.epub')
 522
 523     def get_audiobooks(self, with_children=False, processing=False):
 524         ogg_files = {}
 525         for m in self.media.filter(type='ogg').order_by().iterator():
 526             ogg_files[m.name] = m
 527
 528         audiobooks = []
 529         projects = set()
 530         total_duration = 0
 531         for mp3 in self.media.filter(type='mp3').iterator():
 532             # ogg files are always from the same project
 533             meta = mp3.get_extra_info_json()
 534             project = meta.get('project')
 535             if not project:
 536                 # temporary fallback
 537                 project = 'CzytamySłuchając'
 538
 539             projects.add((project, meta.get('funded_by', '')))
 540             total_duration += mp3.duration or 0
 541
 542             media = {'mp3': mp3}
 543
 544             ogg = ogg_files.get(mp3.name)
 545             if ogg:
 546                 media['ogg'] = ogg
 547             audiobooks.append(media)
 548
 549         if with_children:
 550             for child in self.get_children():
 551                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 552                     with_children=True, processing=True)
 553                 audiobooks.append({'part': child})
 554                 audiobooks += ch_audiobooks
 555                 projects.update(ch_projects)
 556                 total_duration += ch_duration
 557
 558         if not processing:
 559             projects = sorted(projects)
 560             total_duration = '%d:%02d' % (
 561                 total_duration // 60,
 562                 total_duration % 60
 563             )
 564
 565         return audiobooks, projects, total_duration
 566
 567     def get_audiobooks_with_children(self):
 568         return self.get_audiobooks(with_children=True)
 569
 570     def wldocument(self, parse_dublincore=True, inherit=True):
 571         from catalogue.import_utils import ORMDocProvider
 572         from librarian.parser import WLDocument
 573
 574         if inherit and self.parent:
 575             meta_fallbacks = self.parent.cover_info()
 576         else:
 577             meta_fallbacks = None
 578
 579         return WLDocument.from_file(
 580             self.xml_file.path,
 581             provider=ORMDocProvider(self),
 582             parse_dublincore=parse_dublincore,
 583             meta_fallbacks=meta_fallbacks)
 584
 585     def wldocument2(self):
 586         from catalogue.import_utils import ORMDocProvider
 587         from librarian.document import WLDocument
 588         doc = WLDocument(
 589             self.xml_file.path,
 590             provider=ORMDocProvider(self)
 591         )
 592         doc.meta.update(self.cover_info())
 593         return doc
 594
 595
 596     @staticmethod
 597     def zip_format(format_):
 598         def pretty_file_name(book):
 599             return "%s/%s.%s" % (
 600                 book.get_extra_info_json()['author'],
 601                 book.slug,
 602                 format_)
 603
 604         field_name = "%s_file" % format_
 605         field = getattr(Book, field_name)
 606         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 607         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 608         return create_zip(paths, field.ZIP)
 609
 610     def zip_audiobooks(self, format_):
 611         bm = BookMedia.objects.filter(book=self, type=format_)
 612         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 613         licenses = set()
 614         for m in bm:
 615             license = constants.LICENSES.get(
 616                 m.get_extra_info_json().get('license'), {}
 617             ).get('locative')
 618             if license:
 619                 licenses.add(license)
 620         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 621             'licenses': licenses,
 622             'meta': self.wldocument2().meta,
 623         })
 624         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 625
 626     def search_index(self, index=None):
 627         if not self.findable:
 628             return
 629         from search.index import Index
 630         Index.index_book(self)
 631
 632     # will make problems in conjunction with paid previews
 633     def download_pictures(self, remote_gallery_url):
 634         # This is only needed for legacy relative image paths.
 635         gallery_path = self.gallery_path()
 636         # delete previous files, so we don't include old files in ebooks
 637         if os.path.isdir(gallery_path):
 638             for filename in os.listdir(gallery_path):
 639                 file_path = os.path.join(gallery_path, filename)
 640                 os.unlink(file_path)
 641         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 642         if ilustr_elements:
 643             makedirs(gallery_path)
 644             for ilustr in ilustr_elements:
 645                 ilustr_src = ilustr.get('src')
 646                 if '/' in ilustr_src:
 647                     continue
 648                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 649                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 650
 651     def load_abstract(self):
 652         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 653         if abstract is not None:
 654             self.abstract = transform_abstrakt(abstract)
 655         else:
 656             self.abstract = ''
 657
 658     def load_toc(self):
 659         self.toc = ''
 660         if self.html_file:
 661             parser = html.HTMLParser(encoding='utf-8')
 662             tree = html.parse(self.html_file.path, parser=parser)
 663             toc = tree.find('//div[@id="toc"]/ol')
 664             if toc is None or not len(toc):
 665                 return
 666             html_link = reverse('book_text', args=[self.slug])
 667             for a in toc.findall('.//a'):
 668                 a.attrib['href'] = html_link + a.attrib['href']
 669             self.toc = html.tostring(toc, encoding='unicode')
 670             # div#toc
 671
 672     @classmethod
 673     def from_xml_file(cls, xml_file, **kwargs):
 674         from django.core.files import File
 675         from librarian import dcparser
 676
 677         # use librarian to parse meta-data
 678         book_info = dcparser.parse(xml_file)
 679
 680         if not isinstance(xml_file, File):
 681             xml_file = File(open(xml_file))
 682
 683         try:
 684             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 685         finally:
 686             xml_file.close()
 687
 688     @classmethod
 689     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 690                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None, isbn_mp3=None):
 691         from catalogue import tasks
 692
 693         if dont_build is None:
 694             dont_build = set()
 695         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 696
 697         # check for parts before we do anything
 698         children = []
 699         if hasattr(book_info, 'parts'):
 700             for part_url in book_info.parts:
 701                 try:
 702                     children.append(Book.objects.get(slug=part_url.slug))
 703                 except Book.DoesNotExist:
 704                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 705
 706         # Read book metadata
 707         book_slug = book_info.url.slug
 708         if re.search(r'[^a-z0-9-]', book_slug):
 709             raise ValueError('Invalid characters in slug')
 710         book, created = Book.objects.get_or_create(slug=book_slug)
 711
 712         if created:
 713             book_shelves = []
 714             old_cover = None
 715             book.preview = bool(days)
 716             if book.preview:
 717                 book.preview_until = date.today() + timedelta(days)
 718         else:
 719             if not overwrite:
 720                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 721             # Save shelves for this book
 722             book_shelves = list(book.tags.filter(category='set'))
 723             old_cover = book.cover_info()
 724
 725         # Save XML file
 726         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 727         if book.preview:
 728             book.xml_file.set_readable(False)
 729
 730         book.findable = findable
 731         book.language = book_info.language
 732         book.title = book_info.title
 733         book.license = book_info.license or ''
 734         if book_info.variant_of:
 735             book.common_slug = book_info.variant_of.slug
 736         else:
 737             book.common_slug = book.slug
 738         extra = book_info.to_dict()
 739         if logo:
 740             extra['logo'] = logo
 741         if logo_mono:
 742             extra['logo_mono'] = logo_mono
 743         if logo_alt:
 744             extra['logo_alt'] = logo_alt
 745         if can_sell is not None:
 746             book.can_sell = can_sell
 747         if isbn_mp3 is not None:
 748             book.isbn_mp3 = isbn_mp3
 749         book.extra_info = json.dumps(extra)
 750         book.load_abstract()
 751         book.load_toc()
 752         book.save()
 753
 754         book.update_stats()
 755
 756         meta_tags = Tag.tags_from_info(book_info)
 757
 758         just_tags = [t for (t, rel) in meta_tags if not rel]
 759         book.tags = set(just_tags + book_shelves)
 760         book.save()  # update sort_key_author
 761
 762         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 763
 764         cover_changed = old_cover != book.cover_info()
 765         obsolete_children = set(b for b in book.children.all()
 766                                 if b not in children)
 767         notify_cover_changed = []
 768         for n, child_book in enumerate(children):
 769             new_child = child_book.parent != book
 770             child_book.parent = book
 771             child_book.parent_number = n
 772             child_book.save()
 773             if new_child or cover_changed:
 774                 notify_cover_changed.append(child_book)
 775         # Disown unfaithful children and let them cope on their own.
 776         for child in obsolete_children:
 777             child.parent = None
 778             child.parent_number = 0
 779             child.save()
 780             if old_cover:
 781                 notify_cover_changed.append(child)
 782
 783         cls.repopulate_ancestors()
 784         tasks.update_counters.delay()
 785
 786         if remote_gallery_url:
 787             book.download_pictures(remote_gallery_url)
 788
 789         # No saves beyond this point.
 790
 791         # Build cover.
 792         if 'cover' not in dont_build:
 793             book.cover.build_delay()
 794             book.cover_clean.build_delay()
 795             book.cover_thumb.build_delay()
 796             book.cover_api_thumb.build_delay()
 797             book.simple_cover.build_delay()
 798             book.cover_ebookpoint.build_delay()
 799
 800         # Build HTML and ebooks.
 801         book.html_file.build_delay()
 802         if not children:
 803             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 804                 if format_ not in dont_build:
 805                     getattr(book, '%s_file' % format_).build_delay()
 806         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 807             if format_ not in dont_build:
 808                 getattr(book, '%s_file' % format_).build_delay()
 809         book.html_nonotes_file.build_delay()
 810
 811         if not settings.NO_SEARCH_INDEX and search_index and findable:
 812             tasks.index_book.delay(book.id)
 813
 814         for child in notify_cover_changed:
 815             child.parent_cover_changed()
 816
 817         book.update_popularity()
 818         tasks.update_references.delay(book.id)
 819
 820         cls.published.send(sender=cls, instance=book)
 821         return book
 822
 823     def update_stats(self):
 824         stats = self.wldocument2().get_statistics()['total']
 825         self.pages = round(
 826             stats.get('verses_with_fn', 0) / 30 +
 827             stats.get('chars_out_verse_with_fn', 0) / 1800)
 828         self.read_time = round(self.get_time())
 829         self.save(update_fields=['pages', 'read_time'])
 830         if self.parent is not None:
 831             self.parent.update_stats()
 832
 833     def update_references(self):
 834         Entity = apps.get_model('references', 'Entity')
 835         doc = self.wldocument2()
 836         doc._compat_assign_section_ids()
 837         doc._compat_assign_ordered_ids()
 838         refs = {}
 839         for ref_elem in doc.references():
 840             uri = ref_elem.attrib.get('href', '')
 841             if not uri:
 842                 continue
 843             if uri in refs:
 844                 ref = refs[uri]
 845             else:
 846                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 847                 if entity_created:
 848                     try:
 849                         entity.populate()
 850                     except:
 851                         pass
 852                     else:
 853                         entity.save()
 854                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 855                 refs[uri] = ref
 856                 if not ref_created:
 857                     ref.occurence_set.all().delete()
 858             sec = ref_elem.get_link()
 859             m = re.match(r'sec(\d+)', sec)
 860             assert m is not None
 861             sec = int(m.group(1))
 862             snippet = ref_elem.get_snippet()
 863             b = builders['html-snippet']()
 864             for s in snippet:
 865                 s.html_build(b)
 866             html = b.output().get_bytes().decode('utf-8')
 867
 868             ref.occurence_set.create(
 869                 section=sec,
 870                 html=html
 871             )
 872         self.reference_set.exclude(entity__uri__in=refs).delete()
 873
 874     @property
 875     def references(self):
 876         return self.reference_set.all().select_related('entity')
 877
 878     def update_has_audio(self):
 879         self.has_audio = False
 880         if self.media.filter(type='mp3').exists():
 881             self.has_audio = True
 882         if self.descendant.filter(has_audio=True).exists():
 883             self.has_audio = True
 884         self.save(update_fields=['has_audio'])
 885         if self.parent is not None:
 886             self.parent.update_has_audio()
 887
 888     def update_narrators(self):
 889         narrator_names = set()
 890         for bm in self.media.filter(type='mp3'):
 891             narrator_names.update(set(
 892                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 893             ))
 894         narrators = []
 895
 896         for name in narrator_names:
 897             if not name: continue
 898             slug = slugify(name)
 899             try:
 900                 t = Tag.objects.get(category='author', slug=slug)
 901             except Tag.DoesNotExist:
 902                 sort_key = sortify(
 903                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 904                 )
 905                 t = Tag.objects.create(
 906                     category='author',
 907                     name_pl=name,
 908                     slug=slug,
 909                     sort_key=sort_key,
 910                 )
 911             narrators.append(t)
 912         self.narrators.set(narrators)
 913
 914     def update_can_sell_mp3(self):
 915         ret = True
 916         for child in self.get_children():
 917             child.update_can_sell_mp3()
 918             if not child.can_sell_mp3:
 919                 ret = False
 920         if self.has_mp3_file():
 921             audio_items = requests.get(f'https://audio.wolnelektury.pl/archive/book/{self.slug}.json').json()['items']
 922             if not all(x['project']['can_sell'] for x in audio_items):
 923                 ret = False
 924         self.can_sell_audio = ret
 925
 926     @classmethod
 927     @transaction.atomic
 928     def repopulate_ancestors(cls):
 929         """Fixes the ancestry cache."""
 930         # TODO: table names
 931         cursor = connection.cursor()
 932         if connection.vendor == 'postgres':
 933             cursor.execute("TRUNCATE catalogue_book_ancestor")
 934             cursor.execute("""
 935                 WITH RECURSIVE ancestry AS (
 936                     SELECT book.id, book.parent_id
 937                     FROM catalogue_book AS book
 938                     WHERE book.parent_id IS NOT NULL
 939                     UNION
 940                     SELECT ancestor.id, book.parent_id
 941                     FROM ancestry AS ancestor, catalogue_book AS book
 942                     WHERE ancestor.parent_id = book.id
 943                         AND book.parent_id IS NOT NULL
 944                     )
 945                 INSERT INTO catalogue_book_ancestor
 946                     (from_book_id, to_book_id)
 947                     SELECT id, parent_id
 948                     FROM ancestry
 949                     ORDER BY id;
 950                 """)
 951         else:
 952             cursor.execute("DELETE FROM catalogue_book_ancestor")
 953             for b in cls.objects.exclude(parent=None):
 954                 parent = b.parent
 955                 while parent is not None:
 956                     b.ancestor.add(parent)
 957                     parent = parent.parent
 958
 959     @property
 960     def ancestors(self):
 961         if self.parent:
 962             for anc in self.parent.ancestors:
 963                 yield anc
 964             yield self.parent
 965         else:
 966             return []
 967
 968     def clear_cache(self):
 969         clear_cached_renders(self.mini_box)
 970         clear_cached_renders(self.mini_box_nolink)
 971
 972     def cover_info(self, inherit=True):
 973         """Returns a dictionary to serve as fallback for BookInfo.
 974
 975         For now, the only thing inherited is the cover image.
 976         """
 977         need = False
 978         info = {}
 979         for field in ('cover_url', 'cover_by', 'cover_source'):
 980             val = self.get_extra_info_json().get(field)
 981             if val:
 982                 info[field] = val
 983             else:
 984                 need = True
 985         if inherit and need and self.parent is not None:
 986             parent_info = self.parent.cover_info()
 987             parent_info.update(info)
 988             info = parent_info
 989         return info
 990
 991     def related_themes(self):
 992         return Tag.objects.usage_for_queryset(
 993             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 994             counts=True).filter(category='theme').order_by('-count')
 995
 996     def parent_cover_changed(self):
 997         """Called when parent book's cover image is changed."""
 998         if not self.cover_info(inherit=False):
 999             if 'cover' not in app_settings.DONT_BUILD:
1000                 self.cover.build_delay()
1001                 self.cover_clean.build_delay()
1002                 self.cover_thumb.build_delay()
1003                 self.cover_api_thumb.build_delay()
1004                 self.simple_cover.build_delay()
1005                 self.cover_ebookpoint.build_delay()
1006             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
1007                 if format_ not in app_settings.DONT_BUILD:
1008                     getattr(self, '%s_file' % format_).build_delay()
1009             for child in self.children.all():
1010                 child.parent_cover_changed()
1011
1012     def other_versions(self):
1013         """Find other versions (i.e. in other languages) of the book."""
1014         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
1015
1016     def parents(self):
1017         books = []
1018         parent = self.parent
1019         while parent is not None:
1020             books.insert(0, parent)
1021             parent = parent.parent
1022         return books
1023
1024     def pretty_title(self, html_links=False):
1025         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1026         books = self.parents() + [self]
1027         names.extend([(b.title, b.get_absolute_url()) for b in books])
1028
1029         if html_links:
1030             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1031         else:
1032             names = [tag[0] for tag in names]
1033         return ', '.join(names)
1034
1035     def publisher(self):
1036         publisher = self.get_extra_info_json()['publisher']
1037         if isinstance(publisher, str):
1038             return publisher
1039         elif isinstance(publisher, list):
1040             return ', '.join(publisher)
1041
1042     def get_recommended(self, limit=4):
1043         books_qs = type(self).objects.filter(findable=True)
1044         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1045         books = type(self).tagged.related_to(self, books_qs)[:limit]
1046         return books
1047
1048     @classmethod
1049     def tagged_top_level(cls, tags):
1050         """ Returns top-level books tagged with `tags`.
1051
1052         It only returns those books which don't have ancestors which are
1053         also tagged with those tags.
1054
1055         """
1056         objects = cls.tagged.with_all(tags)
1057         return objects.filter(findable=True).exclude(ancestor__in=objects)
1058
1059     @classmethod
1060     def book_list(cls, book_filter=None):
1061         """Generates a hierarchical listing of all books.
1062
1063         Books are optionally filtered with a test function.
1064
1065         """
1066
1067         books_by_parent = {}
1068         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1069         if book_filter:
1070             books = books.filter(book_filter).distinct()
1071
1072             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1073             for book in books.iterator():
1074                 parent = book.parent_id
1075                 if parent not in book_ids:
1076                     parent = None
1077                 books_by_parent.setdefault(parent, []).append(book)
1078         else:
1079             for book in books.iterator():
1080                 books_by_parent.setdefault(book.parent_id, []).append(book)
1081
1082         orphans = []
1083         books_by_author = OrderedDict()
1084         for tag in Tag.objects.filter(category='author').iterator():
1085             books_by_author[tag] = []
1086
1087         for book in books_by_parent.get(None, ()):
1088             authors = list(book.authors().only('pk'))
1089             if authors:
1090                 for author in authors:
1091                     books_by_author[author].append(book)
1092             else:
1093                 orphans.append(book)
1094
1095         return books_by_author, orphans, books_by_parent
1096
1097     _audiences_pl = {
1098         "SP": (1, "szkoła podstawowa"),
1099         "SP1": (1, "szkoła podstawowa"),
1100         "SP2": (1, "szkoła podstawowa"),
1101         "SP3": (1, "szkoła podstawowa"),
1102         "P": (1, "szkoła podstawowa"),
1103         "G": (2, "gimnazjum"),
1104         "L": (3, "liceum"),
1105         "LP": (3, "liceum"),
1106     }
1107
1108     def audiences_pl(self):
1109         audiences = self.get_extra_info_json().get('audiences', [])
1110         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1111         return [a[1] for a in audiences]
1112
1113     def stage_note(self):
1114         stage = self.get_extra_info_json().get('stage')
1115         if stage and stage < '0.4':
1116             return (_('Ten utwór wymaga uwspółcześnienia'),
1117                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1118         else:
1119             return None, None
1120
1121     def choose_fragments(self, number):
1122         fragments = self.fragments.order_by()
1123         fragments_count = fragments.count()
1124         if not fragments_count and self.children.exists():
1125             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1126             fragments_count = fragments.count()
1127         if fragments_count:
1128             if fragments_count > number:
1129                 offset = randint(0, fragments_count - number)
1130             else:
1131                 offset = 0
1132             return fragments[offset : offset + number]
1133         elif self.parent:
1134             return self.parent.choose_fragments(number)
1135         else:
1136             return []
1137
1138     def choose_fragment(self):
1139         fragments = self.choose_fragments(1)
1140         if fragments:
1141             return fragments[0]
1142         else:
1143             return None
1144
1145     def fragment_data(self):
1146         fragment = self.choose_fragment()
1147         if fragment:
1148             return {
1149                 'title': fragment.book.pretty_title(),
1150                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1151             }
1152         else:
1153             return None
1154
1155     def update_popularity(self):
1156         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1157         try:
1158             pop = self.popularity
1159             pop.count = count
1160             pop.save()
1161         except BookPopularity.DoesNotExist:
1162             BookPopularity.objects.create(book=self, count=count)
1163
1164     def ridero_link(self):
1165         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1166
1167     def elevenreader_link(self):
1168         first_text = self.get_first_text()
1169         if first_text is None:
1170             return None
1171         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1172
1173     def content_warnings(self):
1174         warnings_def = {
1175             'wulgaryzmy': _('wulgaryzmy'),
1176         }
1177         warnings = self.get_extra_info_json().get('content_warnings', [])
1178         warnings = [
1179             warnings_def.get(w, w)
1180             for w in warnings
1181         ]
1182         warnings.sort()
1183         return warnings
1184
1185     def full_sort_key(self):
1186         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1187
1188     def cover_color(self):
1189         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1190
1191     @cached_render('catalogue/book_mini_box.html')
1192     def mini_box(self):
1193         return {
1194             'book': self
1195         }
1196
1197     @cached_render('catalogue/book_mini_box.html')
1198     def mini_box_nolink(self):
1199         return {
1200             'book': self,
1201             'no_link': True,
1202         }
1203
1204
1205 class BookPopularity(models.Model):
1206     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1207     count = models.IntegerField(default=0, db_index=True)