src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64
  65     # files generated during publication
  66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  67     html_file = fields.HtmlField(storage=bofh_storage)
  68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  69     fb2_file = fields.Fb2Field(storage=bofh_storage)
  70     txt_file = fields.TxtField(storage=bofh_storage)
  71     epub_file = fields.EpubField(storage=bofh_storage)
  72     mobi_file = fields.MobiField(storage=bofh_storage)
  73     pdf_file = fields.PdfField(storage=bofh_storage)
  74
  75     cover = fields.CoverField('okładka', storage=bofh_storage)
  76     # Cleaner version of cover for thumbs
  77     cover_clean = fields.CoverCleanField('czysta okładka')
  78     cover_thumb = fields.CoverThumbField('miniatura okładki')
  79     cover_api_thumb = fields.CoverApiThumbField(
  80         'mniaturka okładki dla aplikacji')
  81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  82     cover_ebookpoint = fields.CoverEbookpointField(
  83         'okładka dla Ebookpoint')
  84
  85     ebook_formats = constants.EBOOK_FORMATS
  86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  87
  88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  90
  91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  92     has_audience = models.BooleanField(default=False)
  93
  94     objects = models.Manager()
  95     tagged = managers.ModelTaggedItemManager(Tag)
  96     tags = managers.TagDescriptor(Tag)
  97     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
  98     translators = models.ManyToManyField(Tag, blank=True)
  99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 100     has_audio = models.BooleanField(default=False)
 101
 102     html_built = django.dispatch.Signal()
 103     published = django.dispatch.Signal()
 104
 105     SORT_KEY_SEP = '$'
 106
 107     is_book = True
 108
 109     class AlreadyExists(Exception):
 110         pass
 111
 112     class Meta:
 113         ordering = ('sort_key_author', 'sort_key')
 114         verbose_name = 'książka'
 115         verbose_name_plural = 'książki'
 116         app_label = 'catalogue'
 117
 118     def __str__(self):
 119         return self.title
 120
 121     def get_extra_info_json(self):
 122         return json.loads(self.extra_info or '{}')
 123
 124     def get_initial(self):
 125         try:
 126             return re.search(r'\w', self.title, re.U).group(0)
 127         except AttributeError:
 128             return ''
 129
 130     def authors(self):
 131         return self.tags.filter(category='author')
 132
 133     def epochs(self):
 134         return self.tags.filter(category='epoch')
 135
 136     def genres(self):
 137         return self.tags.filter(category='genre')
 138
 139     def kinds(self):
 140         return self.tags.filter(category='kind')
 141
 142     def tag_unicode(self, category):
 143         relations = prefetched_relations(self, category)
 144         if relations:
 145             return ', '.join(rel.tag.name for rel in relations)
 146         else:
 147             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 148
 149     def tags_by_category(self):
 150         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 151
 152     def author_unicode(self):
 153         return self.cached_author
 154
 155     def kind_unicode(self):
 156         return self.tag_unicode('kind')
 157
 158     def epoch_unicode(self):
 159         return self.tag_unicode('epoch')
 160
 161     def genre_unicode(self):
 162         return self.tag_unicode('genre')
 163
 164     def translator(self):
 165         translators = self.get_extra_info_json().get('translators')
 166         if not translators:
 167             return None
 168         if len(translators) > 3:
 169             translators = translators[:2]
 170             others = ' i inni'
 171         else:
 172             others = ''
 173         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 174
 175     def cover_source(self):
 176         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 177
 178     @property
 179     def isbn_pdf(self):
 180         return self.get_extra_info_json().get('isbn_pdf')
 181
 182     @property
 183     def isbn_epub(self):
 184         return self.get_extra_info_json().get('isbn_epub')
 185
 186     @property
 187     def isbn_mobi(self):
 188         return self.get_extra_info_json().get('isbn_mobi')
 189
 190     def is_accessible_to(self, user):
 191         if not self.preview:
 192             return True
 193         if not user.is_authenticated:
 194             return False
 195         Membership = apps.get_model('club', 'Membership')
 196         if Membership.is_active_for(user):
 197             return True
 198         Funding = apps.get_model('funding', 'Funding')
 199         if Funding.objects.filter(user=user, offer__book=self):
 200             return True
 201         return False
 202
 203     def save(self, force_insert=False, force_update=False, **kwargs):
 204         from sortify import sortify
 205
 206         self.sort_key = sortify(self.title)[:120]
 207         self.title = str(self.title)  # ???
 208
 209         try:
 210             author = self.authors().first().sort_key
 211         except AttributeError:
 212             author = ''
 213         self.sort_key_author = author
 214
 215         self.cached_author = self.tag_unicode('author')
 216         self.has_audience = 'audience' in self.get_extra_info_json()
 217
 218         if self.preview and not self.preview_key:
 219             self.preview_key = get_random_hash(self.slug)[:32]
 220
 221         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 222
 223         return ret
 224
 225     def get_absolute_url(self):
 226         return reverse('book_detail', args=[self.slug])
 227
 228     def gallery_path(self):
 229         return gallery_path(self.slug)
 230
 231     def gallery_url(self):
 232         return gallery_url(self.slug)
 233
 234     def get_first_text(self):
 235         if self.html_file:
 236             return self
 237         child = self.children.all().order_by('parent_number').first()
 238         if child is not None:
 239             return child.get_first_text()
 240
 241     def get_last_text(self):
 242         if self.html_file:
 243             return self
 244         child = self.children.all().order_by('parent_number').last()
 245         if child is not None:
 246             return child.get_last_text()
 247
 248     def get_prev_text(self):
 249         if not self.parent:
 250             return None
 251         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 252         if sibling is not None:
 253             return sibling.get_last_text()
 254
 255         if self.parent.html_file:
 256             return self.parent
 257
 258         return self.parent.get_prev_text()
 259
 260     def get_next_text(self, inside=True):
 261         if inside:
 262             child = self.children.order_by('parent_number').first()
 263             if child is not None:
 264                 return child.get_first_text()
 265
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_first_text()
 271         return self.parent.get_next_text(inside=False)
 272
 273     def get_siblings(self):
 274         if not self.parent:
 275             return []
 276         return self.parent.children.all().order_by('parent_number')
 277
 278     def get_children(self):
 279         return self.children.all().order_by('parent_number')
 280
 281     @property
 282     def name(self):
 283         return self.title
 284
 285     def language_code(self):
 286         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 287
 288     def language_name(self):
 289         return dict(settings.LANGUAGES).get(self.language_code(), "")
 290
 291     def is_foreign(self):
 292         return self.language_code() != settings.LANGUAGE_CODE
 293
 294     def set_audio_length(self):
 295         length = self.get_audio_length()
 296         if length > 0:
 297             self.audio_length = self.format_audio_length(length)
 298             self.save()
 299
 300     @staticmethod
 301     def format_audio_length(seconds):
 302         """
 303         >>> Book.format_audio_length(1)
 304         '0:01'
 305         >>> Book.format_audio_length(3661)
 306         '1:01:01'
 307         """
 308         if seconds < 60*60:
 309             minutes = seconds // 60
 310             seconds = seconds % 60
 311             return '%d:%02d' % (minutes, seconds)
 312         else:
 313             hours = seconds // 3600
 314             minutes = seconds % 3600 // 60
 315             seconds = seconds % 60
 316             return '%d:%02d:%02d' % (hours, minutes, seconds)
 317
 318     def get_audio_length(self):
 319         total = 0
 320         for media in self.get_mp3() or ():
 321             total += app_settings.GET_MP3_LENGTH(media.file.path)
 322         return int(total)
 323
 324     def get_time(self):
 325         return round(self.xml_file.size / 1000 * 40)
 326
 327     def has_media(self, type_):
 328         if type_ in Book.formats:
 329             return bool(getattr(self, "%s_file" % type_))
 330         else:
 331             return self.media.filter(type=type_).exists()
 332
 333     def get_media(self, type_):
 334         if self.has_media(type_):
 335             if type_ in Book.formats:
 336                 return getattr(self, "%s_file" % type_)
 337             else:
 338                 return self.media.filter(type=type_)
 339         else:
 340             return None
 341
 342     def get_mp3(self):
 343         return self.get_media("mp3")
 344
 345     def get_odt(self):
 346         return self.get_media("odt")
 347
 348     def get_ogg(self):
 349         return self.get_media("ogg")
 350
 351     def get_daisy(self):
 352         return self.get_media("daisy")
 353
 354     def get_audio_epub(self):
 355         return self.get_media("audio.epub")
 356
 357     def media_url(self, format_):
 358         media = self.get_media(format_)
 359         if media:
 360             if self.preview:
 361                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 362             else:
 363                 return media.url
 364         else:
 365             return None
 366
 367     def html_url(self):
 368         return self.media_url('html')
 369
 370     def html_nonotes_url(self):
 371         return self.media_url('html_nonotes')
 372
 373     def pdf_url(self):
 374         return self.media_url('pdf')
 375
 376     def epub_url(self):
 377         return self.media_url('epub')
 378
 379     def mobi_url(self):
 380         return self.media_url('mobi')
 381
 382     def txt_url(self):
 383         return self.media_url('txt')
 384
 385     def fb2_url(self):
 386         return self.media_url('fb2')
 387
 388     def xml_url(self):
 389         return self.media_url('xml')
 390
 391     def has_description(self):
 392         return len(self.description) > 0
 393     has_description.short_description = 'opis'
 394     has_description.boolean = True
 395
 396     def has_mp3_file(self):
 397         return self.has_media("mp3")
 398     has_mp3_file.short_description = 'MP3'
 399     has_mp3_file.boolean = True
 400
 401     def has_ogg_file(self):
 402         return self.has_media("ogg")
 403     has_ogg_file.short_description = 'OGG'
 404     has_ogg_file.boolean = True
 405
 406     def has_daisy_file(self):
 407         return self.has_media("daisy")
 408     has_daisy_file.short_description = 'DAISY'
 409     has_daisy_file.boolean = True
 410
 411     def has_sync_file(self):
 412         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 413
 414     def build_sync_file(self):
 415         from lxml import html
 416         from django.core.files.base import ContentFile
 417         with self.html_file.open('rb') as f:
 418             h = html.fragment_fromstring(f.read().decode('utf-8'))
 419
 420         durations = [
 421             m['mp3'].duration
 422             for m in self.get_audiobooks()[0]
 423         ]
 424         if settings.MOCK_DURATIONS:
 425             durations = settings.MOCK_DURATIONS
 426
 427         sync = []
 428         ts = None
 429         sid = 1
 430         dirty = False
 431         for elem in h.iter():
 432             if elem.get('data-audio-ts'):
 433                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 434                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 435                 # check if inside verse
 436                 p = elem.getparent()
 437                 while p is not None:
 438                     # Workaround for missing ids.
 439                     if 'verse' in p.get('class', ''):
 440                         if not p.get('id'):
 441                             p.set('id', f'syn{sid}')
 442                             dirty = True
 443                             sid += 1
 444                         sync.append((ts, p.get('id')))
 445                         ts = None
 446                         break
 447                     p = p.getparent()
 448             elif ts:
 449                 cls = elem.get('class', '')
 450                 # Workaround for missing ids.
 451                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 452                     if not elem.get('id'):
 453                         elem.set('id', f'syn{sid}')
 454                         dirty = True
 455                         sid += 1
 456                     sync.append((ts, elem.get('id')))
 457                     ts = None
 458         if dirty:
 459             htext = html.tostring(h, encoding='utf-8')
 460             with open(self.html_file.path, 'wb') as f:
 461                 f.write(htext)
 462         try:
 463             bm = self.media.get(type='sync')
 464         except:
 465             bm = BookMedia(book=self, type='sync')
 466         sync = (
 467             '27\n' + '\n'.join(
 468                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 469             )).encode('latin1')
 470         bm.file.save(
 471             None, ContentFile(sync)
 472             )
 473
 474
 475     def get_sync(self):
 476         if not self.has_sync_file():
 477             return '[]'
 478         with self.get_media('sync').first().file.open('r') as f:
 479             sync = f.read().split('\n')
 480         offset = float(sync[0])
 481         items = []
 482         for line in sync[1:]:
 483             if not line:
 484                 continue
 485             start, end, elid = line.split()
 486             items.append([elid, float(start) + offset])
 487         return json.dumps(items)
 488
 489     def has_audio_epub_file(self):
 490         return self.has_media("audio.epub")
 491
 492     @property
 493     def media_daisy(self):
 494         return self.get_media('daisy')
 495
 496     @property
 497     def media_audio_epub(self):
 498         return self.get_media('audio.epub')
 499
 500     def get_audiobooks(self, with_children=False, processing=False):
 501         ogg_files = {}
 502         for m in self.media.filter(type='ogg').order_by().iterator():
 503             ogg_files[m.name] = m
 504
 505         audiobooks = []
 506         projects = set()
 507         total_duration = 0
 508         for mp3 in self.media.filter(type='mp3').iterator():
 509             # ogg files are always from the same project
 510             meta = mp3.get_extra_info_json()
 511             project = meta.get('project')
 512             if not project:
 513                 # temporary fallback
 514                 project = 'CzytamySłuchając'
 515
 516             projects.add((project, meta.get('funded_by', '')))
 517             total_duration += mp3.duration or 0
 518
 519             media = {'mp3': mp3}
 520
 521             ogg = ogg_files.get(mp3.name)
 522             if ogg:
 523                 media['ogg'] = ogg
 524             audiobooks.append(media)
 525
 526         if with_children:
 527             for child in self.get_children():
 528                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 529                     with_children=True, processing=True)
 530                 audiobooks.append({'part': child})
 531                 audiobooks += ch_audiobooks
 532                 projects.update(ch_projects)
 533                 total_duration += ch_duration
 534
 535         if not processing:
 536             projects = sorted(projects)
 537             total_duration = '%d:%02d' % (
 538                 total_duration // 60,
 539                 total_duration % 60
 540             )
 541
 542         return audiobooks, projects, total_duration
 543
 544     def get_audiobooks_with_children(self):
 545         return self.get_audiobooks(with_children=True)
 546
 547     def wldocument(self, parse_dublincore=True, inherit=True):
 548         from catalogue.import_utils import ORMDocProvider
 549         from librarian.parser import WLDocument
 550
 551         if inherit and self.parent:
 552             meta_fallbacks = self.parent.cover_info()
 553         else:
 554             meta_fallbacks = None
 555
 556         return WLDocument.from_file(
 557             self.xml_file.path,
 558             provider=ORMDocProvider(self),
 559             parse_dublincore=parse_dublincore,
 560             meta_fallbacks=meta_fallbacks)
 561
 562     def wldocument2(self):
 563         from catalogue.import_utils import ORMDocProvider
 564         from librarian.document import WLDocument
 565         doc = WLDocument(
 566             self.xml_file.path,
 567             provider=ORMDocProvider(self)
 568         )
 569         doc.meta.update(self.cover_info())
 570         return doc
 571
 572
 573     @staticmethod
 574     def zip_format(format_):
 575         def pretty_file_name(book):
 576             return "%s/%s.%s" % (
 577                 book.get_extra_info_json()['author'],
 578                 book.slug,
 579                 format_)
 580
 581         field_name = "%s_file" % format_
 582         field = getattr(Book, field_name)
 583         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 584         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 585         return create_zip(paths, field.ZIP)
 586
 587     def zip_audiobooks(self, format_):
 588         bm = BookMedia.objects.filter(book=self, type=format_)
 589         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 590         licenses = set()
 591         for m in bm:
 592             license = constants.LICENSES.get(
 593                 m.get_extra_info_json().get('license'), {}
 594             ).get('locative')
 595             if license:
 596                 licenses.add(license)
 597         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 598             'licenses': licenses,
 599             'meta': self.wldocument2().meta,
 600         })
 601         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 602
 603     def search_index(self, index=None):
 604         if not self.findable:
 605             return
 606         from search.index import Index
 607         Index.index_book(self)
 608
 609     # will make problems in conjunction with paid previews
 610     def download_pictures(self, remote_gallery_url):
 611         # This is only needed for legacy relative image paths.
 612         gallery_path = self.gallery_path()
 613         # delete previous files, so we don't include old files in ebooks
 614         if os.path.isdir(gallery_path):
 615             for filename in os.listdir(gallery_path):
 616                 file_path = os.path.join(gallery_path, filename)
 617                 os.unlink(file_path)
 618         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 619         if ilustr_elements:
 620             makedirs(gallery_path)
 621             for ilustr in ilustr_elements:
 622                 ilustr_src = ilustr.get('src')
 623                 if '/' in ilustr_src:
 624                     continue
 625                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 626                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 627
 628     def load_abstract(self):
 629         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 630         if abstract is not None:
 631             self.abstract = transform_abstrakt(abstract)
 632         else:
 633             self.abstract = ''
 634
 635     def load_toc(self):
 636         self.toc = ''
 637         if self.html_file:
 638             parser = html.HTMLParser(encoding='utf-8')
 639             tree = html.parse(self.html_file.path, parser=parser)
 640             toc = tree.find('//div[@id="toc"]/ol')
 641             if toc is None or not len(toc):
 642                 return
 643             html_link = reverse('book_text', args=[self.slug])
 644             for a in toc.findall('.//a'):
 645                 a.attrib['href'] = html_link + a.attrib['href']
 646             self.toc = html.tostring(toc, encoding='unicode')
 647             # div#toc
 648
 649     @classmethod
 650     def from_xml_file(cls, xml_file, **kwargs):
 651         from django.core.files import File
 652         from librarian import dcparser
 653
 654         # use librarian to parse meta-data
 655         book_info = dcparser.parse(xml_file)
 656
 657         if not isinstance(xml_file, File):
 658             xml_file = File(open(xml_file))
 659
 660         try:
 661             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 662         finally:
 663             xml_file.close()
 664
 665     @classmethod
 666     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 667                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 668         from catalogue import tasks
 669
 670         if dont_build is None:
 671             dont_build = set()
 672         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 673
 674         # check for parts before we do anything
 675         children = []
 676         if hasattr(book_info, 'parts'):
 677             for part_url in book_info.parts:
 678                 try:
 679                     children.append(Book.objects.get(slug=part_url.slug))
 680                 except Book.DoesNotExist:
 681                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 682
 683         # Read book metadata
 684         book_slug = book_info.url.slug
 685         if re.search(r'[^a-z0-9-]', book_slug):
 686             raise ValueError('Invalid characters in slug')
 687         book, created = Book.objects.get_or_create(slug=book_slug)
 688
 689         if created:
 690             book_shelves = []
 691             old_cover = None
 692             book.preview = bool(days)
 693             if book.preview:
 694                 book.preview_until = date.today() + timedelta(days)
 695         else:
 696             if not overwrite:
 697                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 698             # Save shelves for this book
 699             book_shelves = list(book.tags.filter(category='set'))
 700             old_cover = book.cover_info()
 701
 702         # Save XML file
 703         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 704         if book.preview:
 705             book.xml_file.set_readable(False)
 706
 707         book.findable = findable
 708         book.language = book_info.language
 709         book.title = book_info.title
 710         book.license = book_info.license or ''
 711         if book_info.variant_of:
 712             book.common_slug = book_info.variant_of.slug
 713         else:
 714             book.common_slug = book.slug
 715         extra = book_info.to_dict()
 716         if logo:
 717             extra['logo'] = logo
 718         if logo_mono:
 719             extra['logo_mono'] = logo_mono
 720         if logo_alt:
 721             extra['logo_alt'] = logo_alt
 722         book.extra_info = json.dumps(extra)
 723         book.load_abstract()
 724         book.load_toc()
 725         book.save()
 726
 727         meta_tags = Tag.tags_from_info(book_info)
 728
 729         just_tags = [t for (t, rel) in meta_tags if not rel]
 730         book.tags = set(just_tags + book_shelves)
 731         book.save()  # update sort_key_author
 732
 733         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 734
 735         cover_changed = old_cover != book.cover_info()
 736         obsolete_children = set(b for b in book.children.all()
 737                                 if b not in children)
 738         notify_cover_changed = []
 739         for n, child_book in enumerate(children):
 740             new_child = child_book.parent != book
 741             child_book.parent = book
 742             child_book.parent_number = n
 743             child_book.save()
 744             if new_child or cover_changed:
 745                 notify_cover_changed.append(child_book)
 746         # Disown unfaithful children and let them cope on their own.
 747         for child in obsolete_children:
 748             child.parent = None
 749             child.parent_number = 0
 750             child.save()
 751             if old_cover:
 752                 notify_cover_changed.append(child)
 753
 754         cls.repopulate_ancestors()
 755         tasks.update_counters.delay()
 756
 757         if remote_gallery_url:
 758             book.download_pictures(remote_gallery_url)
 759
 760         # No saves beyond this point.
 761
 762         # Build cover.
 763         if 'cover' not in dont_build:
 764             book.cover.build_delay()
 765             book.cover_clean.build_delay()
 766             book.cover_thumb.build_delay()
 767             book.cover_api_thumb.build_delay()
 768             book.simple_cover.build_delay()
 769             book.cover_ebookpoint.build_delay()
 770
 771         # Build HTML and ebooks.
 772         book.html_file.build_delay()
 773         if not children:
 774             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 775                 if format_ not in dont_build:
 776                     getattr(book, '%s_file' % format_).build_delay()
 777         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 778             if format_ not in dont_build:
 779                 getattr(book, '%s_file' % format_).build_delay()
 780         book.html_nonotes_file.build_delay()
 781
 782         if not settings.NO_SEARCH_INDEX and search_index and findable:
 783             tasks.index_book.delay(book.id)
 784
 785         for child in notify_cover_changed:
 786             child.parent_cover_changed()
 787
 788         book.update_popularity()
 789         tasks.update_references.delay(book.id)
 790
 791         cls.published.send(sender=cls, instance=book)
 792         return book
 793
 794     def update_references(self):
 795         Entity = apps.get_model('references', 'Entity')
 796         doc = self.wldocument2()
 797         doc._compat_assign_section_ids()
 798         doc._compat_assign_ordered_ids()
 799         refs = {}
 800         for ref_elem in doc.references():
 801             uri = ref_elem.attrib.get('href', '')
 802             if not uri:
 803                 continue
 804             if uri in refs:
 805                 ref = refs[uri]
 806             else:
 807                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 808                 if entity_created:
 809                     try:
 810                         entity.populate()
 811                     except:
 812                         pass
 813                     else:
 814                         entity.save()
 815                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 816                 refs[uri] = ref
 817                 if not ref_created:
 818                     ref.occurence_set.all().delete()
 819             sec = ref_elem.get_link()
 820             m = re.match(r'sec(\d+)', sec)
 821             assert m is not None
 822             sec = int(m.group(1))
 823             snippet = ref_elem.get_snippet()
 824             b = builders['html-snippet']()
 825             for s in snippet:
 826                 s.html_build(b)
 827             html = b.output().get_bytes().decode('utf-8')
 828
 829             ref.occurence_set.create(
 830                 section=sec,
 831                 html=html
 832             )
 833         self.reference_set.exclude(entity__uri__in=refs).delete()
 834
 835     @property
 836     def references(self):
 837         return self.reference_set.all().select_related('entity')
 838
 839     def update_has_audio(self):
 840         self.has_audio = False
 841         if self.media.filter(type='mp3').exists():
 842             self.has_audio = True
 843         if self.descendant.filter(has_audio=True).exists():
 844             self.has_audio = True
 845         self.save(update_fields=['has_audio'])
 846         if self.parent is not None:
 847             self.parent.update_has_audio()
 848
 849     def update_narrators(self):
 850         narrator_names = set()
 851         for bm in self.media.filter(type='mp3'):
 852             narrator_names.update(set(
 853                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 854             ))
 855         narrators = []
 856
 857         for name in narrator_names:
 858             if not name: continue
 859             slug = slugify(name)
 860             try:
 861                 t = Tag.objects.get(category='author', slug=slug)
 862             except Tag.DoesNotExist:
 863                 sort_key = sortify(
 864                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 865                 )
 866                 t = Tag.objects.create(
 867                     category='author',
 868                     name_pl=name,
 869                     slug=slug,
 870                     sort_key=sort_key,
 871                 )
 872             narrators.append(t)
 873         self.narrators.set(narrators)
 874
 875     @classmethod
 876     @transaction.atomic
 877     def repopulate_ancestors(cls):
 878         """Fixes the ancestry cache."""
 879         # TODO: table names
 880         cursor = connection.cursor()
 881         if connection.vendor == 'postgres':
 882             cursor.execute("TRUNCATE catalogue_book_ancestor")
 883             cursor.execute("""
 884                 WITH RECURSIVE ancestry AS (
 885                     SELECT book.id, book.parent_id
 886                     FROM catalogue_book AS book
 887                     WHERE book.parent_id IS NOT NULL
 888                     UNION
 889                     SELECT ancestor.id, book.parent_id
 890                     FROM ancestry AS ancestor, catalogue_book AS book
 891                     WHERE ancestor.parent_id = book.id
 892                         AND book.parent_id IS NOT NULL
 893                     )
 894                 INSERT INTO catalogue_book_ancestor
 895                     (from_book_id, to_book_id)
 896                     SELECT id, parent_id
 897                     FROM ancestry
 898                     ORDER BY id;
 899                 """)
 900         else:
 901             cursor.execute("DELETE FROM catalogue_book_ancestor")
 902             for b in cls.objects.exclude(parent=None):
 903                 parent = b.parent
 904                 while parent is not None:
 905                     b.ancestor.add(parent)
 906                     parent = parent.parent
 907
 908     @property
 909     def ancestors(self):
 910         if self.parent:
 911             for anc in self.parent.ancestors:
 912                 yield anc
 913             yield self.parent
 914         else:
 915             return []
 916
 917     def clear_cache(self):
 918         clear_cached_renders(self.mini_box)
 919         clear_cached_renders(self.mini_box_nolink)
 920
 921     def cover_info(self, inherit=True):
 922         """Returns a dictionary to serve as fallback for BookInfo.
 923
 924         For now, the only thing inherited is the cover image.
 925         """
 926         need = False
 927         info = {}
 928         for field in ('cover_url', 'cover_by', 'cover_source'):
 929             val = self.get_extra_info_json().get(field)
 930             if val:
 931                 info[field] = val
 932             else:
 933                 need = True
 934         if inherit and need and self.parent is not None:
 935             parent_info = self.parent.cover_info()
 936             parent_info.update(info)
 937             info = parent_info
 938         return info
 939
 940     def related_themes(self):
 941         return Tag.objects.usage_for_queryset(
 942             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 943             counts=True).filter(category='theme').order_by('-count')
 944
 945     def parent_cover_changed(self):
 946         """Called when parent book's cover image is changed."""
 947         if not self.cover_info(inherit=False):
 948             if 'cover' not in app_settings.DONT_BUILD:
 949                 self.cover.build_delay()
 950                 self.cover_clean.build_delay()
 951                 self.cover_thumb.build_delay()
 952                 self.cover_api_thumb.build_delay()
 953                 self.simple_cover.build_delay()
 954                 self.cover_ebookpoint.build_delay()
 955             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 956                 if format_ not in app_settings.DONT_BUILD:
 957                     getattr(self, '%s_file' % format_).build_delay()
 958             for child in self.children.all():
 959                 child.parent_cover_changed()
 960
 961     def other_versions(self):
 962         """Find other versions (i.e. in other languages) of the book."""
 963         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 964
 965     def parents(self):
 966         books = []
 967         parent = self.parent
 968         while parent is not None:
 969             books.insert(0, parent)
 970             parent = parent.parent
 971         return books
 972
 973     def pretty_title(self, html_links=False):
 974         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 975         books = self.parents() + [self]
 976         names.extend([(b.title, b.get_absolute_url()) for b in books])
 977
 978         if html_links:
 979             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 980         else:
 981             names = [tag[0] for tag in names]
 982         return ', '.join(names)
 983
 984     def publisher(self):
 985         publisher = self.get_extra_info_json()['publisher']
 986         if isinstance(publisher, str):
 987             return publisher
 988         elif isinstance(publisher, list):
 989             return ', '.join(publisher)
 990
 991     @classmethod
 992     def tagged_top_level(cls, tags):
 993         """ Returns top-level books tagged with `tags`.
 994
 995         It only returns those books which don't have ancestors which are
 996         also tagged with those tags.
 997
 998         """
 999         objects = cls.tagged.with_all(tags)
1000         return objects.filter(findable=True).exclude(ancestor__in=objects)
1001
1002     @classmethod
1003     def book_list(cls, book_filter=None):
1004         """Generates a hierarchical listing of all books.
1005
1006         Books are optionally filtered with a test function.
1007
1008         """
1009
1010         books_by_parent = {}
1011         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1012         if book_filter:
1013             books = books.filter(book_filter).distinct()
1014
1015             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1016             for book in books.iterator():
1017                 parent = book.parent_id
1018                 if parent not in book_ids:
1019                     parent = None
1020                 books_by_parent.setdefault(parent, []).append(book)
1021         else:
1022             for book in books.iterator():
1023                 books_by_parent.setdefault(book.parent_id, []).append(book)
1024
1025         orphans = []
1026         books_by_author = OrderedDict()
1027         for tag in Tag.objects.filter(category='author').iterator():
1028             books_by_author[tag] = []
1029
1030         for book in books_by_parent.get(None, ()):
1031             authors = list(book.authors().only('pk'))
1032             if authors:
1033                 for author in authors:
1034                     books_by_author[author].append(book)
1035             else:
1036                 orphans.append(book)
1037
1038         return books_by_author, orphans, books_by_parent
1039
1040     _audiences_pl = {
1041         "SP": (1, "szkoła podstawowa"),
1042         "SP1": (1, "szkoła podstawowa"),
1043         "SP2": (1, "szkoła podstawowa"),
1044         "SP3": (1, "szkoła podstawowa"),
1045         "P": (1, "szkoła podstawowa"),
1046         "G": (2, "gimnazjum"),
1047         "L": (3, "liceum"),
1048         "LP": (3, "liceum"),
1049     }
1050
1051     def audiences_pl(self):
1052         audiences = self.get_extra_info_json().get('audiences', [])
1053         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1054         return [a[1] for a in audiences]
1055
1056     def stage_note(self):
1057         stage = self.get_extra_info_json().get('stage')
1058         if stage and stage < '0.4':
1059             return (_('Ten utwór wymaga uwspółcześnienia'),
1060                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1061         else:
1062             return None, None
1063
1064     def choose_fragments(self, number):
1065         fragments = self.fragments.order_by()
1066         fragments_count = fragments.count()
1067         if not fragments_count and self.children.exists():
1068             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1069             fragments_count = fragments.count()
1070         if fragments_count:
1071             if fragments_count > number:
1072                 offset = randint(0, fragments_count - number)
1073             else:
1074                 offset = 0
1075             return fragments[offset : offset + number]
1076         elif self.parent:
1077             return self.parent.choose_fragments(number)
1078         else:
1079             return []
1080
1081     def choose_fragment(self):
1082         fragments = self.choose_fragments(1)
1083         if fragments:
1084             return fragments[0]
1085         else:
1086             return None
1087
1088     def fragment_data(self):
1089         fragment = self.choose_fragment()
1090         if fragment:
1091             return {
1092                 'title': fragment.book.pretty_title(),
1093                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1094             }
1095         else:
1096             return None
1097
1098     def update_popularity(self):
1099         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1100         try:
1101             pop = self.popularity
1102             pop.count = count
1103             pop.save()
1104         except BookPopularity.DoesNotExist:
1105             BookPopularity.objects.create(book=self, count=count)
1106
1107     def ridero_link(self):
1108         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1109
1110     def elevenreader_link(self):
1111         return 'https://elevenreader.io/audiobooks/wolnelektury:' + self.get_first_text().slug
1112
1113     def content_warnings(self):
1114         warnings_def = {
1115             'wulgaryzmy': _('wulgaryzmy'),
1116         }
1117         warnings = self.get_extra_info_json().get('content_warnings', [])
1118         warnings = [
1119             warnings_def.get(w, w)
1120             for w in warnings
1121         ]
1122         warnings.sort()
1123         return warnings
1124
1125     def full_sort_key(self):
1126         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1127
1128     def cover_color(self):
1129         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1130
1131     @cached_render('catalogue/book_mini_box.html')
1132     def mini_box(self):
1133         return {
1134             'book': self
1135         }
1136
1137     @cached_render('catalogue/book_mini_box.html')
1138     def mini_box_nolink(self):
1139         return {
1140             'book': self,
1141             'no_link': True,
1142         }
1143
1144
1145 class BookPopularity(models.Model):
1146     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1147     count = models.IntegerField(default=0, db_index=True)