src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64     can_sell = models.BooleanField('do sprzedaży', default=True)
  65     isbn_mp3 = models.CharField('ISBN audiobooka', max_length=32, blank=True)
  66
  67     # files generated during publication
  68     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  69     html_file = fields.HtmlField(storage=bofh_storage)
  70     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  71     fb2_file = fields.Fb2Field(storage=bofh_storage)
  72     txt_file = fields.TxtField(storage=bofh_storage)
  73     epub_file = fields.EpubField(storage=bofh_storage)
  74     mobi_file = fields.MobiField(storage=bofh_storage)
  75     pdf_file = fields.PdfField(storage=bofh_storage)
  76
  77     cover = fields.CoverField('okładka', storage=bofh_storage)
  78     # Cleaner version of cover for thumbs
  79     cover_clean = fields.CoverCleanField('czysta okładka')
  80     cover_thumb = fields.CoverThumbField('miniatura okładki')
  81     cover_api_thumb = fields.CoverApiThumbField(
  82         'mniaturka okładki dla aplikacji')
  83     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  84     cover_ebookpoint = fields.CoverEbookpointField(
  85         'okładka dla Ebookpoint')
  86
  87     ebook_formats = constants.EBOOK_FORMATS
  88     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  89
  90     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  91     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  92
  93     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  94     has_audience = models.BooleanField(default=False)
  95
  96     objects = models.Manager()
  97     tagged = managers.ModelTaggedItemManager(Tag)
  98     tags = managers.TagDescriptor(Tag)
  99     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
 100     translators = models.ManyToManyField(Tag, blank=True)
 101     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 102     has_audio = models.BooleanField(default=False)
 103     read_time = models.IntegerField(blank=True, null=True)
 104     pages = models.IntegerField(blank=True, null=True)
 105
 106     html_built = django.dispatch.Signal()
 107     published = django.dispatch.Signal()
 108
 109     SORT_KEY_SEP = '$'
 110
 111     is_book = True
 112
 113     class AlreadyExists(Exception):
 114         pass
 115
 116     class Meta:
 117         ordering = ('sort_key_author', 'sort_key')
 118         verbose_name = 'książka'
 119         verbose_name_plural = 'książki'
 120         app_label = 'catalogue'
 121
 122     def __str__(self):
 123         return self.title
 124
 125     def get_extra_info_json(self):
 126         return json.loads(self.extra_info or '{}')
 127
 128     def get_initial(self):
 129         try:
 130             return re.search(r'\w', self.title, re.U).group(0)
 131         except AttributeError:
 132             return ''
 133
 134     def authors(self):
 135         return self.tags.filter(category='author')
 136
 137     def epochs(self):
 138         return self.tags.filter(category='epoch')
 139
 140     def genres(self):
 141         return self.tags.filter(category='genre')
 142
 143     def kinds(self):
 144         return self.tags.filter(category='kind')
 145
 146     def tag_unicode(self, category):
 147         relations = prefetched_relations(self, category)
 148         if relations:
 149             return ', '.join(rel.tag.name for rel in relations)
 150         else:
 151             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 152
 153     def tags_by_category(self):
 154         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 155
 156     def author_unicode(self):
 157         return self.cached_author
 158
 159     def kind_unicode(self):
 160         return self.tag_unicode('kind')
 161
 162     def epoch_unicode(self):
 163         return self.tag_unicode('epoch')
 164
 165     def genre_unicode(self):
 166         return self.tag_unicode('genre')
 167
 168     def translator(self):
 169         translators = self.get_extra_info_json().get('translators')
 170         if not translators:
 171             return None
 172         if len(translators) > 3:
 173             translators = translators[:2]
 174             others = ' i inni'
 175         else:
 176             others = ''
 177         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 178
 179     def cover_source(self):
 180         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 181
 182     @property
 183     def isbn_pdf(self):
 184         return self.get_extra_info_json().get('isbn_pdf')
 185
 186     @property
 187     def isbn_epub(self):
 188         return self.get_extra_info_json().get('isbn_epub')
 189
 190     @property
 191     def isbn_mobi(self):
 192         return self.get_extra_info_json().get('isbn_mobi')
 193
 194     @property
 195     def redakcja(self):
 196         return self.get_extra_info_json().get('about')
 197
 198     def is_accessible_to(self, user):
 199         if not self.preview:
 200             return True
 201         if not user.is_authenticated:
 202             return False
 203         Membership = apps.get_model('club', 'Membership')
 204         if Membership.is_active_for(user):
 205             return True
 206         Funding = apps.get_model('funding', 'Funding')
 207         if Funding.objects.filter(user=user, offer__book=self):
 208             return True
 209         return False
 210
 211     def save(self, force_insert=False, force_update=False, **kwargs):
 212         from sortify import sortify
 213
 214         self.sort_key = sortify(self.title)[:120]
 215         self.title = str(self.title)  # ???
 216
 217         try:
 218             author = self.authors().first().sort_key
 219         except AttributeError:
 220             author = ''
 221         self.sort_key_author = author
 222
 223         self.cached_author = self.tag_unicode('author')
 224         self.has_audience = 'audience' in self.get_extra_info_json()
 225
 226         if self.preview and not self.preview_key:
 227             self.preview_key = get_random_hash(self.slug)[:32]
 228
 229         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 230
 231         return ret
 232
 233     def get_absolute_url(self):
 234         return reverse('book_detail', args=[self.slug])
 235
 236     def gallery_path(self):
 237         return gallery_path(self.slug)
 238
 239     def gallery_url(self):
 240         return gallery_url(self.slug)
 241
 242     def get_first_text(self):
 243         if self.html_file:
 244             return self
 245         child = self.children.all().order_by('parent_number').first()
 246         if child is not None:
 247             return child.get_first_text()
 248
 249     def get_last_text(self):
 250         if self.html_file:
 251             return self
 252         child = self.children.all().order_by('parent_number').last()
 253         if child is not None:
 254             return child.get_last_text()
 255
 256     def get_prev_text(self):
 257         if not self.parent:
 258             return None
 259         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 260         if sibling is not None:
 261             return sibling.get_last_text()
 262
 263         if self.parent.html_file:
 264             return self.parent
 265
 266         return self.parent.get_prev_text()
 267
 268     def get_next_text(self, inside=True):
 269         if inside:
 270             child = self.children.order_by('parent_number').first()
 271             if child is not None:
 272                 return child.get_first_text()
 273
 274         if not self.parent:
 275             return None
 276         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 277         if sibling is not None:
 278             return sibling.get_first_text()
 279         return self.parent.get_next_text(inside=False)
 280
 281     def get_siblings(self):
 282         if not self.parent:
 283             return []
 284         return self.parent.children.all().order_by('parent_number')
 285
 286     def get_children(self):
 287         return self.children.all().order_by('parent_number')
 288
 289     @property
 290     def name(self):
 291         return self.title
 292
 293     def language_code(self):
 294         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 295
 296     def language_name(self):
 297         return dict(settings.LANGUAGES).get(self.language_code(), "")
 298
 299     def is_foreign(self):
 300         return self.language_code() != settings.LANGUAGE_CODE
 301
 302     def set_audio_length(self):
 303         length = self.get_audio_length()
 304         if length > 0:
 305             self.audio_length = self.format_audio_length(length)
 306             self.save()
 307
 308     @staticmethod
 309     def format_audio_length(seconds):
 310         """
 311         >>> Book.format_audio_length(1)
 312         '0:01'
 313         >>> Book.format_audio_length(3661)
 314         '1:01:01'
 315         """
 316         if seconds < 60*60:
 317             minutes = seconds // 60
 318             seconds = seconds % 60
 319             return '%d:%02d' % (minutes, seconds)
 320         else:
 321             hours = seconds // 3600
 322             minutes = seconds % 3600 // 60
 323             seconds = seconds % 60
 324             return '%d:%02d:%02d' % (hours, minutes, seconds)
 325
 326     def get_audio_length(self):
 327         total = 0
 328         for media in self.get_mp3() or ():
 329             total += app_settings.GET_MP3_LENGTH(media.file.path)
 330         return int(total)
 331
 332     def get_time(self):
 333         return round(self.xml_file.size / 1000 * 40)
 334
 335     def has_media(self, type_):
 336         if type_ in Book.formats:
 337             return bool(getattr(self, "%s_file" % type_))
 338         else:
 339             return self.media.filter(type=type_).exists()
 340
 341     def get_media(self, type_):
 342         if self.has_media(type_):
 343             if type_ in Book.formats:
 344                 return getattr(self, "%s_file" % type_)
 345             else:
 346                 return self.media.filter(type=type_)
 347         else:
 348             return None
 349
 350     def get_mp3(self):
 351         return self.get_media("mp3")
 352
 353     def get_odt(self):
 354         return self.get_media("odt")
 355
 356     def get_ogg(self):
 357         return self.get_media("ogg")
 358
 359     def get_daisy(self):
 360         return self.get_media("daisy")
 361
 362     def get_audio_epub(self):
 363         return self.get_media("audio.epub")
 364
 365     def media_url(self, format_):
 366         media = self.get_media(format_)
 367         if media:
 368             if self.preview:
 369                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 370             else:
 371                 return media.url
 372         else:
 373             return None
 374
 375     def html_url(self):
 376         return self.media_url('html')
 377
 378     def html_nonotes_url(self):
 379         return self.media_url('html_nonotes')
 380
 381     def pdf_url(self):
 382         return self.media_url('pdf')
 383
 384     def epub_url(self):
 385         return self.media_url('epub')
 386
 387     def mobi_url(self):
 388         return self.media_url('mobi')
 389
 390     def txt_url(self):
 391         return self.media_url('txt')
 392
 393     def fb2_url(self):
 394         return self.media_url('fb2')
 395
 396     def xml_url(self):
 397         return self.media_url('xml')
 398
 399     def has_description(self):
 400         return len(self.description) > 0
 401     has_description.short_description = 'opis'
 402     has_description.boolean = True
 403
 404     def has_mp3_file(self):
 405         return self.has_media("mp3")
 406     has_mp3_file.short_description = 'MP3'
 407     has_mp3_file.boolean = True
 408
 409     def has_ogg_file(self):
 410         return self.has_media("ogg")
 411     has_ogg_file.short_description = 'OGG'
 412     has_ogg_file.boolean = True
 413
 414     def has_daisy_file(self):
 415         return self.has_media("daisy")
 416     has_daisy_file.short_description = 'DAISY'
 417     has_daisy_file.boolean = True
 418
 419     def has_sync_file(self):
 420         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 421
 422     def build_sync_file(self):
 423         from lxml import html
 424         from django.core.files.base import ContentFile
 425         with self.html_file.open('rb') as f:
 426             h = html.fragment_fromstring(f.read().decode('utf-8'))
 427
 428         durations = [
 429             m['mp3'].duration
 430             for m in self.get_audiobooks()[0]
 431         ]
 432         if settings.MOCK_DURATIONS:
 433             durations = settings.MOCK_DURATIONS
 434
 435         sync = []
 436         ts = None
 437         sid = 1
 438         dirty = False
 439         for elem in h.iter():
 440             if elem.get('data-audio-ts'):
 441                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 442                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 443                 # check if inside verse
 444                 p = elem.getparent()
 445                 while p is not None:
 446                     # Workaround for missing ids.
 447                     if 'verse' in p.get('class', ''):
 448                         if not p.get('id'):
 449                             p.set('id', f'syn{sid}')
 450                             dirty = True
 451                             sid += 1
 452                         sync.append((ts, p.get('id')))
 453                         ts = None
 454                         break
 455                     p = p.getparent()
 456             elif ts:
 457                 cls = elem.get('class', '')
 458                 # Workaround for missing ids.
 459                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 460                     if not elem.get('id'):
 461                         elem.set('id', f'syn{sid}')
 462                         dirty = True
 463                         sid += 1
 464                     sync.append((ts, elem.get('id')))
 465                     ts = None
 466         if dirty:
 467             htext = html.tostring(h, encoding='utf-8')
 468             with open(self.html_file.path, 'wb') as f:
 469                 f.write(htext)
 470         try:
 471             bm = self.media.get(type='sync')
 472         except:
 473             bm = BookMedia(book=self, type='sync')
 474         sync = (
 475             '27\n' + '\n'.join(
 476                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 477             )).encode('latin1')
 478         bm.file.save(
 479             None, ContentFile(sync)
 480             )
 481
 482     def get_sync(self):
 483         if not self.has_sync_file():
 484             return []
 485         with self.get_media('sync').first().file.open('r') as f:
 486             sync = f.read().split('\n')
 487         offset = float(sync[0])
 488         items = []
 489         for line in sync[1:]:
 490             if not line:
 491                 continue
 492             start, end, elid = line.split()
 493             items.append([elid, float(start) + offset])
 494         return items
 495
 496     def sync_ts(self, ts):
 497         elid = None
 498         for cur_id, t in self.get_sync():
 499             if ts >= t:
 500                 elid = cur_id
 501             else:
 502                 break
 503         return elid
 504
 505     def sync_elid(self, elid):
 506         for cur_id, t in self.get_sync():
 507             if cur_id == elid:
 508                 return t
 509
 510     def has_audio_epub_file(self):
 511         return self.has_media("audio.epub")
 512
 513     @property
 514     def media_daisy(self):
 515         return self.get_media('daisy')
 516
 517     @property
 518     def media_audio_epub(self):
 519         return self.get_media('audio.epub')
 520
 521     def get_audiobooks(self, with_children=False, processing=False):
 522         ogg_files = {}
 523         for m in self.media.filter(type='ogg').order_by().iterator():
 524             ogg_files[m.name] = m
 525
 526         audiobooks = []
 527         projects = set()
 528         total_duration = 0
 529         for mp3 in self.media.filter(type='mp3').iterator():
 530             # ogg files are always from the same project
 531             meta = mp3.get_extra_info_json()
 532             project = meta.get('project')
 533             if not project:
 534                 # temporary fallback
 535                 project = 'CzytamySłuchając'
 536
 537             projects.add((project, meta.get('funded_by', '')))
 538             total_duration += mp3.duration or 0
 539
 540             media = {'mp3': mp3}
 541
 542             ogg = ogg_files.get(mp3.name)
 543             if ogg:
 544                 media['ogg'] = ogg
 545             audiobooks.append(media)
 546
 547         if with_children:
 548             for child in self.get_children():
 549                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 550                     with_children=True, processing=True)
 551                 audiobooks.append({'part': child})
 552                 audiobooks += ch_audiobooks
 553                 projects.update(ch_projects)
 554                 total_duration += ch_duration
 555
 556         if not processing:
 557             projects = sorted(projects)
 558             total_duration = '%d:%02d' % (
 559                 total_duration // 60,
 560                 total_duration % 60
 561             )
 562
 563         return audiobooks, projects, total_duration
 564
 565     def get_audiobooks_with_children(self):
 566         return self.get_audiobooks(with_children=True)
 567
 568     def wldocument(self, parse_dublincore=True, inherit=True):
 569         from catalogue.import_utils import ORMDocProvider
 570         from librarian.parser import WLDocument
 571
 572         if inherit and self.parent:
 573             meta_fallbacks = self.parent.cover_info()
 574         else:
 575             meta_fallbacks = None
 576
 577         return WLDocument.from_file(
 578             self.xml_file.path,
 579             provider=ORMDocProvider(self),
 580             parse_dublincore=parse_dublincore,
 581             meta_fallbacks=meta_fallbacks)
 582
 583     def wldocument2(self):
 584         from catalogue.import_utils import ORMDocProvider
 585         from librarian.document import WLDocument
 586         doc = WLDocument(
 587             self.xml_file.path,
 588             provider=ORMDocProvider(self)
 589         )
 590         doc.meta.update(self.cover_info())
 591         return doc
 592
 593
 594     @staticmethod
 595     def zip_format(format_):
 596         def pretty_file_name(book):
 597             return "%s/%s.%s" % (
 598                 book.get_extra_info_json()['author'],
 599                 book.slug,
 600                 format_)
 601
 602         field_name = "%s_file" % format_
 603         field = getattr(Book, field_name)
 604         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 605         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 606         return create_zip(paths, field.ZIP)
 607
 608     def zip_audiobooks(self, format_):
 609         bm = BookMedia.objects.filter(book=self, type=format_)
 610         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 611         licenses = set()
 612         for m in bm:
 613             license = constants.LICENSES.get(
 614                 m.get_extra_info_json().get('license'), {}
 615             ).get('locative')
 616             if license:
 617                 licenses.add(license)
 618         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 619             'licenses': licenses,
 620             'meta': self.wldocument2().meta,
 621         })
 622         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 623
 624     def search_index(self, index=None):
 625         if not self.findable:
 626             return
 627         from search.index import Index
 628         Index.index_book(self)
 629
 630     # will make problems in conjunction with paid previews
 631     def download_pictures(self, remote_gallery_url):
 632         # This is only needed for legacy relative image paths.
 633         gallery_path = self.gallery_path()
 634         # delete previous files, so we don't include old files in ebooks
 635         if os.path.isdir(gallery_path):
 636             for filename in os.listdir(gallery_path):
 637                 file_path = os.path.join(gallery_path, filename)
 638                 os.unlink(file_path)
 639         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 640         if ilustr_elements:
 641             makedirs(gallery_path)
 642             for ilustr in ilustr_elements:
 643                 ilustr_src = ilustr.get('src')
 644                 if '/' in ilustr_src:
 645                     continue
 646                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 647                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 648
 649     def load_abstract(self):
 650         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 651         if abstract is not None:
 652             self.abstract = transform_abstrakt(abstract)
 653         else:
 654             self.abstract = ''
 655
 656     def load_toc(self):
 657         self.toc = ''
 658         if self.html_file:
 659             parser = html.HTMLParser(encoding='utf-8')
 660             tree = html.parse(self.html_file.path, parser=parser)
 661             toc = tree.find('//div[@id="toc"]/ol')
 662             if toc is None or not len(toc):
 663                 return
 664             html_link = reverse('book_text', args=[self.slug])
 665             for a in toc.findall('.//a'):
 666                 a.attrib['href'] = html_link + a.attrib['href']
 667             self.toc = html.tostring(toc, encoding='unicode')
 668             # div#toc
 669
 670     @classmethod
 671     def from_xml_file(cls, xml_file, **kwargs):
 672         from django.core.files import File
 673         from librarian import dcparser
 674
 675         # use librarian to parse meta-data
 676         book_info = dcparser.parse(xml_file)
 677
 678         if not isinstance(xml_file, File):
 679             xml_file = File(open(xml_file))
 680
 681         try:
 682             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 683         finally:
 684             xml_file.close()
 685
 686     @classmethod
 687     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 688                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None, isbn_mp3=None):
 689         from catalogue import tasks
 690
 691         if dont_build is None:
 692             dont_build = set()
 693         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 694
 695         # check for parts before we do anything
 696         children = []
 697         if hasattr(book_info, 'parts'):
 698             for part_url in book_info.parts:
 699                 try:
 700                     children.append(Book.objects.get(slug=part_url.slug))
 701                 except Book.DoesNotExist:
 702                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 703
 704         # Read book metadata
 705         book_slug = book_info.url.slug
 706         if re.search(r'[^a-z0-9-]', book_slug):
 707             raise ValueError('Invalid characters in slug')
 708         book, created = Book.objects.get_or_create(slug=book_slug)
 709
 710         if created:
 711             book_shelves = []
 712             old_cover = None
 713             book.preview = bool(days)
 714             if book.preview:
 715                 book.preview_until = date.today() + timedelta(days)
 716         else:
 717             if not overwrite:
 718                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 719             # Save shelves for this book
 720             book_shelves = list(book.tags.filter(category='set'))
 721             old_cover = book.cover_info()
 722
 723         # Save XML file
 724         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 725         if book.preview:
 726             book.xml_file.set_readable(False)
 727
 728         book.findable = findable
 729         book.language = book_info.language
 730         book.title = book_info.title
 731         book.license = book_info.license or ''
 732         if book_info.variant_of:
 733             book.common_slug = book_info.variant_of.slug
 734         else:
 735             book.common_slug = book.slug
 736         extra = book_info.to_dict()
 737         if logo:
 738             extra['logo'] = logo
 739         if logo_mono:
 740             extra['logo_mono'] = logo_mono
 741         if logo_alt:
 742             extra['logo_alt'] = logo_alt
 743         if can_sell is not None:
 744             book.can_sell = can_sell
 745         if isbn_mp3 is not None:
 746             book.isbn_mp3 = isbn_mp3
 747         book.extra_info = json.dumps(extra)
 748         book.load_abstract()
 749         book.load_toc()
 750         book.save()
 751
 752         book.update_stats()
 753
 754         meta_tags = Tag.tags_from_info(book_info)
 755
 756         just_tags = [t for (t, rel) in meta_tags if not rel]
 757         book.tags = set(just_tags + book_shelves)
 758         book.save()  # update sort_key_author
 759
 760         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 761
 762         cover_changed = old_cover != book.cover_info()
 763         obsolete_children = set(b for b in book.children.all()
 764                                 if b not in children)
 765         notify_cover_changed = []
 766         for n, child_book in enumerate(children):
 767             new_child = child_book.parent != book
 768             child_book.parent = book
 769             child_book.parent_number = n
 770             child_book.save()
 771             if new_child or cover_changed:
 772                 notify_cover_changed.append(child_book)
 773         # Disown unfaithful children and let them cope on their own.
 774         for child in obsolete_children:
 775             child.parent = None
 776             child.parent_number = 0
 777             child.save()
 778             if old_cover:
 779                 notify_cover_changed.append(child)
 780
 781         cls.repopulate_ancestors()
 782         tasks.update_counters.delay()
 783
 784         if remote_gallery_url:
 785             book.download_pictures(remote_gallery_url)
 786
 787         # No saves beyond this point.
 788
 789         # Build cover.
 790         if 'cover' not in dont_build:
 791             book.cover.build_delay()
 792             book.cover_clean.build_delay()
 793             book.cover_thumb.build_delay()
 794             book.cover_api_thumb.build_delay()
 795             book.simple_cover.build_delay()
 796             book.cover_ebookpoint.build_delay()
 797
 798         # Build HTML and ebooks.
 799         book.html_file.build_delay()
 800         if not children:
 801             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 802                 if format_ not in dont_build:
 803                     getattr(book, '%s_file' % format_).build_delay()
 804         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 805             if format_ not in dont_build:
 806                 getattr(book, '%s_file' % format_).build_delay()
 807         book.html_nonotes_file.build_delay()
 808
 809         if not settings.NO_SEARCH_INDEX and search_index and findable:
 810             tasks.index_book.delay(book.id)
 811
 812         for child in notify_cover_changed:
 813             child.parent_cover_changed()
 814
 815         book.update_popularity()
 816         tasks.update_references.delay(book.id)
 817
 818         cls.published.send(sender=cls, instance=book)
 819         return book
 820
 821     def update_stats(self):
 822         stats = self.wldocument2().get_statistics()['total']
 823         self.pages = round(
 824             stats.get('verses_with_fn', 0) / 30 +
 825             stats.get('chars_out_verse_with_fn', 0) / 1800)
 826         self.read_time = round(self.get_time())
 827         self.save(update_fields=['pages', 'read_time'])
 828         if self.parent is not None:
 829             self.parent.update_stats()
 830
 831     def update_references(self):
 832         Entity = apps.get_model('references', 'Entity')
 833         doc = self.wldocument2()
 834         doc._compat_assign_section_ids()
 835         doc._compat_assign_ordered_ids()
 836         refs = {}
 837         for ref_elem in doc.references():
 838             uri = ref_elem.attrib.get('href', '')
 839             if not uri:
 840                 continue
 841             if uri in refs:
 842                 ref = refs[uri]
 843             else:
 844                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 845                 if entity_created:
 846                     try:
 847                         entity.populate()
 848                     except:
 849                         pass
 850                     else:
 851                         entity.save()
 852                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 853                 refs[uri] = ref
 854                 if not ref_created:
 855                     ref.occurence_set.all().delete()
 856             sec = ref_elem.get_link()
 857             m = re.match(r'sec(\d+)', sec)
 858             assert m is not None
 859             sec = int(m.group(1))
 860             snippet = ref_elem.get_snippet()
 861             b = builders['html-snippet']()
 862             for s in snippet:
 863                 s.html_build(b)
 864             html = b.output().get_bytes().decode('utf-8')
 865
 866             ref.occurence_set.create(
 867                 section=sec,
 868                 html=html
 869             )
 870         self.reference_set.exclude(entity__uri__in=refs).delete()
 871
 872     @property
 873     def references(self):
 874         return self.reference_set.all().select_related('entity')
 875
 876     def update_has_audio(self):
 877         self.has_audio = False
 878         if self.media.filter(type='mp3').exists():
 879             self.has_audio = True
 880         if self.descendant.filter(has_audio=True).exists():
 881             self.has_audio = True
 882         self.save(update_fields=['has_audio'])
 883         if self.parent is not None:
 884             self.parent.update_has_audio()
 885
 886     def update_narrators(self):
 887         narrator_names = set()
 888         for bm in self.media.filter(type='mp3'):
 889             narrator_names.update(set(
 890                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 891             ))
 892         narrators = []
 893
 894         for name in narrator_names:
 895             if not name: continue
 896             slug = slugify(name)
 897             try:
 898                 t = Tag.objects.get(category='author', slug=slug)
 899             except Tag.DoesNotExist:
 900                 sort_key = sortify(
 901                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 902                 )
 903                 t = Tag.objects.create(
 904                     category='author',
 905                     name_pl=name,
 906                     slug=slug,
 907                     sort_key=sort_key,
 908                 )
 909             narrators.append(t)
 910         self.narrators.set(narrators)
 911
 912     @classmethod
 913     @transaction.atomic
 914     def repopulate_ancestors(cls):
 915         """Fixes the ancestry cache."""
 916         # TODO: table names
 917         cursor = connection.cursor()
 918         if connection.vendor == 'postgres':
 919             cursor.execute("TRUNCATE catalogue_book_ancestor")
 920             cursor.execute("""
 921                 WITH RECURSIVE ancestry AS (
 922                     SELECT book.id, book.parent_id
 923                     FROM catalogue_book AS book
 924                     WHERE book.parent_id IS NOT NULL
 925                     UNION
 926                     SELECT ancestor.id, book.parent_id
 927                     FROM ancestry AS ancestor, catalogue_book AS book
 928                     WHERE ancestor.parent_id = book.id
 929                         AND book.parent_id IS NOT NULL
 930                     )
 931                 INSERT INTO catalogue_book_ancestor
 932                     (from_book_id, to_book_id)
 933                     SELECT id, parent_id
 934                     FROM ancestry
 935                     ORDER BY id;
 936                 """)
 937         else:
 938             cursor.execute("DELETE FROM catalogue_book_ancestor")
 939             for b in cls.objects.exclude(parent=None):
 940                 parent = b.parent
 941                 while parent is not None:
 942                     b.ancestor.add(parent)
 943                     parent = parent.parent
 944
 945     @property
 946     def ancestors(self):
 947         if self.parent:
 948             for anc in self.parent.ancestors:
 949                 yield anc
 950             yield self.parent
 951         else:
 952             return []
 953
 954     def clear_cache(self):
 955         clear_cached_renders(self.mini_box)
 956         clear_cached_renders(self.mini_box_nolink)
 957
 958     def cover_info(self, inherit=True):
 959         """Returns a dictionary to serve as fallback for BookInfo.
 960
 961         For now, the only thing inherited is the cover image.
 962         """
 963         need = False
 964         info = {}
 965         for field in ('cover_url', 'cover_by', 'cover_source'):
 966             val = self.get_extra_info_json().get(field)
 967             if val:
 968                 info[field] = val
 969             else:
 970                 need = True
 971         if inherit and need and self.parent is not None:
 972             parent_info = self.parent.cover_info()
 973             parent_info.update(info)
 974             info = parent_info
 975         return info
 976
 977     def related_themes(self):
 978         return Tag.objects.usage_for_queryset(
 979             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 980             counts=True).filter(category='theme').order_by('-count')
 981
 982     def parent_cover_changed(self):
 983         """Called when parent book's cover image is changed."""
 984         if not self.cover_info(inherit=False):
 985             if 'cover' not in app_settings.DONT_BUILD:
 986                 self.cover.build_delay()
 987                 self.cover_clean.build_delay()
 988                 self.cover_thumb.build_delay()
 989                 self.cover_api_thumb.build_delay()
 990                 self.simple_cover.build_delay()
 991                 self.cover_ebookpoint.build_delay()
 992             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 993                 if format_ not in app_settings.DONT_BUILD:
 994                     getattr(self, '%s_file' % format_).build_delay()
 995             for child in self.children.all():
 996                 child.parent_cover_changed()
 997
 998     def other_versions(self):
 999         """Find other versions (i.e. in other languages) of the book."""
1000         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
1001
1002     def parents(self):
1003         books = []
1004         parent = self.parent
1005         while parent is not None:
1006             books.insert(0, parent)
1007             parent = parent.parent
1008         return books
1009
1010     def pretty_title(self, html_links=False):
1011         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1012         books = self.parents() + [self]
1013         names.extend([(b.title, b.get_absolute_url()) for b in books])
1014
1015         if html_links:
1016             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1017         else:
1018             names = [tag[0] for tag in names]
1019         return ', '.join(names)
1020
1021     def publisher(self):
1022         publisher = self.get_extra_info_json()['publisher']
1023         if isinstance(publisher, str):
1024             return publisher
1025         elif isinstance(publisher, list):
1026             return ', '.join(publisher)
1027
1028     def get_recommended(self, limit=4):
1029         books_qs = type(self).objects.filter(findable=True)
1030         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1031         books = type(self).tagged.related_to(self, books_qs)[:limit]
1032         return books
1033
1034     @classmethod
1035     def tagged_top_level(cls, tags):
1036         """ Returns top-level books tagged with `tags`.
1037
1038         It only returns those books which don't have ancestors which are
1039         also tagged with those tags.
1040
1041         """
1042         objects = cls.tagged.with_all(tags)
1043         return objects.filter(findable=True).exclude(ancestor__in=objects)
1044
1045     @classmethod
1046     def book_list(cls, book_filter=None):
1047         """Generates a hierarchical listing of all books.
1048
1049         Books are optionally filtered with a test function.
1050
1051         """
1052
1053         books_by_parent = {}
1054         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1055         if book_filter:
1056             books = books.filter(book_filter).distinct()
1057
1058             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1059             for book in books.iterator():
1060                 parent = book.parent_id
1061                 if parent not in book_ids:
1062                     parent = None
1063                 books_by_parent.setdefault(parent, []).append(book)
1064         else:
1065             for book in books.iterator():
1066                 books_by_parent.setdefault(book.parent_id, []).append(book)
1067
1068         orphans = []
1069         books_by_author = OrderedDict()
1070         for tag in Tag.objects.filter(category='author').iterator():
1071             books_by_author[tag] = []
1072
1073         for book in books_by_parent.get(None, ()):
1074             authors = list(book.authors().only('pk'))
1075             if authors:
1076                 for author in authors:
1077                     books_by_author[author].append(book)
1078             else:
1079                 orphans.append(book)
1080
1081         return books_by_author, orphans, books_by_parent
1082
1083     _audiences_pl = {
1084         "SP": (1, "szkoła podstawowa"),
1085         "SP1": (1, "szkoła podstawowa"),
1086         "SP2": (1, "szkoła podstawowa"),
1087         "SP3": (1, "szkoła podstawowa"),
1088         "P": (1, "szkoła podstawowa"),
1089         "G": (2, "gimnazjum"),
1090         "L": (3, "liceum"),
1091         "LP": (3, "liceum"),
1092     }
1093
1094     def audiences_pl(self):
1095         audiences = self.get_extra_info_json().get('audiences', [])
1096         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1097         return [a[1] for a in audiences]
1098
1099     def stage_note(self):
1100         stage = self.get_extra_info_json().get('stage')
1101         if stage and stage < '0.4':
1102             return (_('Ten utwór wymaga uwspółcześnienia'),
1103                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1104         else:
1105             return None, None
1106
1107     def choose_fragments(self, number):
1108         fragments = self.fragments.order_by()
1109         fragments_count = fragments.count()
1110         if not fragments_count and self.children.exists():
1111             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1112             fragments_count = fragments.count()
1113         if fragments_count:
1114             if fragments_count > number:
1115                 offset = randint(0, fragments_count - number)
1116             else:
1117                 offset = 0
1118             return fragments[offset : offset + number]
1119         elif self.parent:
1120             return self.parent.choose_fragments(number)
1121         else:
1122             return []
1123
1124     def choose_fragment(self):
1125         fragments = self.choose_fragments(1)
1126         if fragments:
1127             return fragments[0]
1128         else:
1129             return None
1130
1131     def fragment_data(self):
1132         fragment = self.choose_fragment()
1133         if fragment:
1134             return {
1135                 'title': fragment.book.pretty_title(),
1136                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1137             }
1138         else:
1139             return None
1140
1141     def update_popularity(self):
1142         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1143         try:
1144             pop = self.popularity
1145             pop.count = count
1146             pop.save()
1147         except BookPopularity.DoesNotExist:
1148             BookPopularity.objects.create(book=self, count=count)
1149
1150     def ridero_link(self):
1151         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1152
1153     def elevenreader_link(self):
1154         first_text = self.get_first_text()
1155         if first_text is None:
1156             return None
1157         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1158
1159     def content_warnings(self):
1160         warnings_def = {
1161             'wulgaryzmy': _('wulgaryzmy'),
1162         }
1163         warnings = self.get_extra_info_json().get('content_warnings', [])
1164         warnings = [
1165             warnings_def.get(w, w)
1166             for w in warnings
1167         ]
1168         warnings.sort()
1169         return warnings
1170
1171     def full_sort_key(self):
1172         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1173
1174     def cover_color(self):
1175         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1176
1177     @cached_render('catalogue/book_mini_box.html')
1178     def mini_box(self):
1179         return {
1180             'book': self
1181         }
1182
1183     @cached_render('catalogue/book_mini_box.html')
1184     def mini_box_nolink(self):
1185         return {
1186             'book': self,
1187             'no_link': True,
1188         }
1189
1190
1191 class BookPopularity(models.Model):
1192     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1193     count = models.IntegerField(default=0, db_index=True)