src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64     can_sell = models.BooleanField('do sprzedaży', default=True)
  65
  66     # files generated during publication
  67     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  68     html_file = fields.HtmlField(storage=bofh_storage)
  69     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  70     fb2_file = fields.Fb2Field(storage=bofh_storage)
  71     txt_file = fields.TxtField(storage=bofh_storage)
  72     epub_file = fields.EpubField(storage=bofh_storage)
  73     mobi_file = fields.MobiField(storage=bofh_storage)
  74     pdf_file = fields.PdfField(storage=bofh_storage)
  75
  76     cover = fields.CoverField('okładka', storage=bofh_storage)
  77     # Cleaner version of cover for thumbs
  78     cover_clean = fields.CoverCleanField('czysta okładka')
  79     cover_thumb = fields.CoverThumbField('miniatura okładki')
  80     cover_api_thumb = fields.CoverApiThumbField(
  81         'mniaturka okładki dla aplikacji')
  82     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  83     cover_ebookpoint = fields.CoverEbookpointField(
  84         'okładka dla Ebookpoint')
  85
  86     ebook_formats = constants.EBOOK_FORMATS
  87     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  88
  89     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  90     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  91
  92     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  93     has_audience = models.BooleanField(default=False)
  94
  95     objects = models.Manager()
  96     tagged = managers.ModelTaggedItemManager(Tag)
  97     tags = managers.TagDescriptor(Tag)
  98     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
  99     translators = models.ManyToManyField(Tag, blank=True)
 100     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 101     has_audio = models.BooleanField(default=False)
 102     read_time = models.IntegerField(blank=True, null=True)
 103     pages = models.IntegerField(blank=True, null=True)
 104
 105     html_built = django.dispatch.Signal()
 106     published = django.dispatch.Signal()
 107
 108     SORT_KEY_SEP = '$'
 109
 110     is_book = True
 111
 112     class AlreadyExists(Exception):
 113         pass
 114
 115     class Meta:
 116         ordering = ('sort_key_author', 'sort_key')
 117         verbose_name = 'książka'
 118         verbose_name_plural = 'książki'
 119         app_label = 'catalogue'
 120
 121     def __str__(self):
 122         return self.title
 123
 124     def get_extra_info_json(self):
 125         return json.loads(self.extra_info or '{}')
 126
 127     def get_initial(self):
 128         try:
 129             return re.search(r'\w', self.title, re.U).group(0)
 130         except AttributeError:
 131             return ''
 132
 133     def authors(self):
 134         return self.tags.filter(category='author')
 135
 136     def epochs(self):
 137         return self.tags.filter(category='epoch')
 138
 139     def genres(self):
 140         return self.tags.filter(category='genre')
 141
 142     def kinds(self):
 143         return self.tags.filter(category='kind')
 144
 145     def tag_unicode(self, category):
 146         relations = prefetched_relations(self, category)
 147         if relations:
 148             return ', '.join(rel.tag.name for rel in relations)
 149         else:
 150             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 151
 152     def tags_by_category(self):
 153         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 154
 155     def author_unicode(self):
 156         return self.cached_author
 157
 158     def kind_unicode(self):
 159         return self.tag_unicode('kind')
 160
 161     def epoch_unicode(self):
 162         return self.tag_unicode('epoch')
 163
 164     def genre_unicode(self):
 165         return self.tag_unicode('genre')
 166
 167     def translator(self):
 168         translators = self.get_extra_info_json().get('translators')
 169         if not translators:
 170             return None
 171         if len(translators) > 3:
 172             translators = translators[:2]
 173             others = ' i inni'
 174         else:
 175             others = ''
 176         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 177
 178     def cover_source(self):
 179         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 180
 181     @property
 182     def isbn_pdf(self):
 183         return self.get_extra_info_json().get('isbn_pdf')
 184
 185     @property
 186     def isbn_epub(self):
 187         return self.get_extra_info_json().get('isbn_epub')
 188
 189     @property
 190     def isbn_mobi(self):
 191         return self.get_extra_info_json().get('isbn_mobi')
 192
 193     @property
 194     def redakcja(self):
 195         return self.get_extra_info_json().get('about')
 196
 197     def is_accessible_to(self, user):
 198         if not self.preview:
 199             return True
 200         if not user.is_authenticated:
 201             return False
 202         Membership = apps.get_model('club', 'Membership')
 203         if Membership.is_active_for(user):
 204             return True
 205         Funding = apps.get_model('funding', 'Funding')
 206         if Funding.objects.filter(user=user, offer__book=self):
 207             return True
 208         return False
 209
 210     def save(self, force_insert=False, force_update=False, **kwargs):
 211         from sortify import sortify
 212
 213         self.sort_key = sortify(self.title)[:120]
 214         self.title = str(self.title)  # ???
 215
 216         try:
 217             author = self.authors().first().sort_key
 218         except AttributeError:
 219             author = ''
 220         self.sort_key_author = author
 221
 222         self.cached_author = self.tag_unicode('author')
 223         self.has_audience = 'audience' in self.get_extra_info_json()
 224
 225         if self.preview and not self.preview_key:
 226             self.preview_key = get_random_hash(self.slug)[:32]
 227
 228         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 229
 230         return ret
 231
 232     def get_absolute_url(self):
 233         return reverse('book_detail', args=[self.slug])
 234
 235     def gallery_path(self):
 236         return gallery_path(self.slug)
 237
 238     def gallery_url(self):
 239         return gallery_url(self.slug)
 240
 241     def get_first_text(self):
 242         if self.html_file:
 243             return self
 244         child = self.children.all().order_by('parent_number').first()
 245         if child is not None:
 246             return child.get_first_text()
 247
 248     def get_last_text(self):
 249         if self.html_file:
 250             return self
 251         child = self.children.all().order_by('parent_number').last()
 252         if child is not None:
 253             return child.get_last_text()
 254
 255     def get_prev_text(self):
 256         if not self.parent:
 257             return None
 258         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 259         if sibling is not None:
 260             return sibling.get_last_text()
 261
 262         if self.parent.html_file:
 263             return self.parent
 264
 265         return self.parent.get_prev_text()
 266
 267     def get_next_text(self, inside=True):
 268         if inside:
 269             child = self.children.order_by('parent_number').first()
 270             if child is not None:
 271                 return child.get_first_text()
 272
 273         if not self.parent:
 274             return None
 275         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 276         if sibling is not None:
 277             return sibling.get_first_text()
 278         return self.parent.get_next_text(inside=False)
 279
 280     def get_siblings(self):
 281         if not self.parent:
 282             return []
 283         return self.parent.children.all().order_by('parent_number')
 284
 285     def get_children(self):
 286         return self.children.all().order_by('parent_number')
 287
 288     @property
 289     def name(self):
 290         return self.title
 291
 292     def language_code(self):
 293         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 294
 295     def language_name(self):
 296         return dict(settings.LANGUAGES).get(self.language_code(), "")
 297
 298     def is_foreign(self):
 299         return self.language_code() != settings.LANGUAGE_CODE
 300
 301     def set_audio_length(self):
 302         length = self.get_audio_length()
 303         if length > 0:
 304             self.audio_length = self.format_audio_length(length)
 305             self.save()
 306
 307     @staticmethod
 308     def format_audio_length(seconds):
 309         """
 310         >>> Book.format_audio_length(1)
 311         '0:01'
 312         >>> Book.format_audio_length(3661)
 313         '1:01:01'
 314         """
 315         if seconds < 60*60:
 316             minutes = seconds // 60
 317             seconds = seconds % 60
 318             return '%d:%02d' % (minutes, seconds)
 319         else:
 320             hours = seconds // 3600
 321             minutes = seconds % 3600 // 60
 322             seconds = seconds % 60
 323             return '%d:%02d:%02d' % (hours, minutes, seconds)
 324
 325     def get_audio_length(self):
 326         total = 0
 327         for media in self.get_mp3() or ():
 328             total += app_settings.GET_MP3_LENGTH(media.file.path)
 329         return int(total)
 330
 331     def get_time(self):
 332         return round(self.xml_file.size / 1000 * 40)
 333
 334     def has_media(self, type_):
 335         if type_ in Book.formats:
 336             return bool(getattr(self, "%s_file" % type_))
 337         else:
 338             return self.media.filter(type=type_).exists()
 339
 340     def get_media(self, type_):
 341         if self.has_media(type_):
 342             if type_ in Book.formats:
 343                 return getattr(self, "%s_file" % type_)
 344             else:
 345                 return self.media.filter(type=type_)
 346         else:
 347             return None
 348
 349     def get_mp3(self):
 350         return self.get_media("mp3")
 351
 352     def get_odt(self):
 353         return self.get_media("odt")
 354
 355     def get_ogg(self):
 356         return self.get_media("ogg")
 357
 358     def get_daisy(self):
 359         return self.get_media("daisy")
 360
 361     def get_audio_epub(self):
 362         return self.get_media("audio.epub")
 363
 364     def media_url(self, format_):
 365         media = self.get_media(format_)
 366         if media:
 367             if self.preview:
 368                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 369             else:
 370                 return media.url
 371         else:
 372             return None
 373
 374     def html_url(self):
 375         return self.media_url('html')
 376
 377     def html_nonotes_url(self):
 378         return self.media_url('html_nonotes')
 379
 380     def pdf_url(self):
 381         return self.media_url('pdf')
 382
 383     def epub_url(self):
 384         return self.media_url('epub')
 385
 386     def mobi_url(self):
 387         return self.media_url('mobi')
 388
 389     def txt_url(self):
 390         return self.media_url('txt')
 391
 392     def fb2_url(self):
 393         return self.media_url('fb2')
 394
 395     def xml_url(self):
 396         return self.media_url('xml')
 397
 398     def has_description(self):
 399         return len(self.description) > 0
 400     has_description.short_description = 'opis'
 401     has_description.boolean = True
 402
 403     def has_mp3_file(self):
 404         return self.has_media("mp3")
 405     has_mp3_file.short_description = 'MP3'
 406     has_mp3_file.boolean = True
 407
 408     def has_ogg_file(self):
 409         return self.has_media("ogg")
 410     has_ogg_file.short_description = 'OGG'
 411     has_ogg_file.boolean = True
 412
 413     def has_daisy_file(self):
 414         return self.has_media("daisy")
 415     has_daisy_file.short_description = 'DAISY'
 416     has_daisy_file.boolean = True
 417
 418     def has_sync_file(self):
 419         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 420
 421     def build_sync_file(self):
 422         from lxml import html
 423         from django.core.files.base import ContentFile
 424         with self.html_file.open('rb') as f:
 425             h = html.fragment_fromstring(f.read().decode('utf-8'))
 426
 427         durations = [
 428             m['mp3'].duration
 429             for m in self.get_audiobooks()[0]
 430         ]
 431         if settings.MOCK_DURATIONS:
 432             durations = settings.MOCK_DURATIONS
 433
 434         sync = []
 435         ts = None
 436         sid = 1
 437         dirty = False
 438         for elem in h.iter():
 439             if elem.get('data-audio-ts'):
 440                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 441                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 442                 # check if inside verse
 443                 p = elem.getparent()
 444                 while p is not None:
 445                     # Workaround for missing ids.
 446                     if 'verse' in p.get('class', ''):
 447                         if not p.get('id'):
 448                             p.set('id', f'syn{sid}')
 449                             dirty = True
 450                             sid += 1
 451                         sync.append((ts, p.get('id')))
 452                         ts = None
 453                         break
 454                     p = p.getparent()
 455             elif ts:
 456                 cls = elem.get('class', '')
 457                 # Workaround for missing ids.
 458                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 459                     if not elem.get('id'):
 460                         elem.set('id', f'syn{sid}')
 461                         dirty = True
 462                         sid += 1
 463                     sync.append((ts, elem.get('id')))
 464                     ts = None
 465         if dirty:
 466             htext = html.tostring(h, encoding='utf-8')
 467             with open(self.html_file.path, 'wb') as f:
 468                 f.write(htext)
 469         try:
 470             bm = self.media.get(type='sync')
 471         except:
 472             bm = BookMedia(book=self, type='sync')
 473         sync = (
 474             '27\n' + '\n'.join(
 475                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 476             )).encode('latin1')
 477         bm.file.save(
 478             None, ContentFile(sync)
 479             )
 480
 481     def get_sync(self):
 482         if not self.has_sync_file():
 483             return []
 484         with self.get_media('sync').first().file.open('r') as f:
 485             sync = f.read().split('\n')
 486         offset = float(sync[0])
 487         items = []
 488         for line in sync[1:]:
 489             if not line:
 490                 continue
 491             start, end, elid = line.split()
 492             items.append([elid, float(start) + offset])
 493         return items
 494
 495     def sync_ts(self, ts):
 496         elid = None
 497         for cur_id, t in self.get_sync():
 498             if ts >= t:
 499                 elid = cur_id
 500             else:
 501                 break
 502         return elid
 503
 504     def sync_elid(self, elid):
 505         for cur_id, t in self.get_sync():
 506             if cur_id == elid:
 507                 return t
 508
 509     def has_audio_epub_file(self):
 510         return self.has_media("audio.epub")
 511
 512     @property
 513     def media_daisy(self):
 514         return self.get_media('daisy')
 515
 516     @property
 517     def media_audio_epub(self):
 518         return self.get_media('audio.epub')
 519
 520     def get_audiobooks(self, with_children=False, processing=False):
 521         ogg_files = {}
 522         for m in self.media.filter(type='ogg').order_by().iterator():
 523             ogg_files[m.name] = m
 524
 525         audiobooks = []
 526         projects = set()
 527         total_duration = 0
 528         for mp3 in self.media.filter(type='mp3').iterator():
 529             # ogg files are always from the same project
 530             meta = mp3.get_extra_info_json()
 531             project = meta.get('project')
 532             if not project:
 533                 # temporary fallback
 534                 project = 'CzytamySłuchając'
 535
 536             projects.add((project, meta.get('funded_by', '')))
 537             total_duration += mp3.duration or 0
 538
 539             media = {'mp3': mp3}
 540
 541             ogg = ogg_files.get(mp3.name)
 542             if ogg:
 543                 media['ogg'] = ogg
 544             audiobooks.append(media)
 545
 546         if with_children:
 547             for child in self.get_children():
 548                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 549                     with_children=True, processing=True)
 550                 audiobooks.append({'part': child})
 551                 audiobooks += ch_audiobooks
 552                 projects.update(ch_projects)
 553                 total_duration += ch_duration
 554
 555         if not processing:
 556             projects = sorted(projects)
 557             total_duration = '%d:%02d' % (
 558                 total_duration // 60,
 559                 total_duration % 60
 560             )
 561
 562         return audiobooks, projects, total_duration
 563
 564     def get_audiobooks_with_children(self):
 565         return self.get_audiobooks(with_children=True)
 566
 567     def wldocument(self, parse_dublincore=True, inherit=True):
 568         from catalogue.import_utils import ORMDocProvider
 569         from librarian.parser import WLDocument
 570
 571         if inherit and self.parent:
 572             meta_fallbacks = self.parent.cover_info()
 573         else:
 574             meta_fallbacks = None
 575
 576         return WLDocument.from_file(
 577             self.xml_file.path,
 578             provider=ORMDocProvider(self),
 579             parse_dublincore=parse_dublincore,
 580             meta_fallbacks=meta_fallbacks)
 581
 582     def wldocument2(self):
 583         from catalogue.import_utils import ORMDocProvider
 584         from librarian.document import WLDocument
 585         doc = WLDocument(
 586             self.xml_file.path,
 587             provider=ORMDocProvider(self)
 588         )
 589         doc.meta.update(self.cover_info())
 590         return doc
 591
 592
 593     @staticmethod
 594     def zip_format(format_):
 595         def pretty_file_name(book):
 596             return "%s/%s.%s" % (
 597                 book.get_extra_info_json()['author'],
 598                 book.slug,
 599                 format_)
 600
 601         field_name = "%s_file" % format_
 602         field = getattr(Book, field_name)
 603         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 604         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 605         return create_zip(paths, field.ZIP)
 606
 607     def zip_audiobooks(self, format_):
 608         bm = BookMedia.objects.filter(book=self, type=format_)
 609         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 610         licenses = set()
 611         for m in bm:
 612             license = constants.LICENSES.get(
 613                 m.get_extra_info_json().get('license'), {}
 614             ).get('locative')
 615             if license:
 616                 licenses.add(license)
 617         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 618             'licenses': licenses,
 619             'meta': self.wldocument2().meta,
 620         })
 621         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 622
 623     def search_index(self, index=None):
 624         if not self.findable:
 625             return
 626         from search.index import Index
 627         Index.index_book(self)
 628
 629     # will make problems in conjunction with paid previews
 630     def download_pictures(self, remote_gallery_url):
 631         # This is only needed for legacy relative image paths.
 632         gallery_path = self.gallery_path()
 633         # delete previous files, so we don't include old files in ebooks
 634         if os.path.isdir(gallery_path):
 635             for filename in os.listdir(gallery_path):
 636                 file_path = os.path.join(gallery_path, filename)
 637                 os.unlink(file_path)
 638         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 639         if ilustr_elements:
 640             makedirs(gallery_path)
 641             for ilustr in ilustr_elements:
 642                 ilustr_src = ilustr.get('src')
 643                 if '/' in ilustr_src:
 644                     continue
 645                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 646                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 647
 648     def load_abstract(self):
 649         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 650         if abstract is not None:
 651             self.abstract = transform_abstrakt(abstract)
 652         else:
 653             self.abstract = ''
 654
 655     def load_toc(self):
 656         self.toc = ''
 657         if self.html_file:
 658             parser = html.HTMLParser(encoding='utf-8')
 659             tree = html.parse(self.html_file.path, parser=parser)
 660             toc = tree.find('//div[@id="toc"]/ol')
 661             if toc is None or not len(toc):
 662                 return
 663             html_link = reverse('book_text', args=[self.slug])
 664             for a in toc.findall('.//a'):
 665                 a.attrib['href'] = html_link + a.attrib['href']
 666             self.toc = html.tostring(toc, encoding='unicode')
 667             # div#toc
 668
 669     @classmethod
 670     def from_xml_file(cls, xml_file, **kwargs):
 671         from django.core.files import File
 672         from librarian import dcparser
 673
 674         # use librarian to parse meta-data
 675         book_info = dcparser.parse(xml_file)
 676
 677         if not isinstance(xml_file, File):
 678             xml_file = File(open(xml_file))
 679
 680         try:
 681             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 682         finally:
 683             xml_file.close()
 684
 685     @classmethod
 686     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 687                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None):
 688         from catalogue import tasks
 689
 690         if dont_build is None:
 691             dont_build = set()
 692         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 693
 694         # check for parts before we do anything
 695         children = []
 696         if hasattr(book_info, 'parts'):
 697             for part_url in book_info.parts:
 698                 try:
 699                     children.append(Book.objects.get(slug=part_url.slug))
 700                 except Book.DoesNotExist:
 701                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 702
 703         # Read book metadata
 704         book_slug = book_info.url.slug
 705         if re.search(r'[^a-z0-9-]', book_slug):
 706             raise ValueError('Invalid characters in slug')
 707         book, created = Book.objects.get_or_create(slug=book_slug)
 708
 709         if created:
 710             book_shelves = []
 711             old_cover = None
 712             book.preview = bool(days)
 713             if book.preview:
 714                 book.preview_until = date.today() + timedelta(days)
 715         else:
 716             if not overwrite:
 717                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 718             # Save shelves for this book
 719             book_shelves = list(book.tags.filter(category='set'))
 720             old_cover = book.cover_info()
 721
 722         # Save XML file
 723         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 724         if book.preview:
 725             book.xml_file.set_readable(False)
 726
 727         book.findable = findable
 728         book.language = book_info.language
 729         book.title = book_info.title
 730         book.license = book_info.license or ''
 731         if book_info.variant_of:
 732             book.common_slug = book_info.variant_of.slug
 733         else:
 734             book.common_slug = book.slug
 735         extra = book_info.to_dict()
 736         if logo:
 737             extra['logo'] = logo
 738         if logo_mono:
 739             extra['logo_mono'] = logo_mono
 740         if logo_alt:
 741             extra['logo_alt'] = logo_alt
 742         if can_sell is not None:
 743             book.can_sell = can_sell
 744         book.extra_info = json.dumps(extra)
 745         book.load_abstract()
 746         book.load_toc()
 747         book.save()
 748
 749         book.update_stats()
 750
 751         meta_tags = Tag.tags_from_info(book_info)
 752
 753         just_tags = [t for (t, rel) in meta_tags if not rel]
 754         book.tags = set(just_tags + book_shelves)
 755         book.save()  # update sort_key_author
 756
 757         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 758
 759         cover_changed = old_cover != book.cover_info()
 760         obsolete_children = set(b for b in book.children.all()
 761                                 if b not in children)
 762         notify_cover_changed = []
 763         for n, child_book in enumerate(children):
 764             new_child = child_book.parent != book
 765             child_book.parent = book
 766             child_book.parent_number = n
 767             child_book.save()
 768             if new_child or cover_changed:
 769                 notify_cover_changed.append(child_book)
 770         # Disown unfaithful children and let them cope on their own.
 771         for child in obsolete_children:
 772             child.parent = None
 773             child.parent_number = 0
 774             child.save()
 775             if old_cover:
 776                 notify_cover_changed.append(child)
 777
 778         cls.repopulate_ancestors()
 779         tasks.update_counters.delay()
 780
 781         if remote_gallery_url:
 782             book.download_pictures(remote_gallery_url)
 783
 784         # No saves beyond this point.
 785
 786         # Build cover.
 787         if 'cover' not in dont_build:
 788             book.cover.build_delay()
 789             book.cover_clean.build_delay()
 790             book.cover_thumb.build_delay()
 791             book.cover_api_thumb.build_delay()
 792             book.simple_cover.build_delay()
 793             book.cover_ebookpoint.build_delay()
 794
 795         # Build HTML and ebooks.
 796         book.html_file.build_delay()
 797         if not children:
 798             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 799                 if format_ not in dont_build:
 800                     getattr(book, '%s_file' % format_).build_delay()
 801         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 802             if format_ not in dont_build:
 803                 getattr(book, '%s_file' % format_).build_delay()
 804         book.html_nonotes_file.build_delay()
 805
 806         if not settings.NO_SEARCH_INDEX and search_index and findable:
 807             tasks.index_book.delay(book.id)
 808
 809         for child in notify_cover_changed:
 810             child.parent_cover_changed()
 811
 812         book.update_popularity()
 813         tasks.update_references.delay(book.id)
 814
 815         cls.published.send(sender=cls, instance=book)
 816         return book
 817
 818     def update_stats(self):
 819         stats = self.wldocument2().get_statistics()['total']
 820         self.pages = round(
 821             stats.get('verses_with_fn', 0) / 30 +
 822             stats.get('chars_out_verse_with_fn', 0) / 1800)
 823         self.read_time = round(self.get_time())
 824         self.save(update_fields=['pages', 'read_time'])
 825         if self.parent is not None:
 826             self.parent.update_stats()
 827
 828     def update_references(self):
 829         Entity = apps.get_model('references', 'Entity')
 830         doc = self.wldocument2()
 831         doc._compat_assign_section_ids()
 832         doc._compat_assign_ordered_ids()
 833         refs = {}
 834         for ref_elem in doc.references():
 835             uri = ref_elem.attrib.get('href', '')
 836             if not uri:
 837                 continue
 838             if uri in refs:
 839                 ref = refs[uri]
 840             else:
 841                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 842                 if entity_created:
 843                     try:
 844                         entity.populate()
 845                     except:
 846                         pass
 847                     else:
 848                         entity.save()
 849                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 850                 refs[uri] = ref
 851                 if not ref_created:
 852                     ref.occurence_set.all().delete()
 853             sec = ref_elem.get_link()
 854             m = re.match(r'sec(\d+)', sec)
 855             assert m is not None
 856             sec = int(m.group(1))
 857             snippet = ref_elem.get_snippet()
 858             b = builders['html-snippet']()
 859             for s in snippet:
 860                 s.html_build(b)
 861             html = b.output().get_bytes().decode('utf-8')
 862
 863             ref.occurence_set.create(
 864                 section=sec,
 865                 html=html
 866             )
 867         self.reference_set.exclude(entity__uri__in=refs).delete()
 868
 869     @property
 870     def references(self):
 871         return self.reference_set.all().select_related('entity')
 872
 873     def update_has_audio(self):
 874         self.has_audio = False
 875         if self.media.filter(type='mp3').exists():
 876             self.has_audio = True
 877         if self.descendant.filter(has_audio=True).exists():
 878             self.has_audio = True
 879         self.save(update_fields=['has_audio'])
 880         if self.parent is not None:
 881             self.parent.update_has_audio()
 882
 883     def update_narrators(self):
 884         narrator_names = set()
 885         for bm in self.media.filter(type='mp3'):
 886             narrator_names.update(set(
 887                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 888             ))
 889         narrators = []
 890
 891         for name in narrator_names:
 892             if not name: continue
 893             slug = slugify(name)
 894             try:
 895                 t = Tag.objects.get(category='author', slug=slug)
 896             except Tag.DoesNotExist:
 897                 sort_key = sortify(
 898                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 899                 )
 900                 t = Tag.objects.create(
 901                     category='author',
 902                     name_pl=name,
 903                     slug=slug,
 904                     sort_key=sort_key,
 905                 )
 906             narrators.append(t)
 907         self.narrators.set(narrators)
 908
 909     @classmethod
 910     @transaction.atomic
 911     def repopulate_ancestors(cls):
 912         """Fixes the ancestry cache."""
 913         # TODO: table names
 914         cursor = connection.cursor()
 915         if connection.vendor == 'postgres':
 916             cursor.execute("TRUNCATE catalogue_book_ancestor")
 917             cursor.execute("""
 918                 WITH RECURSIVE ancestry AS (
 919                     SELECT book.id, book.parent_id
 920                     FROM catalogue_book AS book
 921                     WHERE book.parent_id IS NOT NULL
 922                     UNION
 923                     SELECT ancestor.id, book.parent_id
 924                     FROM ancestry AS ancestor, catalogue_book AS book
 925                     WHERE ancestor.parent_id = book.id
 926                         AND book.parent_id IS NOT NULL
 927                     )
 928                 INSERT INTO catalogue_book_ancestor
 929                     (from_book_id, to_book_id)
 930                     SELECT id, parent_id
 931                     FROM ancestry
 932                     ORDER BY id;
 933                 """)
 934         else:
 935             cursor.execute("DELETE FROM catalogue_book_ancestor")
 936             for b in cls.objects.exclude(parent=None):
 937                 parent = b.parent
 938                 while parent is not None:
 939                     b.ancestor.add(parent)
 940                     parent = parent.parent
 941
 942     @property
 943     def ancestors(self):
 944         if self.parent:
 945             for anc in self.parent.ancestors:
 946                 yield anc
 947             yield self.parent
 948         else:
 949             return []
 950
 951     def clear_cache(self):
 952         clear_cached_renders(self.mini_box)
 953         clear_cached_renders(self.mini_box_nolink)
 954
 955     def cover_info(self, inherit=True):
 956         """Returns a dictionary to serve as fallback for BookInfo.
 957
 958         For now, the only thing inherited is the cover image.
 959         """
 960         need = False
 961         info = {}
 962         for field in ('cover_url', 'cover_by', 'cover_source'):
 963             val = self.get_extra_info_json().get(field)
 964             if val:
 965                 info[field] = val
 966             else:
 967                 need = True
 968         if inherit and need and self.parent is not None:
 969             parent_info = self.parent.cover_info()
 970             parent_info.update(info)
 971             info = parent_info
 972         return info
 973
 974     def related_themes(self):
 975         return Tag.objects.usage_for_queryset(
 976             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 977             counts=True).filter(category='theme').order_by('-count')
 978
 979     def parent_cover_changed(self):
 980         """Called when parent book's cover image is changed."""
 981         if not self.cover_info(inherit=False):
 982             if 'cover' not in app_settings.DONT_BUILD:
 983                 self.cover.build_delay()
 984                 self.cover_clean.build_delay()
 985                 self.cover_thumb.build_delay()
 986                 self.cover_api_thumb.build_delay()
 987                 self.simple_cover.build_delay()
 988                 self.cover_ebookpoint.build_delay()
 989             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 990                 if format_ not in app_settings.DONT_BUILD:
 991                     getattr(self, '%s_file' % format_).build_delay()
 992             for child in self.children.all():
 993                 child.parent_cover_changed()
 994
 995     def other_versions(self):
 996         """Find other versions (i.e. in other languages) of the book."""
 997         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 998
 999     def parents(self):
1000         books = []
1001         parent = self.parent
1002         while parent is not None:
1003             books.insert(0, parent)
1004             parent = parent.parent
1005         return books
1006
1007     def pretty_title(self, html_links=False):
1008         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1009         books = self.parents() + [self]
1010         names.extend([(b.title, b.get_absolute_url()) for b in books])
1011
1012         if html_links:
1013             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1014         else:
1015             names = [tag[0] for tag in names]
1016         return ', '.join(names)
1017
1018     def publisher(self):
1019         publisher = self.get_extra_info_json()['publisher']
1020         if isinstance(publisher, str):
1021             return publisher
1022         elif isinstance(publisher, list):
1023             return ', '.join(publisher)
1024
1025     def get_recommended(self, limit=4):
1026         books_qs = type(self).objects.filter(findable=True)
1027         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1028         books = type(self).tagged.related_to(self, books_qs)[:limit]
1029         return books
1030
1031     @classmethod
1032     def tagged_top_level(cls, tags):
1033         """ Returns top-level books tagged with `tags`.
1034
1035         It only returns those books which don't have ancestors which are
1036         also tagged with those tags.
1037
1038         """
1039         objects = cls.tagged.with_all(tags)
1040         return objects.filter(findable=True).exclude(ancestor__in=objects)
1041
1042     @classmethod
1043     def book_list(cls, book_filter=None):
1044         """Generates a hierarchical listing of all books.
1045
1046         Books are optionally filtered with a test function.
1047
1048         """
1049
1050         books_by_parent = {}
1051         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1052         if book_filter:
1053             books = books.filter(book_filter).distinct()
1054
1055             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1056             for book in books.iterator():
1057                 parent = book.parent_id
1058                 if parent not in book_ids:
1059                     parent = None
1060                 books_by_parent.setdefault(parent, []).append(book)
1061         else:
1062             for book in books.iterator():
1063                 books_by_parent.setdefault(book.parent_id, []).append(book)
1064
1065         orphans = []
1066         books_by_author = OrderedDict()
1067         for tag in Tag.objects.filter(category='author').iterator():
1068             books_by_author[tag] = []
1069
1070         for book in books_by_parent.get(None, ()):
1071             authors = list(book.authors().only('pk'))
1072             if authors:
1073                 for author in authors:
1074                     books_by_author[author].append(book)
1075             else:
1076                 orphans.append(book)
1077
1078         return books_by_author, orphans, books_by_parent
1079
1080     _audiences_pl = {
1081         "SP": (1, "szkoła podstawowa"),
1082         "SP1": (1, "szkoła podstawowa"),
1083         "SP2": (1, "szkoła podstawowa"),
1084         "SP3": (1, "szkoła podstawowa"),
1085         "P": (1, "szkoła podstawowa"),
1086         "G": (2, "gimnazjum"),
1087         "L": (3, "liceum"),
1088         "LP": (3, "liceum"),
1089     }
1090
1091     def audiences_pl(self):
1092         audiences = self.get_extra_info_json().get('audiences', [])
1093         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1094         return [a[1] for a in audiences]
1095
1096     def stage_note(self):
1097         stage = self.get_extra_info_json().get('stage')
1098         if stage and stage < '0.4':
1099             return (_('Ten utwór wymaga uwspółcześnienia'),
1100                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1101         else:
1102             return None, None
1103
1104     def choose_fragments(self, number):
1105         fragments = self.fragments.order_by()
1106         fragments_count = fragments.count()
1107         if not fragments_count and self.children.exists():
1108             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1109             fragments_count = fragments.count()
1110         if fragments_count:
1111             if fragments_count > number:
1112                 offset = randint(0, fragments_count - number)
1113             else:
1114                 offset = 0
1115             return fragments[offset : offset + number]
1116         elif self.parent:
1117             return self.parent.choose_fragments(number)
1118         else:
1119             return []
1120
1121     def choose_fragment(self):
1122         fragments = self.choose_fragments(1)
1123         if fragments:
1124             return fragments[0]
1125         else:
1126             return None
1127
1128     def fragment_data(self):
1129         fragment = self.choose_fragment()
1130         if fragment:
1131             return {
1132                 'title': fragment.book.pretty_title(),
1133                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1134             }
1135         else:
1136             return None
1137
1138     def update_popularity(self):
1139         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1140         try:
1141             pop = self.popularity
1142             pop.count = count
1143             pop.save()
1144         except BookPopularity.DoesNotExist:
1145             BookPopularity.objects.create(book=self, count=count)
1146
1147     def ridero_link(self):
1148         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1149
1150     def elevenreader_link(self):
1151         first_text = self.get_first_text()
1152         if first_text is None:
1153             return None
1154         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1155
1156     def content_warnings(self):
1157         warnings_def = {
1158             'wulgaryzmy': _('wulgaryzmy'),
1159         }
1160         warnings = self.get_extra_info_json().get('content_warnings', [])
1161         warnings = [
1162             warnings_def.get(w, w)
1163             for w in warnings
1164         ]
1165         warnings.sort()
1166         return warnings
1167
1168     def full_sort_key(self):
1169         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1170
1171     def cover_color(self):
1172         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1173
1174     @cached_render('catalogue/book_mini_box.html')
1175     def mini_box(self):
1176         return {
1177             'book': self
1178         }
1179
1180     @cached_render('catalogue/book_mini_box.html')
1181     def mini_box_nolink(self):
1182         return {
1183             'book': self,
1184             'no_link': True,
1185         }
1186
1187
1188 class BookPopularity(models.Model):
1189     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1190     count = models.IntegerField(default=0, db_index=True)