src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64
  65     # files generated during publication
  66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  67     html_file = fields.HtmlField(storage=bofh_storage)
  68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  69     fb2_file = fields.Fb2Field(storage=bofh_storage)
  70     txt_file = fields.TxtField(storage=bofh_storage)
  71     epub_file = fields.EpubField(storage=bofh_storage)
  72     mobi_file = fields.MobiField(storage=bofh_storage)
  73     pdf_file = fields.PdfField(storage=bofh_storage)
  74
  75     cover = fields.CoverField('okładka', storage=bofh_storage)
  76     # Cleaner version of cover for thumbs
  77     cover_clean = fields.CoverCleanField('czysta okładka')
  78     cover_thumb = fields.CoverThumbField('miniatura okładki')
  79     cover_api_thumb = fields.CoverApiThumbField(
  80         'mniaturka okładki dla aplikacji')
  81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  82     cover_ebookpoint = fields.CoverEbookpointField(
  83         'okładka dla Ebookpoint')
  84
  85     ebook_formats = constants.EBOOK_FORMATS
  86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  87
  88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  90
  91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  92     has_audience = models.BooleanField(default=False)
  93
  94     objects = models.Manager()
  95     tagged = managers.ModelTaggedItemManager(Tag)
  96     tags = managers.TagDescriptor(Tag)
  97     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
  98     translators = models.ManyToManyField(Tag, blank=True)
  99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 100     has_audio = models.BooleanField(default=False)
 101     read_time = models.IntegerField(blank=True, null=True)
 102     pages = models.IntegerField(blank=True, null=True)
 103
 104     html_built = django.dispatch.Signal()
 105     published = django.dispatch.Signal()
 106
 107     SORT_KEY_SEP = '$'
 108
 109     is_book = True
 110
 111     class AlreadyExists(Exception):
 112         pass
 113
 114     class Meta:
 115         ordering = ('sort_key_author', 'sort_key')
 116         verbose_name = 'książka'
 117         verbose_name_plural = 'książki'
 118         app_label = 'catalogue'
 119
 120     def __str__(self):
 121         return self.title
 122
 123     def get_extra_info_json(self):
 124         return json.loads(self.extra_info or '{}')
 125
 126     def get_initial(self):
 127         try:
 128             return re.search(r'\w', self.title, re.U).group(0)
 129         except AttributeError:
 130             return ''
 131
 132     def authors(self):
 133         return self.tags.filter(category='author')
 134
 135     def epochs(self):
 136         return self.tags.filter(category='epoch')
 137
 138     def genres(self):
 139         return self.tags.filter(category='genre')
 140
 141     def kinds(self):
 142         return self.tags.filter(category='kind')
 143
 144     def tag_unicode(self, category):
 145         relations = prefetched_relations(self, category)
 146         if relations:
 147             return ', '.join(rel.tag.name for rel in relations)
 148         else:
 149             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 150
 151     def tags_by_category(self):
 152         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 153
 154     def author_unicode(self):
 155         return self.cached_author
 156
 157     def kind_unicode(self):
 158         return self.tag_unicode('kind')
 159
 160     def epoch_unicode(self):
 161         return self.tag_unicode('epoch')
 162
 163     def genre_unicode(self):
 164         return self.tag_unicode('genre')
 165
 166     def translator(self):
 167         translators = self.get_extra_info_json().get('translators')
 168         if not translators:
 169             return None
 170         if len(translators) > 3:
 171             translators = translators[:2]
 172             others = ' i inni'
 173         else:
 174             others = ''
 175         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 176
 177     def cover_source(self):
 178         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 179
 180     @property
 181     def isbn_pdf(self):
 182         return self.get_extra_info_json().get('isbn_pdf')
 183
 184     @property
 185     def isbn_epub(self):
 186         return self.get_extra_info_json().get('isbn_epub')
 187
 188     @property
 189     def isbn_mobi(self):
 190         return self.get_extra_info_json().get('isbn_mobi')
 191
 192     @property
 193     def redakcja(self):
 194         return self.get_extra_info_json().get('about')
 195
 196     def is_accessible_to(self, user):
 197         if not self.preview:
 198             return True
 199         if not user.is_authenticated:
 200             return False
 201         Membership = apps.get_model('club', 'Membership')
 202         if Membership.is_active_for(user):
 203             return True
 204         Funding = apps.get_model('funding', 'Funding')
 205         if Funding.objects.filter(user=user, offer__book=self):
 206             return True
 207         return False
 208
 209     def save(self, force_insert=False, force_update=False, **kwargs):
 210         from sortify import sortify
 211
 212         self.sort_key = sortify(self.title)[:120]
 213         self.title = str(self.title)  # ???
 214
 215         try:
 216             author = self.authors().first().sort_key
 217         except AttributeError:
 218             author = ''
 219         self.sort_key_author = author
 220
 221         self.cached_author = self.tag_unicode('author')
 222         self.has_audience = 'audience' in self.get_extra_info_json()
 223
 224         if self.preview and not self.preview_key:
 225             self.preview_key = get_random_hash(self.slug)[:32]
 226
 227         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 228
 229         return ret
 230
 231     def get_absolute_url(self):
 232         return reverse('book_detail', args=[self.slug])
 233
 234     def gallery_path(self):
 235         return gallery_path(self.slug)
 236
 237     def gallery_url(self):
 238         return gallery_url(self.slug)
 239
 240     def get_first_text(self):
 241         if self.html_file:
 242             return self
 243         child = self.children.all().order_by('parent_number').first()
 244         if child is not None:
 245             return child.get_first_text()
 246
 247     def get_last_text(self):
 248         if self.html_file:
 249             return self
 250         child = self.children.all().order_by('parent_number').last()
 251         if child is not None:
 252             return child.get_last_text()
 253
 254     def get_prev_text(self):
 255         if not self.parent:
 256             return None
 257         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 258         if sibling is not None:
 259             return sibling.get_last_text()
 260
 261         if self.parent.html_file:
 262             return self.parent
 263
 264         return self.parent.get_prev_text()
 265
 266     def get_next_text(self, inside=True):
 267         if inside:
 268             child = self.children.order_by('parent_number').first()
 269             if child is not None:
 270                 return child.get_first_text()
 271
 272         if not self.parent:
 273             return None
 274         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 275         if sibling is not None:
 276             return sibling.get_first_text()
 277         return self.parent.get_next_text(inside=False)
 278
 279     def get_siblings(self):
 280         if not self.parent:
 281             return []
 282         return self.parent.children.all().order_by('parent_number')
 283
 284     def get_children(self):
 285         return self.children.all().order_by('parent_number')
 286
 287     @property
 288     def name(self):
 289         return self.title
 290
 291     def language_code(self):
 292         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 293
 294     def language_name(self):
 295         return dict(settings.LANGUAGES).get(self.language_code(), "")
 296
 297     def is_foreign(self):
 298         return self.language_code() != settings.LANGUAGE_CODE
 299
 300     def set_audio_length(self):
 301         length = self.get_audio_length()
 302         if length > 0:
 303             self.audio_length = self.format_audio_length(length)
 304             self.save()
 305
 306     @staticmethod
 307     def format_audio_length(seconds):
 308         """
 309         >>> Book.format_audio_length(1)
 310         '0:01'
 311         >>> Book.format_audio_length(3661)
 312         '1:01:01'
 313         """
 314         if seconds < 60*60:
 315             minutes = seconds // 60
 316             seconds = seconds % 60
 317             return '%d:%02d' % (minutes, seconds)
 318         else:
 319             hours = seconds // 3600
 320             minutes = seconds % 3600 // 60
 321             seconds = seconds % 60
 322             return '%d:%02d:%02d' % (hours, minutes, seconds)
 323
 324     def get_audio_length(self):
 325         total = 0
 326         for media in self.get_mp3() or ():
 327             total += app_settings.GET_MP3_LENGTH(media.file.path)
 328         return int(total)
 329
 330     def get_time(self):
 331         return round(self.xml_file.size / 1000 * 40)
 332
 333     def has_media(self, type_):
 334         if type_ in Book.formats:
 335             return bool(getattr(self, "%s_file" % type_))
 336         else:
 337             return self.media.filter(type=type_).exists()
 338
 339     def get_media(self, type_):
 340         if self.has_media(type_):
 341             if type_ in Book.formats:
 342                 return getattr(self, "%s_file" % type_)
 343             else:
 344                 return self.media.filter(type=type_)
 345         else:
 346             return None
 347
 348     def get_mp3(self):
 349         return self.get_media("mp3")
 350
 351     def get_odt(self):
 352         return self.get_media("odt")
 353
 354     def get_ogg(self):
 355         return self.get_media("ogg")
 356
 357     def get_daisy(self):
 358         return self.get_media("daisy")
 359
 360     def get_audio_epub(self):
 361         return self.get_media("audio.epub")
 362
 363     def media_url(self, format_):
 364         media = self.get_media(format_)
 365         if media:
 366             if self.preview:
 367                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 368             else:
 369                 return media.url
 370         else:
 371             return None
 372
 373     def html_url(self):
 374         return self.media_url('html')
 375
 376     def html_nonotes_url(self):
 377         return self.media_url('html_nonotes')
 378
 379     def pdf_url(self):
 380         return self.media_url('pdf')
 381
 382     def epub_url(self):
 383         return self.media_url('epub')
 384
 385     def mobi_url(self):
 386         return self.media_url('mobi')
 387
 388     def txt_url(self):
 389         return self.media_url('txt')
 390
 391     def fb2_url(self):
 392         return self.media_url('fb2')
 393
 394     def xml_url(self):
 395         return self.media_url('xml')
 396
 397     def has_description(self):
 398         return len(self.description) > 0
 399     has_description.short_description = 'opis'
 400     has_description.boolean = True
 401
 402     def has_mp3_file(self):
 403         return self.has_media("mp3")
 404     has_mp3_file.short_description = 'MP3'
 405     has_mp3_file.boolean = True
 406
 407     def has_ogg_file(self):
 408         return self.has_media("ogg")
 409     has_ogg_file.short_description = 'OGG'
 410     has_ogg_file.boolean = True
 411
 412     def has_daisy_file(self):
 413         return self.has_media("daisy")
 414     has_daisy_file.short_description = 'DAISY'
 415     has_daisy_file.boolean = True
 416
 417     def has_sync_file(self):
 418         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 419
 420     def build_sync_file(self):
 421         from lxml import html
 422         from django.core.files.base import ContentFile
 423         with self.html_file.open('rb') as f:
 424             h = html.fragment_fromstring(f.read().decode('utf-8'))
 425
 426         durations = [
 427             m['mp3'].duration
 428             for m in self.get_audiobooks()[0]
 429         ]
 430         if settings.MOCK_DURATIONS:
 431             durations = settings.MOCK_DURATIONS
 432
 433         sync = []
 434         ts = None
 435         sid = 1
 436         dirty = False
 437         for elem in h.iter():
 438             if elem.get('data-audio-ts'):
 439                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 440                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 441                 # check if inside verse
 442                 p = elem.getparent()
 443                 while p is not None:
 444                     # Workaround for missing ids.
 445                     if 'verse' in p.get('class', ''):
 446                         if not p.get('id'):
 447                             p.set('id', f'syn{sid}')
 448                             dirty = True
 449                             sid += 1
 450                         sync.append((ts, p.get('id')))
 451                         ts = None
 452                         break
 453                     p = p.getparent()
 454             elif ts:
 455                 cls = elem.get('class', '')
 456                 # Workaround for missing ids.
 457                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 458                     if not elem.get('id'):
 459                         elem.set('id', f'syn{sid}')
 460                         dirty = True
 461                         sid += 1
 462                     sync.append((ts, elem.get('id')))
 463                     ts = None
 464         if dirty:
 465             htext = html.tostring(h, encoding='utf-8')
 466             with open(self.html_file.path, 'wb') as f:
 467                 f.write(htext)
 468         try:
 469             bm = self.media.get(type='sync')
 470         except:
 471             bm = BookMedia(book=self, type='sync')
 472         sync = (
 473             '27\n' + '\n'.join(
 474                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 475             )).encode('latin1')
 476         bm.file.save(
 477             None, ContentFile(sync)
 478             )
 479
 480     def get_sync(self):
 481         if not self.has_sync_file():
 482             return []
 483         with self.get_media('sync').first().file.open('r') as f:
 484             sync = f.read().split('\n')
 485         offset = float(sync[0])
 486         items = []
 487         for line in sync[1:]:
 488             if not line:
 489                 continue
 490             start, end, elid = line.split()
 491             items.append([elid, float(start) + offset])
 492         return items
 493
 494     def sync_ts(self, ts):
 495         elid = None
 496         for cur_id, t in self.get_sync():
 497             if ts >= t:
 498                 elid = cur_id
 499             else:
 500                 break
 501         return elid
 502
 503     def sync_elid(self, elid):
 504         for cur_id, t in self.get_sync():
 505             if cur_id == elid:
 506                 return t
 507
 508     def has_audio_epub_file(self):
 509         return self.has_media("audio.epub")
 510
 511     @property
 512     def media_daisy(self):
 513         return self.get_media('daisy')
 514
 515     @property
 516     def media_audio_epub(self):
 517         return self.get_media('audio.epub')
 518
 519     def get_audiobooks(self, with_children=False, processing=False):
 520         ogg_files = {}
 521         for m in self.media.filter(type='ogg').order_by().iterator():
 522             ogg_files[m.name] = m
 523
 524         audiobooks = []
 525         projects = set()
 526         total_duration = 0
 527         for mp3 in self.media.filter(type='mp3').iterator():
 528             # ogg files are always from the same project
 529             meta = mp3.get_extra_info_json()
 530             project = meta.get('project')
 531             if not project:
 532                 # temporary fallback
 533                 project = 'CzytamySłuchając'
 534
 535             projects.add((project, meta.get('funded_by', '')))
 536             total_duration += mp3.duration or 0
 537
 538             media = {'mp3': mp3}
 539
 540             ogg = ogg_files.get(mp3.name)
 541             if ogg:
 542                 media['ogg'] = ogg
 543             audiobooks.append(media)
 544
 545         if with_children:
 546             for child in self.get_children():
 547                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 548                     with_children=True, processing=True)
 549                 audiobooks.append({'part': child})
 550                 audiobooks += ch_audiobooks
 551                 projects.update(ch_projects)
 552                 total_duration += ch_duration
 553
 554         if not processing:
 555             projects = sorted(projects)
 556             total_duration = '%d:%02d' % (
 557                 total_duration // 60,
 558                 total_duration % 60
 559             )
 560
 561         return audiobooks, projects, total_duration
 562
 563     def get_audiobooks_with_children(self):
 564         return self.get_audiobooks(with_children=True)
 565
 566     def wldocument(self, parse_dublincore=True, inherit=True):
 567         from catalogue.import_utils import ORMDocProvider
 568         from librarian.parser import WLDocument
 569
 570         if inherit and self.parent:
 571             meta_fallbacks = self.parent.cover_info()
 572         else:
 573             meta_fallbacks = None
 574
 575         return WLDocument.from_file(
 576             self.xml_file.path,
 577             provider=ORMDocProvider(self),
 578             parse_dublincore=parse_dublincore,
 579             meta_fallbacks=meta_fallbacks)
 580
 581     def wldocument2(self):
 582         from catalogue.import_utils import ORMDocProvider
 583         from librarian.document import WLDocument
 584         doc = WLDocument(
 585             self.xml_file.path,
 586             provider=ORMDocProvider(self)
 587         )
 588         doc.meta.update(self.cover_info())
 589         return doc
 590
 591
 592     @staticmethod
 593     def zip_format(format_):
 594         def pretty_file_name(book):
 595             return "%s/%s.%s" % (
 596                 book.get_extra_info_json()['author'],
 597                 book.slug,
 598                 format_)
 599
 600         field_name = "%s_file" % format_
 601         field = getattr(Book, field_name)
 602         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 603         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 604         return create_zip(paths, field.ZIP)
 605
 606     def zip_audiobooks(self, format_):
 607         bm = BookMedia.objects.filter(book=self, type=format_)
 608         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 609         licenses = set()
 610         for m in bm:
 611             license = constants.LICENSES.get(
 612                 m.get_extra_info_json().get('license'), {}
 613             ).get('locative')
 614             if license:
 615                 licenses.add(license)
 616         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 617             'licenses': licenses,
 618             'meta': self.wldocument2().meta,
 619         })
 620         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 621
 622     def search_index(self, index=None):
 623         if not self.findable:
 624             return
 625         from search.index import Index
 626         Index.index_book(self)
 627
 628     # will make problems in conjunction with paid previews
 629     def download_pictures(self, remote_gallery_url):
 630         # This is only needed for legacy relative image paths.
 631         gallery_path = self.gallery_path()
 632         # delete previous files, so we don't include old files in ebooks
 633         if os.path.isdir(gallery_path):
 634             for filename in os.listdir(gallery_path):
 635                 file_path = os.path.join(gallery_path, filename)
 636                 os.unlink(file_path)
 637         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 638         if ilustr_elements:
 639             makedirs(gallery_path)
 640             for ilustr in ilustr_elements:
 641                 ilustr_src = ilustr.get('src')
 642                 if '/' in ilustr_src:
 643                     continue
 644                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 645                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 646
 647     def load_abstract(self):
 648         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 649         if abstract is not None:
 650             self.abstract = transform_abstrakt(abstract)
 651         else:
 652             self.abstract = ''
 653
 654     def load_toc(self):
 655         self.toc = ''
 656         if self.html_file:
 657             parser = html.HTMLParser(encoding='utf-8')
 658             tree = html.parse(self.html_file.path, parser=parser)
 659             toc = tree.find('//div[@id="toc"]/ol')
 660             if toc is None or not len(toc):
 661                 return
 662             html_link = reverse('book_text', args=[self.slug])
 663             for a in toc.findall('.//a'):
 664                 a.attrib['href'] = html_link + a.attrib['href']
 665             self.toc = html.tostring(toc, encoding='unicode')
 666             # div#toc
 667
 668     @classmethod
 669     def from_xml_file(cls, xml_file, **kwargs):
 670         from django.core.files import File
 671         from librarian import dcparser
 672
 673         # use librarian to parse meta-data
 674         book_info = dcparser.parse(xml_file)
 675
 676         if not isinstance(xml_file, File):
 677             xml_file = File(open(xml_file))
 678
 679         try:
 680             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 681         finally:
 682             xml_file.close()
 683
 684     @classmethod
 685     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 686                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 687         from catalogue import tasks
 688
 689         if dont_build is None:
 690             dont_build = set()
 691         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 692
 693         # check for parts before we do anything
 694         children = []
 695         if hasattr(book_info, 'parts'):
 696             for part_url in book_info.parts:
 697                 try:
 698                     children.append(Book.objects.get(slug=part_url.slug))
 699                 except Book.DoesNotExist:
 700                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 701
 702         # Read book metadata
 703         book_slug = book_info.url.slug
 704         if re.search(r'[^a-z0-9-]', book_slug):
 705             raise ValueError('Invalid characters in slug')
 706         book, created = Book.objects.get_or_create(slug=book_slug)
 707
 708         if created:
 709             book_shelves = []
 710             old_cover = None
 711             book.preview = bool(days)
 712             if book.preview:
 713                 book.preview_until = date.today() + timedelta(days)
 714         else:
 715             if not overwrite:
 716                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 717             # Save shelves for this book
 718             book_shelves = list(book.tags.filter(category='set'))
 719             old_cover = book.cover_info()
 720
 721         # Save XML file
 722         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 723         if book.preview:
 724             book.xml_file.set_readable(False)
 725
 726         book.findable = findable
 727         book.language = book_info.language
 728         book.title = book_info.title
 729         book.license = book_info.license or ''
 730         if book_info.variant_of:
 731             book.common_slug = book_info.variant_of.slug
 732         else:
 733             book.common_slug = book.slug
 734         extra = book_info.to_dict()
 735         if logo:
 736             extra['logo'] = logo
 737         if logo_mono:
 738             extra['logo_mono'] = logo_mono
 739         if logo_alt:
 740             extra['logo_alt'] = logo_alt
 741         book.extra_info = json.dumps(extra)
 742         book.load_abstract()
 743         book.load_toc()
 744         book.save()
 745
 746         book.update_stats()
 747
 748         meta_tags = Tag.tags_from_info(book_info)
 749
 750         just_tags = [t for (t, rel) in meta_tags if not rel]
 751         book.tags = set(just_tags + book_shelves)
 752         book.save()  # update sort_key_author
 753
 754         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 755
 756         cover_changed = old_cover != book.cover_info()
 757         obsolete_children = set(b for b in book.children.all()
 758                                 if b not in children)
 759         notify_cover_changed = []
 760         for n, child_book in enumerate(children):
 761             new_child = child_book.parent != book
 762             child_book.parent = book
 763             child_book.parent_number = n
 764             child_book.save()
 765             if new_child or cover_changed:
 766                 notify_cover_changed.append(child_book)
 767         # Disown unfaithful children and let them cope on their own.
 768         for child in obsolete_children:
 769             child.parent = None
 770             child.parent_number = 0
 771             child.save()
 772             if old_cover:
 773                 notify_cover_changed.append(child)
 774
 775         cls.repopulate_ancestors()
 776         tasks.update_counters.delay()
 777
 778         if remote_gallery_url:
 779             book.download_pictures(remote_gallery_url)
 780
 781         # No saves beyond this point.
 782
 783         # Build cover.
 784         if 'cover' not in dont_build:
 785             book.cover.build_delay()
 786             book.cover_clean.build_delay()
 787             book.cover_thumb.build_delay()
 788             book.cover_api_thumb.build_delay()
 789             book.simple_cover.build_delay()
 790             book.cover_ebookpoint.build_delay()
 791
 792         # Build HTML and ebooks.
 793         book.html_file.build_delay()
 794         if not children:
 795             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 796                 if format_ not in dont_build:
 797                     getattr(book, '%s_file' % format_).build_delay()
 798         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 799             if format_ not in dont_build:
 800                 getattr(book, '%s_file' % format_).build_delay()
 801         book.html_nonotes_file.build_delay()
 802
 803         if not settings.NO_SEARCH_INDEX and search_index and findable:
 804             tasks.index_book.delay(book.id)
 805
 806         for child in notify_cover_changed:
 807             child.parent_cover_changed()
 808
 809         book.update_popularity()
 810         tasks.update_references.delay(book.id)
 811
 812         cls.published.send(sender=cls, instance=book)
 813         return book
 814
 815     def update_stats(self):
 816         stats = self.wldocument2().get_statistics()['total']
 817         self.pages = round(
 818             stats.get('verses_with_fn', 0) / 30 +
 819             stats.get('chars_out_verse_with_fn', 0) / 1800)
 820         self.read_time = round(self.get_time())
 821         self.save(update_fields=['pages', 'read_time'])
 822         if self.parent is not None:
 823             self.parent.update_stats()
 824
 825     def update_references(self):
 826         Entity = apps.get_model('references', 'Entity')
 827         doc = self.wldocument2()
 828         doc._compat_assign_section_ids()
 829         doc._compat_assign_ordered_ids()
 830         refs = {}
 831         for ref_elem in doc.references():
 832             uri = ref_elem.attrib.get('href', '')
 833             if not uri:
 834                 continue
 835             if uri in refs:
 836                 ref = refs[uri]
 837             else:
 838                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 839                 if entity_created:
 840                     try:
 841                         entity.populate()
 842                     except:
 843                         pass
 844                     else:
 845                         entity.save()
 846                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 847                 refs[uri] = ref
 848                 if not ref_created:
 849                     ref.occurence_set.all().delete()
 850             sec = ref_elem.get_link()
 851             m = re.match(r'sec(\d+)', sec)
 852             assert m is not None
 853             sec = int(m.group(1))
 854             snippet = ref_elem.get_snippet()
 855             b = builders['html-snippet']()
 856             for s in snippet:
 857                 s.html_build(b)
 858             html = b.output().get_bytes().decode('utf-8')
 859
 860             ref.occurence_set.create(
 861                 section=sec,
 862                 html=html
 863             )
 864         self.reference_set.exclude(entity__uri__in=refs).delete()
 865
 866     @property
 867     def references(self):
 868         return self.reference_set.all().select_related('entity')
 869
 870     def update_has_audio(self):
 871         self.has_audio = False
 872         if self.media.filter(type='mp3').exists():
 873             self.has_audio = True
 874         if self.descendant.filter(has_audio=True).exists():
 875             self.has_audio = True
 876         self.save(update_fields=['has_audio'])
 877         if self.parent is not None:
 878             self.parent.update_has_audio()
 879
 880     def update_narrators(self):
 881         narrator_names = set()
 882         for bm in self.media.filter(type='mp3'):
 883             narrator_names.update(set(
 884                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 885             ))
 886         narrators = []
 887
 888         for name in narrator_names:
 889             if not name: continue
 890             slug = slugify(name)
 891             try:
 892                 t = Tag.objects.get(category='author', slug=slug)
 893             except Tag.DoesNotExist:
 894                 sort_key = sortify(
 895                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 896                 )
 897                 t = Tag.objects.create(
 898                     category='author',
 899                     name_pl=name,
 900                     slug=slug,
 901                     sort_key=sort_key,
 902                 )
 903             narrators.append(t)
 904         self.narrators.set(narrators)
 905
 906     @classmethod
 907     @transaction.atomic
 908     def repopulate_ancestors(cls):
 909         """Fixes the ancestry cache."""
 910         # TODO: table names
 911         cursor = connection.cursor()
 912         if connection.vendor == 'postgres':
 913             cursor.execute("TRUNCATE catalogue_book_ancestor")
 914             cursor.execute("""
 915                 WITH RECURSIVE ancestry AS (
 916                     SELECT book.id, book.parent_id
 917                     FROM catalogue_book AS book
 918                     WHERE book.parent_id IS NOT NULL
 919                     UNION
 920                     SELECT ancestor.id, book.parent_id
 921                     FROM ancestry AS ancestor, catalogue_book AS book
 922                     WHERE ancestor.parent_id = book.id
 923                         AND book.parent_id IS NOT NULL
 924                     )
 925                 INSERT INTO catalogue_book_ancestor
 926                     (from_book_id, to_book_id)
 927                     SELECT id, parent_id
 928                     FROM ancestry
 929                     ORDER BY id;
 930                 """)
 931         else:
 932             cursor.execute("DELETE FROM catalogue_book_ancestor")
 933             for b in cls.objects.exclude(parent=None):
 934                 parent = b.parent
 935                 while parent is not None:
 936                     b.ancestor.add(parent)
 937                     parent = parent.parent
 938
 939     @property
 940     def ancestors(self):
 941         if self.parent:
 942             for anc in self.parent.ancestors:
 943                 yield anc
 944             yield self.parent
 945         else:
 946             return []
 947
 948     def clear_cache(self):
 949         clear_cached_renders(self.mini_box)
 950         clear_cached_renders(self.mini_box_nolink)
 951
 952     def cover_info(self, inherit=True):
 953         """Returns a dictionary to serve as fallback for BookInfo.
 954
 955         For now, the only thing inherited is the cover image.
 956         """
 957         need = False
 958         info = {}
 959         for field in ('cover_url', 'cover_by', 'cover_source'):
 960             val = self.get_extra_info_json().get(field)
 961             if val:
 962                 info[field] = val
 963             else:
 964                 need = True
 965         if inherit and need and self.parent is not None:
 966             parent_info = self.parent.cover_info()
 967             parent_info.update(info)
 968             info = parent_info
 969         return info
 970
 971     def related_themes(self):
 972         return Tag.objects.usage_for_queryset(
 973             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 974             counts=True).filter(category='theme').order_by('-count')
 975
 976     def parent_cover_changed(self):
 977         """Called when parent book's cover image is changed."""
 978         if not self.cover_info(inherit=False):
 979             if 'cover' not in app_settings.DONT_BUILD:
 980                 self.cover.build_delay()
 981                 self.cover_clean.build_delay()
 982                 self.cover_thumb.build_delay()
 983                 self.cover_api_thumb.build_delay()
 984                 self.simple_cover.build_delay()
 985                 self.cover_ebookpoint.build_delay()
 986             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 987                 if format_ not in app_settings.DONT_BUILD:
 988                     getattr(self, '%s_file' % format_).build_delay()
 989             for child in self.children.all():
 990                 child.parent_cover_changed()
 991
 992     def other_versions(self):
 993         """Find other versions (i.e. in other languages) of the book."""
 994         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 995
 996     def parents(self):
 997         books = []
 998         parent = self.parent
 999         while parent is not None:
1000             books.insert(0, parent)
1001             parent = parent.parent
1002         return books
1003
1004     def pretty_title(self, html_links=False):
1005         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1006         books = self.parents() + [self]
1007         names.extend([(b.title, b.get_absolute_url()) for b in books])
1008
1009         if html_links:
1010             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1011         else:
1012             names = [tag[0] for tag in names]
1013         return ', '.join(names)
1014
1015     def publisher(self):
1016         publisher = self.get_extra_info_json()['publisher']
1017         if isinstance(publisher, str):
1018             return publisher
1019         elif isinstance(publisher, list):
1020             return ', '.join(publisher)
1021
1022     def get_recommended(self, limit=4):
1023         books_qs = type(self).objects.filter(findable=True)
1024         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1025         books = type(self).tagged.related_to(self, books_qs)[:limit]
1026         return books
1027
1028     @classmethod
1029     def tagged_top_level(cls, tags):
1030         """ Returns top-level books tagged with `tags`.
1031
1032         It only returns those books which don't have ancestors which are
1033         also tagged with those tags.
1034
1035         """
1036         objects = cls.tagged.with_all(tags)
1037         return objects.filter(findable=True).exclude(ancestor__in=objects)
1038
1039     @classmethod
1040     def book_list(cls, book_filter=None):
1041         """Generates a hierarchical listing of all books.
1042
1043         Books are optionally filtered with a test function.
1044
1045         """
1046
1047         books_by_parent = {}
1048         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1049         if book_filter:
1050             books = books.filter(book_filter).distinct()
1051
1052             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1053             for book in books.iterator():
1054                 parent = book.parent_id
1055                 if parent not in book_ids:
1056                     parent = None
1057                 books_by_parent.setdefault(parent, []).append(book)
1058         else:
1059             for book in books.iterator():
1060                 books_by_parent.setdefault(book.parent_id, []).append(book)
1061
1062         orphans = []
1063         books_by_author = OrderedDict()
1064         for tag in Tag.objects.filter(category='author').iterator():
1065             books_by_author[tag] = []
1066
1067         for book in books_by_parent.get(None, ()):
1068             authors = list(book.authors().only('pk'))
1069             if authors:
1070                 for author in authors:
1071                     books_by_author[author].append(book)
1072             else:
1073                 orphans.append(book)
1074
1075         return books_by_author, orphans, books_by_parent
1076
1077     _audiences_pl = {
1078         "SP": (1, "szkoła podstawowa"),
1079         "SP1": (1, "szkoła podstawowa"),
1080         "SP2": (1, "szkoła podstawowa"),
1081         "SP3": (1, "szkoła podstawowa"),
1082         "P": (1, "szkoła podstawowa"),
1083         "G": (2, "gimnazjum"),
1084         "L": (3, "liceum"),
1085         "LP": (3, "liceum"),
1086     }
1087
1088     def audiences_pl(self):
1089         audiences = self.get_extra_info_json().get('audiences', [])
1090         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1091         return [a[1] for a in audiences]
1092
1093     def stage_note(self):
1094         stage = self.get_extra_info_json().get('stage')
1095         if stage and stage < '0.4':
1096             return (_('Ten utwór wymaga uwspółcześnienia'),
1097                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1098         else:
1099             return None, None
1100
1101     def choose_fragments(self, number):
1102         fragments = self.fragments.order_by()
1103         fragments_count = fragments.count()
1104         if not fragments_count and self.children.exists():
1105             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1106             fragments_count = fragments.count()
1107         if fragments_count:
1108             if fragments_count > number:
1109                 offset = randint(0, fragments_count - number)
1110             else:
1111                 offset = 0
1112             return fragments[offset : offset + number]
1113         elif self.parent:
1114             return self.parent.choose_fragments(number)
1115         else:
1116             return []
1117
1118     def choose_fragment(self):
1119         fragments = self.choose_fragments(1)
1120         if fragments:
1121             return fragments[0]
1122         else:
1123             return None
1124
1125     def fragment_data(self):
1126         fragment = self.choose_fragment()
1127         if fragment:
1128             return {
1129                 'title': fragment.book.pretty_title(),
1130                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1131             }
1132         else:
1133             return None
1134
1135     def update_popularity(self):
1136         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1137         try:
1138             pop = self.popularity
1139             pop.count = count
1140             pop.save()
1141         except BookPopularity.DoesNotExist:
1142             BookPopularity.objects.create(book=self, count=count)
1143
1144     def ridero_link(self):
1145         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1146
1147     def elevenreader_link(self):
1148         first_text = self.get_first_text()
1149         if first_text is None:
1150             return None
1151         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1152
1153     def content_warnings(self):
1154         warnings_def = {
1155             'wulgaryzmy': _('wulgaryzmy'),
1156         }
1157         warnings = self.get_extra_info_json().get('content_warnings', [])
1158         warnings = [
1159             warnings_def.get(w, w)
1160             for w in warnings
1161         ]
1162         warnings.sort()
1163         return warnings
1164
1165     def full_sort_key(self):
1166         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1167
1168     def cover_color(self):
1169         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1170
1171     @cached_render('catalogue/book_mini_box.html')
1172     def mini_box(self):
1173         return {
1174             'book': self
1175         }
1176
1177     @cached_render('catalogue/book_mini_box.html')
1178     def mini_box_nolink(self):
1179         return {
1180             'book': self,
1181             'no_link': True,
1182         }
1183
1184
1185 class BookPopularity(models.Model):
1186     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1187     count = models.IntegerField(default=0, db_index=True)