src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import gettext_lazy as _, get_language
  19 from fnpdjango.storage import BofhFileSystemStorage
  20 from lxml import html
  21 from librarian.cover import WLCover
  22 from librarian.html import transform_abstrakt
  23 from librarian.builders import builders
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue import fields
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  32
  33 bofh_storage = BofhFileSystemStorage()
  34
  35
  36 class Book(models.Model):
  37     """Represents a book imported from WL-XML."""
  38     title = models.CharField('tytuł', max_length=32767)
  39     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  40     sort_key_author = models.CharField(
  41         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  42     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  43     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  44     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  45     description = models.TextField('opis', blank=True)
  46     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  47     abstract = models.TextField('abstrakt', blank=True)
  48     toc = models.TextField('spis treści', blank=True)
  49     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  50     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  51     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  52     extra_info = models.TextField('dodatkowe informacje', default='{}')
  53     gazeta_link = models.CharField(blank=True, max_length=240)
  54     wiki_link = models.CharField(blank=True, max_length=240)
  55     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  56     recommended = models.BooleanField('polecane', default=False)
  57     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  58     preview = models.BooleanField('prapremiera', default=False)
  59     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  60     preview_key = models.CharField(max_length=32, blank=True, null=True)
  61     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  62
  63     # files generated during publication
  64     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  65     html_file = fields.HtmlField(storage=bofh_storage)
  66     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  67     fb2_file = fields.Fb2Field(storage=bofh_storage)
  68     txt_file = fields.TxtField(storage=bofh_storage)
  69     epub_file = fields.EpubField(storage=bofh_storage)
  70     mobi_file = fields.MobiField(storage=bofh_storage)
  71     pdf_file = fields.PdfField(storage=bofh_storage)
  72
  73     cover = fields.CoverField('okładka', storage=bofh_storage)
  74     # Cleaner version of cover for thumbs
  75     cover_clean = fields.CoverCleanField('czysta okładka')
  76     cover_thumb = fields.CoverThumbField('miniatura okładki')
  77     cover_api_thumb = fields.CoverApiThumbField(
  78         'mniaturka okładki dla aplikacji')
  79     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  80     cover_ebookpoint = fields.CoverEbookpointField(
  81         'okładka dla Ebookpoint')
  82
  83     ebook_formats = constants.EBOOK_FORMATS
  84     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  85
  86     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  87     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  88
  89     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  90     has_audience = models.BooleanField(default=False)
  91
  92     objects = models.Manager()
  93     tagged = managers.ModelTaggedItemManager(Tag)
  94     tags = managers.TagDescriptor(Tag)
  95     tag_relations = GenericRelation(Tag.intermediary_table_model)
  96     translators = models.ManyToManyField(Tag, blank=True)
  97
  98     html_built = django.dispatch.Signal()
  99     published = django.dispatch.Signal()
 100
 101     SORT_KEY_SEP = '$'
 102
 103     is_book = True
 104
 105     class AlreadyExists(Exception):
 106         pass
 107
 108     class Meta:
 109         ordering = ('sort_key_author', 'sort_key')
 110         verbose_name = 'książka'
 111         verbose_name_plural = 'książki'
 112         app_label = 'catalogue'
 113
 114     def __str__(self):
 115         return self.title
 116
 117     def get_extra_info_json(self):
 118         return json.loads(self.extra_info or '{}')
 119
 120     def get_initial(self):
 121         try:
 122             return re.search(r'\w', self.title, re.U).group(0)
 123         except AttributeError:
 124             return ''
 125
 126     def authors(self):
 127         return self.tags.filter(category='author')
 128
 129     def epochs(self):
 130         return self.tags.filter(category='epoch')
 131
 132     def genres(self):
 133         return self.tags.filter(category='genre')
 134
 135     def kinds(self):
 136         return self.tags.filter(category='kind')
 137
 138     def tag_unicode(self, category):
 139         relations = prefetched_relations(self, category)
 140         if relations:
 141             return ', '.join(rel.tag.name for rel in relations)
 142         else:
 143             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 144
 145     def tags_by_category(self):
 146         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 147
 148     def author_unicode(self):
 149         return self.cached_author
 150
 151     def kind_unicode(self):
 152         return self.tag_unicode('kind')
 153
 154     def epoch_unicode(self):
 155         return self.tag_unicode('epoch')
 156
 157     def genre_unicode(self):
 158         return self.tag_unicode('genre')
 159
 160     def translator(self):
 161         translators = self.get_extra_info_json().get('translators')
 162         if not translators:
 163             return None
 164         if len(translators) > 3:
 165             translators = translators[:2]
 166             others = ' i inni'
 167         else:
 168             others = ''
 169         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 170
 171     def cover_source(self):
 172         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 173
 174     @property
 175     def isbn_pdf(self):
 176         return self.get_extra_info_json().get('isbn_pdf')
 177
 178     @property
 179     def isbn_epub(self):
 180         return self.get_extra_info_json().get('isbn_epub')
 181
 182     @property
 183     def isbn_mobi(self):
 184         return self.get_extra_info_json().get('isbn_mobi')
 185
 186     def is_accessible_to(self, user):
 187         if not self.preview:
 188             return True
 189         if not user.is_authenticated:
 190             return False
 191         Membership = apps.get_model('club', 'Membership')
 192         if Membership.is_active_for(user):
 193             return True
 194         Funding = apps.get_model('funding', 'Funding')
 195         if Funding.objects.filter(user=user, offer__book=self):
 196             return True
 197         return False
 198
 199     def save(self, force_insert=False, force_update=False, **kwargs):
 200         from sortify import sortify
 201
 202         self.sort_key = sortify(self.title)[:120]
 203         self.title = str(self.title)  # ???
 204
 205         try:
 206             author = self.authors().first().sort_key
 207         except AttributeError:
 208             author = ''
 209         self.sort_key_author = author
 210
 211         self.cached_author = self.tag_unicode('author')
 212         self.has_audience = 'audience' in self.get_extra_info_json()
 213
 214         if self.preview and not self.preview_key:
 215             self.preview_key = get_random_hash(self.slug)[:32]
 216
 217         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 218
 219         return ret
 220
 221     def get_absolute_url(self):
 222         return reverse('book_detail', args=[self.slug])
 223
 224     def gallery_path(self):
 225         return gallery_path(self.slug)
 226
 227     def gallery_url(self):
 228         return gallery_url(self.slug)
 229
 230     def get_first_text(self):
 231         if self.html_file:
 232             return self
 233         child = self.children.all().order_by('parent_number').first()
 234         if child is not None:
 235             return child.get_first_text()
 236
 237     def get_last_text(self):
 238         if self.html_file:
 239             return self
 240         child = self.children.all().order_by('parent_number').last()
 241         if child is not None:
 242             return child.get_last_text()
 243
 244     def get_prev_text(self):
 245         if not self.parent:
 246             return None
 247         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 248         if sibling is not None:
 249             return sibling.get_last_text()
 250
 251         if self.parent.html_file:
 252             return self.parent
 253
 254         return self.parent.get_prev_text()
 255
 256     def get_next_text(self, inside=True):
 257         if inside:
 258             child = self.children.order_by('parent_number').first()
 259             if child is not None:
 260                 return child.get_first_text()
 261
 262         if not self.parent:
 263             return None
 264         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 265         if sibling is not None:
 266             return sibling.get_first_text()
 267         return self.parent.get_next_text(inside=False)
 268
 269     def get_child_audiobook(self):
 270         BookMedia = apps.get_model('catalogue', 'BookMedia')
 271         if not BookMedia.objects.filter(book__ancestor=self).exists():
 272             return None
 273         for child in self.children.order_by('parent_number').all():
 274             if child.has_mp3_file():
 275                 return child
 276             child_sub = child.get_child_audiobook()
 277             if child_sub is not None:
 278                 return child_sub
 279
 280     def get_siblings(self):
 281         if not self.parent:
 282             return []
 283         return self.parent.children.all().order_by('parent_number')
 284
 285     def get_children(self):
 286         return self.children.all().order_by('parent_number')
 287
 288     @property
 289     def name(self):
 290         return self.title
 291
 292     def language_code(self):
 293         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 294
 295     def language_name(self):
 296         return dict(settings.LANGUAGES).get(self.language_code(), "")
 297
 298     def is_foreign(self):
 299         return self.language_code() != settings.LANGUAGE_CODE
 300
 301     def set_audio_length(self):
 302         length = self.get_audio_length()
 303         if length > 0:
 304             self.audio_length = self.format_audio_length(length)
 305             self.save()
 306
 307     @staticmethod
 308     def format_audio_length(seconds):
 309         """
 310         >>> Book.format_audio_length(1)
 311         '0:01'
 312         >>> Book.format_audio_length(3661)
 313         '1:01:01'
 314         """
 315         if seconds < 60*60:
 316             minutes = seconds // 60
 317             seconds = seconds % 60
 318             return '%d:%02d' % (minutes, seconds)
 319         else:
 320             hours = seconds // 3600
 321             minutes = seconds % 3600 // 60
 322             seconds = seconds % 60
 323             return '%d:%02d:%02d' % (hours, minutes, seconds)
 324
 325     def get_audio_length(self):
 326         total = 0
 327         for media in self.get_mp3() or ():
 328             total += app_settings.GET_MP3_LENGTH(media.file.path)
 329         return int(total)
 330
 331     def get_time(self):
 332         return round(self.xml_file.size / 1000 * 40)
 333
 334     def has_media(self, type_):
 335         if type_ in Book.formats:
 336             return bool(getattr(self, "%s_file" % type_))
 337         else:
 338             return self.media.filter(type=type_).exists()
 339
 340     def has_audio(self):
 341         return self.has_media('mp3')
 342
 343     def get_media(self, type_):
 344         if self.has_media(type_):
 345             if type_ in Book.formats:
 346                 return getattr(self, "%s_file" % type_)
 347             else:
 348                 return self.media.filter(type=type_)
 349         else:
 350             return None
 351
 352     def get_mp3(self):
 353         return self.get_media("mp3")
 354
 355     def get_odt(self):
 356         return self.get_media("odt")
 357
 358     def get_ogg(self):
 359         return self.get_media("ogg")
 360
 361     def get_daisy(self):
 362         return self.get_media("daisy")
 363
 364     def get_audio_epub(self):
 365         return self.get_media("audio.epub")
 366
 367     def media_url(self, format_):
 368         media = self.get_media(format_)
 369         if media:
 370             if self.preview:
 371                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 372             else:
 373                 return media.url
 374         else:
 375             return None
 376
 377     def html_url(self):
 378         return self.media_url('html')
 379
 380     def html_nonotes_url(self):
 381         return self.media_url('html_nonotes')
 382
 383     def pdf_url(self):
 384         return self.media_url('pdf')
 385
 386     def epub_url(self):
 387         return self.media_url('epub')
 388
 389     def mobi_url(self):
 390         return self.media_url('mobi')
 391
 392     def txt_url(self):
 393         return self.media_url('txt')
 394
 395     def fb2_url(self):
 396         return self.media_url('fb2')
 397
 398     def xml_url(self):
 399         return self.media_url('xml')
 400
 401     def has_description(self):
 402         return len(self.description) > 0
 403     has_description.short_description = 'opis'
 404     has_description.boolean = True
 405
 406     def has_mp3_file(self):
 407         return self.has_media("mp3")
 408     has_mp3_file.short_description = 'MP3'
 409     has_mp3_file.boolean = True
 410
 411     def has_ogg_file(self):
 412         return self.has_media("ogg")
 413     has_ogg_file.short_description = 'OGG'
 414     has_ogg_file.boolean = True
 415
 416     def has_daisy_file(self):
 417         return self.has_media("daisy")
 418     has_daisy_file.short_description = 'DAISY'
 419     has_daisy_file.boolean = True
 420
 421     def has_sync_file(self):
 422         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 423
 424     def build_sync_file(self):
 425         from lxml import html
 426         from django.core.files.base import ContentFile
 427         with self.html_file.open('rb') as f:
 428             h = html.fragment_fromstring(f.read().decode('utf-8'))
 429
 430         durations = [
 431             m['mp3'].duration
 432             for m in self.get_audiobooks()[0]
 433         ]
 434         if settings.MOCK_DURATIONS:
 435             durations = settings.MOCK_DURATIONS
 436
 437         sync = []
 438         ts = None
 439         sid = 1
 440         dirty = False
 441         for elem in h.iter():
 442             if elem.get('data-audio-ts'):
 443                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 444                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 445                 # check if inside verse
 446                 p = elem.getparent()
 447                 while p is not None:
 448                     # Workaround for missing ids.
 449                     if 'verse' in p.get('class', ''):
 450                         if not p.get('id'):
 451                             p.set('id', f'syn{sid}')
 452                             dirty = True
 453                             sid += 1
 454                         sync.append((ts, p.get('id')))
 455                         ts = None
 456                         break
 457                     p = p.getparent()
 458             elif ts:
 459                 cls = elem.get('class', '')
 460                 # Workaround for missing ids.
 461                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 462                     if not elem.get('id'):
 463                         elem.set('id', f'syn{sid}')
 464                         dirty = True
 465                         sid += 1
 466                     sync.append((ts, elem.get('id')))
 467                     ts = None
 468         if dirty:
 469             htext = html.tostring(h, encoding='utf-8')
 470             with open(self.html_file.path, 'wb') as f:
 471                 f.write(htext)
 472         try:
 473             bm = self.media.get(type='sync')
 474         except:
 475             bm = BookMedia(book=self, type='sync')
 476         sync = (
 477             '27\n' + '\n'.join(
 478                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 479             )).encode('latin1')
 480         bm.file.save(
 481             None, ContentFile(sync)
 482             )
 483
 484
 485     def get_sync(self):
 486         with self.get_media('sync').first().file.open('r') as f:
 487             sync = f.read().split('\n')
 488         offset = float(sync[0])
 489         items = []
 490         for line in sync[1:]:
 491             if not line:
 492                 continue
 493             start, end, elid = line.split()
 494             items.append([elid, float(start) + offset])
 495         return json.dumps(items)
 496
 497     def has_audio_epub_file(self):
 498         return self.has_media("audio.epub")
 499
 500     @property
 501     def media_daisy(self):
 502         return self.get_media('daisy')
 503
 504     @property
 505     def media_audio_epub(self):
 506         return self.get_media('audio.epub')
 507
 508     def get_audiobooks(self):
 509         ogg_files = {}
 510         for m in self.media.filter(type='ogg').order_by().iterator():
 511             ogg_files[m.name] = m
 512
 513         audiobooks = []
 514         projects = set()
 515         total_duration = 0
 516         for mp3 in self.media.filter(type='mp3').iterator():
 517             # ogg files are always from the same project
 518             meta = mp3.get_extra_info_json()
 519             project = meta.get('project')
 520             if not project:
 521                 # temporary fallback
 522                 project = 'CzytamySłuchając'
 523
 524             projects.add((project, meta.get('funded_by', '')))
 525             total_duration += mp3.duration or 0
 526
 527             media = {'mp3': mp3}
 528
 529             ogg = ogg_files.get(mp3.name)
 530             if ogg:
 531                 media['ogg'] = ogg
 532             audiobooks.append(media)
 533
 534         projects = sorted(projects)
 535         total_duration = '%d:%02d' % (
 536             total_duration // 60,
 537             total_duration % 60
 538         )
 539         return audiobooks, projects, total_duration
 540
 541     def wldocument(self, parse_dublincore=True, inherit=True):
 542         from catalogue.import_utils import ORMDocProvider
 543         from librarian.parser import WLDocument
 544
 545         if inherit and self.parent:
 546             meta_fallbacks = self.parent.cover_info()
 547         else:
 548             meta_fallbacks = None
 549
 550         return WLDocument.from_file(
 551             self.xml_file.path,
 552             provider=ORMDocProvider(self),
 553             parse_dublincore=parse_dublincore,
 554             meta_fallbacks=meta_fallbacks)
 555
 556     def wldocument2(self):
 557         from catalogue.import_utils import ORMDocProvider
 558         from librarian.document import WLDocument
 559         doc = WLDocument(
 560             self.xml_file.path,
 561             provider=ORMDocProvider(self)
 562         )
 563         doc.meta.update(self.cover_info())
 564         return doc
 565
 566
 567     @staticmethod
 568     def zip_format(format_):
 569         def pretty_file_name(book):
 570             return "%s/%s.%s" % (
 571                 book.get_extra_info_json()['author'],
 572                 book.slug,
 573                 format_)
 574
 575         field_name = "%s_file" % format_
 576         field = getattr(Book, field_name)
 577         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 578         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 579         return create_zip(paths, field.ZIP)
 580
 581     def zip_audiobooks(self, format_):
 582         bm = BookMedia.objects.filter(book=self, type=format_)
 583         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 584         licenses = set()
 585         for m in bm:
 586             license = constants.LICENSES.get(
 587                 m.get_extra_info_json().get('license'), {}
 588             ).get('locative')
 589             if license:
 590                 licenses.add(license)
 591         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 592             'licenses': licenses,
 593             'meta': self.wldocument2().meta,
 594         })
 595         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 596
 597     def search_index(self, index=None):
 598         if not self.findable:
 599             return
 600         from search.index import Index
 601         Index.index_book(self)
 602
 603     # will make problems in conjunction with paid previews
 604     def download_pictures(self, remote_gallery_url):
 605         # This is only needed for legacy relative image paths.
 606         gallery_path = self.gallery_path()
 607         # delete previous files, so we don't include old files in ebooks
 608         if os.path.isdir(gallery_path):
 609             for filename in os.listdir(gallery_path):
 610                 file_path = os.path.join(gallery_path, filename)
 611                 os.unlink(file_path)
 612         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 613         if ilustr_elements:
 614             makedirs(gallery_path)
 615             for ilustr in ilustr_elements:
 616                 ilustr_src = ilustr.get('src')
 617                 if '/' in ilustr_src:
 618                     continue
 619                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 620                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 621
 622     def load_abstract(self):
 623         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 624         if abstract is not None:
 625             self.abstract = transform_abstrakt(abstract)
 626         else:
 627             self.abstract = ''
 628
 629     def load_toc(self):
 630         self.toc = ''
 631         if self.html_file:
 632             parser = html.HTMLParser(encoding='utf-8')
 633             tree = html.parse(self.html_file.path, parser=parser)
 634             toc = tree.find('//div[@id="toc"]/ol')
 635             if toc is None or not len(toc):
 636                 return
 637             html_link = reverse('book_text', args=[self.slug])
 638             for a in toc.findall('.//a'):
 639                 a.attrib['href'] = html_link + a.attrib['href']
 640             self.toc = html.tostring(toc, encoding='unicode')
 641             # div#toc
 642
 643     @classmethod
 644     def from_xml_file(cls, xml_file, **kwargs):
 645         from django.core.files import File
 646         from librarian import dcparser
 647
 648         # use librarian to parse meta-data
 649         book_info = dcparser.parse(xml_file)
 650
 651         if not isinstance(xml_file, File):
 652             xml_file = File(open(xml_file))
 653
 654         try:
 655             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 656         finally:
 657             xml_file.close()
 658
 659     @classmethod
 660     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 661                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 662         from catalogue import tasks
 663
 664         if dont_build is None:
 665             dont_build = set()
 666         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 667
 668         # check for parts before we do anything
 669         children = []
 670         if hasattr(book_info, 'parts'):
 671             for part_url in book_info.parts:
 672                 try:
 673                     children.append(Book.objects.get(slug=part_url.slug))
 674                 except Book.DoesNotExist:
 675                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 676
 677         # Read book metadata
 678         book_slug = book_info.url.slug
 679         if re.search(r'[^a-z0-9-]', book_slug):
 680             raise ValueError('Invalid characters in slug')
 681         book, created = Book.objects.get_or_create(slug=book_slug)
 682
 683         if created:
 684             book_shelves = []
 685             old_cover = None
 686             book.preview = bool(days)
 687             if book.preview:
 688                 book.preview_until = date.today() + timedelta(days)
 689         else:
 690             if not overwrite:
 691                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 692             # Save shelves for this book
 693             book_shelves = list(book.tags.filter(category='set'))
 694             old_cover = book.cover_info()
 695
 696         # Save XML file
 697         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 698         if book.preview:
 699             book.xml_file.set_readable(False)
 700
 701         book.findable = findable
 702         book.language = book_info.language
 703         book.title = book_info.title
 704         book.license = book_info.license or ''
 705         if book_info.variant_of:
 706             book.common_slug = book_info.variant_of.slug
 707         else:
 708             book.common_slug = book.slug
 709         extra = book_info.to_dict()
 710         if logo:
 711             extra['logo'] = logo
 712         if logo_mono:
 713             extra['logo_mono'] = logo_mono
 714         if logo_alt:
 715             extra['logo_alt'] = logo_alt
 716         book.extra_info = json.dumps(extra)
 717         book.load_abstract()
 718         book.load_toc()
 719         book.save()
 720
 721         meta_tags = Tag.tags_from_info(book_info)
 722
 723         just_tags = [t for (t, rel) in meta_tags if not rel]
 724         book.tags = set(just_tags + book_shelves)
 725         book.save()  # update sort_key_author
 726
 727         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 728
 729         cover_changed = old_cover != book.cover_info()
 730         obsolete_children = set(b for b in book.children.all()
 731                                 if b not in children)
 732         notify_cover_changed = []
 733         for n, child_book in enumerate(children):
 734             new_child = child_book.parent != book
 735             child_book.parent = book
 736             child_book.parent_number = n
 737             child_book.save()
 738             if new_child or cover_changed:
 739                 notify_cover_changed.append(child_book)
 740         # Disown unfaithful children and let them cope on their own.
 741         for child in obsolete_children:
 742             child.parent = None
 743             child.parent_number = 0
 744             child.save()
 745             if old_cover:
 746                 notify_cover_changed.append(child)
 747
 748         cls.repopulate_ancestors()
 749         tasks.update_counters.delay()
 750
 751         if remote_gallery_url:
 752             book.download_pictures(remote_gallery_url)
 753
 754         # No saves beyond this point.
 755
 756         # Build cover.
 757         if 'cover' not in dont_build:
 758             book.cover.build_delay()
 759             book.cover_clean.build_delay()
 760             book.cover_thumb.build_delay()
 761             book.cover_api_thumb.build_delay()
 762             book.simple_cover.build_delay()
 763             book.cover_ebookpoint.build_delay()
 764
 765         # Build HTML and ebooks.
 766         book.html_file.build_delay()
 767         if not children:
 768             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 769                 if format_ not in dont_build:
 770                     getattr(book, '%s_file' % format_).build_delay()
 771         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 772             if format_ not in dont_build:
 773                 getattr(book, '%s_file' % format_).build_delay()
 774         book.html_nonotes_file.build_delay()
 775
 776         if not settings.NO_SEARCH_INDEX and search_index and findable:
 777             tasks.index_book.delay(book.id)
 778
 779         for child in notify_cover_changed:
 780             child.parent_cover_changed()
 781
 782         book.update_popularity()
 783         tasks.update_references.delay(book.id)
 784
 785         cls.published.send(sender=cls, instance=book)
 786         return book
 787
 788     def update_references(self):
 789         Entity = apps.get_model('references', 'Entity')
 790         doc = self.wldocument2()
 791         doc._compat_assign_section_ids()
 792         doc._compat_assign_ordered_ids()
 793         refs = {}
 794         for ref_elem in doc.references():
 795             uri = ref_elem.attrib.get('href', '')
 796             if not uri:
 797                 continue
 798             if uri in refs:
 799                 ref = refs[uri]
 800             else:
 801                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 802                 if entity_created:
 803                     try:
 804                         entity.populate()
 805                     except:
 806                         pass
 807                     else:
 808                         entity.save()
 809                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 810                 refs[uri] = ref
 811                 if not ref_created:
 812                     ref.occurence_set.all().delete()
 813             sec = ref_elem.get_link()
 814             m = re.match(r'sec(\d+)', sec)
 815             assert m is not None
 816             sec = int(m.group(1))
 817             snippet = ref_elem.get_snippet()
 818             b = builders['html-snippet']()
 819             for s in snippet:
 820                 s.html_build(b)
 821             html = b.output().get_bytes().decode('utf-8')
 822
 823             ref.occurence_set.create(
 824                 section=sec,
 825                 html=html
 826             )
 827         self.reference_set.exclude(entity__uri__in=refs).delete()
 828
 829     @property
 830     def references(self):
 831         return self.reference_set.all().select_related('entity')
 832
 833     @classmethod
 834     @transaction.atomic
 835     def repopulate_ancestors(cls):
 836         """Fixes the ancestry cache."""
 837         # TODO: table names
 838         cursor = connection.cursor()
 839         if connection.vendor == 'postgres':
 840             cursor.execute("TRUNCATE catalogue_book_ancestor")
 841             cursor.execute("""
 842                 WITH RECURSIVE ancestry AS (
 843                     SELECT book.id, book.parent_id
 844                     FROM catalogue_book AS book
 845                     WHERE book.parent_id IS NOT NULL
 846                     UNION
 847                     SELECT ancestor.id, book.parent_id
 848                     FROM ancestry AS ancestor, catalogue_book AS book
 849                     WHERE ancestor.parent_id = book.id
 850                         AND book.parent_id IS NOT NULL
 851                     )
 852                 INSERT INTO catalogue_book_ancestor
 853                     (from_book_id, to_book_id)
 854                     SELECT id, parent_id
 855                     FROM ancestry
 856                     ORDER BY id;
 857                 """)
 858         else:
 859             cursor.execute("DELETE FROM catalogue_book_ancestor")
 860             for b in cls.objects.exclude(parent=None):
 861                 parent = b.parent
 862                 while parent is not None:
 863                     b.ancestor.add(parent)
 864                     parent = parent.parent
 865
 866     @property
 867     def ancestors(self):
 868         if self.parent:
 869             for anc in self.parent.ancestors:
 870                 yield anc
 871             yield self.parent
 872         else:
 873             return []
 874
 875     def clear_cache(self):
 876         clear_cached_renders(self.mini_box)
 877         clear_cached_renders(self.mini_box_nolink)
 878
 879     def cover_info(self, inherit=True):
 880         """Returns a dictionary to serve as fallback for BookInfo.
 881
 882         For now, the only thing inherited is the cover image.
 883         """
 884         need = False
 885         info = {}
 886         for field in ('cover_url', 'cover_by', 'cover_source'):
 887             val = self.get_extra_info_json().get(field)
 888             if val:
 889                 info[field] = val
 890             else:
 891                 need = True
 892         if inherit and need and self.parent is not None:
 893             parent_info = self.parent.cover_info()
 894             parent_info.update(info)
 895             info = parent_info
 896         return info
 897
 898     def related_themes(self):
 899         return Tag.objects.usage_for_queryset(
 900             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 901             counts=True).filter(category='theme').order_by('-count')
 902
 903     def parent_cover_changed(self):
 904         """Called when parent book's cover image is changed."""
 905         if not self.cover_info(inherit=False):
 906             if 'cover' not in app_settings.DONT_BUILD:
 907                 self.cover.build_delay()
 908                 self.cover_clean.build_delay()
 909                 self.cover_thumb.build_delay()
 910                 self.cover_api_thumb.build_delay()
 911                 self.simple_cover.build_delay()
 912                 self.cover_ebookpoint.build_delay()
 913             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 914                 if format_ not in app_settings.DONT_BUILD:
 915                     getattr(self, '%s_file' % format_).build_delay()
 916             for child in self.children.all():
 917                 child.parent_cover_changed()
 918
 919     def other_versions(self):
 920         """Find other versions (i.e. in other languages) of the book."""
 921         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 922
 923     def parents(self):
 924         books = []
 925         parent = self.parent
 926         while parent is not None:
 927             books.insert(0, parent)
 928             parent = parent.parent
 929         return books
 930
 931     def pretty_title(self, html_links=False):
 932         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 933         books = self.parents() + [self]
 934         names.extend([(b.title, b.get_absolute_url()) for b in books])
 935
 936         if html_links:
 937             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 938         else:
 939             names = [tag[0] for tag in names]
 940         return ', '.join(names)
 941
 942     def publisher(self):
 943         publisher = self.get_extra_info_json()['publisher']
 944         if isinstance(publisher, str):
 945             return publisher
 946         elif isinstance(publisher, list):
 947             return ', '.join(publisher)
 948
 949     @classmethod
 950     def tagged_top_level(cls, tags):
 951         """ Returns top-level books tagged with `tags`.
 952
 953         It only returns those books which don't have ancestors which are
 954         also tagged with those tags.
 955
 956         """
 957         objects = cls.tagged.with_all(tags)
 958         return objects.filter(findable=True).exclude(ancestor__in=objects)
 959
 960     @classmethod
 961     def book_list(cls, book_filter=None):
 962         """Generates a hierarchical listing of all books.
 963
 964         Books are optionally filtered with a test function.
 965
 966         """
 967
 968         books_by_parent = {}
 969         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 970         if book_filter:
 971             books = books.filter(book_filter).distinct()
 972
 973             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 974             for book in books.iterator():
 975                 parent = book.parent_id
 976                 if parent not in book_ids:
 977                     parent = None
 978                 books_by_parent.setdefault(parent, []).append(book)
 979         else:
 980             for book in books.iterator():
 981                 books_by_parent.setdefault(book.parent_id, []).append(book)
 982
 983         orphans = []
 984         books_by_author = OrderedDict()
 985         for tag in Tag.objects.filter(category='author').iterator():
 986             books_by_author[tag] = []
 987
 988         for book in books_by_parent.get(None, ()):
 989             authors = list(book.authors().only('pk'))
 990             if authors:
 991                 for author in authors:
 992                     books_by_author[author].append(book)
 993             else:
 994                 orphans.append(book)
 995
 996         return books_by_author, orphans, books_by_parent
 997
 998     _audiences_pl = {
 999         "SP": (1, "szkoła podstawowa"),
1000         "SP1": (1, "szkoła podstawowa"),
1001         "SP2": (1, "szkoła podstawowa"),
1002         "SP3": (1, "szkoła podstawowa"),
1003         "P": (1, "szkoła podstawowa"),
1004         "G": (2, "gimnazjum"),
1005         "L": (3, "liceum"),
1006         "LP": (3, "liceum"),
1007     }
1008
1009     def audiences_pl(self):
1010         audiences = self.get_extra_info_json().get('audiences', [])
1011         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1012         return [a[1] for a in audiences]
1013
1014     def stage_note(self):
1015         stage = self.get_extra_info_json().get('stage')
1016         if stage and stage < '0.4':
1017             return (_('Ten utwór wymaga uwspółcześnienia'),
1018                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1019         else:
1020             return None, None
1021
1022     def choose_fragments(self, number):
1023         fragments = self.fragments.order_by()
1024         fragments_count = fragments.count()
1025         if not fragments_count and self.children.exists():
1026             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1027             fragments_count = fragments.count()
1028         if fragments_count:
1029             if fragments_count > number:
1030                 offset = randint(0, fragments_count - number)
1031             else:
1032                 offset = 0
1033             return fragments[offset : offset + number]
1034         elif self.parent:
1035             return self.parent.choose_fragments(number)
1036         else:
1037             return []
1038
1039     def choose_fragment(self):
1040         fragments = self.choose_fragments(1)
1041         if fragments:
1042             return fragments[0]
1043         else:
1044             return None
1045
1046     def fragment_data(self):
1047         fragment = self.choose_fragment()
1048         if fragment:
1049             return {
1050                 'title': fragment.book.pretty_title(),
1051                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1052             }
1053         else:
1054             return None
1055
1056     def update_popularity(self):
1057         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1058         try:
1059             pop = self.popularity
1060             pop.count = count
1061             pop.save()
1062         except BookPopularity.DoesNotExist:
1063             BookPopularity.objects.create(book=self, count=count)
1064
1065     def ridero_link(self):
1066         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1067
1068     def like(self, user):
1069         from social.utils import likes, get_set, set_sets
1070         if not likes(user, self):
1071             tag = get_set(user, '')
1072             set_sets(user, self, [tag])
1073
1074     def unlike(self, user):
1075         from social.utils import likes, set_sets
1076         if likes(user, self):
1077             set_sets(user, self, [])
1078
1079     def full_sort_key(self):
1080         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1081
1082     def cover_color(self):
1083         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1084
1085     @cached_render('catalogue/book_mini_box.html')
1086     def mini_box(self):
1087         return {
1088             'book': self
1089         }
1090
1091     @cached_render('catalogue/book_mini_box.html')
1092     def mini_box_nolink(self):
1093         return {
1094             'book': self,
1095             'no_link': True,
1096         }
1097
1098
1099 class BookPopularity(models.Model):
1100     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1101     count = models.IntegerField(default=0, db_index=True)