src/catalogue/models/book.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from slugify import slugify
  11 from sortify import sortify
  12 from urllib.request import urlretrieve
  13 from django.apps import apps
  14 from django.conf import settings
  15 from django.db import connection, models, transaction
  16 import django.dispatch
  17 from django.contrib.contenttypes.fields import GenericRelation
  18 from django.template.loader import render_to_string
  19 from django.urls import reverse
  20 from django.utils.translation import gettext_lazy as _, get_language
  21 from fnpdjango.storage import BofhFileSystemStorage
  22 from lxml import html
  23 from librarian.cover import WLCover
  24 from librarian.html import transform_abstrakt
  25 from librarian.builders import builders
  26 from newtagging import managers
  27 from catalogue import constants
  28 from catalogue import fields
  29 from catalogue.models import Tag, Fragment, BookMedia
  30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  31 from catalogue.models.tag import prefetched_relations
  32 from catalogue import app_settings
  33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  34
  35 bofh_storage = BofhFileSystemStorage()
  36
  37
  38 class Book(models.Model):
  39     """Represents a book imported from WL-XML."""
  40     title = models.CharField('tytuł', max_length=32767)
  41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
  42     sort_key_author = models.CharField(
  43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
  44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
  45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
  46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  47     description = models.TextField('opis', blank=True)
  48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
  49     abstract = models.TextField('abstrakt', blank=True)
  50     toc = models.TextField('spis treści', blank=True)
  51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
  52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
  53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
  54     extra_info = models.TextField('dodatkowe informacje', default='{}')
  55     gazeta_link = models.CharField(blank=True, max_length=240)
  56     wiki_link = models.CharField(blank=True, max_length=240)
  57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
  58     recommended = models.BooleanField('polecane', default=False)
  59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
  60     preview = models.BooleanField('prapremiera', default=False)
  61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
  62     preview_key = models.CharField(max_length=32, blank=True, null=True)
  63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
  64
  65     # files generated during publication
  66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
  67     html_file = fields.HtmlField(storage=bofh_storage)
  68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
  69     fb2_file = fields.Fb2Field(storage=bofh_storage)
  70     txt_file = fields.TxtField(storage=bofh_storage)
  71     epub_file = fields.EpubField(storage=bofh_storage)
  72     mobi_file = fields.MobiField(storage=bofh_storage)
  73     pdf_file = fields.PdfField(storage=bofh_storage)
  74
  75     cover = fields.CoverField('okładka', storage=bofh_storage)
  76     # Cleaner version of cover for thumbs
  77     cover_clean = fields.CoverCleanField('czysta okładka')
  78     cover_thumb = fields.CoverThumbField('miniatura okładki')
  79     cover_api_thumb = fields.CoverApiThumbField(
  80         'mniaturka okładki dla aplikacji')
  81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
  82     cover_ebookpoint = fields.CoverEbookpointField(
  83         'okładka dla Ebookpoint')
  84
  85     ebook_formats = constants.EBOOK_FORMATS
  86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
  87
  88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
  89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
  90
  91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
  92     has_audience = models.BooleanField(default=False)
  93
  94     objects = models.Manager()
  95     tagged = managers.ModelTaggedItemManager(Tag)
  96     tags = managers.TagDescriptor(Tag)
  97     tag_relations = GenericRelation(Tag.intermediary_table_model)
  98     translators = models.ManyToManyField(Tag, blank=True)
  99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
 100     has_audio = models.BooleanField(default=False)
 101
 102     html_built = django.dispatch.Signal()
 103     published = django.dispatch.Signal()
 104
 105     SORT_KEY_SEP = '$'
 106
 107     is_book = True
 108
 109     class AlreadyExists(Exception):
 110         pass
 111
 112     class Meta:
 113         ordering = ('sort_key_author', 'sort_key')
 114         verbose_name = 'książka'
 115         verbose_name_plural = 'książki'
 116         app_label = 'catalogue'
 117
 118     def __str__(self):
 119         return self.title
 120
 121     def get_extra_info_json(self):
 122         return json.loads(self.extra_info or '{}')
 123
 124     def get_initial(self):
 125         try:
 126             return re.search(r'\w', self.title, re.U).group(0)
 127         except AttributeError:
 128             return ''
 129
 130     def authors(self):
 131         return self.tags.filter(category='author')
 132
 133     def epochs(self):
 134         return self.tags.filter(category='epoch')
 135
 136     def genres(self):
 137         return self.tags.filter(category='genre')
 138
 139     def kinds(self):
 140         return self.tags.filter(category='kind')
 141
 142     def tag_unicode(self, category):
 143         relations = prefetched_relations(self, category)
 144         if relations:
 145             return ', '.join(rel.tag.name for rel in relations)
 146         else:
 147             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 148
 149     def tags_by_category(self):
 150         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 151
 152     def author_unicode(self):
 153         return self.cached_author
 154
 155     def kind_unicode(self):
 156         return self.tag_unicode('kind')
 157
 158     def epoch_unicode(self):
 159         return self.tag_unicode('epoch')
 160
 161     def genre_unicode(self):
 162         return self.tag_unicode('genre')
 163
 164     def translator(self):
 165         translators = self.get_extra_info_json().get('translators')
 166         if not translators:
 167             return None
 168         if len(translators) > 3:
 169             translators = translators[:2]
 170             others = ' i inni'
 171         else:
 172             others = ''
 173         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 174
 175     def cover_source(self):
 176         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 177
 178     @property
 179     def isbn_pdf(self):
 180         return self.get_extra_info_json().get('isbn_pdf')
 181
 182     @property
 183     def isbn_epub(self):
 184         return self.get_extra_info_json().get('isbn_epub')
 185
 186     @property
 187     def isbn_mobi(self):
 188         return self.get_extra_info_json().get('isbn_mobi')
 189
 190     def is_accessible_to(self, user):
 191         if not self.preview:
 192             return True
 193         if not user.is_authenticated:
 194             return False
 195         Membership = apps.get_model('club', 'Membership')
 196         if Membership.is_active_for(user):
 197             return True
 198         Funding = apps.get_model('funding', 'Funding')
 199         if Funding.objects.filter(user=user, offer__book=self):
 200             return True
 201         return False
 202
 203     def save(self, force_insert=False, force_update=False, **kwargs):
 204         from sortify import sortify
 205
 206         self.sort_key = sortify(self.title)[:120]
 207         self.title = str(self.title)  # ???
 208
 209         try:
 210             author = self.authors().first().sort_key
 211         except AttributeError:
 212             author = ''
 213         self.sort_key_author = author
 214
 215         self.cached_author = self.tag_unicode('author')
 216         self.has_audience = 'audience' in self.get_extra_info_json()
 217
 218         if self.preview and not self.preview_key:
 219             self.preview_key = get_random_hash(self.slug)[:32]
 220
 221         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 222
 223         return ret
 224
 225     def get_absolute_url(self):
 226         return reverse('book_detail', args=[self.slug])
 227
 228     def gallery_path(self):
 229         return gallery_path(self.slug)
 230
 231     def gallery_url(self):
 232         return gallery_url(self.slug)
 233
 234     def get_first_text(self):
 235         if self.html_file:
 236             return self
 237         child = self.children.all().order_by('parent_number').first()
 238         if child is not None:
 239             return child.get_first_text()
 240
 241     def get_last_text(self):
 242         if self.html_file:
 243             return self
 244         child = self.children.all().order_by('parent_number').last()
 245         if child is not None:
 246             return child.get_last_text()
 247
 248     def get_prev_text(self):
 249         if not self.parent:
 250             return None
 251         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 252         if sibling is not None:
 253             return sibling.get_last_text()
 254
 255         if self.parent.html_file:
 256             return self.parent
 257
 258         return self.parent.get_prev_text()
 259
 260     def get_next_text(self, inside=True):
 261         if inside:
 262             child = self.children.order_by('parent_number').first()
 263             if child is not None:
 264                 return child.get_first_text()
 265
 266         if not self.parent:
 267             return None
 268         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 269         if sibling is not None:
 270             return sibling.get_first_text()
 271         return self.parent.get_next_text(inside=False)
 272
 273     def get_siblings(self):
 274         if not self.parent:
 275             return []
 276         return self.parent.children.all().order_by('parent_number')
 277
 278     def get_children(self):
 279         return self.children.all().order_by('parent_number')
 280
 281     @property
 282     def name(self):
 283         return self.title
 284
 285     def language_code(self):
 286         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 287
 288     def language_name(self):
 289         return dict(settings.LANGUAGES).get(self.language_code(), "")
 290
 291     def is_foreign(self):
 292         return self.language_code() != settings.LANGUAGE_CODE
 293
 294     def set_audio_length(self):
 295         length = self.get_audio_length()
 296         if length > 0:
 297             self.audio_length = self.format_audio_length(length)
 298             self.save()
 299
 300     @staticmethod
 301     def format_audio_length(seconds):
 302         """
 303         >>> Book.format_audio_length(1)
 304         '0:01'
 305         >>> Book.format_audio_length(3661)
 306         '1:01:01'
 307         """
 308         if seconds < 60*60:
 309             minutes = seconds // 60
 310             seconds = seconds % 60
 311             return '%d:%02d' % (minutes, seconds)
 312         else:
 313             hours = seconds // 3600
 314             minutes = seconds % 3600 // 60
 315             seconds = seconds % 60
 316             return '%d:%02d:%02d' % (hours, minutes, seconds)
 317
 318     def get_audio_length(self):
 319         total = 0
 320         for media in self.get_mp3() or ():
 321             total += app_settings.GET_MP3_LENGTH(media.file.path)
 322         return int(total)
 323
 324     def get_time(self):
 325         return round(self.xml_file.size / 1000 * 40)
 326
 327     def has_media(self, type_):
 328         if type_ in Book.formats:
 329             return bool(getattr(self, "%s_file" % type_))
 330         else:
 331             return self.media.filter(type=type_).exists()
 332
 333     def get_media(self, type_):
 334         if self.has_media(type_):
 335             if type_ in Book.formats:
 336                 return getattr(self, "%s_file" % type_)
 337             else:
 338                 return self.media.filter(type=type_)
 339         else:
 340             return None
 341
 342     def get_mp3(self):
 343         return self.get_media("mp3")
 344
 345     def get_odt(self):
 346         return self.get_media("odt")
 347
 348     def get_ogg(self):
 349         return self.get_media("ogg")
 350
 351     def get_daisy(self):
 352         return self.get_media("daisy")
 353
 354     def get_audio_epub(self):
 355         return self.get_media("audio.epub")
 356
 357     def media_url(self, format_):
 358         media = self.get_media(format_)
 359         if media:
 360             if self.preview:
 361                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 362             else:
 363                 return media.url
 364         else:
 365             return None
 366
 367     def html_url(self):
 368         return self.media_url('html')
 369
 370     def html_nonotes_url(self):
 371         return self.media_url('html_nonotes')
 372
 373     def pdf_url(self):
 374         return self.media_url('pdf')
 375
 376     def epub_url(self):
 377         return self.media_url('epub')
 378
 379     def mobi_url(self):
 380         return self.media_url('mobi')
 381
 382     def txt_url(self):
 383         return self.media_url('txt')
 384
 385     def fb2_url(self):
 386         return self.media_url('fb2')
 387
 388     def xml_url(self):
 389         return self.media_url('xml')
 390
 391     def has_description(self):
 392         return len(self.description) > 0
 393     has_description.short_description = 'opis'
 394     has_description.boolean = True
 395
 396     def has_mp3_file(self):
 397         return self.has_media("mp3")
 398     has_mp3_file.short_description = 'MP3'
 399     has_mp3_file.boolean = True
 400
 401     def has_ogg_file(self):
 402         return self.has_media("ogg")
 403     has_ogg_file.short_description = 'OGG'
 404     has_ogg_file.boolean = True
 405
 406     def has_daisy_file(self):
 407         return self.has_media("daisy")
 408     has_daisy_file.short_description = 'DAISY'
 409     has_daisy_file.boolean = True
 410
 411     def has_sync_file(self):
 412         return settings.FEATURE_SYNCHRO and self.has_media("sync")
 413
 414     def build_sync_file(self):
 415         from lxml import html
 416         from django.core.files.base import ContentFile
 417         with self.html_file.open('rb') as f:
 418             h = html.fragment_fromstring(f.read().decode('utf-8'))
 419
 420         durations = [
 421             m['mp3'].duration
 422             for m in self.get_audiobooks()[0]
 423         ]
 424         if settings.MOCK_DURATIONS:
 425             durations = settings.MOCK_DURATIONS
 426
 427         sync = []
 428         ts = None
 429         sid = 1
 430         dirty = False
 431         for elem in h.iter():
 432             if elem.get('data-audio-ts'):
 433                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
 434                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
 435                 # check if inside verse
 436                 p = elem.getparent()
 437                 while p is not None:
 438                     # Workaround for missing ids.
 439                     if 'verse' in p.get('class', ''):
 440                         if not p.get('id'):
 441                             p.set('id', f'syn{sid}')
 442                             dirty = True
 443                             sid += 1
 444                         sync.append((ts, p.get('id')))
 445                         ts = None
 446                         break
 447                     p = p.getparent()
 448             elif ts:
 449                 cls = elem.get('class', '')
 450                 # Workaround for missing ids.
 451                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
 452                     if not elem.get('id'):
 453                         elem.set('id', f'syn{sid}')
 454                         dirty = True
 455                         sid += 1
 456                     sync.append((ts, elem.get('id')))
 457                     ts = None
 458         if dirty:
 459             htext = html.tostring(h, encoding='utf-8')
 460             with open(self.html_file.path, 'wb') as f:
 461                 f.write(htext)
 462         try:
 463             bm = self.media.get(type='sync')
 464         except:
 465             bm = BookMedia(book=self, type='sync')
 466         sync = (
 467             '27\n' + '\n'.join(
 468                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
 469             )).encode('latin1')
 470         bm.file.save(
 471             None, ContentFile(sync)
 472             )
 473
 474
 475     def get_sync(self):
 476         with self.get_media('sync').first().file.open('r') as f:
 477             sync = f.read().split('\n')
 478         offset = float(sync[0])
 479         items = []
 480         for line in sync[1:]:
 481             if not line:
 482                 continue
 483             start, end, elid = line.split()
 484             items.append([elid, float(start) + offset])
 485         return json.dumps(items)
 486
 487     def has_audio_epub_file(self):
 488         return self.has_media("audio.epub")
 489
 490     @property
 491     def media_daisy(self):
 492         return self.get_media('daisy')
 493
 494     @property
 495     def media_audio_epub(self):
 496         return self.get_media('audio.epub')
 497
 498     def get_audiobooks(self, with_children=False, processing=False):
 499         ogg_files = {}
 500         for m in self.media.filter(type='ogg').order_by().iterator():
 501             ogg_files[m.name] = m
 502
 503         audiobooks = []
 504         projects = set()
 505         total_duration = 0
 506         for mp3 in self.media.filter(type='mp3').iterator():
 507             # ogg files are always from the same project
 508             meta = mp3.get_extra_info_json()
 509             project = meta.get('project')
 510             if not project:
 511                 # temporary fallback
 512                 project = 'CzytamySłuchając'
 513
 514             projects.add((project, meta.get('funded_by', '')))
 515             total_duration += mp3.duration or 0
 516
 517             media = {'mp3': mp3}
 518
 519             ogg = ogg_files.get(mp3.name)
 520             if ogg:
 521                 media['ogg'] = ogg
 522             audiobooks.append(media)
 523
 524         if with_children:
 525             for child in self.get_children():
 526                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
 527                     with_children=True, processing=True)
 528                 audiobooks.append({'part': child})
 529                 audiobooks += ch_audiobooks
 530                 projects.update(ch_projects)
 531                 total_duration += ch_duration
 532
 533         if not processing:
 534             projects = sorted(projects)
 535             total_duration = '%d:%02d' % (
 536                 total_duration // 60,
 537                 total_duration % 60
 538             )
 539
 540         return audiobooks, projects, total_duration
 541
 542     def get_audiobooks_with_children(self):
 543         return self.get_audiobooks(with_children=True)
 544
 545     def wldocument(self, parse_dublincore=True, inherit=True):
 546         from catalogue.import_utils import ORMDocProvider
 547         from librarian.parser import WLDocument
 548
 549         if inherit and self.parent:
 550             meta_fallbacks = self.parent.cover_info()
 551         else:
 552             meta_fallbacks = None
 553
 554         return WLDocument.from_file(
 555             self.xml_file.path,
 556             provider=ORMDocProvider(self),
 557             parse_dublincore=parse_dublincore,
 558             meta_fallbacks=meta_fallbacks)
 559
 560     def wldocument2(self):
 561         from catalogue.import_utils import ORMDocProvider
 562         from librarian.document import WLDocument
 563         doc = WLDocument(
 564             self.xml_file.path,
 565             provider=ORMDocProvider(self)
 566         )
 567         doc.meta.update(self.cover_info())
 568         return doc
 569
 570
 571     @staticmethod
 572     def zip_format(format_):
 573         def pretty_file_name(book):
 574             return "%s/%s.%s" % (
 575                 book.get_extra_info_json()['author'],
 576                 book.slug,
 577                 format_)
 578
 579         field_name = "%s_file" % format_
 580         field = getattr(Book, field_name)
 581         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 582         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 583         return create_zip(paths, field.ZIP)
 584
 585     def zip_audiobooks(self, format_):
 586         bm = BookMedia.objects.filter(book=self, type=format_)
 587         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 588         licenses = set()
 589         for m in bm:
 590             license = constants.LICENSES.get(
 591                 m.get_extra_info_json().get('license'), {}
 592             ).get('locative')
 593             if license:
 594                 licenses.add(license)
 595         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 596             'licenses': licenses,
 597             'meta': self.wldocument2().meta,
 598         })
 599         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 600
 601     def search_index(self, index=None):
 602         if not self.findable:
 603             return
 604         from search.index import Index
 605         Index.index_book(self)
 606
 607     # will make problems in conjunction with paid previews
 608     def download_pictures(self, remote_gallery_url):
 609         # This is only needed for legacy relative image paths.
 610         gallery_path = self.gallery_path()
 611         # delete previous files, so we don't include old files in ebooks
 612         if os.path.isdir(gallery_path):
 613             for filename in os.listdir(gallery_path):
 614                 file_path = os.path.join(gallery_path, filename)
 615                 os.unlink(file_path)
 616         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 617         if ilustr_elements:
 618             makedirs(gallery_path)
 619             for ilustr in ilustr_elements:
 620                 ilustr_src = ilustr.get('src')
 621                 if '/' in ilustr_src:
 622                     continue
 623                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 624                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 625
 626     def load_abstract(self):
 627         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 628         if abstract is not None:
 629             self.abstract = transform_abstrakt(abstract)
 630         else:
 631             self.abstract = ''
 632
 633     def load_toc(self):
 634         self.toc = ''
 635         if self.html_file:
 636             parser = html.HTMLParser(encoding='utf-8')
 637             tree = html.parse(self.html_file.path, parser=parser)
 638             toc = tree.find('//div[@id="toc"]/ol')
 639             if toc is None or not len(toc):
 640                 return
 641             html_link = reverse('book_text', args=[self.slug])
 642             for a in toc.findall('.//a'):
 643                 a.attrib['href'] = html_link + a.attrib['href']
 644             self.toc = html.tostring(toc, encoding='unicode')
 645             # div#toc
 646
 647     @classmethod
 648     def from_xml_file(cls, xml_file, **kwargs):
 649         from django.core.files import File
 650         from librarian import dcparser
 651
 652         # use librarian to parse meta-data
 653         book_info = dcparser.parse(xml_file)
 654
 655         if not isinstance(xml_file, File):
 656             xml_file = File(open(xml_file))
 657
 658         try:
 659             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 660         finally:
 661             xml_file.close()
 662
 663     @classmethod
 664     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 665                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
 666         from catalogue import tasks
 667
 668         if dont_build is None:
 669             dont_build = set()
 670         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 671
 672         # check for parts before we do anything
 673         children = []
 674         if hasattr(book_info, 'parts'):
 675             for part_url in book_info.parts:
 676                 try:
 677                     children.append(Book.objects.get(slug=part_url.slug))
 678                 except Book.DoesNotExist:
 679                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
 680
 681         # Read book metadata
 682         book_slug = book_info.url.slug
 683         if re.search(r'[^a-z0-9-]', book_slug):
 684             raise ValueError('Invalid characters in slug')
 685         book, created = Book.objects.get_or_create(slug=book_slug)
 686
 687         if created:
 688             book_shelves = []
 689             old_cover = None
 690             book.preview = bool(days)
 691             if book.preview:
 692                 book.preview_until = date.today() + timedelta(days)
 693         else:
 694             if not overwrite:
 695                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
 696             # Save shelves for this book
 697             book_shelves = list(book.tags.filter(category='set'))
 698             old_cover = book.cover_info()
 699
 700         # Save XML file
 701         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 702         if book.preview:
 703             book.xml_file.set_readable(False)
 704
 705         book.findable = findable
 706         book.language = book_info.language
 707         book.title = book_info.title
 708         book.license = book_info.license or ''
 709         if book_info.variant_of:
 710             book.common_slug = book_info.variant_of.slug
 711         else:
 712             book.common_slug = book.slug
 713         extra = book_info.to_dict()
 714         if logo:
 715             extra['logo'] = logo
 716         if logo_mono:
 717             extra['logo_mono'] = logo_mono
 718         if logo_alt:
 719             extra['logo_alt'] = logo_alt
 720         book.extra_info = json.dumps(extra)
 721         book.load_abstract()
 722         book.load_toc()
 723         book.save()
 724
 725         meta_tags = Tag.tags_from_info(book_info)
 726
 727         just_tags = [t for (t, rel) in meta_tags if not rel]
 728         book.tags = set(just_tags + book_shelves)
 729         book.save()  # update sort_key_author
 730
 731         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
 732
 733         cover_changed = old_cover != book.cover_info()
 734         obsolete_children = set(b for b in book.children.all()
 735                                 if b not in children)
 736         notify_cover_changed = []
 737         for n, child_book in enumerate(children):
 738             new_child = child_book.parent != book
 739             child_book.parent = book
 740             child_book.parent_number = n
 741             child_book.save()
 742             if new_child or cover_changed:
 743                 notify_cover_changed.append(child_book)
 744         # Disown unfaithful children and let them cope on their own.
 745         for child in obsolete_children:
 746             child.parent = None
 747             child.parent_number = 0
 748             child.save()
 749             if old_cover:
 750                 notify_cover_changed.append(child)
 751
 752         cls.repopulate_ancestors()
 753         tasks.update_counters.delay()
 754
 755         if remote_gallery_url:
 756             book.download_pictures(remote_gallery_url)
 757
 758         # No saves beyond this point.
 759
 760         # Build cover.
 761         if 'cover' not in dont_build:
 762             book.cover.build_delay()
 763             book.cover_clean.build_delay()
 764             book.cover_thumb.build_delay()
 765             book.cover_api_thumb.build_delay()
 766             book.simple_cover.build_delay()
 767             book.cover_ebookpoint.build_delay()
 768
 769         # Build HTML and ebooks.
 770         book.html_file.build_delay()
 771         if not children:
 772             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 773                 if format_ not in dont_build:
 774                     getattr(book, '%s_file' % format_).build_delay()
 775         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 776             if format_ not in dont_build:
 777                 getattr(book, '%s_file' % format_).build_delay()
 778         book.html_nonotes_file.build_delay()
 779
 780         if not settings.NO_SEARCH_INDEX and search_index and findable:
 781             tasks.index_book.delay(book.id)
 782
 783         for child in notify_cover_changed:
 784             child.parent_cover_changed()
 785
 786         book.update_popularity()
 787         tasks.update_references.delay(book.id)
 788
 789         cls.published.send(sender=cls, instance=book)
 790         return book
 791
 792     def update_references(self):
 793         Entity = apps.get_model('references', 'Entity')
 794         doc = self.wldocument2()
 795         doc._compat_assign_section_ids()
 796         doc._compat_assign_ordered_ids()
 797         refs = {}
 798         for ref_elem in doc.references():
 799             uri = ref_elem.attrib.get('href', '')
 800             if not uri:
 801                 continue
 802             if uri in refs:
 803                 ref = refs[uri]
 804             else:
 805                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
 806                 if entity_created:
 807                     try:
 808                         entity.populate()
 809                     except:
 810                         pass
 811                     else:
 812                         entity.save()
 813                 ref, ref_created = entity.reference_set.get_or_create(book=self)
 814                 refs[uri] = ref
 815                 if not ref_created:
 816                     ref.occurence_set.all().delete()
 817             sec = ref_elem.get_link()
 818             m = re.match(r'sec(\d+)', sec)
 819             assert m is not None
 820             sec = int(m.group(1))
 821             snippet = ref_elem.get_snippet()
 822             b = builders['html-snippet']()
 823             for s in snippet:
 824                 s.html_build(b)
 825             html = b.output().get_bytes().decode('utf-8')
 826
 827             ref.occurence_set.create(
 828                 section=sec,
 829                 html=html
 830             )
 831         self.reference_set.exclude(entity__uri__in=refs).delete()
 832
 833     @property
 834     def references(self):
 835         return self.reference_set.all().select_related('entity')
 836
 837     def update_has_audio(self):
 838         self.has_audio = False
 839         if self.media.filter(type='mp3').exists():
 840             self.has_audio = True
 841         if self.descendant.filter(has_audio=True).exists():
 842             self.has_audio = True
 843         self.save(update_fields=['has_audio'])
 844         if self.parent is not None:
 845             self.parent.update_has_audio()
 846
 847     def update_narrators(self):
 848         narrator_names = set()
 849         for bm in self.media.filter(type='mp3'):
 850             narrator_names.update(set(
 851                 a.strip() for a in re.split(r',|\si\s', bm.artist)
 852             ))
 853         narrators = []
 854
 855         for name in narrator_names:
 856             if not name: continue
 857             slug = slugify(name)
 858             try:
 859                 t = Tag.objects.get(category='author', slug=slug)
 860             except Tag.DoesNotExist:
 861                 sort_key = sortify(
 862                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
 863                 )
 864                 t = Tag.objects.create(
 865                     category='author',
 866                     name_pl=name,
 867                     slug=slug,
 868                     sort_key=sort_key,
 869                 )
 870             narrators.append(t)
 871         self.narrators.set(narrators)
 872
 873     @classmethod
 874     @transaction.atomic
 875     def repopulate_ancestors(cls):
 876         """Fixes the ancestry cache."""
 877         # TODO: table names
 878         cursor = connection.cursor()
 879         if connection.vendor == 'postgres':
 880             cursor.execute("TRUNCATE catalogue_book_ancestor")
 881             cursor.execute("""
 882                 WITH RECURSIVE ancestry AS (
 883                     SELECT book.id, book.parent_id
 884                     FROM catalogue_book AS book
 885                     WHERE book.parent_id IS NOT NULL
 886                     UNION
 887                     SELECT ancestor.id, book.parent_id
 888                     FROM ancestry AS ancestor, catalogue_book AS book
 889                     WHERE ancestor.parent_id = book.id
 890                         AND book.parent_id IS NOT NULL
 891                     )
 892                 INSERT INTO catalogue_book_ancestor
 893                     (from_book_id, to_book_id)
 894                     SELECT id, parent_id
 895                     FROM ancestry
 896                     ORDER BY id;
 897                 """)
 898         else:
 899             cursor.execute("DELETE FROM catalogue_book_ancestor")
 900             for b in cls.objects.exclude(parent=None):
 901                 parent = b.parent
 902                 while parent is not None:
 903                     b.ancestor.add(parent)
 904                     parent = parent.parent
 905
 906     @property
 907     def ancestors(self):
 908         if self.parent:
 909             for anc in self.parent.ancestors:
 910                 yield anc
 911             yield self.parent
 912         else:
 913             return []
 914
 915     def clear_cache(self):
 916         clear_cached_renders(self.mini_box)
 917         clear_cached_renders(self.mini_box_nolink)
 918
 919     def cover_info(self, inherit=True):
 920         """Returns a dictionary to serve as fallback for BookInfo.
 921
 922         For now, the only thing inherited is the cover image.
 923         """
 924         need = False
 925         info = {}
 926         for field in ('cover_url', 'cover_by', 'cover_source'):
 927             val = self.get_extra_info_json().get(field)
 928             if val:
 929                 info[field] = val
 930             else:
 931                 need = True
 932         if inherit and need and self.parent is not None:
 933             parent_info = self.parent.cover_info()
 934             parent_info.update(info)
 935             info = parent_info
 936         return info
 937
 938     def related_themes(self):
 939         return Tag.objects.usage_for_queryset(
 940             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 941             counts=True).filter(category='theme').order_by('-count')
 942
 943     def parent_cover_changed(self):
 944         """Called when parent book's cover image is changed."""
 945         if not self.cover_info(inherit=False):
 946             if 'cover' not in app_settings.DONT_BUILD:
 947                 self.cover.build_delay()
 948                 self.cover_clean.build_delay()
 949                 self.cover_thumb.build_delay()
 950                 self.cover_api_thumb.build_delay()
 951                 self.simple_cover.build_delay()
 952                 self.cover_ebookpoint.build_delay()
 953             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 954                 if format_ not in app_settings.DONT_BUILD:
 955                     getattr(self, '%s_file' % format_).build_delay()
 956             for child in self.children.all():
 957                 child.parent_cover_changed()
 958
 959     def other_versions(self):
 960         """Find other versions (i.e. in other languages) of the book."""
 961         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 962
 963     def parents(self):
 964         books = []
 965         parent = self.parent
 966         while parent is not None:
 967             books.insert(0, parent)
 968             parent = parent.parent
 969         return books
 970
 971     def pretty_title(self, html_links=False):
 972         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 973         books = self.parents() + [self]
 974         names.extend([(b.title, b.get_absolute_url()) for b in books])
 975
 976         if html_links:
 977             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 978         else:
 979             names = [tag[0] for tag in names]
 980         return ', '.join(names)
 981
 982     def publisher(self):
 983         publisher = self.get_extra_info_json()['publisher']
 984         if isinstance(publisher, str):
 985             return publisher
 986         elif isinstance(publisher, list):
 987             return ', '.join(publisher)
 988
 989     @classmethod
 990     def tagged_top_level(cls, tags):
 991         """ Returns top-level books tagged with `tags`.
 992
 993         It only returns those books which don't have ancestors which are
 994         also tagged with those tags.
 995
 996         """
 997         objects = cls.tagged.with_all(tags)
 998         return objects.filter(findable=True).exclude(ancestor__in=objects)
 999
1000     @classmethod
1001     def book_list(cls, book_filter=None):
1002         """Generates a hierarchical listing of all books.
1003
1004         Books are optionally filtered with a test function.
1005
1006         """
1007
1008         books_by_parent = {}
1009         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1010         if book_filter:
1011             books = books.filter(book_filter).distinct()
1012
1013             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1014             for book in books.iterator():
1015                 parent = book.parent_id
1016                 if parent not in book_ids:
1017                     parent = None
1018                 books_by_parent.setdefault(parent, []).append(book)
1019         else:
1020             for book in books.iterator():
1021                 books_by_parent.setdefault(book.parent_id, []).append(book)
1022
1023         orphans = []
1024         books_by_author = OrderedDict()
1025         for tag in Tag.objects.filter(category='author').iterator():
1026             books_by_author[tag] = []
1027
1028         for book in books_by_parent.get(None, ()):
1029             authors = list(book.authors().only('pk'))
1030             if authors:
1031                 for author in authors:
1032                     books_by_author[author].append(book)
1033             else:
1034                 orphans.append(book)
1035
1036         return books_by_author, orphans, books_by_parent
1037
1038     _audiences_pl = {
1039         "SP": (1, "szkoła podstawowa"),
1040         "SP1": (1, "szkoła podstawowa"),
1041         "SP2": (1, "szkoła podstawowa"),
1042         "SP3": (1, "szkoła podstawowa"),
1043         "P": (1, "szkoła podstawowa"),
1044         "G": (2, "gimnazjum"),
1045         "L": (3, "liceum"),
1046         "LP": (3, "liceum"),
1047     }
1048
1049     def audiences_pl(self):
1050         audiences = self.get_extra_info_json().get('audiences', [])
1051         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1052         return [a[1] for a in audiences]
1053
1054     def stage_note(self):
1055         stage = self.get_extra_info_json().get('stage')
1056         if stage and stage < '0.4':
1057             return (_('Ten utwór wymaga uwspółcześnienia'),
1058                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1059         else:
1060             return None, None
1061
1062     def choose_fragments(self, number):
1063         fragments = self.fragments.order_by()
1064         fragments_count = fragments.count()
1065         if not fragments_count and self.children.exists():
1066             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1067             fragments_count = fragments.count()
1068         if fragments_count:
1069             if fragments_count > number:
1070                 offset = randint(0, fragments_count - number)
1071             else:
1072                 offset = 0
1073             return fragments[offset : offset + number]
1074         elif self.parent:
1075             return self.parent.choose_fragments(number)
1076         else:
1077             return []
1078
1079     def choose_fragment(self):
1080         fragments = self.choose_fragments(1)
1081         if fragments:
1082             return fragments[0]
1083         else:
1084             return None
1085
1086     def fragment_data(self):
1087         fragment = self.choose_fragment()
1088         if fragment:
1089             return {
1090                 'title': fragment.book.pretty_title(),
1091                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1092             }
1093         else:
1094             return None
1095
1096     def update_popularity(self):
1097         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1098         try:
1099             pop = self.popularity
1100             pop.count = count
1101             pop.save()
1102         except BookPopularity.DoesNotExist:
1103             BookPopularity.objects.create(book=self, count=count)
1104
1105     def ridero_link(self):
1106         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1107
1108     def like(self, user):
1109         from social.utils import likes, get_set, set_sets
1110         if not likes(user, self):
1111             tag = get_set(user, '')
1112             set_sets(user, self, [tag])
1113
1114     def unlike(self, user):
1115         from social.utils import likes, set_sets
1116         if likes(user, self):
1117             set_sets(user, self, [])
1118
1119     def full_sort_key(self):
1120         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1121
1122     def cover_color(self):
1123         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1124
1125     @cached_render('catalogue/book_mini_box.html')
1126     def mini_box(self):
1127         return {
1128             'book': self
1129         }
1130
1131     @cached_render('catalogue/book_mini_box.html')
1132     def mini_box_nolink(self):
1133         return {
1134             'book': self,
1135             'no_link': True,
1136         }
1137
1138
1139 class BookPopularity(models.Model):
1140     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1141     count = models.IntegerField(default=0, db_index=True)