src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     is_book = True
 143
 144     class AlreadyExists(Exception):
 145         pass
 146
 147     class Meta:
 148         ordering = ('sort_key_author', 'sort_key')
 149         verbose_name = _('book')
 150         verbose_name_plural = _('books')
 151         app_label = 'catalogue'
 152
 153     def __str__(self):
 154         return self.title
 155
 156     def get_extra_info_json(self):
 157         return json.loads(self.extra_info or '{}')
 158
 159     def get_initial(self):
 160         try:
 161             return re.search(r'\w', self.title, re.U).group(0)
 162         except AttributeError:
 163             return ''
 164
 165     def authors(self):
 166         return self.tags.filter(category='author')
 167
 168     def epochs(self):
 169         return self.tags.filter(category='epoch')
 170
 171     def genres(self):
 172         return self.tags.filter(category='genre')
 173
 174     def kinds(self):
 175         return self.tags.filter(category='kind')
 176
 177     def tag_unicode(self, category):
 178         relations = prefetched_relations(self, category)
 179         if relations:
 180             return ', '.join(rel.tag.name for rel in relations)
 181         else:
 182             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 183
 184     def tags_by_category(self):
 185         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 186
 187     def author_unicode(self):
 188         return self.cached_author
 189
 190     def kind_unicode(self):
 191         return self.tag_unicode('kind')
 192
 193     def epoch_unicode(self):
 194         return self.tag_unicode('epoch')
 195
 196     def genre_unicode(self):
 197         return self.tag_unicode('genre')
 198
 199     def translators(self):
 200         translators = self.get_extra_info_json().get('translators') or []
 201         return [
 202             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 203         ]
 204
 205     def translator(self):
 206         translators = self.get_extra_info_json().get('translators')
 207         if not translators:
 208             return None
 209         if len(translators) > 3:
 210             translators = translators[:2]
 211             others = ' i inni'
 212         else:
 213             others = ''
 214         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 215
 216     def cover_source(self):
 217         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 218
 219     @property
 220     def isbn_pdf(self):
 221         return self.get_extra_info_json().get('isbn_pdf')
 222
 223     @property
 224     def isbn_epub(self):
 225         return self.get_extra_info_json().get('isbn_epub')
 226
 227     @property
 228     def isbn_mobi(self):
 229         return self.get_extra_info_json().get('isbn_mobi')
 230
 231     def is_accessible_to(self, user):
 232         if not self.preview:
 233             return True
 234         if not user.is_authenticated:
 235             return False
 236         Membership = apps.get_model('club', 'Membership')
 237         if Membership.is_active_for(user):
 238             return True
 239         Funding = apps.get_model('funding', 'Funding')
 240         if Funding.objects.filter(user=user, offer__book=self):
 241             return True
 242         return False
 243
 244     def save(self, force_insert=False, force_update=False, **kwargs):
 245         from sortify import sortify
 246
 247         self.sort_key = sortify(self.title)[:120]
 248         self.title = str(self.title)  # ???
 249
 250         try:
 251             author = self.authors().first().sort_key
 252         except AttributeError:
 253             author = ''
 254         self.sort_key_author = author
 255
 256         self.cached_author = self.tag_unicode('author')
 257         self.has_audience = 'audience' in self.get_extra_info_json()
 258
 259         if self.preview and not self.preview_key:
 260             self.preview_key = get_random_hash(self.slug)[:32]
 261
 262         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 263
 264         return ret
 265
 266     def get_absolute_url(self):
 267         return reverse('book_detail', args=[self.slug])
 268
 269     def gallery_path(self):
 270         return gallery_path(self.slug)
 271
 272     def gallery_url(self):
 273         return gallery_url(self.slug)
 274
 275     def get_first_text(self):
 276         if self.html_file:
 277             return self
 278         child = self.children.all().order_by('parent_number').first()
 279         if child is not None:
 280             return child.get_first_text()
 281
 282     def get_last_text(self):
 283         if self.html_file:
 284             return self
 285         child = self.children.all().order_by('parent_number').last()
 286         if child is not None:
 287             return child.get_last_text()
 288
 289     def get_prev_text(self):
 290         if not self.parent:
 291             return None
 292         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 293         if sibling is not None:
 294             return sibling.get_last_text()
 295
 296         if self.parent.html_file:
 297             return self.parent
 298
 299         return self.parent.get_prev_text()
 300
 301     def get_next_text(self, inside=True):
 302         if inside:
 303             child = self.children.order_by('parent_number').first()
 304             if child is not None:
 305                 return child.get_first_text()
 306
 307         if not self.parent:
 308             return None
 309         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 310         if sibling is not None:
 311             return sibling.get_first_text()
 312         return self.parent.get_next_text(inside=False)
 313
 314     def get_child_audiobook(self):
 315         BookMedia = apps.get_model('catalogue', 'BookMedia')
 316         if not BookMedia.objects.filter(book__ancestor=self).exists():
 317             return None
 318         for child in self.children.order_by('parent_number').all():
 319             if child.has_mp3_file():
 320                 return child
 321             child_sub = child.get_child_audiobook()
 322             if child_sub is not None:
 323                 return child_sub
 324
 325     def get_siblings(self):
 326         if not self.parent:
 327             return []
 328         return self.parent.children.all().order_by('parent_number')
 329
 330     def get_children(self):
 331         return self.children.all().order_by('parent_number')
 332
 333     @property
 334     def name(self):
 335         return self.title
 336
 337     def language_code(self):
 338         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 339
 340     def language_name(self):
 341         return dict(settings.LANGUAGES).get(self.language_code(), "")
 342
 343     def is_foreign(self):
 344         return self.language_code() != settings.LANGUAGE_CODE
 345
 346     def set_audio_length(self):
 347         length = self.get_audio_length()
 348         if length > 0:
 349             self.audio_length = self.format_audio_length(length)
 350             self.save()
 351
 352     @staticmethod
 353     def format_audio_length(seconds):
 354         """
 355         >>> Book.format_audio_length(1)
 356         '0:01'
 357         >>> Book.format_audio_length(3661)
 358         '1:01:01'
 359         """
 360         if seconds < 60*60:
 361             minutes = seconds // 60
 362             seconds = seconds % 60
 363             return '%d:%02d' % (minutes, seconds)
 364         else:
 365             hours = seconds // 3600
 366             minutes = seconds % 3600 // 60
 367             seconds = seconds % 60
 368             return '%d:%02d:%02d' % (hours, minutes, seconds)
 369
 370     def get_audio_length(self):
 371         total = 0
 372         for media in self.get_mp3() or ():
 373             total += app_settings.GET_MP3_LENGTH(media.file.path)
 374         return int(total)
 375
 376     def has_media(self, type_):
 377         if type_ in Book.formats:
 378             return bool(getattr(self, "%s_file" % type_))
 379         else:
 380             return self.media.filter(type=type_).exists()
 381
 382     def has_audio(self):
 383         return self.has_media('mp3')
 384
 385     def get_media(self, type_):
 386         if self.has_media(type_):
 387             if type_ in Book.formats:
 388                 return getattr(self, "%s_file" % type_)
 389             else:
 390                 return self.media.filter(type=type_)
 391         else:
 392             return None
 393
 394     def get_mp3(self):
 395         return self.get_media("mp3")
 396
 397     def get_odt(self):
 398         return self.get_media("odt")
 399
 400     def get_ogg(self):
 401         return self.get_media("ogg")
 402
 403     def get_daisy(self):
 404         return self.get_media("daisy")
 405
 406     def get_audio_epub(self):
 407         return self.get_media("audio.epub")
 408
 409     def media_url(self, format_):
 410         media = self.get_media(format_)
 411         if media:
 412             if self.preview:
 413                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 414             else:
 415                 return media.url
 416         else:
 417             return None
 418
 419     def html_url(self):
 420         return self.media_url('html')
 421
 422     def pdf_url(self):
 423         return self.media_url('pdf')
 424
 425     def epub_url(self):
 426         return self.media_url('epub')
 427
 428     def mobi_url(self):
 429         return self.media_url('mobi')
 430
 431     def txt_url(self):
 432         return self.media_url('txt')
 433
 434     def fb2_url(self):
 435         return self.media_url('fb2')
 436
 437     def xml_url(self):
 438         return self.media_url('xml')
 439
 440     def has_description(self):
 441         return len(self.description) > 0
 442     has_description.short_description = _('description')
 443     has_description.boolean = True
 444
 445     def has_mp3_file(self):
 446         return self.has_media("mp3")
 447     has_mp3_file.short_description = 'MP3'
 448     has_mp3_file.boolean = True
 449
 450     def has_ogg_file(self):
 451         return self.has_media("ogg")
 452     has_ogg_file.short_description = 'OGG'
 453     has_ogg_file.boolean = True
 454
 455     def has_daisy_file(self):
 456         return self.has_media("daisy")
 457     has_daisy_file.short_description = 'DAISY'
 458     has_daisy_file.boolean = True
 459
 460     def has_audio_epub_file(self):
 461         return self.has_media("audio.epub")
 462
 463     @property
 464     def media_daisy(self):
 465         return self.get_media('daisy')
 466
 467     @property
 468     def media_audio_epub(self):
 469         return self.get_media('audio.epub')
 470
 471     def get_audiobooks(self):
 472         ogg_files = {}
 473         for m in self.media.filter(type='ogg').order_by().iterator():
 474             ogg_files[m.name] = m
 475
 476         audiobooks = []
 477         projects = set()
 478         total_duration = 0
 479         for mp3 in self.media.filter(type='mp3').iterator():
 480             # ogg files are always from the same project
 481             meta = mp3.get_extra_info_json()
 482             project = meta.get('project')
 483             if not project:
 484                 # temporary fallback
 485                 project = 'CzytamySłuchając'
 486
 487             projects.add((project, meta.get('funded_by', '')))
 488             total_duration += mp3.duration or 0
 489
 490             media = {'mp3': mp3}
 491
 492             ogg = ogg_files.get(mp3.name)
 493             if ogg:
 494                 media['ogg'] = ogg
 495             audiobooks.append(media)
 496
 497         projects = sorted(projects)
 498         total_duration = '%d:%02d' % (
 499             total_duration // 60,
 500             total_duration % 60
 501         )
 502         return audiobooks, projects, total_duration
 503
 504     def wldocument(self, parse_dublincore=True, inherit=True):
 505         from catalogue.import_utils import ORMDocProvider
 506         from librarian.parser import WLDocument
 507
 508         if inherit and self.parent:
 509             meta_fallbacks = self.parent.cover_info()
 510         else:
 511             meta_fallbacks = None
 512
 513         return WLDocument.from_file(
 514             self.xml_file.path,
 515             provider=ORMDocProvider(self),
 516             parse_dublincore=parse_dublincore,
 517             meta_fallbacks=meta_fallbacks)
 518
 519     def wldocument2(self):
 520         from catalogue.import_utils import ORMDocProvider
 521         from librarian.document import WLDocument
 522         doc = WLDocument(
 523             self.xml_file.path,
 524             provider=ORMDocProvider(self)
 525         )
 526         doc.meta.update(self.cover_info())
 527         return doc
 528
 529
 530     @staticmethod
 531     def zip_format(format_):
 532         def pretty_file_name(book):
 533             return "%s/%s.%s" % (
 534                 book.get_extra_info_json()['author'],
 535                 book.slug,
 536                 format_)
 537
 538         field_name = "%s_file" % format_
 539         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 540         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 541         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 542
 543     def zip_audiobooks(self, format_):
 544         bm = BookMedia.objects.filter(book=self, type=format_)
 545         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 546         licenses = set()
 547         for m in bm:
 548             license = constants.LICENSES.get(
 549                 m.get_extra_info_json().get('license'), {}
 550             ).get('locative')
 551             if license:
 552                 licenses.add(license)
 553         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 554             'licenses': licenses,
 555         })
 556         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 557
 558     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 559         if not self.findable:
 560             return
 561         if index is None:
 562             from search.index import Index
 563             index = Index()
 564         try:
 565             index.index_book(self, book_info)
 566             if index_tags:
 567                 index.index_tags()
 568             if commit:
 569                 index.index.commit()
 570         except Exception as e:
 571             index.index.rollback()
 572             raise e
 573
 574     # will make problems in conjunction with paid previews
 575     def download_pictures(self, remote_gallery_url):
 576         # This is only needed for legacy relative image paths.
 577         gallery_path = self.gallery_path()
 578         # delete previous files, so we don't include old files in ebooks
 579         if os.path.isdir(gallery_path):
 580             for filename in os.listdir(gallery_path):
 581                 file_path = os.path.join(gallery_path, filename)
 582                 os.unlink(file_path)
 583         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 584         if ilustr_elements:
 585             makedirs(gallery_path)
 586             for ilustr in ilustr_elements:
 587                 ilustr_src = ilustr.get('src')
 588                 if '/' in ilustr_src:
 589                     continue
 590                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 591                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 592
 593     def load_abstract(self):
 594         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 595         if abstract is not None:
 596             self.abstract = transform_abstrakt(abstract)
 597         else:
 598             self.abstract = ''
 599
 600     def load_toc(self):
 601         self.toc = ''
 602         if self.html_file:
 603             parser = html.HTMLParser(encoding='utf-8')
 604             tree = html.parse(self.html_file.path, parser=parser)
 605             toc = tree.find('//div[@id="toc"]/ol')
 606             if toc is None or not len(toc):
 607                 return
 608             html_link = reverse('book_text', args=[self.slug])
 609             for a in toc.findall('.//a'):
 610                 a.attrib['href'] = html_link + a.attrib['href']
 611             self.toc = html.tostring(toc, encoding='unicode')
 612             # div#toc
 613
 614     @classmethod
 615     def from_xml_file(cls, xml_file, **kwargs):
 616         from django.core.files import File
 617         from librarian import dcparser
 618
 619         # use librarian to parse meta-data
 620         book_info = dcparser.parse(xml_file)
 621
 622         if not isinstance(xml_file, File):
 623             xml_file = File(open(xml_file))
 624
 625         try:
 626             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 627         finally:
 628             xml_file.close()
 629
 630     @classmethod
 631     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 632                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 633         if dont_build is None:
 634             dont_build = set()
 635         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 636
 637         # check for parts before we do anything
 638         children = []
 639         if hasattr(book_info, 'parts'):
 640             for part_url in book_info.parts:
 641                 try:
 642                     children.append(Book.objects.get(slug=part_url.slug))
 643                 except Book.DoesNotExist:
 644                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 645
 646         # Read book metadata
 647         book_slug = book_info.url.slug
 648         if re.search(r'[^a-z0-9-]', book_slug):
 649             raise ValueError('Invalid characters in slug')
 650         book, created = Book.objects.get_or_create(slug=book_slug)
 651
 652         if created:
 653             book_shelves = []
 654             old_cover = None
 655             book.preview = bool(days)
 656             if book.preview:
 657                 book.preview_until = date.today() + timedelta(days)
 658         else:
 659             if not overwrite:
 660                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 661             # Save shelves for this book
 662             book_shelves = list(book.tags.filter(category='set'))
 663             old_cover = book.cover_info()
 664
 665         # Save XML file
 666         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 667         if book.preview:
 668             book.xml_file.set_readable(False)
 669
 670         book.findable = findable
 671         book.language = book_info.language
 672         book.title = book_info.title
 673         if book_info.variant_of:
 674             book.common_slug = book_info.variant_of.slug
 675         else:
 676             book.common_slug = book.slug
 677         book.extra_info = json.dumps(book_info.to_dict())
 678         book.load_abstract()
 679         book.load_toc()
 680         book.save()
 681
 682         meta_tags = Tag.tags_from_info(book_info)
 683
 684         for tag in meta_tags:
 685             if not tag.for_books:
 686                 tag.for_books = True
 687                 tag.save()
 688
 689         book.tags = set(meta_tags + book_shelves)
 690         book.save()  # update sort_key_author
 691
 692         cover_changed = old_cover != book.cover_info()
 693         obsolete_children = set(b for b in book.children.all()
 694                                 if b not in children)
 695         notify_cover_changed = []
 696         for n, child_book in enumerate(children):
 697             new_child = child_book.parent != book
 698             child_book.parent = book
 699             child_book.parent_number = n
 700             child_book.save()
 701             if new_child or cover_changed:
 702                 notify_cover_changed.append(child_book)
 703         # Disown unfaithful children and let them cope on their own.
 704         for child in obsolete_children:
 705             child.parent = None
 706             child.parent_number = 0
 707             child.save()
 708             if old_cover:
 709                 notify_cover_changed.append(child)
 710
 711         cls.repopulate_ancestors()
 712         tasks.update_counters.delay()
 713
 714         if remote_gallery_url:
 715             book.download_pictures(remote_gallery_url)
 716
 717         # No saves beyond this point.
 718
 719         # Build cover.
 720         if 'cover' not in dont_build:
 721             book.cover.build_delay()
 722             book.cover_clean.build_delay()
 723             book.cover_thumb.build_delay()
 724             book.cover_api_thumb.build_delay()
 725             book.simple_cover.build_delay()
 726             book.cover_ebookpoint.build_delay()
 727
 728         # Build HTML and ebooks.
 729         book.html_file.build_delay()
 730         if not children:
 731             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 732                 if format_ not in dont_build:
 733                     getattr(book, '%s_file' % format_).build_delay()
 734         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 735             if format_ not in dont_build:
 736                 getattr(book, '%s_file' % format_).build_delay()
 737
 738         if not settings.NO_SEARCH_INDEX and search_index and findable:
 739             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 740
 741         for child in notify_cover_changed:
 742             child.parent_cover_changed()
 743
 744         book.update_popularity()
 745         tasks.update_references.delay(book.id)
 746
 747         cls.published.send(sender=cls, instance=book)
 748         return book
 749
 750     def get_master(self):
 751         master_tags = [
 752             'opowiadanie',
 753             'powiesc',
 754             'dramat_wierszowany_l',
 755             'dramat_wierszowany_lp',
 756             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 757             'wywiad',
 758         ]
 759         from librarian.parser import WLDocument
 760         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 761         root = wld.edoc.getroot()
 762         for master in root.iter():
 763             if master.tag in master_tags:
 764                 return master
 765
 766     def update_references(self):
 767         from references.models import Entity, Reference
 768         master = self.get_master()
 769         if master is None:
 770             master = []
 771         found = set()
 772         for i, sec in enumerate(master):
 773             for ref in sec.findall('.//ref'):
 774                 href = ref.attrib.get('href', '')
 775                 if not href or href in found:
 776                     continue
 777                 found.add(href)
 778                 entity, created = Entity.objects.get_or_create(
 779                     uri=href
 780                 )
 781                 ref, created = Reference.objects.get_or_create(
 782                     book=self,
 783                     entity=entity
 784                 )
 785                 ref.first_section = 'sec%d' % (i + 1)
 786                 entity.populate()
 787                 entity.save()
 788         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 789
 790     @property
 791     def references(self):
 792         return self.reference_set.all().select_related('entity')
 793
 794     @classmethod
 795     @transaction.atomic
 796     def repopulate_ancestors(cls):
 797         """Fixes the ancestry cache."""
 798         # TODO: table names
 799         cursor = connection.cursor()
 800         if connection.vendor == 'postgres':
 801             cursor.execute("TRUNCATE catalogue_book_ancestor")
 802             cursor.execute("""
 803                 WITH RECURSIVE ancestry AS (
 804                     SELECT book.id, book.parent_id
 805                     FROM catalogue_book AS book
 806                     WHERE book.parent_id IS NOT NULL
 807                     UNION
 808                     SELECT ancestor.id, book.parent_id
 809                     FROM ancestry AS ancestor, catalogue_book AS book
 810                     WHERE ancestor.parent_id = book.id
 811                         AND book.parent_id IS NOT NULL
 812                     )
 813                 INSERT INTO catalogue_book_ancestor
 814                     (from_book_id, to_book_id)
 815                     SELECT id, parent_id
 816                     FROM ancestry
 817                     ORDER BY id;
 818                 """)
 819         else:
 820             cursor.execute("DELETE FROM catalogue_book_ancestor")
 821             for b in cls.objects.exclude(parent=None):
 822                 parent = b.parent
 823                 while parent is not None:
 824                     b.ancestor.add(parent)
 825                     parent = parent.parent
 826
 827     @property
 828     def ancestors(self):
 829         if self.parent:
 830             for anc in self.parent.ancestors:
 831                 yield anc
 832             yield self.parent
 833         else:
 834             return []
 835
 836     def clear_cache(self):
 837         clear_cached_renders(self.mini_box)
 838         clear_cached_renders(self.mini_box_nolink)
 839
 840     def cover_info(self, inherit=True):
 841         """Returns a dictionary to serve as fallback for BookInfo.
 842
 843         For now, the only thing inherited is the cover image.
 844         """
 845         need = False
 846         info = {}
 847         for field in ('cover_url', 'cover_by', 'cover_source'):
 848             val = self.get_extra_info_json().get(field)
 849             if val:
 850                 info[field] = val
 851             else:
 852                 need = True
 853         if inherit and need and self.parent is not None:
 854             parent_info = self.parent.cover_info()
 855             parent_info.update(info)
 856             info = parent_info
 857         return info
 858
 859     def related_themes(self):
 860         return Tag.objects.usage_for_queryset(
 861             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 862             counts=True).filter(category='theme').order_by('-count')
 863
 864     def parent_cover_changed(self):
 865         """Called when parent book's cover image is changed."""
 866         if not self.cover_info(inherit=False):
 867             if 'cover' not in app_settings.DONT_BUILD:
 868                 self.cover.build_delay()
 869                 self.cover_clean.build_delay()
 870                 self.cover_thumb.build_delay()
 871                 self.cover_api_thumb.build_delay()
 872                 self.simple_cover.build_delay()
 873                 self.cover_ebookpoint.build_delay()
 874             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 875                 if format_ not in app_settings.DONT_BUILD:
 876                     getattr(self, '%s_file' % format_).build_delay()
 877             for child in self.children.all():
 878                 child.parent_cover_changed()
 879
 880     def other_versions(self):
 881         """Find other versions (i.e. in other languages) of the book."""
 882         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 883
 884     def parents(self):
 885         books = []
 886         parent = self.parent
 887         while parent is not None:
 888             books.insert(0, parent)
 889             parent = parent.parent
 890         return books
 891
 892     def pretty_title(self, html_links=False):
 893         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 894         books = self.parents() + [self]
 895         names.extend([(b.title, b.get_absolute_url()) for b in books])
 896
 897         if html_links:
 898             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 899         else:
 900             names = [tag[0] for tag in names]
 901         return ', '.join(names)
 902
 903     def publisher(self):
 904         publisher = self.get_extra_info_json()['publisher']
 905         if isinstance(publisher, str):
 906             return publisher
 907         elif isinstance(publisher, list):
 908             return ', '.join(publisher)
 909
 910     @classmethod
 911     def tagged_top_level(cls, tags):
 912         """ Returns top-level books tagged with `tags`.
 913
 914         It only returns those books which don't have ancestors which are
 915         also tagged with those tags.
 916
 917         """
 918         objects = cls.tagged.with_all(tags)
 919         return objects.filter(findable=True).exclude(ancestor__in=objects)
 920
 921     @classmethod
 922     def book_list(cls, book_filter=None):
 923         """Generates a hierarchical listing of all books.
 924
 925         Books are optionally filtered with a test function.
 926
 927         """
 928
 929         books_by_parent = {}
 930         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 931         if book_filter:
 932             books = books.filter(book_filter).distinct()
 933
 934             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 935             for book in books.iterator():
 936                 parent = book.parent_id
 937                 if parent not in book_ids:
 938                     parent = None
 939                 books_by_parent.setdefault(parent, []).append(book)
 940         else:
 941             for book in books.iterator():
 942                 books_by_parent.setdefault(book.parent_id, []).append(book)
 943
 944         orphans = []
 945         books_by_author = OrderedDict()
 946         for tag in Tag.objects.filter(category='author').iterator():
 947             books_by_author[tag] = []
 948
 949         for book in books_by_parent.get(None, ()):
 950             authors = list(book.authors().only('pk'))
 951             if authors:
 952                 for author in authors:
 953                     books_by_author[author].append(book)
 954             else:
 955                 orphans.append(book)
 956
 957         return books_by_author, orphans, books_by_parent
 958
 959     _audiences_pl = {
 960         "SP": (1, "szkoła podstawowa"),
 961         "SP1": (1, "szkoła podstawowa"),
 962         "SP2": (1, "szkoła podstawowa"),
 963         "SP3": (1, "szkoła podstawowa"),
 964         "P": (1, "szkoła podstawowa"),
 965         "G": (2, "gimnazjum"),
 966         "L": (3, "liceum"),
 967         "LP": (3, "liceum"),
 968     }
 969
 970     def audiences_pl(self):
 971         audiences = self.get_extra_info_json().get('audiences', [])
 972         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 973         return [a[1] for a in audiences]
 974
 975     def stage_note(self):
 976         stage = self.get_extra_info_json().get('stage')
 977         if stage and stage < '0.4':
 978             return (_('This work needs modernisation'),
 979                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 980         else:
 981             return None, None
 982
 983     def choose_fragments(self, number):
 984         fragments = self.fragments.order_by()
 985         fragments_count = fragments.count()
 986         if not fragments_count and self.children.exists():
 987             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 988             fragments_count = fragments.count()
 989         if fragments_count:
 990             if fragments_count > number:
 991                 offset = randint(0, fragments_count - number)
 992             else:
 993                 offset = 0
 994             return fragments[offset : offset + number]
 995         elif self.parent:
 996             return self.parent.choose_fragments(number)
 997         else:
 998             return []
 999
1000     def choose_fragment(self):
1001         fragments = self.choose_fragments(1)
1002         if fragments:
1003             return fragments[0]
1004         else:
1005             return None
1006
1007     def fragment_data(self):
1008         fragment = self.choose_fragment()
1009         if fragment:
1010             return {
1011                 'title': fragment.book.pretty_title(),
1012                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1013             }
1014         else:
1015             return None
1016
1017     def update_popularity(self):
1018         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1019         try:
1020             pop = self.popularity
1021             pop.count = count
1022             pop.save()
1023         except BookPopularity.DoesNotExist:
1024             BookPopularity.objects.create(book=self, count=count)
1025
1026     def ridero_link(self):
1027         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1028
1029     def like(self, user):
1030         from social.utils import likes, get_set, set_sets
1031         if not likes(user, self):
1032             tag = get_set(user, '')
1033             set_sets(user, self, [tag])
1034
1035     def unlike(self, user):
1036         from social.utils import likes, set_sets
1037         if likes(user, self):
1038             set_sets(user, self, [])
1039
1040     def full_sort_key(self):
1041         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1042
1043     def cover_color(self):
1044         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1045
1046     @cached_render('catalogue/book_mini_box.html')
1047     def mini_box(self):
1048         return {
1049             'book': self
1050         }
1051
1052     @cached_render('catalogue/book_mini_box.html')
1053     def mini_box_nolink(self):
1054         return {
1055             'book': self,
1056             'no_link': True,
1057         }
1058
1059 def add_file_fields():
1060     for format_ in Book.formats:
1061         field_name = "%s_file" % format_
1062         # This weird globals() assignment makes Django migrations comfortable.
1063         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1064         _upload_to.__name__ = '_%s_upload_to' % format_
1065         globals()[_upload_to.__name__] = _upload_to
1066
1067         EbookField(
1068             format_, _("%s file" % format_.upper()),
1069             upload_to=_upload_to,
1070             storage=bofh_storage,
1071             max_length=255,
1072             blank=True,
1073             default=''
1074         ).contribute_to_class(Book, field_name)
1075         if format_ != 'xml':
1076             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1077
1078
1079 add_file_fields()
1080
1081
1082 class BookPopularity(models.Model):
1083     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1084     count = models.IntegerField(default=0, db_index=True)