src/catalogue/models/book.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from collections import OrderedDict
   5 import json
   6 from datetime import date, timedelta
   7 from random import randint
   8 import os.path
   9 import re
  10 from urllib.request import urlretrieve
  11 from django.apps import apps
  12 from django.conf import settings
  13 from django.db import connection, models, transaction
  14 import django.dispatch
  15 from django.contrib.contenttypes.fields import GenericRelation
  16 from django.template.loader import render_to_string
  17 from django.urls import reverse
  18 from django.utils.translation import ugettext_lazy as _, get_language
  19 from django.utils.deconstruct import deconstructible
  20 from fnpdjango.storage import BofhFileSystemStorage
  21 from lxml import html
  22 from librarian.cover import WLCover
  23 from librarian.html import transform_abstrakt
  24 from newtagging import managers
  25 from catalogue import constants
  26 from catalogue.fields import EbookField
  27 from catalogue.models import Tag, Fragment, BookMedia
  28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
  29 from catalogue.models.tag import prefetched_relations
  30 from catalogue import app_settings
  31 from catalogue import tasks
  32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
  33
  34 bofh_storage = BofhFileSystemStorage()
  35
  36
  37 @deconstructible
  38 class UploadToPath(object):
  39     def __init__(self, path):
  40         self.path = path
  41
  42     def __call__(self, instance, filename):
  43         return self.path % instance.slug
  44
  45
  46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
  47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
  48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
  49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
  50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
  51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
  52
  53
  54 def _ebook_upload_to(upload_path):
  55     return UploadToPath(upload_path)
  56
  57
  58 class Book(models.Model):
  59     """Represents a book imported from WL-XML."""
  60     title = models.CharField(_('title'), max_length=32767)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
  62     sort_key_author = models.CharField(
  63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
  64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
  65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
  67     description = models.TextField(_('description'), blank=True)
  68     abstract = models.TextField(_('abstract'), blank=True)
  69     toc = models.TextField(_('toc'), blank=True)
  70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
  72     parent_number = models.IntegerField(_('parent number'), default=0)
  73     extra_info = models.TextField(_('extra information'), default='{}')
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
  77     recommended = models.BooleanField(_('recommended'), default=False)
  78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
  79     preview = models.BooleanField(_('preview'), default=False)
  80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
  81     preview_key = models.CharField(max_length=32, blank=True, null=True)
  82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
  83
  84     # files generated during publication
  85     cover = EbookField(
  86         'cover', _('cover'),
  87         null=True, blank=True,
  88         upload_to=_cover_upload_to,
  89         storage=bofh_storage, max_length=255)
  90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  91     # Cleaner version of cover for thumbs
  92     cover_clean = EbookField(
  93         'cover_clean', _('clean cover'),
  94         null=True, blank=True,
  95         upload_to=_cover_clean_upload_to,
  96         max_length=255
  97     )
  98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
  99     cover_thumb = EbookField(
 100         'cover_thumb', _('cover thumbnail'),
 101         null=True, blank=True,
 102         upload_to=_cover_thumb_upload_to,
 103         max_length=255)
 104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 105     cover_api_thumb = EbookField(
 106         'cover_api_thumb', _('cover thumbnail for mobile app'),
 107         null=True, blank=True,
 108         upload_to=_cover_api_thumb_upload_to,
 109         max_length=255)
 110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 111     simple_cover = EbookField(
 112         'simple_cover', _('cover for mobile app'),
 113         null=True, blank=True,
 114         upload_to=_simple_cover_upload_to,
 115         max_length=255)
 116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 117     cover_ebookpoint = EbookField(
 118         'cover_ebookpoint', _('cover for Ebookpoint'),
 119         null=True, blank=True,
 120         upload_to=_cover_ebookpoint_upload_to,
 121         max_length=255)
 122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
 123     ebook_formats = constants.EBOOK_FORMATS
 124     formats = ebook_formats + ['html', 'xml']
 125
 126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
 127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
 128
 129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
 130     has_audience = models.BooleanField(default=False)
 131
 132     objects = models.Manager()
 133     tagged = managers.ModelTaggedItemManager(Tag)
 134     tags = managers.TagDescriptor(Tag)
 135     tag_relations = GenericRelation(Tag.intermediary_table_model)
 136
 137     html_built = django.dispatch.Signal()
 138     published = django.dispatch.Signal()
 139
 140     SORT_KEY_SEP = '$'
 141
 142     class AlreadyExists(Exception):
 143         pass
 144
 145     class Meta:
 146         ordering = ('sort_key_author', 'sort_key')
 147         verbose_name = _('book')
 148         verbose_name_plural = _('books')
 149         app_label = 'catalogue'
 150
 151     def __str__(self):
 152         return self.title
 153
 154     def get_extra_info_json(self):
 155         return json.loads(self.extra_info or '{}')
 156
 157     def get_initial(self):
 158         try:
 159             return re.search(r'\w', self.title, re.U).group(0)
 160         except AttributeError:
 161             return ''
 162
 163     def authors(self):
 164         return self.tags.filter(category='author')
 165
 166     def epochs(self):
 167         return self.tags.filter(category='epoch')
 168
 169     def genres(self):
 170         return self.tags.filter(category='genre')
 171
 172     def kinds(self):
 173         return self.tags.filter(category='kind')
 174
 175     def tag_unicode(self, category):
 176         relations = prefetched_relations(self, category)
 177         if relations:
 178             return ', '.join(rel.tag.name for rel in relations)
 179         else:
 180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 181
 182     def tags_by_category(self):
 183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
 184
 185     def author_unicode(self):
 186         return self.cached_author
 187
 188     def kind_unicode(self):
 189         return self.tag_unicode('kind')
 190
 191     def epoch_unicode(self):
 192         return self.tag_unicode('epoch')
 193
 194     def genre_unicode(self):
 195         return self.tag_unicode('genre')
 196
 197     def translators(self):
 198         translators = self.get_extra_info_json().get('translators') or []
 199         return [
 200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
 201         ]
 202
 203     def translator(self):
 204         translators = self.get_extra_info_json().get('translators')
 205         if not translators:
 206             return None
 207         if len(translators) > 3:
 208             translators = translators[:2]
 209             others = ' i inni'
 210         else:
 211             others = ''
 212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
 213
 214     def cover_source(self):
 215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
 216
 217     @property
 218     def isbn_pdf(self):
 219         return self.get_extra_info_json().get('isbn_pdf')
 220
 221     @property
 222     def isbn_epub(self):
 223         return self.get_extra_info_json().get('isbn_epub')
 224
 225     @property
 226     def isbn_mobi(self):
 227         return self.get_extra_info_json().get('isbn_mobi')
 228
 229
 230     def save(self, force_insert=False, force_update=False, **kwargs):
 231         from sortify import sortify
 232
 233         self.sort_key = sortify(self.title)[:120]
 234         self.title = str(self.title)  # ???
 235
 236         try:
 237             author = self.authors().first().sort_key
 238         except AttributeError:
 239             author = ''
 240         self.sort_key_author = author
 241
 242         self.cached_author = self.tag_unicode('author')
 243         self.has_audience = 'audience' in self.get_extra_info_json()
 244
 245         if self.preview and not self.preview_key:
 246             self.preview_key = get_random_hash(self.slug)[:32]
 247
 248         ret = super(Book, self).save(force_insert, force_update, **kwargs)
 249
 250         return ret
 251
 252     def get_absolute_url(self):
 253         return reverse('book_detail', args=[self.slug])
 254
 255     def gallery_path(self):
 256         return gallery_path(self.slug)
 257
 258     def gallery_url(self):
 259         return gallery_url(self.slug)
 260
 261     def get_first_text(self):
 262         if self.html_file:
 263             return self
 264         child = self.children.all().order_by('parent_number').first()
 265         if child is not None:
 266             return child.get_first_text()
 267
 268     def get_last_text(self):
 269         if self.html_file:
 270             return self
 271         child = self.children.all().order_by('parent_number').last()
 272         if child is not None:
 273             return child.get_last_text()
 274
 275     def get_prev_text(self):
 276         if not self.parent:
 277             return None
 278         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
 279         if sibling is not None:
 280             return sibling.get_last_text()
 281
 282         if self.parent.html_file:
 283             return self.parent
 284
 285         return self.parent.get_prev_text()
 286
 287     def get_next_text(self):
 288         child = self.children.order_by('parent_number').first()
 289         if child is not None:
 290             return child.get_first_text()
 291
 292         if not self.parent:
 293             return None
 294         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
 295         if sibling is not None:
 296             return sibling.get_first_text()
 297         return self.parent.get_next_text()
 298
 299     def get_child_audiobook(self):
 300         BookMedia = apps.get_model('catalogue', 'BookMedia')
 301         if not BookMedia.objects.filter(book__ancestor=self).exists():
 302             return None
 303         for child in self.children.all():
 304             if child.has_mp3_file():
 305                 return child
 306             child_sub = child.get_child_audiobook()
 307             if child_sub is not None:
 308                 return child_sub
 309
 310     def get_siblings(self):
 311         if not self.parent:
 312             return []
 313         return self.parent.children.all().order_by('parent_number')
 314
 315     def get_children(self):
 316         return self.children.all().order_by('parent_number')
 317
 318     @property
 319     def name(self):
 320         return self.title
 321
 322     def language_code(self):
 323         return constants.LANGUAGES_3TO2.get(self.language, self.language)
 324
 325     def language_name(self):
 326         return dict(settings.LANGUAGES).get(self.language_code(), "")
 327
 328     def is_foreign(self):
 329         return self.language_code() != settings.LANGUAGE_CODE
 330
 331     def set_audio_length(self):
 332         length = self.get_audio_length()
 333         if length > 0:
 334             self.audio_length = self.format_audio_length(length)
 335             self.save()
 336
 337     @staticmethod
 338     def format_audio_length(seconds):
 339         """
 340         >>> Book.format_audio_length(1)
 341         '0:01'
 342         >>> Book.format_audio_length(3661)
 343         '1:01:01'
 344         """
 345         if seconds < 60*60:
 346             minutes = seconds // 60
 347             seconds = seconds % 60
 348             return '%d:%02d' % (minutes, seconds)
 349         else:
 350             hours = seconds // 3600
 351             minutes = seconds % 3600 // 60
 352             seconds = seconds % 60
 353             return '%d:%02d:%02d' % (hours, minutes, seconds)
 354
 355     def get_audio_length(self):
 356         total = 0
 357         for media in self.get_mp3() or ():
 358             total += app_settings.GET_MP3_LENGTH(media.file.path)
 359         return int(total)
 360
 361     def has_media(self, type_):
 362         if type_ in Book.formats:
 363             return bool(getattr(self, "%s_file" % type_))
 364         else:
 365             return self.media.filter(type=type_).exists()
 366
 367     def has_audio(self):
 368         return self.has_media('mp3')
 369
 370     def get_media(self, type_):
 371         if self.has_media(type_):
 372             if type_ in Book.formats:
 373                 return getattr(self, "%s_file" % type_)
 374             else:
 375                 return self.media.filter(type=type_)
 376         else:
 377             return None
 378
 379     def get_mp3(self):
 380         return self.get_media("mp3")
 381
 382     def get_odt(self):
 383         return self.get_media("odt")
 384
 385     def get_ogg(self):
 386         return self.get_media("ogg")
 387
 388     def get_daisy(self):
 389         return self.get_media("daisy")
 390
 391     def media_url(self, format_):
 392         media = self.get_media(format_)
 393         if media:
 394             if self.preview:
 395                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
 396             else:
 397                 return media.url
 398         else:
 399             return None
 400
 401     def html_url(self):
 402         return self.media_url('html')
 403
 404     def pdf_url(self):
 405         return self.media_url('pdf')
 406
 407     def epub_url(self):
 408         return self.media_url('epub')
 409
 410     def mobi_url(self):
 411         return self.media_url('mobi')
 412
 413     def txt_url(self):
 414         return self.media_url('txt')
 415
 416     def fb2_url(self):
 417         return self.media_url('fb2')
 418
 419     def xml_url(self):
 420         return self.media_url('xml')
 421
 422     def has_description(self):
 423         return len(self.description) > 0
 424     has_description.short_description = _('description')
 425     has_description.boolean = True
 426
 427     def has_mp3_file(self):
 428         return self.has_media("mp3")
 429     has_mp3_file.short_description = 'MP3'
 430     has_mp3_file.boolean = True
 431
 432     def has_ogg_file(self):
 433         return self.has_media("ogg")
 434     has_ogg_file.short_description = 'OGG'
 435     has_ogg_file.boolean = True
 436
 437     def has_daisy_file(self):
 438         return self.has_media("daisy")
 439     has_daisy_file.short_description = 'DAISY'
 440     has_daisy_file.boolean = True
 441
 442     @property
 443     def media_daisy(self):
 444         return self.get_media('daisy')
 445
 446     def get_audiobooks(self):
 447         ogg_files = {}
 448         for m in self.media.filter(type='ogg').order_by().iterator():
 449             ogg_files[m.name] = m
 450
 451         audiobooks = []
 452         projects = set()
 453         total_duration = 0
 454         for mp3 in self.media.filter(type='mp3').iterator():
 455             # ogg files are always from the same project
 456             meta = mp3.get_extra_info_json()
 457             project = meta.get('project')
 458             if not project:
 459                 # temporary fallback
 460                 project = 'CzytamySłuchając'
 461
 462             projects.add((project, meta.get('funded_by', '')))
 463             total_duration += mp3.duration or 0
 464
 465             media = {'mp3': mp3}
 466
 467             ogg = ogg_files.get(mp3.name)
 468             if ogg:
 469                 media['ogg'] = ogg
 470             audiobooks.append(media)
 471
 472         projects = sorted(projects)
 473         total_duration = '%d:%02d' % (
 474             total_duration // 60,
 475             total_duration % 60
 476         )
 477         return audiobooks, projects, total_duration
 478
 479     def wldocument(self, parse_dublincore=True, inherit=True):
 480         from catalogue.import_utils import ORMDocProvider
 481         from librarian.parser import WLDocument
 482
 483         if inherit and self.parent:
 484             meta_fallbacks = self.parent.cover_info()
 485         else:
 486             meta_fallbacks = None
 487
 488         return WLDocument.from_file(
 489             self.xml_file.path,
 490             provider=ORMDocProvider(self),
 491             parse_dublincore=parse_dublincore,
 492             meta_fallbacks=meta_fallbacks)
 493
 494     def wldocument2(self):
 495         from catalogue.import_utils import ORMDocProvider
 496         from librarian.document import WLDocument
 497         doc = WLDocument(
 498             self.xml_file.path,
 499             provider=ORMDocProvider(self)
 500         )
 501         doc.meta.update(self.cover_info())
 502         return doc
 503
 504
 505     @staticmethod
 506     def zip_format(format_):
 507         def pretty_file_name(book):
 508             return "%s/%s.%s" % (
 509                 book.get_extra_info_json()['author'],
 510                 book.slug,
 511                 format_)
 512
 513         field_name = "%s_file" % format_
 514         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
 515         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
 516         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
 517
 518     def zip_audiobooks(self, format_):
 519         bm = BookMedia.objects.filter(book=self, type=format_)
 520         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
 521         licenses = set()
 522         for m in bm:
 523             license = constants.LICENSES.get(
 524                 m.get_extra_info_json().get('license'), {}
 525             ).get('locative')
 526             if license:
 527                 licenses.add(license)
 528         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
 529             'licenses': licenses,
 530         })
 531         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 532
 533     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
 534         if not self.findable:
 535             return
 536         if index is None:
 537             from search.index import Index
 538             index = Index()
 539         try:
 540             index.index_book(self, book_info)
 541             if index_tags:
 542                 index.index_tags()
 543             if commit:
 544                 index.index.commit()
 545         except Exception as e:
 546             index.index.rollback()
 547             raise e
 548
 549     # will make problems in conjunction with paid previews
 550     def download_pictures(self, remote_gallery_url):
 551         gallery_path = self.gallery_path()
 552         # delete previous files, so we don't include old files in ebooks
 553         if os.path.isdir(gallery_path):
 554             for filename in os.listdir(gallery_path):
 555                 file_path = os.path.join(gallery_path, filename)
 556                 os.unlink(file_path)
 557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
 558         if ilustr_elements:
 559             makedirs(gallery_path)
 560             for ilustr in ilustr_elements:
 561                 ilustr_src = ilustr.get('src')
 562                 ilustr_path = os.path.join(gallery_path, ilustr_src)
 563                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
 564
 565     def load_abstract(self):
 566         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
 567         if abstract is not None:
 568             self.abstract = transform_abstrakt(abstract)
 569         else:
 570             self.abstract = ''
 571
 572     def load_toc(self):
 573         self.toc = ''
 574         if self.html_file:
 575             parser = html.HTMLParser(encoding='utf-8')
 576             tree = html.parse(self.html_file.path, parser=parser)
 577             toc = tree.find('//div[@id="toc"]/ol')
 578             if toc is None or not len(toc):
 579                 return
 580             html_link = reverse('book_text', args=[self.slug])
 581             for a in toc.findall('.//a'):
 582                 a.attrib['href'] = html_link + a.attrib['href']
 583             self.toc = html.tostring(toc, encoding='unicode')
 584             # div#toc
 585
 586     @classmethod
 587     def from_xml_file(cls, xml_file, **kwargs):
 588         from django.core.files import File
 589         from librarian import dcparser
 590
 591         # use librarian to parse meta-data
 592         book_info = dcparser.parse(xml_file)
 593
 594         if not isinstance(xml_file, File):
 595             xml_file = File(open(xml_file))
 596
 597         try:
 598             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 599         finally:
 600             xml_file.close()
 601
 602     @classmethod
 603     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
 604                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
 605         if dont_build is None:
 606             dont_build = set()
 607         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
 608
 609         # check for parts before we do anything
 610         children = []
 611         if hasattr(book_info, 'parts'):
 612             for part_url in book_info.parts:
 613                 try:
 614                     children.append(Book.objects.get(slug=part_url.slug))
 615                 except Book.DoesNotExist:
 616                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
 617
 618         # Read book metadata
 619         book_slug = book_info.url.slug
 620         if re.search(r'[^a-z0-9-]', book_slug):
 621             raise ValueError('Invalid characters in slug')
 622         book, created = Book.objects.get_or_create(slug=book_slug)
 623
 624         if created:
 625             book_shelves = []
 626             old_cover = None
 627             book.preview = bool(days)
 628             if book.preview:
 629                 book.preview_until = date.today() + timedelta(days)
 630         else:
 631             if not overwrite:
 632                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 633             # Save shelves for this book
 634             book_shelves = list(book.tags.filter(category='set'))
 635             old_cover = book.cover_info()
 636
 637         # Save XML file
 638         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 639         if book.preview:
 640             book.xml_file.set_readable(False)
 641
 642         book.findable = findable
 643         book.language = book_info.language
 644         book.title = book_info.title
 645         if book_info.variant_of:
 646             book.common_slug = book_info.variant_of.slug
 647         else:
 648             book.common_slug = book.slug
 649         book.extra_info = json.dumps(book_info.to_dict())
 650         book.load_abstract()
 651         book.load_toc()
 652         book.save()
 653
 654         meta_tags = Tag.tags_from_info(book_info)
 655
 656         for tag in meta_tags:
 657             if not tag.for_books:
 658                 tag.for_books = True
 659                 tag.save()
 660
 661         book.tags = set(meta_tags + book_shelves)
 662         book.save()  # update sort_key_author
 663
 664         cover_changed = old_cover != book.cover_info()
 665         obsolete_children = set(b for b in book.children.all()
 666                                 if b not in children)
 667         notify_cover_changed = []
 668         for n, child_book in enumerate(children):
 669             new_child = child_book.parent != book
 670             child_book.parent = book
 671             child_book.parent_number = n
 672             child_book.save()
 673             if new_child or cover_changed:
 674                 notify_cover_changed.append(child_book)
 675         # Disown unfaithful children and let them cope on their own.
 676         for child in obsolete_children:
 677             child.parent = None
 678             child.parent_number = 0
 679             child.save()
 680             if old_cover:
 681                 notify_cover_changed.append(child)
 682
 683         cls.repopulate_ancestors()
 684         tasks.update_counters.delay()
 685
 686         if remote_gallery_url:
 687             book.download_pictures(remote_gallery_url)
 688
 689         # No saves beyond this point.
 690
 691         # Build cover.
 692         if 'cover' not in dont_build:
 693             book.cover.build_delay()
 694             book.cover_clean.build_delay()
 695             book.cover_thumb.build_delay()
 696             book.cover_api_thumb.build_delay()
 697             book.simple_cover.build_delay()
 698             book.cover_ebookpoint.build_delay()
 699
 700         # Build HTML and ebooks.
 701         book.html_file.build_delay()
 702         if not children:
 703             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
 704                 if format_ not in dont_build:
 705                     getattr(book, '%s_file' % format_).build_delay()
 706         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
 707             if format_ not in dont_build:
 708                 getattr(book, '%s_file' % format_).build_delay()
 709
 710         if not settings.NO_SEARCH_INDEX and search_index and findable:
 711             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
 712
 713         for child in notify_cover_changed:
 714             child.parent_cover_changed()
 715
 716         book.update_popularity()
 717         tasks.update_references.delay(book.id)
 718
 719         cls.published.send(sender=cls, instance=book)
 720         return book
 721
 722     def get_master(self):
 723         master_tags = [
 724             'opowiadanie',
 725             'powiesc',
 726             'dramat_wierszowany_l',
 727             'dramat_wierszowany_lp',
 728             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 729             'wywiad',
 730         ]
 731         from librarian.parser import WLDocument
 732         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
 733         root = wld.edoc.getroot()
 734         for master in root.iter():
 735             if master.tag in master_tags:
 736                 return master
 737
 738     def update_references(self):
 739         from references.models import Entity, Reference
 740         master = self.get_master()
 741         found = set()
 742         for i, sec in enumerate(master):
 743             for ref in sec.findall('.//ref'):
 744                 href = ref.attrib.get('href', '')
 745                 if not href or href in found:
 746                     continue
 747                 found.add(href)
 748                 entity, created = Entity.objects.get_or_create(
 749                     uri=href
 750                 )
 751                 ref, created = Reference.objects.get_or_create(
 752                     book=self,
 753                     entity=entity
 754                 )
 755                 ref.first_section = 'sec%d' % (i + 1)
 756                 entity.populate()
 757                 entity.save()
 758         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
 759
 760     @property
 761     def references(self):
 762         return self.reference_set.all().select_related('entity')
 763
 764     @classmethod
 765     @transaction.atomic
 766     def repopulate_ancestors(cls):
 767         """Fixes the ancestry cache."""
 768         # TODO: table names
 769         cursor = connection.cursor()
 770         if connection.vendor == 'postgres':
 771             cursor.execute("TRUNCATE catalogue_book_ancestor")
 772             cursor.execute("""
 773                 WITH RECURSIVE ancestry AS (
 774                     SELECT book.id, book.parent_id
 775                     FROM catalogue_book AS book
 776                     WHERE book.parent_id IS NOT NULL
 777                     UNION
 778                     SELECT ancestor.id, book.parent_id
 779                     FROM ancestry AS ancestor, catalogue_book AS book
 780                     WHERE ancestor.parent_id = book.id
 781                         AND book.parent_id IS NOT NULL
 782                     )
 783                 INSERT INTO catalogue_book_ancestor
 784                     (from_book_id, to_book_id)
 785                     SELECT id, parent_id
 786                     FROM ancestry
 787                     ORDER BY id;
 788                 """)
 789         else:
 790             cursor.execute("DELETE FROM catalogue_book_ancestor")
 791             for b in cls.objects.exclude(parent=None):
 792                 parent = b.parent
 793                 while parent is not None:
 794                     b.ancestor.add(parent)
 795                     parent = parent.parent
 796
 797     @property
 798     def ancestors(self):
 799         if self.parent:
 800             for anc in self.parent.ancestors:
 801                 yield anc
 802             yield self.parent
 803         else:
 804             return []
 805
 806     def clear_cache(self):
 807         clear_cached_renders(self.mini_box)
 808         clear_cached_renders(self.mini_box_nolink)
 809
 810     def cover_info(self, inherit=True):
 811         """Returns a dictionary to serve as fallback for BookInfo.
 812
 813         For now, the only thing inherited is the cover image.
 814         """
 815         need = False
 816         info = {}
 817         for field in ('cover_url', 'cover_by', 'cover_source'):
 818             val = self.get_extra_info_json().get(field)
 819             if val:
 820                 info[field] = val
 821             else:
 822                 need = True
 823         if inherit and need and self.parent is not None:
 824             parent_info = self.parent.cover_info()
 825             parent_info.update(info)
 826             info = parent_info
 827         return info
 828
 829     def related_themes(self):
 830         return Tag.objects.usage_for_queryset(
 831             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
 832             counts=True).filter(category='theme')
 833
 834     def parent_cover_changed(self):
 835         """Called when parent book's cover image is changed."""
 836         if not self.cover_info(inherit=False):
 837             if 'cover' not in app_settings.DONT_BUILD:
 838                 self.cover.build_delay()
 839                 self.cover_clean.build_delay()
 840                 self.cover_thumb.build_delay()
 841                 self.cover_api_thumb.build_delay()
 842                 self.simple_cover.build_delay()
 843                 self.cover_ebookpoint.build_delay()
 844             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
 845                 if format_ not in app_settings.DONT_BUILD:
 846                     getattr(self, '%s_file' % format_).build_delay()
 847             for child in self.children.all():
 848                 child.parent_cover_changed()
 849
 850     def other_versions(self):
 851         """Find other versions (i.e. in other languages) of the book."""
 852         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
 853
 854     def parents(self):
 855         books = []
 856         parent = self.parent
 857         while parent is not None:
 858             books.insert(0, parent)
 859             parent = parent.parent
 860         return books
 861
 862     def pretty_title(self, html_links=False):
 863         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
 864         books = self.parents() + [self]
 865         names.extend([(b.title, b.get_absolute_url()) for b in books])
 866
 867         if html_links:
 868             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
 869         else:
 870             names = [tag[0] for tag in names]
 871         return ', '.join(names)
 872
 873     def publisher(self):
 874         publisher = self.get_extra_info_json()['publisher']
 875         if isinstance(publisher, str):
 876             return publisher
 877         elif isinstance(publisher, list):
 878             return ', '.join(publisher)
 879
 880     @classmethod
 881     def tagged_top_level(cls, tags):
 882         """ Returns top-level books tagged with `tags`.
 883
 884         It only returns those books which don't have ancestors which are
 885         also tagged with those tags.
 886
 887         """
 888         objects = cls.tagged.with_all(tags)
 889         return objects.filter(findable=True).exclude(ancestor__in=objects)
 890
 891     @classmethod
 892     def book_list(cls, book_filter=None):
 893         """Generates a hierarchical listing of all books.
 894
 895         Books are optionally filtered with a test function.
 896
 897         """
 898
 899         books_by_parent = {}
 900         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
 901         if book_filter:
 902             books = books.filter(book_filter).distinct()
 903
 904             book_ids = set(b['pk'] for b in books.values("pk").iterator())
 905             for book in books.iterator():
 906                 parent = book.parent_id
 907                 if parent not in book_ids:
 908                     parent = None
 909                 books_by_parent.setdefault(parent, []).append(book)
 910         else:
 911             for book in books.iterator():
 912                 books_by_parent.setdefault(book.parent_id, []).append(book)
 913
 914         orphans = []
 915         books_by_author = OrderedDict()
 916         for tag in Tag.objects.filter(category='author').iterator():
 917             books_by_author[tag] = []
 918
 919         for book in books_by_parent.get(None, ()):
 920             authors = list(book.authors().only('pk'))
 921             if authors:
 922                 for author in authors:
 923                     books_by_author[author].append(book)
 924             else:
 925                 orphans.append(book)
 926
 927         return books_by_author, orphans, books_by_parent
 928
 929     _audiences_pl = {
 930         "SP": (1, "szkoła podstawowa"),
 931         "SP1": (1, "szkoła podstawowa"),
 932         "SP2": (1, "szkoła podstawowa"),
 933         "SP3": (1, "szkoła podstawowa"),
 934         "P": (1, "szkoła podstawowa"),
 935         "G": (2, "gimnazjum"),
 936         "L": (3, "liceum"),
 937         "LP": (3, "liceum"),
 938     }
 939
 940     def audiences_pl(self):
 941         audiences = self.get_extra_info_json().get('audiences', [])
 942         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
 943         return [a[1] for a in audiences]
 944
 945     def stage_note(self):
 946         stage = self.get_extra_info_json().get('stage')
 947         if stage and stage < '0.4':
 948             return (_('This work needs modernisation'),
 949                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
 950         else:
 951             return None, None
 952
 953     def choose_fragments(self, number):
 954         fragments = self.fragments.order_by()
 955         fragments_count = fragments.count()
 956         if not fragments_count and self.children.exists():
 957             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
 958             fragments_count = fragments.count()
 959         if fragments_count:
 960             if fragments_count > number:
 961                 offset = randint(0, fragments_count - number)
 962             else:
 963                 offset = 0
 964             return fragments[offset : offset + number]
 965         elif self.parent:
 966             return self.parent.choose_fragments(number)
 967         else:
 968             return []
 969
 970     def choose_fragment(self):
 971         fragments = self.choose_fragments(1)
 972         if fragments:
 973             return fragments[0]
 974         else:
 975             return None
 976
 977     def fragment_data(self):
 978         fragment = self.choose_fragment()
 979         if fragment:
 980             return {
 981                 'title': fragment.book.pretty_title(),
 982                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
 983             }
 984         else:
 985             return None
 986
 987     def update_popularity(self):
 988         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
 989         try:
 990             pop = self.popularity
 991             pop.count = count
 992             pop.save()
 993         except BookPopularity.DoesNotExist:
 994             BookPopularity.objects.create(book=self, count=count)
 995
 996     def ridero_link(self):
 997         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
 998
 999     def like(self, user):
1000         from social.utils import likes, get_set, set_sets
1001         if not likes(user, self):
1002             tag = get_set(user, '')
1003             set_sets(user, self, [tag])
1004
1005     def unlike(self, user):
1006         from social.utils import likes, set_sets
1007         if likes(user, self):
1008             set_sets(user, self, [])
1009
1010     def full_sort_key(self):
1011         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1012
1013     def cover_color(self):
1014         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1015
1016     @cached_render('catalogue/book_mini_box.html')
1017     def mini_box(self):
1018         return {
1019             'book': self
1020         }
1021
1022     @cached_render('catalogue/book_mini_box.html')
1023     def mini_box_nolink(self):
1024         return {
1025             'book': self,
1026             'no_link': True,
1027         }
1028
1029 def add_file_fields():
1030     for format_ in Book.formats:
1031         field_name = "%s_file" % format_
1032         # This weird globals() assignment makes Django migrations comfortable.
1033         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1034         _upload_to.__name__ = '_%s_upload_to' % format_
1035         globals()[_upload_to.__name__] = _upload_to
1036
1037         EbookField(
1038             format_, _("%s file" % format_.upper()),
1039             upload_to=_upload_to,
1040             storage=bofh_storage,
1041             max_length=255,
1042             blank=True,
1043             default=''
1044         ).contribute_to_class(Book, field_name)
1045         if format_ != 'xml':
1046             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1047
1048
1049 add_file_fields()
1050
1051
1052 class BookPopularity(models.Model):
1053     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1054     count = models.IntegerField(default=0, db_index=True)