d0487cd5ff2cb53b7f6bb82393fc219a3d4dc77a
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 import requests
11 from slugify import slugify
12 from sortify import sortify
13 from urllib.request import urlretrieve
14 from django.apps import apps
15 from django.conf import settings
16 from django.db import connection, models, transaction
17 import django.dispatch
18 from django.contrib.contenttypes.fields import GenericRelation
19 from django.template.loader import render_to_string
20 from django.urls import reverse
21 from django.utils.translation import gettext_lazy as _, get_language
22 from fnpdjango.storage import BofhFileSystemStorage
23 from lxml import html
24 from librarian.cover import WLCover
25 from librarian.html import transform_abstrakt
26 from librarian.builders import builders
27 from newtagging import managers
28 from catalogue import constants
29 from catalogue import fields
30 from catalogue.models import Tag, Fragment, BookMedia
31 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
32 from catalogue.models.tag import prefetched_relations
33 from catalogue import app_settings
34 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
35
36 bofh_storage = BofhFileSystemStorage()
37
38
39 class Book(models.Model):
40     """Represents a book imported from WL-XML."""
41     title = models.CharField('tytuł', max_length=32767)
42     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
43     sort_key_author = models.CharField(
44         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
45     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
46     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
47     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
48     description = models.TextField('opis', blank=True)
49     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
50     abstract = models.TextField('abstrakt', blank=True)
51     toc = models.TextField('spis treści', blank=True)
52     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
53     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
54     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
55     extra_info = models.TextField('dodatkowe informacje', default='{}')
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58     print_on_demand = models.BooleanField('druk na żądanie', default=False)
59     recommended = models.BooleanField('polecane', default=False)
60     audio_length = models.CharField('długość audio', blank=True, max_length=8)
61     preview = models.BooleanField('prapremiera', default=False)
62     preview_until = models.DateField('prapremiera do', blank=True, null=True)
63     preview_key = models.CharField(max_length=32, blank=True, null=True)
64     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
65     can_sell = models.BooleanField('do sprzedaży', default=True)
66     can_sell_mp3 = models.BooleanField('do sprzedaży mp3', default=True)
67     isbn_mp3 = models.CharField('ISBN audiobooka', max_length=32, blank=True)
68
69     # files generated during publication
70     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
71     html_file = fields.HtmlField(storage=bofh_storage)
72     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
73     fb2_file = fields.Fb2Field(storage=bofh_storage)
74     txt_file = fields.TxtField(storage=bofh_storage)
75     epub_file = fields.EpubField(storage=bofh_storage)
76     mobi_file = fields.MobiField(storage=bofh_storage)
77     pdf_file = fields.PdfField(storage=bofh_storage)
78
79     cover = fields.CoverField('okładka', storage=bofh_storage)
80     # Cleaner version of cover for thumbs
81     cover_clean = fields.CoverCleanField('czysta okładka')
82     cover_thumb = fields.CoverThumbField('miniatura okładki')
83     cover_api_thumb = fields.CoverApiThumbField(
84         'mniaturka okładki dla aplikacji')
85     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
86     cover_ebookpoint = fields.CoverEbookpointField(
87         'okładka dla Ebookpoint')
88
89     ebook_formats = constants.EBOOK_FORMATS
90     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
91
92     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
93     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
94
95     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
96     has_audience = models.BooleanField(default=False)
97
98     objects = models.Manager()
99     tagged = managers.ModelTaggedItemManager(Tag)
100     tags = managers.TagDescriptor(Tag)
101     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
102     translators = models.ManyToManyField(Tag, blank=True)
103     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
104     has_audio = models.BooleanField(default=False)
105     read_time = models.IntegerField(blank=True, null=True)
106     pages = models.IntegerField(blank=True, null=True)
107     
108     html_built = django.dispatch.Signal()
109     published = django.dispatch.Signal()
110
111     SORT_KEY_SEP = '$'
112
113     is_book = True
114
115     class AlreadyExists(Exception):
116         pass
117
118     class Meta:
119         ordering = ('sort_key_author', 'sort_key')
120         verbose_name = 'książka'
121         verbose_name_plural = 'książki'
122         app_label = 'catalogue'
123
124     def __str__(self):
125         return self.title
126
127     def get_extra_info_json(self):
128         return json.loads(self.extra_info or '{}')
129
130     def get_initial(self):
131         try:
132             return re.search(r'\w', self.title, re.U).group(0)
133         except AttributeError:
134             return ''
135
136     def authors(self):
137         return self.tags.filter(category='author')
138
139     def epochs(self):
140         return self.tags.filter(category='epoch')
141
142     def genres(self):
143         return self.tags.filter(category='genre')
144
145     def kinds(self):
146         return self.tags.filter(category='kind')
147
148     def tag_unicode(self, category):
149         relations = prefetched_relations(self, category)
150         if relations:
151             return ', '.join(rel.tag.name for rel in relations)
152         else:
153             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
154
155     def tags_by_category(self):
156         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
157
158     def author_unicode(self):
159         return self.cached_author
160
161     def kind_unicode(self):
162         return self.tag_unicode('kind')
163
164     def epoch_unicode(self):
165         return self.tag_unicode('epoch')
166
167     def genre_unicode(self):
168         return self.tag_unicode('genre')
169
170     def translator(self):
171         translators = self.get_extra_info_json().get('translators')
172         if not translators:
173             return None
174         if len(translators) > 3:
175             translators = translators[:2]
176             others = ' i inni'
177         else:
178             others = ''
179         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
180
181     def cover_source(self):
182         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
183
184     @property
185     def isbn_pdf(self):
186         return self.get_extra_info_json().get('isbn_pdf')
187
188     @property
189     def isbn_epub(self):
190         return self.get_extra_info_json().get('isbn_epub')
191
192     @property
193     def isbn_mobi(self):
194         return self.get_extra_info_json().get('isbn_mobi')
195
196     @property
197     def redakcja(self):
198         return self.get_extra_info_json().get('about')
199     
200     def is_accessible_to(self, user):
201         if not self.preview:
202             return True
203         if not user.is_authenticated:
204             return False
205         Membership = apps.get_model('club', 'Membership')
206         if Membership.is_active_for(user):
207             return True
208         Funding = apps.get_model('funding', 'Funding')
209         if Funding.objects.filter(user=user, offer__book=self):
210             return True
211         return False
212
213     def save(self, force_insert=False, force_update=False, **kwargs):
214         from sortify import sortify
215
216         self.sort_key = sortify(self.title)[:120]
217         self.title = str(self.title)  # ???
218
219         try:
220             author = self.authors().first().sort_key
221         except AttributeError:
222             author = ''
223         self.sort_key_author = author
224
225         self.cached_author = self.tag_unicode('author')
226         self.has_audience = 'audience' in self.get_extra_info_json()
227
228         if self.preview and not self.preview_key:
229             self.preview_key = get_random_hash(self.slug)[:32]
230
231         ret = super(Book, self).save(force_insert, force_update, **kwargs)
232
233         return ret
234
235     def get_absolute_url(self):
236         return reverse('book_detail', args=[self.slug])
237
238     def gallery_path(self):
239         return gallery_path(self.slug)
240
241     def gallery_url(self):
242         return gallery_url(self.slug)
243
244     def get_first_text(self):
245         if self.html_file:
246             return self
247         child = self.children.all().order_by('parent_number').first()
248         if child is not None:
249             return child.get_first_text()
250
251     def get_last_text(self):
252         if self.html_file:
253             return self
254         child = self.children.all().order_by('parent_number').last()
255         if child is not None:
256             return child.get_last_text()
257
258     def get_prev_text(self):
259         if not self.parent:
260             return None
261         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
262         if sibling is not None:
263             return sibling.get_last_text()
264
265         if self.parent.html_file:
266             return self.parent
267
268         return self.parent.get_prev_text()
269
270     def get_next_text(self, inside=True):
271         if inside:
272             child = self.children.order_by('parent_number').first()
273             if child is not None:
274                 return child.get_first_text()
275
276         if not self.parent:
277             return None
278         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
279         if sibling is not None:
280             return sibling.get_first_text()
281         return self.parent.get_next_text(inside=False)
282
283     def get_siblings(self):
284         if not self.parent:
285             return []
286         return self.parent.children.all().order_by('parent_number')
287
288     def get_children(self):
289         return self.children.all().order_by('parent_number')
290
291     @property
292     def name(self):
293         return self.title
294
295     def language_code(self):
296         return constants.LANGUAGES_3TO2.get(self.language, self.language)
297
298     def language_name(self):
299         return dict(settings.LANGUAGES).get(self.language_code(), "")
300
301     def is_foreign(self):
302         return self.language_code() != settings.LANGUAGE_CODE
303
304     def set_audio_length(self):
305         length = self.get_audio_length()
306         if length > 0:
307             self.audio_length = self.format_audio_length(length)
308             self.save()
309
310     @staticmethod
311     def format_audio_length(seconds):
312         """
313         >>> Book.format_audio_length(1)
314         '0:01'
315         >>> Book.format_audio_length(3661)
316         '1:01:01'
317         """
318         if seconds < 60*60:
319             minutes = seconds // 60
320             seconds = seconds % 60
321             return '%d:%02d' % (minutes, seconds)
322         else:
323             hours = seconds // 3600
324             minutes = seconds % 3600 // 60
325             seconds = seconds % 60
326             return '%d:%02d:%02d' % (hours, minutes, seconds)
327
328     def get_audio_length(self):
329         total = 0
330         for media in self.get_mp3() or ():
331             total += app_settings.GET_MP3_LENGTH(media.file.path)
332         return int(total)
333
334     def get_time(self):
335         return round(self.xml_file.size / 1000 * 40)
336     
337     def has_media(self, type_):
338         if type_ in Book.formats:
339             return bool(getattr(self, "%s_file" % type_))
340         else:
341             return self.media.filter(type=type_).exists()
342
343     def get_media(self, type_):
344         if self.has_media(type_):
345             if type_ in Book.formats:
346                 return getattr(self, "%s_file" % type_)
347             else:
348                 return self.media.filter(type=type_)
349         else:
350             return None
351
352     def get_mp3(self):
353         return self.get_media("mp3")
354
355     def get_odt(self):
356         return self.get_media("odt")
357
358     def get_ogg(self):
359         return self.get_media("ogg")
360
361     def get_daisy(self):
362         return self.get_media("daisy")
363
364     def get_audio_epub(self):
365         return self.get_media("audio.epub")
366
367     def media_url(self, format_):
368         media = self.get_media(format_)
369         if media:
370             if self.preview:
371                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
372             else:
373                 return media.url
374         else:
375             return None
376
377     def html_url(self):
378         return self.media_url('html')
379
380     def html_nonotes_url(self):
381         return self.media_url('html_nonotes')
382
383     def pdf_url(self):
384         return self.media_url('pdf')
385
386     def epub_url(self):
387         return self.media_url('epub')
388
389     def mobi_url(self):
390         return self.media_url('mobi')
391
392     def txt_url(self):
393         return self.media_url('txt')
394
395     def fb2_url(self):
396         return self.media_url('fb2')
397
398     def xml_url(self):
399         return self.media_url('xml')
400
401     def has_description(self):
402         return len(self.description) > 0
403     has_description.short_description = 'opis'
404     has_description.boolean = True
405
406     def has_mp3_file(self):
407         return self.has_media("mp3")
408     has_mp3_file.short_description = 'MP3'
409     has_mp3_file.boolean = True
410
411     def has_ogg_file(self):
412         return self.has_media("ogg")
413     has_ogg_file.short_description = 'OGG'
414     has_ogg_file.boolean = True
415
416     def has_daisy_file(self):
417         return self.has_media("daisy")
418     has_daisy_file.short_description = 'DAISY'
419     has_daisy_file.boolean = True
420
421     def has_sync_file(self):
422         return settings.FEATURE_SYNCHRO and self.has_media("sync")
423
424     def build_sync_file(self):
425         from lxml import html
426         from django.core.files.base import ContentFile
427         with self.html_file.open('rb') as f:
428             h = html.fragment_fromstring(f.read().decode('utf-8'))
429
430         durations = [
431             m['mp3'].duration
432             for m in self.get_audiobooks()[0]
433         ]
434         if settings.MOCK_DURATIONS:
435             durations = settings.MOCK_DURATIONS
436
437         sync = []
438         ts = None
439         sid = 1
440         dirty = False
441         for elem in h.iter():
442             if elem.get('data-audio-ts'):
443                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
444                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
445                 # check if inside verse
446                 p = elem.getparent()
447                 while p is not None:
448                     # Workaround for missing ids.
449                     if 'verse' in p.get('class', ''):
450                         if not p.get('id'):
451                             p.set('id', f'syn{sid}')
452                             dirty = True
453                             sid += 1
454                         sync.append((ts, p.get('id')))
455                         ts = None
456                         break
457                     p = p.getparent()
458             elif ts:
459                 cls = elem.get('class', '')
460                 # Workaround for missing ids.
461                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
462                     if not elem.get('id'):
463                         elem.set('id', f'syn{sid}')
464                         dirty = True
465                         sid += 1
466                     sync.append((ts, elem.get('id')))
467                     ts = None
468         if dirty:
469             htext = html.tostring(h, encoding='utf-8')
470             with open(self.html_file.path, 'wb') as f:
471                 f.write(htext)
472         try:
473             bm = self.media.get(type='sync')
474         except:
475             bm = BookMedia(book=self, type='sync')
476         sync = (
477             '27\n' + '\n'.join(
478                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
479             )).encode('latin1')
480         bm.file.save(
481             None, ContentFile(sync)
482             )
483
484     def get_sync(self):
485         if not self.has_sync_file():
486             return []
487         with self.get_media('sync').first().file.open('r') as f:
488             sync = f.read().split('\n')
489         offset = float(sync[0])
490         items = []
491         for line in sync[1:]:
492             if not line:
493                 continue
494             start, end, elid = line.split()
495             items.append([elid, float(start) + offset])
496         return items
497
498     def sync_ts(self, ts):
499         elid = None
500         for cur_id, t in self.get_sync():
501             if ts >= t:
502                 elid = cur_id
503             else:
504                 break
505         return elid
506
507     def sync_elid(self, elid):
508         for cur_id, t in self.get_sync():
509             if cur_id == elid:
510                 return t
511
512     def has_audio_epub_file(self):
513         return self.has_media("audio.epub")
514
515     @property
516     def media_daisy(self):
517         return self.get_media('daisy')
518
519     @property
520     def media_audio_epub(self):
521         return self.get_media('audio.epub')
522
523     def get_audiobooks(self, with_children=False, processing=False):
524         ogg_files = {}
525         for m in self.media.filter(type='ogg').order_by().iterator():
526             ogg_files[m.name] = m
527
528         audiobooks = []
529         projects = set()
530         total_duration = 0
531         for mp3 in self.media.filter(type='mp3').iterator():
532             # ogg files are always from the same project
533             meta = mp3.get_extra_info_json()
534             project = meta.get('project')
535             if not project:
536                 # temporary fallback
537                 project = 'CzytamySłuchając'
538
539             projects.add((project, meta.get('funded_by', '')))
540             total_duration += mp3.duration or 0
541
542             media = {'mp3': mp3}
543
544             ogg = ogg_files.get(mp3.name)
545             if ogg:
546                 media['ogg'] = ogg
547             audiobooks.append(media)
548
549         if with_children:
550             for child in self.get_children():
551                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
552                     with_children=True, processing=True)
553                 audiobooks.append({'part': child})
554                 audiobooks += ch_audiobooks
555                 projects.update(ch_projects)
556                 total_duration += ch_duration
557
558         if not processing:
559             projects = sorted(projects)
560             total_duration = '%d:%02d' % (
561                 total_duration // 60,
562                 total_duration % 60
563             )
564
565         return audiobooks, projects, total_duration
566
567     def get_audiobooks_with_children(self):
568         return self.get_audiobooks(with_children=True)
569     
570     def wldocument(self, parse_dublincore=True, inherit=True):
571         from catalogue.import_utils import ORMDocProvider
572         from librarian.parser import WLDocument
573
574         if inherit and self.parent:
575             meta_fallbacks = self.parent.cover_info()
576         else:
577             meta_fallbacks = None
578
579         return WLDocument.from_file(
580             self.xml_file.path,
581             provider=ORMDocProvider(self),
582             parse_dublincore=parse_dublincore,
583             meta_fallbacks=meta_fallbacks)
584
585     def wldocument2(self):
586         from catalogue.import_utils import ORMDocProvider
587         from librarian.document import WLDocument
588         doc = WLDocument(
589             self.xml_file.path,
590             provider=ORMDocProvider(self)
591         )
592         doc.meta.update(self.cover_info())
593         return doc
594
595
596     @staticmethod
597     def zip_format(format_):
598         def pretty_file_name(book):
599             return "%s/%s.%s" % (
600                 book.get_extra_info_json()['author'],
601                 book.slug,
602                 format_)
603
604         field_name = "%s_file" % format_
605         field = getattr(Book, field_name)
606         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
607         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
608         return create_zip(paths, field.ZIP)
609
610     def zip_audiobooks(self, format_):
611         bm = BookMedia.objects.filter(book=self, type=format_)
612         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
613         licenses = set()
614         for m in bm:
615             license = constants.LICENSES.get(
616                 m.get_extra_info_json().get('license'), {}
617             ).get('locative')
618             if license:
619                 licenses.add(license)
620         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
621             'licenses': licenses,
622             'meta': self.wldocument2().meta,
623         })
624         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
625
626     def search_index(self, index=None):
627         if not self.findable:
628             return
629         from search.index import Index
630         Index.index_book(self)
631
632     # will make problems in conjunction with paid previews
633     def download_pictures(self, remote_gallery_url):
634         # This is only needed for legacy relative image paths.
635         gallery_path = self.gallery_path()
636         # delete previous files, so we don't include old files in ebooks
637         if os.path.isdir(gallery_path):
638             for filename in os.listdir(gallery_path):
639                 file_path = os.path.join(gallery_path, filename)
640                 os.unlink(file_path)
641         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
642         if ilustr_elements:
643             makedirs(gallery_path)
644             for ilustr in ilustr_elements:
645                 ilustr_src = ilustr.get('src')
646                 if '/' in ilustr_src:
647                     continue
648                 ilustr_path = os.path.join(gallery_path, ilustr_src)
649                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
650
651     def load_abstract(self):
652         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
653         if abstract is not None:
654             self.abstract = transform_abstrakt(abstract)
655         else:
656             self.abstract = ''
657
658     def load_toc(self):
659         self.toc = ''
660         if self.html_file:
661             parser = html.HTMLParser(encoding='utf-8')
662             tree = html.parse(self.html_file.path, parser=parser)
663             toc = tree.find('//div[@id="toc"]/ol')
664             if toc is None or not len(toc):
665                 return
666             html_link = reverse('book_text', args=[self.slug])
667             for a in toc.findall('.//a'):
668                 a.attrib['href'] = html_link + a.attrib['href']
669             self.toc = html.tostring(toc, encoding='unicode')
670             # div#toc
671
672     @classmethod
673     def from_xml_file(cls, xml_file, **kwargs):
674         from django.core.files import File
675         from librarian import dcparser
676
677         # use librarian to parse meta-data
678         book_info = dcparser.parse(xml_file)
679
680         if not isinstance(xml_file, File):
681             xml_file = File(open(xml_file))
682
683         try:
684             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
685         finally:
686             xml_file.close()
687
688     @classmethod
689     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
690                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None, isbn_mp3=None):
691         from catalogue import tasks
692
693         if dont_build is None:
694             dont_build = set()
695         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
696
697         # check for parts before we do anything
698         children = []
699         if hasattr(book_info, 'parts'):
700             for part_url in book_info.parts:
701                 try:
702                     children.append(Book.objects.get(slug=part_url.slug))
703                 except Book.DoesNotExist:
704                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
705
706         # Read book metadata
707         book_slug = book_info.url.slug
708         if re.search(r'[^a-z0-9-]', book_slug):
709             raise ValueError('Invalid characters in slug')
710         book, created = Book.objects.get_or_create(slug=book_slug)
711
712         if created:
713             book_shelves = []
714             old_cover = None
715             book.preview = bool(days)
716             if book.preview:
717                 book.preview_until = date.today() + timedelta(days)
718         else:
719             if not overwrite:
720                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
721             # Save shelves for this book
722             book_shelves = list(book.tags.filter(category='set'))
723             old_cover = book.cover_info()
724
725         # Save XML file
726         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
727         if book.preview:
728             book.xml_file.set_readable(False)
729
730         book.findable = findable
731         book.language = book_info.language
732         book.title = book_info.title
733         book.license = book_info.license or ''
734         if book_info.variant_of:
735             book.common_slug = book_info.variant_of.slug
736         else:
737             book.common_slug = book.slug
738         extra = book_info.to_dict()
739         if logo:
740             extra['logo'] = logo
741         if logo_mono:
742             extra['logo_mono'] = logo_mono
743         if logo_alt:
744             extra['logo_alt'] = logo_alt
745         if can_sell is not None:
746             book.can_sell = can_sell
747         if isbn_mp3 is not None:
748             book.isbn_mp3 = isbn_mp3
749         book.extra_info = json.dumps(extra)
750         book.load_abstract()
751         book.load_toc()
752         book.save()
753
754         book.update_stats()
755         
756         meta_tags = Tag.tags_from_info(book_info)
757
758         just_tags = [t for (t, rel) in meta_tags if not rel]
759         book.tags = set(just_tags + book_shelves)
760         book.save()  # update sort_key_author
761
762         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
763
764         cover_changed = old_cover != book.cover_info()
765         obsolete_children = set(b for b in book.children.all()
766                                 if b not in children)
767         notify_cover_changed = []
768         for n, child_book in enumerate(children):
769             new_child = child_book.parent != book
770             child_book.parent = book
771             child_book.parent_number = n
772             child_book.save()
773             if new_child or cover_changed:
774                 notify_cover_changed.append(child_book)
775         # Disown unfaithful children and let them cope on their own.
776         for child in obsolete_children:
777             child.parent = None
778             child.parent_number = 0
779             child.save()
780             if old_cover:
781                 notify_cover_changed.append(child)
782
783         cls.repopulate_ancestors()
784         tasks.update_counters.delay()
785
786         if remote_gallery_url:
787             book.download_pictures(remote_gallery_url)
788
789         # No saves beyond this point.
790
791         # Build cover.
792         if 'cover' not in dont_build:
793             book.cover.build_delay()
794             book.cover_clean.build_delay()
795             book.cover_thumb.build_delay()
796             book.cover_api_thumb.build_delay()
797             book.simple_cover.build_delay()
798             book.cover_ebookpoint.build_delay()
799
800         # Build HTML and ebooks.
801         book.html_file.build_delay()
802         if not children:
803             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
804                 if format_ not in dont_build:
805                     getattr(book, '%s_file' % format_).build_delay()
806         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
807             if format_ not in dont_build:
808                 getattr(book, '%s_file' % format_).build_delay()
809         book.html_nonotes_file.build_delay()
810
811         if not settings.NO_SEARCH_INDEX and search_index and findable:
812             tasks.index_book.delay(book.id)
813
814         for child in notify_cover_changed:
815             child.parent_cover_changed()
816
817         book.update_popularity()
818         tasks.update_references.delay(book.id)
819
820         cls.published.send(sender=cls, instance=book)
821         return book
822
823     def update_stats(self):
824         stats = self.wldocument2().get_statistics()['total']
825         self.pages = round(
826             stats.get('verses_with_fn', 0) / 30 +
827             stats.get('chars_out_verse_with_fn', 0) / 1800)
828         self.read_time = round(self.get_time())
829         self.save(update_fields=['pages', 'read_time'])
830         if self.parent is not None:
831             self.parent.update_stats()
832
833     def update_references(self):
834         Entity = apps.get_model('references', 'Entity')
835         doc = self.wldocument2()
836         doc._compat_assign_section_ids()
837         doc._compat_assign_ordered_ids()
838         refs = {}
839         for ref_elem in doc.references():
840             uri = ref_elem.attrib.get('href', '')
841             if not uri:
842                 continue
843             if uri in refs:
844                 ref = refs[uri]
845             else:
846                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
847                 if entity_created:
848                     try:
849                         entity.populate()
850                     except:
851                         pass
852                     else:
853                         entity.save()
854                 ref, ref_created = entity.reference_set.get_or_create(book=self)
855                 refs[uri] = ref
856                 if not ref_created:
857                     ref.occurence_set.all().delete()
858             sec = ref_elem.get_link()
859             m = re.match(r'sec(\d+)', sec)
860             assert m is not None
861             sec = int(m.group(1))
862             snippet = ref_elem.get_snippet()
863             b = builders['html-snippet']()
864             for s in snippet:
865                 s.html_build(b)
866             html = b.output().get_bytes().decode('utf-8')
867
868             ref.occurence_set.create(
869                 section=sec,
870                 html=html
871             )
872         self.reference_set.exclude(entity__uri__in=refs).delete()
873
874     @property
875     def references(self):
876         return self.reference_set.all().select_related('entity')
877
878     def update_has_audio(self):
879         self.has_audio = False
880         if self.media.filter(type='mp3').exists():
881             self.has_audio = True
882         if self.descendant.filter(has_audio=True).exists():
883             self.has_audio = True
884         self.save(update_fields=['has_audio'])
885         if self.parent is not None:
886             self.parent.update_has_audio()
887
888     def update_narrators(self):
889         narrator_names = set()
890         for bm in self.media.filter(type='mp3'):
891             narrator_names.update(set(
892                 a.strip() for a in re.split(r',|\si\s', bm.artist)
893             ))
894         narrators = []
895
896         for name in narrator_names:
897             if not name: continue
898             slug = slugify(name)
899             try:
900                 t = Tag.objects.get(category='author', slug=slug)
901             except Tag.DoesNotExist:
902                 sort_key = sortify(
903                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
904                 )
905                 t = Tag.objects.create(
906                     category='author',
907                     name_pl=name,
908                     slug=slug,
909                     sort_key=sort_key,
910                 )
911             narrators.append(t)
912         self.narrators.set(narrators)
913
914     def update_can_sell_mp3(self):
915         ret = True
916         for child in self.get_children():
917             child.update_can_sell_mp3()
918             if not child.can_sell_mp3:
919                 ret = False
920         if self.has_mp3_file():
921             audio_items = requests.get(f'https://audio.wolnelektury.pl/archive/book/{self.slug}.json').json()['items']
922             if not all(x['project']['can_sell'] for x in audio_items):
923                 ret = False
924         self.can_sell_audio = ret
925
926     @classmethod
927     @transaction.atomic
928     def repopulate_ancestors(cls):
929         """Fixes the ancestry cache."""
930         # TODO: table names
931         cursor = connection.cursor()
932         if connection.vendor == 'postgres':
933             cursor.execute("TRUNCATE catalogue_book_ancestor")
934             cursor.execute("""
935                 WITH RECURSIVE ancestry AS (
936                     SELECT book.id, book.parent_id
937                     FROM catalogue_book AS book
938                     WHERE book.parent_id IS NOT NULL
939                     UNION
940                     SELECT ancestor.id, book.parent_id
941                     FROM ancestry AS ancestor, catalogue_book AS book
942                     WHERE ancestor.parent_id = book.id
943                         AND book.parent_id IS NOT NULL
944                     )
945                 INSERT INTO catalogue_book_ancestor
946                     (from_book_id, to_book_id)
947                     SELECT id, parent_id
948                     FROM ancestry
949                     ORDER BY id;
950                 """)
951         else:
952             cursor.execute("DELETE FROM catalogue_book_ancestor")
953             for b in cls.objects.exclude(parent=None):
954                 parent = b.parent
955                 while parent is not None:
956                     b.ancestor.add(parent)
957                     parent = parent.parent
958
959     @property
960     def ancestors(self):
961         if self.parent:
962             for anc in self.parent.ancestors:
963                 yield anc
964             yield self.parent
965         else:
966             return []
967
968     def clear_cache(self):
969         clear_cached_renders(self.mini_box)
970         clear_cached_renders(self.mini_box_nolink)
971
972     def cover_info(self, inherit=True):
973         """Returns a dictionary to serve as fallback for BookInfo.
974
975         For now, the only thing inherited is the cover image.
976         """
977         need = False
978         info = {}
979         for field in ('cover_url', 'cover_by', 'cover_source'):
980             val = self.get_extra_info_json().get(field)
981             if val:
982                 info[field] = val
983             else:
984                 need = True
985         if inherit and need and self.parent is not None:
986             parent_info = self.parent.cover_info()
987             parent_info.update(info)
988             info = parent_info
989         return info
990
991     def related_themes(self):
992         return Tag.objects.usage_for_queryset(
993             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
994             counts=True).filter(category='theme').order_by('-count')
995
996     def parent_cover_changed(self):
997         """Called when parent book's cover image is changed."""
998         if not self.cover_info(inherit=False):
999             if 'cover' not in app_settings.DONT_BUILD:
1000                 self.cover.build_delay()
1001                 self.cover_clean.build_delay()
1002                 self.cover_thumb.build_delay()
1003                 self.cover_api_thumb.build_delay()
1004                 self.simple_cover.build_delay()
1005                 self.cover_ebookpoint.build_delay()
1006             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
1007                 if format_ not in app_settings.DONT_BUILD:
1008                     getattr(self, '%s_file' % format_).build_delay()
1009             for child in self.children.all():
1010                 child.parent_cover_changed()
1011
1012     def other_versions(self):
1013         """Find other versions (i.e. in other languages) of the book."""
1014         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
1015
1016     def parents(self):
1017         books = []
1018         parent = self.parent
1019         while parent is not None:
1020             books.insert(0, parent)
1021             parent = parent.parent
1022         return books
1023
1024     def pretty_title(self, html_links=False):
1025         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1026         books = self.parents() + [self]
1027         names.extend([(b.title, b.get_absolute_url()) for b in books])
1028
1029         if html_links:
1030             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1031         else:
1032             names = [tag[0] for tag in names]
1033         return ', '.join(names)
1034
1035     def publisher(self):
1036         publisher = self.get_extra_info_json()['publisher']
1037         if isinstance(publisher, str):
1038             return publisher
1039         elif isinstance(publisher, list):
1040             return ', '.join(publisher)
1041
1042     def get_recommended(self, limit=4):
1043         books_qs = type(self).objects.filter(findable=True)
1044         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1045         books = type(self).tagged.related_to(self, books_qs)[:limit]
1046         return books
1047
1048     @classmethod
1049     def tagged_top_level(cls, tags):
1050         """ Returns top-level books tagged with `tags`.
1051
1052         It only returns those books which don't have ancestors which are
1053         also tagged with those tags.
1054
1055         """
1056         objects = cls.tagged.with_all(tags)
1057         return objects.filter(findable=True).exclude(ancestor__in=objects)
1058
1059     @classmethod
1060     def book_list(cls, book_filter=None):
1061         """Generates a hierarchical listing of all books.
1062
1063         Books are optionally filtered with a test function.
1064
1065         """
1066
1067         books_by_parent = {}
1068         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1069         if book_filter:
1070             books = books.filter(book_filter).distinct()
1071
1072             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1073             for book in books.iterator():
1074                 parent = book.parent_id
1075                 if parent not in book_ids:
1076                     parent = None
1077                 books_by_parent.setdefault(parent, []).append(book)
1078         else:
1079             for book in books.iterator():
1080                 books_by_parent.setdefault(book.parent_id, []).append(book)
1081
1082         orphans = []
1083         books_by_author = OrderedDict()
1084         for tag in Tag.objects.filter(category='author').iterator():
1085             books_by_author[tag] = []
1086
1087         for book in books_by_parent.get(None, ()):
1088             authors = list(book.authors().only('pk'))
1089             if authors:
1090                 for author in authors:
1091                     books_by_author[author].append(book)
1092             else:
1093                 orphans.append(book)
1094
1095         return books_by_author, orphans, books_by_parent
1096
1097     _audiences_pl = {
1098         "SP": (1, "szkoła podstawowa"),
1099         "SP1": (1, "szkoła podstawowa"),
1100         "SP2": (1, "szkoła podstawowa"),
1101         "SP3": (1, "szkoła podstawowa"),
1102         "P": (1, "szkoła podstawowa"),
1103         "G": (2, "gimnazjum"),
1104         "L": (3, "liceum"),
1105         "LP": (3, "liceum"),
1106     }
1107
1108     def audiences_pl(self):
1109         audiences = self.get_extra_info_json().get('audiences', [])
1110         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1111         return [a[1] for a in audiences]
1112
1113     def stage_note(self):
1114         stage = self.get_extra_info_json().get('stage')
1115         if stage and stage < '0.4':
1116             return (_('Ten utwór wymaga uwspółcześnienia'),
1117                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1118         else:
1119             return None, None
1120
1121     def choose_fragments(self, number):
1122         fragments = self.fragments.order_by()
1123         fragments_count = fragments.count()
1124         if not fragments_count and self.children.exists():
1125             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1126             fragments_count = fragments.count()
1127         if fragments_count:
1128             if fragments_count > number:
1129                 offset = randint(0, fragments_count - number)
1130             else:
1131                 offset = 0
1132             return fragments[offset : offset + number]
1133         elif self.parent:
1134             return self.parent.choose_fragments(number)
1135         else:
1136             return []
1137
1138     def choose_fragment(self):
1139         fragments = self.choose_fragments(1)
1140         if fragments:
1141             return fragments[0]
1142         else:
1143             return None
1144
1145     def fragment_data(self):
1146         fragment = self.choose_fragment()
1147         if fragment:
1148             return {
1149                 'title': fragment.book.pretty_title(),
1150                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1151             }
1152         else:
1153             return None
1154
1155     def update_popularity(self):
1156         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1157         try:
1158             pop = self.popularity
1159             pop.count = count
1160             pop.save()
1161         except BookPopularity.DoesNotExist:
1162             BookPopularity.objects.create(book=self, count=count)
1163
1164     def ridero_link(self):
1165         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1166
1167     def elevenreader_link(self):
1168         first_text = self.get_first_text()
1169         if first_text is None:
1170             return None
1171         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1172
1173     def content_warnings(self):
1174         warnings_def = {
1175             'wulgaryzmy': _('wulgaryzmy'),
1176         }
1177         warnings = self.get_extra_info_json().get('content_warnings', [])
1178         warnings = [
1179             warnings_def.get(w, w)
1180             for w in warnings
1181         ]
1182         warnings.sort()
1183         return warnings
1184
1185     def full_sort_key(self):
1186         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1187
1188     def cover_color(self):
1189         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1190
1191     @cached_render('catalogue/book_mini_box.html')
1192     def mini_box(self):
1193         return {
1194             'book': self
1195         }
1196
1197     @cached_render('catalogue/book_mini_box.html')
1198     def mini_box_nolink(self):
1199         return {
1200             'book': self,
1201             'no_link': True,
1202         }
1203
1204
1205 class BookPopularity(models.Model):
1206     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1207     count = models.IntegerField(default=0, db_index=True)