Use actual sync for bookmarks.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from slugify import slugify
11 from sortify import sortify
12 from urllib.request import urlretrieve
13 from django.apps import apps
14 from django.conf import settings
15 from django.db import connection, models, transaction
16 import django.dispatch
17 from django.contrib.contenttypes.fields import GenericRelation
18 from django.template.loader import render_to_string
19 from django.urls import reverse
20 from django.utils.translation import gettext_lazy as _, get_language
21 from fnpdjango.storage import BofhFileSystemStorage
22 from lxml import html
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from librarian.builders import builders
26 from newtagging import managers
27 from catalogue import constants
28 from catalogue import fields
29 from catalogue.models import Tag, Fragment, BookMedia
30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
31 from catalogue.models.tag import prefetched_relations
32 from catalogue import app_settings
33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
34
35 bofh_storage = BofhFileSystemStorage()
36
37
38 class Book(models.Model):
39     """Represents a book imported from WL-XML."""
40     title = models.CharField('tytuł', max_length=32767)
41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
42     sort_key_author = models.CharField(
43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
47     description = models.TextField('opis', blank=True)
48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
49     abstract = models.TextField('abstrakt', blank=True)
50     toc = models.TextField('spis treści', blank=True)
51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
54     extra_info = models.TextField('dodatkowe informacje', default='{}')
55     gazeta_link = models.CharField(blank=True, max_length=240)
56     wiki_link = models.CharField(blank=True, max_length=240)
57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
58     recommended = models.BooleanField('polecane', default=False)
59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
60     preview = models.BooleanField('prapremiera', default=False)
61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
62     preview_key = models.CharField(max_length=32, blank=True, null=True)
63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
64
65     # files generated during publication
66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
67     html_file = fields.HtmlField(storage=bofh_storage)
68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
69     fb2_file = fields.Fb2Field(storage=bofh_storage)
70     txt_file = fields.TxtField(storage=bofh_storage)
71     epub_file = fields.EpubField(storage=bofh_storage)
72     mobi_file = fields.MobiField(storage=bofh_storage)
73     pdf_file = fields.PdfField(storage=bofh_storage)
74
75     cover = fields.CoverField('okładka', storage=bofh_storage)
76     # Cleaner version of cover for thumbs
77     cover_clean = fields.CoverCleanField('czysta okładka')
78     cover_thumb = fields.CoverThumbField('miniatura okładki')
79     cover_api_thumb = fields.CoverApiThumbField(
80         'mniaturka okładki dla aplikacji')
81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
82     cover_ebookpoint = fields.CoverEbookpointField(
83         'okładka dla Ebookpoint')
84
85     ebook_formats = constants.EBOOK_FORMATS
86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
87
88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
90
91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
92     has_audience = models.BooleanField(default=False)
93
94     objects = models.Manager()
95     tagged = managers.ModelTaggedItemManager(Tag)
96     tags = managers.TagDescriptor(Tag)
97     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
98     translators = models.ManyToManyField(Tag, blank=True)
99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
100     has_audio = models.BooleanField(default=False)
101
102     html_built = django.dispatch.Signal()
103     published = django.dispatch.Signal()
104
105     SORT_KEY_SEP = '$'
106
107     is_book = True
108
109     class AlreadyExists(Exception):
110         pass
111
112     class Meta:
113         ordering = ('sort_key_author', 'sort_key')
114         verbose_name = 'książka'
115         verbose_name_plural = 'książki'
116         app_label = 'catalogue'
117
118     def __str__(self):
119         return self.title
120
121     def get_extra_info_json(self):
122         return json.loads(self.extra_info or '{}')
123
124     def get_initial(self):
125         try:
126             return re.search(r'\w', self.title, re.U).group(0)
127         except AttributeError:
128             return ''
129
130     def authors(self):
131         return self.tags.filter(category='author')
132
133     def epochs(self):
134         return self.tags.filter(category='epoch')
135
136     def genres(self):
137         return self.tags.filter(category='genre')
138
139     def kinds(self):
140         return self.tags.filter(category='kind')
141
142     def tag_unicode(self, category):
143         relations = prefetched_relations(self, category)
144         if relations:
145             return ', '.join(rel.tag.name for rel in relations)
146         else:
147             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
148
149     def tags_by_category(self):
150         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
151
152     def author_unicode(self):
153         return self.cached_author
154
155     def kind_unicode(self):
156         return self.tag_unicode('kind')
157
158     def epoch_unicode(self):
159         return self.tag_unicode('epoch')
160
161     def genre_unicode(self):
162         return self.tag_unicode('genre')
163
164     def translator(self):
165         translators = self.get_extra_info_json().get('translators')
166         if not translators:
167             return None
168         if len(translators) > 3:
169             translators = translators[:2]
170             others = ' i inni'
171         else:
172             others = ''
173         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
174
175     def cover_source(self):
176         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
177
178     @property
179     def isbn_pdf(self):
180         return self.get_extra_info_json().get('isbn_pdf')
181
182     @property
183     def isbn_epub(self):
184         return self.get_extra_info_json().get('isbn_epub')
185
186     @property
187     def isbn_mobi(self):
188         return self.get_extra_info_json().get('isbn_mobi')
189
190     def is_accessible_to(self, user):
191         if not self.preview:
192             return True
193         if not user.is_authenticated:
194             return False
195         Membership = apps.get_model('club', 'Membership')
196         if Membership.is_active_for(user):
197             return True
198         Funding = apps.get_model('funding', 'Funding')
199         if Funding.objects.filter(user=user, offer__book=self):
200             return True
201         return False
202
203     def save(self, force_insert=False, force_update=False, **kwargs):
204         from sortify import sortify
205
206         self.sort_key = sortify(self.title)[:120]
207         self.title = str(self.title)  # ???
208
209         try:
210             author = self.authors().first().sort_key
211         except AttributeError:
212             author = ''
213         self.sort_key_author = author
214
215         self.cached_author = self.tag_unicode('author')
216         self.has_audience = 'audience' in self.get_extra_info_json()
217
218         if self.preview and not self.preview_key:
219             self.preview_key = get_random_hash(self.slug)[:32]
220
221         ret = super(Book, self).save(force_insert, force_update, **kwargs)
222
223         return ret
224
225     def get_absolute_url(self):
226         return reverse('book_detail', args=[self.slug])
227
228     def gallery_path(self):
229         return gallery_path(self.slug)
230
231     def gallery_url(self):
232         return gallery_url(self.slug)
233
234     def get_first_text(self):
235         if self.html_file:
236             return self
237         child = self.children.all().order_by('parent_number').first()
238         if child is not None:
239             return child.get_first_text()
240
241     def get_last_text(self):
242         if self.html_file:
243             return self
244         child = self.children.all().order_by('parent_number').last()
245         if child is not None:
246             return child.get_last_text()
247
248     def get_prev_text(self):
249         if not self.parent:
250             return None
251         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
252         if sibling is not None:
253             return sibling.get_last_text()
254
255         if self.parent.html_file:
256             return self.parent
257
258         return self.parent.get_prev_text()
259
260     def get_next_text(self, inside=True):
261         if inside:
262             child = self.children.order_by('parent_number').first()
263             if child is not None:
264                 return child.get_first_text()
265
266         if not self.parent:
267             return None
268         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
269         if sibling is not None:
270             return sibling.get_first_text()
271         return self.parent.get_next_text(inside=False)
272
273     def get_siblings(self):
274         if not self.parent:
275             return []
276         return self.parent.children.all().order_by('parent_number')
277
278     def get_children(self):
279         return self.children.all().order_by('parent_number')
280
281     @property
282     def name(self):
283         return self.title
284
285     def language_code(self):
286         return constants.LANGUAGES_3TO2.get(self.language, self.language)
287
288     def language_name(self):
289         return dict(settings.LANGUAGES).get(self.language_code(), "")
290
291     def is_foreign(self):
292         return self.language_code() != settings.LANGUAGE_CODE
293
294     def set_audio_length(self):
295         length = self.get_audio_length()
296         if length > 0:
297             self.audio_length = self.format_audio_length(length)
298             self.save()
299
300     @staticmethod
301     def format_audio_length(seconds):
302         """
303         >>> Book.format_audio_length(1)
304         '0:01'
305         >>> Book.format_audio_length(3661)
306         '1:01:01'
307         """
308         if seconds < 60*60:
309             minutes = seconds // 60
310             seconds = seconds % 60
311             return '%d:%02d' % (minutes, seconds)
312         else:
313             hours = seconds // 3600
314             minutes = seconds % 3600 // 60
315             seconds = seconds % 60
316             return '%d:%02d:%02d' % (hours, minutes, seconds)
317
318     def get_audio_length(self):
319         total = 0
320         for media in self.get_mp3() or ():
321             total += app_settings.GET_MP3_LENGTH(media.file.path)
322         return int(total)
323
324     def get_time(self):
325         return round(self.xml_file.size / 1000 * 40)
326     
327     def has_media(self, type_):
328         if type_ in Book.formats:
329             return bool(getattr(self, "%s_file" % type_))
330         else:
331             return self.media.filter(type=type_).exists()
332
333     def get_media(self, type_):
334         if self.has_media(type_):
335             if type_ in Book.formats:
336                 return getattr(self, "%s_file" % type_)
337             else:
338                 return self.media.filter(type=type_)
339         else:
340             return None
341
342     def get_mp3(self):
343         return self.get_media("mp3")
344
345     def get_odt(self):
346         return self.get_media("odt")
347
348     def get_ogg(self):
349         return self.get_media("ogg")
350
351     def get_daisy(self):
352         return self.get_media("daisy")
353
354     def get_audio_epub(self):
355         return self.get_media("audio.epub")
356
357     def media_url(self, format_):
358         media = self.get_media(format_)
359         if media:
360             if self.preview:
361                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
362             else:
363                 return media.url
364         else:
365             return None
366
367     def html_url(self):
368         return self.media_url('html')
369
370     def html_nonotes_url(self):
371         return self.media_url('html_nonotes')
372
373     def pdf_url(self):
374         return self.media_url('pdf')
375
376     def epub_url(self):
377         return self.media_url('epub')
378
379     def mobi_url(self):
380         return self.media_url('mobi')
381
382     def txt_url(self):
383         return self.media_url('txt')
384
385     def fb2_url(self):
386         return self.media_url('fb2')
387
388     def xml_url(self):
389         return self.media_url('xml')
390
391     def has_description(self):
392         return len(self.description) > 0
393     has_description.short_description = 'opis'
394     has_description.boolean = True
395
396     def has_mp3_file(self):
397         return self.has_media("mp3")
398     has_mp3_file.short_description = 'MP3'
399     has_mp3_file.boolean = True
400
401     def has_ogg_file(self):
402         return self.has_media("ogg")
403     has_ogg_file.short_description = 'OGG'
404     has_ogg_file.boolean = True
405
406     def has_daisy_file(self):
407         return self.has_media("daisy")
408     has_daisy_file.short_description = 'DAISY'
409     has_daisy_file.boolean = True
410
411     def has_sync_file(self):
412         return settings.FEATURE_SYNCHRO and self.has_media("sync")
413
414     def build_sync_file(self):
415         from lxml import html
416         from django.core.files.base import ContentFile
417         with self.html_file.open('rb') as f:
418             h = html.fragment_fromstring(f.read().decode('utf-8'))
419
420         durations = [
421             m['mp3'].duration
422             for m in self.get_audiobooks()[0]
423         ]
424         if settings.MOCK_DURATIONS:
425             durations = settings.MOCK_DURATIONS
426
427         sync = []
428         ts = None
429         sid = 1
430         dirty = False
431         for elem in h.iter():
432             if elem.get('data-audio-ts'):
433                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
434                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
435                 # check if inside verse
436                 p = elem.getparent()
437                 while p is not None:
438                     # Workaround for missing ids.
439                     if 'verse' in p.get('class', ''):
440                         if not p.get('id'):
441                             p.set('id', f'syn{sid}')
442                             dirty = True
443                             sid += 1
444                         sync.append((ts, p.get('id')))
445                         ts = None
446                         break
447                     p = p.getparent()
448             elif ts:
449                 cls = elem.get('class', '')
450                 # Workaround for missing ids.
451                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
452                     if not elem.get('id'):
453                         elem.set('id', f'syn{sid}')
454                         dirty = True
455                         sid += 1
456                     sync.append((ts, elem.get('id')))
457                     ts = None
458         if dirty:
459             htext = html.tostring(h, encoding='utf-8')
460             with open(self.html_file.path, 'wb') as f:
461                 f.write(htext)
462         try:
463             bm = self.media.get(type='sync')
464         except:
465             bm = BookMedia(book=self, type='sync')
466         sync = (
467             '27\n' + '\n'.join(
468                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
469             )).encode('latin1')
470         bm.file.save(
471             None, ContentFile(sync)
472             )
473
474     def get_sync(self):
475         if not self.has_sync_file():
476             return []
477         with self.get_media('sync').first().file.open('r') as f:
478             sync = f.read().split('\n')
479         offset = float(sync[0])
480         items = []
481         for line in sync[1:]:
482             if not line:
483                 continue
484             start, end, elid = line.split()
485             items.append([elid, float(start) + offset])
486         return items
487
488     def sync_ts(self, ts):
489         elid = None
490         for cur_id, t in self.get_sync():
491             if ts >= t:
492                 elid = cur_id
493             else:
494                 break
495         return elid
496
497     def sync_elid(self, elid):
498         for cur_id, t in self.get_sync():
499             if cur_id == elid:
500                 return t
501
502     def has_audio_epub_file(self):
503         return self.has_media("audio.epub")
504
505     @property
506     def media_daisy(self):
507         return self.get_media('daisy')
508
509     @property
510     def media_audio_epub(self):
511         return self.get_media('audio.epub')
512
513     def get_audiobooks(self, with_children=False, processing=False):
514         ogg_files = {}
515         for m in self.media.filter(type='ogg').order_by().iterator():
516             ogg_files[m.name] = m
517
518         audiobooks = []
519         projects = set()
520         total_duration = 0
521         for mp3 in self.media.filter(type='mp3').iterator():
522             # ogg files are always from the same project
523             meta = mp3.get_extra_info_json()
524             project = meta.get('project')
525             if not project:
526                 # temporary fallback
527                 project = 'CzytamySłuchając'
528
529             projects.add((project, meta.get('funded_by', '')))
530             total_duration += mp3.duration or 0
531
532             media = {'mp3': mp3}
533
534             ogg = ogg_files.get(mp3.name)
535             if ogg:
536                 media['ogg'] = ogg
537             audiobooks.append(media)
538
539         if with_children:
540             for child in self.get_children():
541                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
542                     with_children=True, processing=True)
543                 audiobooks.append({'part': child})
544                 audiobooks += ch_audiobooks
545                 projects.update(ch_projects)
546                 total_duration += ch_duration
547
548         if not processing:
549             projects = sorted(projects)
550             total_duration = '%d:%02d' % (
551                 total_duration // 60,
552                 total_duration % 60
553             )
554
555         return audiobooks, projects, total_duration
556
557     def get_audiobooks_with_children(self):
558         return self.get_audiobooks(with_children=True)
559     
560     def wldocument(self, parse_dublincore=True, inherit=True):
561         from catalogue.import_utils import ORMDocProvider
562         from librarian.parser import WLDocument
563
564         if inherit and self.parent:
565             meta_fallbacks = self.parent.cover_info()
566         else:
567             meta_fallbacks = None
568
569         return WLDocument.from_file(
570             self.xml_file.path,
571             provider=ORMDocProvider(self),
572             parse_dublincore=parse_dublincore,
573             meta_fallbacks=meta_fallbacks)
574
575     def wldocument2(self):
576         from catalogue.import_utils import ORMDocProvider
577         from librarian.document import WLDocument
578         doc = WLDocument(
579             self.xml_file.path,
580             provider=ORMDocProvider(self)
581         )
582         doc.meta.update(self.cover_info())
583         return doc
584
585
586     @staticmethod
587     def zip_format(format_):
588         def pretty_file_name(book):
589             return "%s/%s.%s" % (
590                 book.get_extra_info_json()['author'],
591                 book.slug,
592                 format_)
593
594         field_name = "%s_file" % format_
595         field = getattr(Book, field_name)
596         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
597         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
598         return create_zip(paths, field.ZIP)
599
600     def zip_audiobooks(self, format_):
601         bm = BookMedia.objects.filter(book=self, type=format_)
602         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
603         licenses = set()
604         for m in bm:
605             license = constants.LICENSES.get(
606                 m.get_extra_info_json().get('license'), {}
607             ).get('locative')
608             if license:
609                 licenses.add(license)
610         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
611             'licenses': licenses,
612             'meta': self.wldocument2().meta,
613         })
614         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
615
616     def search_index(self, index=None):
617         if not self.findable:
618             return
619         from search.index import Index
620         Index.index_book(self)
621
622     # will make problems in conjunction with paid previews
623     def download_pictures(self, remote_gallery_url):
624         # This is only needed for legacy relative image paths.
625         gallery_path = self.gallery_path()
626         # delete previous files, so we don't include old files in ebooks
627         if os.path.isdir(gallery_path):
628             for filename in os.listdir(gallery_path):
629                 file_path = os.path.join(gallery_path, filename)
630                 os.unlink(file_path)
631         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
632         if ilustr_elements:
633             makedirs(gallery_path)
634             for ilustr in ilustr_elements:
635                 ilustr_src = ilustr.get('src')
636                 if '/' in ilustr_src:
637                     continue
638                 ilustr_path = os.path.join(gallery_path, ilustr_src)
639                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
640
641     def load_abstract(self):
642         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
643         if abstract is not None:
644             self.abstract = transform_abstrakt(abstract)
645         else:
646             self.abstract = ''
647
648     def load_toc(self):
649         self.toc = ''
650         if self.html_file:
651             parser = html.HTMLParser(encoding='utf-8')
652             tree = html.parse(self.html_file.path, parser=parser)
653             toc = tree.find('//div[@id="toc"]/ol')
654             if toc is None or not len(toc):
655                 return
656             html_link = reverse('book_text', args=[self.slug])
657             for a in toc.findall('.//a'):
658                 a.attrib['href'] = html_link + a.attrib['href']
659             self.toc = html.tostring(toc, encoding='unicode')
660             # div#toc
661
662     @classmethod
663     def from_xml_file(cls, xml_file, **kwargs):
664         from django.core.files import File
665         from librarian import dcparser
666
667         # use librarian to parse meta-data
668         book_info = dcparser.parse(xml_file)
669
670         if not isinstance(xml_file, File):
671             xml_file = File(open(xml_file))
672
673         try:
674             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
675         finally:
676             xml_file.close()
677
678     @classmethod
679     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
680                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
681         from catalogue import tasks
682
683         if dont_build is None:
684             dont_build = set()
685         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
686
687         # check for parts before we do anything
688         children = []
689         if hasattr(book_info, 'parts'):
690             for part_url in book_info.parts:
691                 try:
692                     children.append(Book.objects.get(slug=part_url.slug))
693                 except Book.DoesNotExist:
694                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
695
696         # Read book metadata
697         book_slug = book_info.url.slug
698         if re.search(r'[^a-z0-9-]', book_slug):
699             raise ValueError('Invalid characters in slug')
700         book, created = Book.objects.get_or_create(slug=book_slug)
701
702         if created:
703             book_shelves = []
704             old_cover = None
705             book.preview = bool(days)
706             if book.preview:
707                 book.preview_until = date.today() + timedelta(days)
708         else:
709             if not overwrite:
710                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
711             # Save shelves for this book
712             book_shelves = list(book.tags.filter(category='set'))
713             old_cover = book.cover_info()
714
715         # Save XML file
716         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
717         if book.preview:
718             book.xml_file.set_readable(False)
719
720         book.findable = findable
721         book.language = book_info.language
722         book.title = book_info.title
723         book.license = book_info.license or ''
724         if book_info.variant_of:
725             book.common_slug = book_info.variant_of.slug
726         else:
727             book.common_slug = book.slug
728         extra = book_info.to_dict()
729         if logo:
730             extra['logo'] = logo
731         if logo_mono:
732             extra['logo_mono'] = logo_mono
733         if logo_alt:
734             extra['logo_alt'] = logo_alt
735         book.extra_info = json.dumps(extra)
736         book.load_abstract()
737         book.load_toc()
738         book.save()
739
740         meta_tags = Tag.tags_from_info(book_info)
741
742         just_tags = [t for (t, rel) in meta_tags if not rel]
743         book.tags = set(just_tags + book_shelves)
744         book.save()  # update sort_key_author
745
746         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
747
748         cover_changed = old_cover != book.cover_info()
749         obsolete_children = set(b for b in book.children.all()
750                                 if b not in children)
751         notify_cover_changed = []
752         for n, child_book in enumerate(children):
753             new_child = child_book.parent != book
754             child_book.parent = book
755             child_book.parent_number = n
756             child_book.save()
757             if new_child or cover_changed:
758                 notify_cover_changed.append(child_book)
759         # Disown unfaithful children and let them cope on their own.
760         for child in obsolete_children:
761             child.parent = None
762             child.parent_number = 0
763             child.save()
764             if old_cover:
765                 notify_cover_changed.append(child)
766
767         cls.repopulate_ancestors()
768         tasks.update_counters.delay()
769
770         if remote_gallery_url:
771             book.download_pictures(remote_gallery_url)
772
773         # No saves beyond this point.
774
775         # Build cover.
776         if 'cover' not in dont_build:
777             book.cover.build_delay()
778             book.cover_clean.build_delay()
779             book.cover_thumb.build_delay()
780             book.cover_api_thumb.build_delay()
781             book.simple_cover.build_delay()
782             book.cover_ebookpoint.build_delay()
783
784         # Build HTML and ebooks.
785         book.html_file.build_delay()
786         if not children:
787             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
788                 if format_ not in dont_build:
789                     getattr(book, '%s_file' % format_).build_delay()
790         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
791             if format_ not in dont_build:
792                 getattr(book, '%s_file' % format_).build_delay()
793         book.html_nonotes_file.build_delay()
794
795         if not settings.NO_SEARCH_INDEX and search_index and findable:
796             tasks.index_book.delay(book.id)
797
798         for child in notify_cover_changed:
799             child.parent_cover_changed()
800
801         book.update_popularity()
802         tasks.update_references.delay(book.id)
803
804         cls.published.send(sender=cls, instance=book)
805         return book
806
807     def update_references(self):
808         Entity = apps.get_model('references', 'Entity')
809         doc = self.wldocument2()
810         doc._compat_assign_section_ids()
811         doc._compat_assign_ordered_ids()
812         refs = {}
813         for ref_elem in doc.references():
814             uri = ref_elem.attrib.get('href', '')
815             if not uri:
816                 continue
817             if uri in refs:
818                 ref = refs[uri]
819             else:
820                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
821                 if entity_created:
822                     try:
823                         entity.populate()
824                     except:
825                         pass
826                     else:
827                         entity.save()
828                 ref, ref_created = entity.reference_set.get_or_create(book=self)
829                 refs[uri] = ref
830                 if not ref_created:
831                     ref.occurence_set.all().delete()
832             sec = ref_elem.get_link()
833             m = re.match(r'sec(\d+)', sec)
834             assert m is not None
835             sec = int(m.group(1))
836             snippet = ref_elem.get_snippet()
837             b = builders['html-snippet']()
838             for s in snippet:
839                 s.html_build(b)
840             html = b.output().get_bytes().decode('utf-8')
841
842             ref.occurence_set.create(
843                 section=sec,
844                 html=html
845             )
846         self.reference_set.exclude(entity__uri__in=refs).delete()
847
848     @property
849     def references(self):
850         return self.reference_set.all().select_related('entity')
851
852     def update_has_audio(self):
853         self.has_audio = False
854         if self.media.filter(type='mp3').exists():
855             self.has_audio = True
856         if self.descendant.filter(has_audio=True).exists():
857             self.has_audio = True
858         self.save(update_fields=['has_audio'])
859         if self.parent is not None:
860             self.parent.update_has_audio()
861
862     def update_narrators(self):
863         narrator_names = set()
864         for bm in self.media.filter(type='mp3'):
865             narrator_names.update(set(
866                 a.strip() for a in re.split(r',|\si\s', bm.artist)
867             ))
868         narrators = []
869
870         for name in narrator_names:
871             if not name: continue
872             slug = slugify(name)
873             try:
874                 t = Tag.objects.get(category='author', slug=slug)
875             except Tag.DoesNotExist:
876                 sort_key = sortify(
877                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
878                 )
879                 t = Tag.objects.create(
880                     category='author',
881                     name_pl=name,
882                     slug=slug,
883                     sort_key=sort_key,
884                 )
885             narrators.append(t)
886         self.narrators.set(narrators)
887
888     @classmethod
889     @transaction.atomic
890     def repopulate_ancestors(cls):
891         """Fixes the ancestry cache."""
892         # TODO: table names
893         cursor = connection.cursor()
894         if connection.vendor == 'postgres':
895             cursor.execute("TRUNCATE catalogue_book_ancestor")
896             cursor.execute("""
897                 WITH RECURSIVE ancestry AS (
898                     SELECT book.id, book.parent_id
899                     FROM catalogue_book AS book
900                     WHERE book.parent_id IS NOT NULL
901                     UNION
902                     SELECT ancestor.id, book.parent_id
903                     FROM ancestry AS ancestor, catalogue_book AS book
904                     WHERE ancestor.parent_id = book.id
905                         AND book.parent_id IS NOT NULL
906                     )
907                 INSERT INTO catalogue_book_ancestor
908                     (from_book_id, to_book_id)
909                     SELECT id, parent_id
910                     FROM ancestry
911                     ORDER BY id;
912                 """)
913         else:
914             cursor.execute("DELETE FROM catalogue_book_ancestor")
915             for b in cls.objects.exclude(parent=None):
916                 parent = b.parent
917                 while parent is not None:
918                     b.ancestor.add(parent)
919                     parent = parent.parent
920
921     @property
922     def ancestors(self):
923         if self.parent:
924             for anc in self.parent.ancestors:
925                 yield anc
926             yield self.parent
927         else:
928             return []
929
930     def clear_cache(self):
931         clear_cached_renders(self.mini_box)
932         clear_cached_renders(self.mini_box_nolink)
933
934     def cover_info(self, inherit=True):
935         """Returns a dictionary to serve as fallback for BookInfo.
936
937         For now, the only thing inherited is the cover image.
938         """
939         need = False
940         info = {}
941         for field in ('cover_url', 'cover_by', 'cover_source'):
942             val = self.get_extra_info_json().get(field)
943             if val:
944                 info[field] = val
945             else:
946                 need = True
947         if inherit and need and self.parent is not None:
948             parent_info = self.parent.cover_info()
949             parent_info.update(info)
950             info = parent_info
951         return info
952
953     def related_themes(self):
954         return Tag.objects.usage_for_queryset(
955             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
956             counts=True).filter(category='theme').order_by('-count')
957
958     def parent_cover_changed(self):
959         """Called when parent book's cover image is changed."""
960         if not self.cover_info(inherit=False):
961             if 'cover' not in app_settings.DONT_BUILD:
962                 self.cover.build_delay()
963                 self.cover_clean.build_delay()
964                 self.cover_thumb.build_delay()
965                 self.cover_api_thumb.build_delay()
966                 self.simple_cover.build_delay()
967                 self.cover_ebookpoint.build_delay()
968             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
969                 if format_ not in app_settings.DONT_BUILD:
970                     getattr(self, '%s_file' % format_).build_delay()
971             for child in self.children.all():
972                 child.parent_cover_changed()
973
974     def other_versions(self):
975         """Find other versions (i.e. in other languages) of the book."""
976         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
977
978     def parents(self):
979         books = []
980         parent = self.parent
981         while parent is not None:
982             books.insert(0, parent)
983             parent = parent.parent
984         return books
985
986     def pretty_title(self, html_links=False):
987         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
988         books = self.parents() + [self]
989         names.extend([(b.title, b.get_absolute_url()) for b in books])
990
991         if html_links:
992             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
993         else:
994             names = [tag[0] for tag in names]
995         return ', '.join(names)
996
997     def publisher(self):
998         publisher = self.get_extra_info_json()['publisher']
999         if isinstance(publisher, str):
1000             return publisher
1001         elif isinstance(publisher, list):
1002             return ', '.join(publisher)
1003
1004     @classmethod
1005     def tagged_top_level(cls, tags):
1006         """ Returns top-level books tagged with `tags`.
1007
1008         It only returns those books which don't have ancestors which are
1009         also tagged with those tags.
1010
1011         """
1012         objects = cls.tagged.with_all(tags)
1013         return objects.filter(findable=True).exclude(ancestor__in=objects)
1014
1015     @classmethod
1016     def book_list(cls, book_filter=None):
1017         """Generates a hierarchical listing of all books.
1018
1019         Books are optionally filtered with a test function.
1020
1021         """
1022
1023         books_by_parent = {}
1024         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1025         if book_filter:
1026             books = books.filter(book_filter).distinct()
1027
1028             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1029             for book in books.iterator():
1030                 parent = book.parent_id
1031                 if parent not in book_ids:
1032                     parent = None
1033                 books_by_parent.setdefault(parent, []).append(book)
1034         else:
1035             for book in books.iterator():
1036                 books_by_parent.setdefault(book.parent_id, []).append(book)
1037
1038         orphans = []
1039         books_by_author = OrderedDict()
1040         for tag in Tag.objects.filter(category='author').iterator():
1041             books_by_author[tag] = []
1042
1043         for book in books_by_parent.get(None, ()):
1044             authors = list(book.authors().only('pk'))
1045             if authors:
1046                 for author in authors:
1047                     books_by_author[author].append(book)
1048             else:
1049                 orphans.append(book)
1050
1051         return books_by_author, orphans, books_by_parent
1052
1053     _audiences_pl = {
1054         "SP": (1, "szkoła podstawowa"),
1055         "SP1": (1, "szkoła podstawowa"),
1056         "SP2": (1, "szkoła podstawowa"),
1057         "SP3": (1, "szkoła podstawowa"),
1058         "P": (1, "szkoła podstawowa"),
1059         "G": (2, "gimnazjum"),
1060         "L": (3, "liceum"),
1061         "LP": (3, "liceum"),
1062     }
1063
1064     def audiences_pl(self):
1065         audiences = self.get_extra_info_json().get('audiences', [])
1066         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1067         return [a[1] for a in audiences]
1068
1069     def stage_note(self):
1070         stage = self.get_extra_info_json().get('stage')
1071         if stage and stage < '0.4':
1072             return (_('Ten utwór wymaga uwspółcześnienia'),
1073                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1074         else:
1075             return None, None
1076
1077     def choose_fragments(self, number):
1078         fragments = self.fragments.order_by()
1079         fragments_count = fragments.count()
1080         if not fragments_count and self.children.exists():
1081             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1082             fragments_count = fragments.count()
1083         if fragments_count:
1084             if fragments_count > number:
1085                 offset = randint(0, fragments_count - number)
1086             else:
1087                 offset = 0
1088             return fragments[offset : offset + number]
1089         elif self.parent:
1090             return self.parent.choose_fragments(number)
1091         else:
1092             return []
1093
1094     def choose_fragment(self):
1095         fragments = self.choose_fragments(1)
1096         if fragments:
1097             return fragments[0]
1098         else:
1099             return None
1100
1101     def fragment_data(self):
1102         fragment = self.choose_fragment()
1103         if fragment:
1104             return {
1105                 'title': fragment.book.pretty_title(),
1106                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1107             }
1108         else:
1109             return None
1110
1111     def update_popularity(self):
1112         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1113         try:
1114             pop = self.popularity
1115             pop.count = count
1116             pop.save()
1117         except BookPopularity.DoesNotExist:
1118             BookPopularity.objects.create(book=self, count=count)
1119
1120     def ridero_link(self):
1121         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1122
1123     def elevenreader_link(self):
1124         first_text = self.get_first_text()
1125         if first_text is None:
1126             return None
1127         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1128
1129     def content_warnings(self):
1130         warnings_def = {
1131             'wulgaryzmy': _('wulgaryzmy'),
1132         }
1133         warnings = self.get_extra_info_json().get('content_warnings', [])
1134         warnings = [
1135             warnings_def.get(w, w)
1136             for w in warnings
1137         ]
1138         warnings.sort()
1139         return warnings
1140
1141     def full_sort_key(self):
1142         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1143
1144     def cover_color(self):
1145         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1146
1147     @cached_render('catalogue/book_mini_box.html')
1148     def mini_box(self):
1149         return {
1150             'book': self
1151         }
1152
1153     @cached_render('catalogue/book_mini_box.html')
1154     def mini_box_nolink(self):
1155         return {
1156             'book': self,
1157             'no_link': True,
1158         }
1159
1160
1161 class BookPopularity(models.Model):
1162     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1163     count = models.IntegerField(default=0, db_index=True)