Serve partner audiobooks with isbns
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from slugify import slugify
11 from sortify import sortify
12 from urllib.request import urlretrieve
13 from django.apps import apps
14 from django.conf import settings
15 from django.db import connection, models, transaction
16 import django.dispatch
17 from django.contrib.contenttypes.fields import GenericRelation
18 from django.template.loader import render_to_string
19 from django.urls import reverse
20 from django.utils.translation import gettext_lazy as _, get_language
21 from fnpdjango.storage import BofhFileSystemStorage
22 from lxml import html
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from librarian.builders import builders
26 from newtagging import managers
27 from catalogue import constants
28 from catalogue import fields
29 from catalogue.models import Tag, Fragment, BookMedia
30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
31 from catalogue.models.tag import prefetched_relations
32 from catalogue import app_settings
33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
34
35 bofh_storage = BofhFileSystemStorage()
36
37
38 class Book(models.Model):
39     """Represents a book imported from WL-XML."""
40     title = models.CharField('tytuł', max_length=32767)
41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
42     sort_key_author = models.CharField(
43         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
47     description = models.TextField('opis', blank=True)
48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
49     abstract = models.TextField('abstrakt', blank=True)
50     toc = models.TextField('spis treści', blank=True)
51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
54     extra_info = models.TextField('dodatkowe informacje', default='{}')
55     gazeta_link = models.CharField(blank=True, max_length=240)
56     wiki_link = models.CharField(blank=True, max_length=240)
57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
58     recommended = models.BooleanField('polecane', default=False)
59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
60     preview = models.BooleanField('prapremiera', default=False)
61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
62     preview_key = models.CharField(max_length=32, blank=True, null=True)
63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
64     can_sell = models.BooleanField('do sprzedaży', default=True)
65     isbn_mp3 = models.CharField('ISBN audiobooka', max_length=32, blank=True)
66
67     # files generated during publication
68     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
69     html_file = fields.HtmlField(storage=bofh_storage)
70     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
71     fb2_file = fields.Fb2Field(storage=bofh_storage)
72     txt_file = fields.TxtField(storage=bofh_storage)
73     epub_file = fields.EpubField(storage=bofh_storage)
74     mobi_file = fields.MobiField(storage=bofh_storage)
75     pdf_file = fields.PdfField(storage=bofh_storage)
76
77     cover = fields.CoverField('okładka', storage=bofh_storage)
78     # Cleaner version of cover for thumbs
79     cover_clean = fields.CoverCleanField('czysta okładka')
80     cover_thumb = fields.CoverThumbField('miniatura okładki')
81     cover_api_thumb = fields.CoverApiThumbField(
82         'mniaturka okładki dla aplikacji')
83     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
84     cover_ebookpoint = fields.CoverEbookpointField(
85         'okładka dla Ebookpoint')
86
87     ebook_formats = constants.EBOOK_FORMATS
88     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
89
90     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
91     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
92
93     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
94     has_audience = models.BooleanField(default=False)
95
96     objects = models.Manager()
97     tagged = managers.ModelTaggedItemManager(Tag)
98     tags = managers.TagDescriptor(Tag)
99     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
100     translators = models.ManyToManyField(Tag, blank=True)
101     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
102     has_audio = models.BooleanField(default=False)
103     read_time = models.IntegerField(blank=True, null=True)
104     pages = models.IntegerField(blank=True, null=True)
105     
106     html_built = django.dispatch.Signal()
107     published = django.dispatch.Signal()
108
109     SORT_KEY_SEP = '$'
110
111     is_book = True
112
113     class AlreadyExists(Exception):
114         pass
115
116     class Meta:
117         ordering = ('sort_key_author', 'sort_key')
118         verbose_name = 'książka'
119         verbose_name_plural = 'książki'
120         app_label = 'catalogue'
121
122     def __str__(self):
123         return self.title
124
125     def get_extra_info_json(self):
126         return json.loads(self.extra_info or '{}')
127
128     def get_initial(self):
129         try:
130             return re.search(r'\w', self.title, re.U).group(0)
131         except AttributeError:
132             return ''
133
134     def authors(self):
135         return self.tags.filter(category='author')
136
137     def epochs(self):
138         return self.tags.filter(category='epoch')
139
140     def genres(self):
141         return self.tags.filter(category='genre')
142
143     def kinds(self):
144         return self.tags.filter(category='kind')
145
146     def tag_unicode(self, category):
147         relations = prefetched_relations(self, category)
148         if relations:
149             return ', '.join(rel.tag.name for rel in relations)
150         else:
151             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
152
153     def tags_by_category(self):
154         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
155
156     def author_unicode(self):
157         return self.cached_author
158
159     def kind_unicode(self):
160         return self.tag_unicode('kind')
161
162     def epoch_unicode(self):
163         return self.tag_unicode('epoch')
164
165     def genre_unicode(self):
166         return self.tag_unicode('genre')
167
168     def translator(self):
169         translators = self.get_extra_info_json().get('translators')
170         if not translators:
171             return None
172         if len(translators) > 3:
173             translators = translators[:2]
174             others = ' i inni'
175         else:
176             others = ''
177         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
178
179     def cover_source(self):
180         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
181
182     @property
183     def isbn_pdf(self):
184         return self.get_extra_info_json().get('isbn_pdf')
185
186     @property
187     def isbn_epub(self):
188         return self.get_extra_info_json().get('isbn_epub')
189
190     @property
191     def isbn_mobi(self):
192         return self.get_extra_info_json().get('isbn_mobi')
193
194     @property
195     def redakcja(self):
196         return self.get_extra_info_json().get('about')
197     
198     def is_accessible_to(self, user):
199         if not self.preview:
200             return True
201         if not user.is_authenticated:
202             return False
203         Membership = apps.get_model('club', 'Membership')
204         if Membership.is_active_for(user):
205             return True
206         Funding = apps.get_model('funding', 'Funding')
207         if Funding.objects.filter(user=user, offer__book=self):
208             return True
209         return False
210
211     def save(self, force_insert=False, force_update=False, **kwargs):
212         from sortify import sortify
213
214         self.sort_key = sortify(self.title)[:120]
215         self.title = str(self.title)  # ???
216
217         try:
218             author = self.authors().first().sort_key
219         except AttributeError:
220             author = ''
221         self.sort_key_author = author
222
223         self.cached_author = self.tag_unicode('author')
224         self.has_audience = 'audience' in self.get_extra_info_json()
225
226         if self.preview and not self.preview_key:
227             self.preview_key = get_random_hash(self.slug)[:32]
228
229         ret = super(Book, self).save(force_insert, force_update, **kwargs)
230
231         return ret
232
233     def get_absolute_url(self):
234         return reverse('book_detail', args=[self.slug])
235
236     def gallery_path(self):
237         return gallery_path(self.slug)
238
239     def gallery_url(self):
240         return gallery_url(self.slug)
241
242     def get_first_text(self):
243         if self.html_file:
244             return self
245         child = self.children.all().order_by('parent_number').first()
246         if child is not None:
247             return child.get_first_text()
248
249     def get_last_text(self):
250         if self.html_file:
251             return self
252         child = self.children.all().order_by('parent_number').last()
253         if child is not None:
254             return child.get_last_text()
255
256     def get_prev_text(self):
257         if not self.parent:
258             return None
259         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
260         if sibling is not None:
261             return sibling.get_last_text()
262
263         if self.parent.html_file:
264             return self.parent
265
266         return self.parent.get_prev_text()
267
268     def get_next_text(self, inside=True):
269         if inside:
270             child = self.children.order_by('parent_number').first()
271             if child is not None:
272                 return child.get_first_text()
273
274         if not self.parent:
275             return None
276         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
277         if sibling is not None:
278             return sibling.get_first_text()
279         return self.parent.get_next_text(inside=False)
280
281     def get_siblings(self):
282         if not self.parent:
283             return []
284         return self.parent.children.all().order_by('parent_number')
285
286     def get_children(self):
287         return self.children.all().order_by('parent_number')
288
289     @property
290     def name(self):
291         return self.title
292
293     def language_code(self):
294         return constants.LANGUAGES_3TO2.get(self.language, self.language)
295
296     def language_name(self):
297         return dict(settings.LANGUAGES).get(self.language_code(), "")
298
299     def is_foreign(self):
300         return self.language_code() != settings.LANGUAGE_CODE
301
302     def set_audio_length(self):
303         length = self.get_audio_length()
304         if length > 0:
305             self.audio_length = self.format_audio_length(length)
306             self.save()
307
308     @staticmethod
309     def format_audio_length(seconds):
310         """
311         >>> Book.format_audio_length(1)
312         '0:01'
313         >>> Book.format_audio_length(3661)
314         '1:01:01'
315         """
316         if seconds < 60*60:
317             minutes = seconds // 60
318             seconds = seconds % 60
319             return '%d:%02d' % (minutes, seconds)
320         else:
321             hours = seconds // 3600
322             minutes = seconds % 3600 // 60
323             seconds = seconds % 60
324             return '%d:%02d:%02d' % (hours, minutes, seconds)
325
326     def get_audio_length(self):
327         total = 0
328         for media in self.get_mp3() or ():
329             total += app_settings.GET_MP3_LENGTH(media.file.path)
330         return int(total)
331
332     def get_time(self):
333         return round(self.xml_file.size / 1000 * 40)
334     
335     def has_media(self, type_):
336         if type_ in Book.formats:
337             return bool(getattr(self, "%s_file" % type_))
338         else:
339             return self.media.filter(type=type_).exists()
340
341     def get_media(self, type_):
342         if self.has_media(type_):
343             if type_ in Book.formats:
344                 return getattr(self, "%s_file" % type_)
345             else:
346                 return self.media.filter(type=type_)
347         else:
348             return None
349
350     def get_mp3(self):
351         return self.get_media("mp3")
352
353     def get_odt(self):
354         return self.get_media("odt")
355
356     def get_ogg(self):
357         return self.get_media("ogg")
358
359     def get_daisy(self):
360         return self.get_media("daisy")
361
362     def get_audio_epub(self):
363         return self.get_media("audio.epub")
364
365     def media_url(self, format_):
366         media = self.get_media(format_)
367         if media:
368             if self.preview:
369                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
370             else:
371                 return media.url
372         else:
373             return None
374
375     def html_url(self):
376         return self.media_url('html')
377
378     def html_nonotes_url(self):
379         return self.media_url('html_nonotes')
380
381     def pdf_url(self):
382         return self.media_url('pdf')
383
384     def epub_url(self):
385         return self.media_url('epub')
386
387     def mobi_url(self):
388         return self.media_url('mobi')
389
390     def txt_url(self):
391         return self.media_url('txt')
392
393     def fb2_url(self):
394         return self.media_url('fb2')
395
396     def xml_url(self):
397         return self.media_url('xml')
398
399     def has_description(self):
400         return len(self.description) > 0
401     has_description.short_description = 'opis'
402     has_description.boolean = True
403
404     def has_mp3_file(self):
405         return self.has_media("mp3")
406     has_mp3_file.short_description = 'MP3'
407     has_mp3_file.boolean = True
408
409     def has_ogg_file(self):
410         return self.has_media("ogg")
411     has_ogg_file.short_description = 'OGG'
412     has_ogg_file.boolean = True
413
414     def has_daisy_file(self):
415         return self.has_media("daisy")
416     has_daisy_file.short_description = 'DAISY'
417     has_daisy_file.boolean = True
418
419     def has_sync_file(self):
420         return settings.FEATURE_SYNCHRO and self.has_media("sync")
421
422     def build_sync_file(self):
423         from lxml import html
424         from django.core.files.base import ContentFile
425         with self.html_file.open('rb') as f:
426             h = html.fragment_fromstring(f.read().decode('utf-8'))
427
428         durations = [
429             m['mp3'].duration
430             for m in self.get_audiobooks()[0]
431         ]
432         if settings.MOCK_DURATIONS:
433             durations = settings.MOCK_DURATIONS
434
435         sync = []
436         ts = None
437         sid = 1
438         dirty = False
439         for elem in h.iter():
440             if elem.get('data-audio-ts'):
441                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
442                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
443                 # check if inside verse
444                 p = elem.getparent()
445                 while p is not None:
446                     # Workaround for missing ids.
447                     if 'verse' in p.get('class', ''):
448                         if not p.get('id'):
449                             p.set('id', f'syn{sid}')
450                             dirty = True
451                             sid += 1
452                         sync.append((ts, p.get('id')))
453                         ts = None
454                         break
455                     p = p.getparent()
456             elif ts:
457                 cls = elem.get('class', '')
458                 # Workaround for missing ids.
459                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
460                     if not elem.get('id'):
461                         elem.set('id', f'syn{sid}')
462                         dirty = True
463                         sid += 1
464                     sync.append((ts, elem.get('id')))
465                     ts = None
466         if dirty:
467             htext = html.tostring(h, encoding='utf-8')
468             with open(self.html_file.path, 'wb') as f:
469                 f.write(htext)
470         try:
471             bm = self.media.get(type='sync')
472         except:
473             bm = BookMedia(book=self, type='sync')
474         sync = (
475             '27\n' + '\n'.join(
476                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
477             )).encode('latin1')
478         bm.file.save(
479             None, ContentFile(sync)
480             )
481
482     def get_sync(self):
483         if not self.has_sync_file():
484             return []
485         with self.get_media('sync').first().file.open('r') as f:
486             sync = f.read().split('\n')
487         offset = float(sync[0])
488         items = []
489         for line in sync[1:]:
490             if not line:
491                 continue
492             start, end, elid = line.split()
493             items.append([elid, float(start) + offset])
494         return items
495
496     def sync_ts(self, ts):
497         elid = None
498         for cur_id, t in self.get_sync():
499             if ts >= t:
500                 elid = cur_id
501             else:
502                 break
503         return elid
504
505     def sync_elid(self, elid):
506         for cur_id, t in self.get_sync():
507             if cur_id == elid:
508                 return t
509
510     def has_audio_epub_file(self):
511         return self.has_media("audio.epub")
512
513     @property
514     def media_daisy(self):
515         return self.get_media('daisy')
516
517     @property
518     def media_audio_epub(self):
519         return self.get_media('audio.epub')
520
521     def get_audiobooks(self, with_children=False, processing=False):
522         ogg_files = {}
523         for m in self.media.filter(type='ogg').order_by().iterator():
524             ogg_files[m.name] = m
525
526         audiobooks = []
527         projects = set()
528         total_duration = 0
529         for mp3 in self.media.filter(type='mp3').iterator():
530             # ogg files are always from the same project
531             meta = mp3.get_extra_info_json()
532             project = meta.get('project')
533             if not project:
534                 # temporary fallback
535                 project = 'CzytamySłuchając'
536
537             projects.add((project, meta.get('funded_by', '')))
538             total_duration += mp3.duration or 0
539
540             media = {'mp3': mp3}
541
542             ogg = ogg_files.get(mp3.name)
543             if ogg:
544                 media['ogg'] = ogg
545             audiobooks.append(media)
546
547         if with_children:
548             for child in self.get_children():
549                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
550                     with_children=True, processing=True)
551                 audiobooks.append({'part': child})
552                 audiobooks += ch_audiobooks
553                 projects.update(ch_projects)
554                 total_duration += ch_duration
555
556         if not processing:
557             projects = sorted(projects)
558             total_duration = '%d:%02d' % (
559                 total_duration // 60,
560                 total_duration % 60
561             )
562
563         return audiobooks, projects, total_duration
564
565     def get_audiobooks_with_children(self):
566         return self.get_audiobooks(with_children=True)
567     
568     def wldocument(self, parse_dublincore=True, inherit=True):
569         from catalogue.import_utils import ORMDocProvider
570         from librarian.parser import WLDocument
571
572         if inherit and self.parent:
573             meta_fallbacks = self.parent.cover_info()
574         else:
575             meta_fallbacks = None
576
577         return WLDocument.from_file(
578             self.xml_file.path,
579             provider=ORMDocProvider(self),
580             parse_dublincore=parse_dublincore,
581             meta_fallbacks=meta_fallbacks)
582
583     def wldocument2(self):
584         from catalogue.import_utils import ORMDocProvider
585         from librarian.document import WLDocument
586         doc = WLDocument(
587             self.xml_file.path,
588             provider=ORMDocProvider(self)
589         )
590         doc.meta.update(self.cover_info())
591         return doc
592
593
594     @staticmethod
595     def zip_format(format_):
596         def pretty_file_name(book):
597             return "%s/%s.%s" % (
598                 book.get_extra_info_json()['author'],
599                 book.slug,
600                 format_)
601
602         field_name = "%s_file" % format_
603         field = getattr(Book, field_name)
604         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
605         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
606         return create_zip(paths, field.ZIP)
607
608     def zip_audiobooks(self, format_):
609         bm = BookMedia.objects.filter(book=self, type=format_)
610         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
611         licenses = set()
612         for m in bm:
613             license = constants.LICENSES.get(
614                 m.get_extra_info_json().get('license'), {}
615             ).get('locative')
616             if license:
617                 licenses.add(license)
618         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
619             'licenses': licenses,
620             'meta': self.wldocument2().meta,
621         })
622         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
623
624     def search_index(self, index=None):
625         if not self.findable:
626             return
627         from search.index import Index
628         Index.index_book(self)
629
630     # will make problems in conjunction with paid previews
631     def download_pictures(self, remote_gallery_url):
632         # This is only needed for legacy relative image paths.
633         gallery_path = self.gallery_path()
634         # delete previous files, so we don't include old files in ebooks
635         if os.path.isdir(gallery_path):
636             for filename in os.listdir(gallery_path):
637                 file_path = os.path.join(gallery_path, filename)
638                 os.unlink(file_path)
639         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
640         if ilustr_elements:
641             makedirs(gallery_path)
642             for ilustr in ilustr_elements:
643                 ilustr_src = ilustr.get('src')
644                 if '/' in ilustr_src:
645                     continue
646                 ilustr_path = os.path.join(gallery_path, ilustr_src)
647                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
648
649     def load_abstract(self):
650         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
651         if abstract is not None:
652             self.abstract = transform_abstrakt(abstract)
653         else:
654             self.abstract = ''
655
656     def load_toc(self):
657         self.toc = ''
658         if self.html_file:
659             parser = html.HTMLParser(encoding='utf-8')
660             tree = html.parse(self.html_file.path, parser=parser)
661             toc = tree.find('//div[@id="toc"]/ol')
662             if toc is None or not len(toc):
663                 return
664             html_link = reverse('book_text', args=[self.slug])
665             for a in toc.findall('.//a'):
666                 a.attrib['href'] = html_link + a.attrib['href']
667             self.toc = html.tostring(toc, encoding='unicode')
668             # div#toc
669
670     @classmethod
671     def from_xml_file(cls, xml_file, **kwargs):
672         from django.core.files import File
673         from librarian import dcparser
674
675         # use librarian to parse meta-data
676         book_info = dcparser.parse(xml_file)
677
678         if not isinstance(xml_file, File):
679             xml_file = File(open(xml_file))
680
681         try:
682             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
683         finally:
684             xml_file.close()
685
686     @classmethod
687     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
688                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None, isbn_mp3=None):
689         from catalogue import tasks
690
691         if dont_build is None:
692             dont_build = set()
693         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
694
695         # check for parts before we do anything
696         children = []
697         if hasattr(book_info, 'parts'):
698             for part_url in book_info.parts:
699                 try:
700                     children.append(Book.objects.get(slug=part_url.slug))
701                 except Book.DoesNotExist:
702                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
703
704         # Read book metadata
705         book_slug = book_info.url.slug
706         if re.search(r'[^a-z0-9-]', book_slug):
707             raise ValueError('Invalid characters in slug')
708         book, created = Book.objects.get_or_create(slug=book_slug)
709
710         if created:
711             book_shelves = []
712             old_cover = None
713             book.preview = bool(days)
714             if book.preview:
715                 book.preview_until = date.today() + timedelta(days)
716         else:
717             if not overwrite:
718                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
719             # Save shelves for this book
720             book_shelves = list(book.tags.filter(category='set'))
721             old_cover = book.cover_info()
722
723         # Save XML file
724         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
725         if book.preview:
726             book.xml_file.set_readable(False)
727
728         book.findable = findable
729         book.language = book_info.language
730         book.title = book_info.title
731         book.license = book_info.license or ''
732         if book_info.variant_of:
733             book.common_slug = book_info.variant_of.slug
734         else:
735             book.common_slug = book.slug
736         extra = book_info.to_dict()
737         if logo:
738             extra['logo'] = logo
739         if logo_mono:
740             extra['logo_mono'] = logo_mono
741         if logo_alt:
742             extra['logo_alt'] = logo_alt
743         if can_sell is not None:
744             book.can_sell = can_sell
745         if isbn_mp3 is not None:
746             book.isbn_mp3 = isbn_mp3
747         book.extra_info = json.dumps(extra)
748         book.load_abstract()
749         book.load_toc()
750         book.save()
751
752         book.update_stats()
753         
754         meta_tags = Tag.tags_from_info(book_info)
755
756         just_tags = [t for (t, rel) in meta_tags if not rel]
757         book.tags = set(just_tags + book_shelves)
758         book.save()  # update sort_key_author
759
760         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
761
762         cover_changed = old_cover != book.cover_info()
763         obsolete_children = set(b for b in book.children.all()
764                                 if b not in children)
765         notify_cover_changed = []
766         for n, child_book in enumerate(children):
767             new_child = child_book.parent != book
768             child_book.parent = book
769             child_book.parent_number = n
770             child_book.save()
771             if new_child or cover_changed:
772                 notify_cover_changed.append(child_book)
773         # Disown unfaithful children and let them cope on their own.
774         for child in obsolete_children:
775             child.parent = None
776             child.parent_number = 0
777             child.save()
778             if old_cover:
779                 notify_cover_changed.append(child)
780
781         cls.repopulate_ancestors()
782         tasks.update_counters.delay()
783
784         if remote_gallery_url:
785             book.download_pictures(remote_gallery_url)
786
787         # No saves beyond this point.
788
789         # Build cover.
790         if 'cover' not in dont_build:
791             book.cover.build_delay()
792             book.cover_clean.build_delay()
793             book.cover_thumb.build_delay()
794             book.cover_api_thumb.build_delay()
795             book.simple_cover.build_delay()
796             book.cover_ebookpoint.build_delay()
797
798         # Build HTML and ebooks.
799         book.html_file.build_delay()
800         if not children:
801             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
802                 if format_ not in dont_build:
803                     getattr(book, '%s_file' % format_).build_delay()
804         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
805             if format_ not in dont_build:
806                 getattr(book, '%s_file' % format_).build_delay()
807         book.html_nonotes_file.build_delay()
808
809         if not settings.NO_SEARCH_INDEX and search_index and findable:
810             tasks.index_book.delay(book.id)
811
812         for child in notify_cover_changed:
813             child.parent_cover_changed()
814
815         book.update_popularity()
816         tasks.update_references.delay(book.id)
817
818         cls.published.send(sender=cls, instance=book)
819         return book
820
821     def update_stats(self):
822         stats = self.wldocument2().get_statistics()['total']
823         self.pages = round(
824             stats.get('verses_with_fn', 0) / 30 +
825             stats.get('chars_out_verse_with_fn', 0) / 1800)
826         self.read_time = round(self.get_time())
827         self.save(update_fields=['pages', 'read_time'])
828         if self.parent is not None:
829             self.parent.update_stats()
830
831     def update_references(self):
832         Entity = apps.get_model('references', 'Entity')
833         doc = self.wldocument2()
834         doc._compat_assign_section_ids()
835         doc._compat_assign_ordered_ids()
836         refs = {}
837         for ref_elem in doc.references():
838             uri = ref_elem.attrib.get('href', '')
839             if not uri:
840                 continue
841             if uri in refs:
842                 ref = refs[uri]
843             else:
844                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
845                 if entity_created:
846                     try:
847                         entity.populate()
848                     except:
849                         pass
850                     else:
851                         entity.save()
852                 ref, ref_created = entity.reference_set.get_or_create(book=self)
853                 refs[uri] = ref
854                 if not ref_created:
855                     ref.occurence_set.all().delete()
856             sec = ref_elem.get_link()
857             m = re.match(r'sec(\d+)', sec)
858             assert m is not None
859             sec = int(m.group(1))
860             snippet = ref_elem.get_snippet()
861             b = builders['html-snippet']()
862             for s in snippet:
863                 s.html_build(b)
864             html = b.output().get_bytes().decode('utf-8')
865
866             ref.occurence_set.create(
867                 section=sec,
868                 html=html
869             )
870         self.reference_set.exclude(entity__uri__in=refs).delete()
871
872     @property
873     def references(self):
874         return self.reference_set.all().select_related('entity')
875
876     def update_has_audio(self):
877         self.has_audio = False
878         if self.media.filter(type='mp3').exists():
879             self.has_audio = True
880         if self.descendant.filter(has_audio=True).exists():
881             self.has_audio = True
882         self.save(update_fields=['has_audio'])
883         if self.parent is not None:
884             self.parent.update_has_audio()
885
886     def update_narrators(self):
887         narrator_names = set()
888         for bm in self.media.filter(type='mp3'):
889             narrator_names.update(set(
890                 a.strip() for a in re.split(r',|\si\s', bm.artist)
891             ))
892         narrators = []
893
894         for name in narrator_names:
895             if not name: continue
896             slug = slugify(name)
897             try:
898                 t = Tag.objects.get(category='author', slug=slug)
899             except Tag.DoesNotExist:
900                 sort_key = sortify(
901                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
902                 )
903                 t = Tag.objects.create(
904                     category='author',
905                     name_pl=name,
906                     slug=slug,
907                     sort_key=sort_key,
908                 )
909             narrators.append(t)
910         self.narrators.set(narrators)
911
912     @classmethod
913     @transaction.atomic
914     def repopulate_ancestors(cls):
915         """Fixes the ancestry cache."""
916         # TODO: table names
917         cursor = connection.cursor()
918         if connection.vendor == 'postgres':
919             cursor.execute("TRUNCATE catalogue_book_ancestor")
920             cursor.execute("""
921                 WITH RECURSIVE ancestry AS (
922                     SELECT book.id, book.parent_id
923                     FROM catalogue_book AS book
924                     WHERE book.parent_id IS NOT NULL
925                     UNION
926                     SELECT ancestor.id, book.parent_id
927                     FROM ancestry AS ancestor, catalogue_book AS book
928                     WHERE ancestor.parent_id = book.id
929                         AND book.parent_id IS NOT NULL
930                     )
931                 INSERT INTO catalogue_book_ancestor
932                     (from_book_id, to_book_id)
933                     SELECT id, parent_id
934                     FROM ancestry
935                     ORDER BY id;
936                 """)
937         else:
938             cursor.execute("DELETE FROM catalogue_book_ancestor")
939             for b in cls.objects.exclude(parent=None):
940                 parent = b.parent
941                 while parent is not None:
942                     b.ancestor.add(parent)
943                     parent = parent.parent
944
945     @property
946     def ancestors(self):
947         if self.parent:
948             for anc in self.parent.ancestors:
949                 yield anc
950             yield self.parent
951         else:
952             return []
953
954     def clear_cache(self):
955         clear_cached_renders(self.mini_box)
956         clear_cached_renders(self.mini_box_nolink)
957
958     def cover_info(self, inherit=True):
959         """Returns a dictionary to serve as fallback for BookInfo.
960
961         For now, the only thing inherited is the cover image.
962         """
963         need = False
964         info = {}
965         for field in ('cover_url', 'cover_by', 'cover_source'):
966             val = self.get_extra_info_json().get(field)
967             if val:
968                 info[field] = val
969             else:
970                 need = True
971         if inherit and need and self.parent is not None:
972             parent_info = self.parent.cover_info()
973             parent_info.update(info)
974             info = parent_info
975         return info
976
977     def related_themes(self):
978         return Tag.objects.usage_for_queryset(
979             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
980             counts=True).filter(category='theme').order_by('-count')
981
982     def parent_cover_changed(self):
983         """Called when parent book's cover image is changed."""
984         if not self.cover_info(inherit=False):
985             if 'cover' not in app_settings.DONT_BUILD:
986                 self.cover.build_delay()
987                 self.cover_clean.build_delay()
988                 self.cover_thumb.build_delay()
989                 self.cover_api_thumb.build_delay()
990                 self.simple_cover.build_delay()
991                 self.cover_ebookpoint.build_delay()
992             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
993                 if format_ not in app_settings.DONT_BUILD:
994                     getattr(self, '%s_file' % format_).build_delay()
995             for child in self.children.all():
996                 child.parent_cover_changed()
997
998     def other_versions(self):
999         """Find other versions (i.e. in other languages) of the book."""
1000         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
1001
1002     def parents(self):
1003         books = []
1004         parent = self.parent
1005         while parent is not None:
1006             books.insert(0, parent)
1007             parent = parent.parent
1008         return books
1009
1010     def pretty_title(self, html_links=False):
1011         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1012         books = self.parents() + [self]
1013         names.extend([(b.title, b.get_absolute_url()) for b in books])
1014
1015         if html_links:
1016             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1017         else:
1018             names = [tag[0] for tag in names]
1019         return ', '.join(names)
1020
1021     def publisher(self):
1022         publisher = self.get_extra_info_json()['publisher']
1023         if isinstance(publisher, str):
1024             return publisher
1025         elif isinstance(publisher, list):
1026             return ', '.join(publisher)
1027
1028     def get_recommended(self, limit=4):
1029         books_qs = type(self).objects.filter(findable=True)
1030         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1031         books = type(self).tagged.related_to(self, books_qs)[:limit]
1032         return books
1033
1034     @classmethod
1035     def tagged_top_level(cls, tags):
1036         """ Returns top-level books tagged with `tags`.
1037
1038         It only returns those books which don't have ancestors which are
1039         also tagged with those tags.
1040
1041         """
1042         objects = cls.tagged.with_all(tags)
1043         return objects.filter(findable=True).exclude(ancestor__in=objects)
1044
1045     @classmethod
1046     def book_list(cls, book_filter=None):
1047         """Generates a hierarchical listing of all books.
1048
1049         Books are optionally filtered with a test function.
1050
1051         """
1052
1053         books_by_parent = {}
1054         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1055         if book_filter:
1056             books = books.filter(book_filter).distinct()
1057
1058             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1059             for book in books.iterator():
1060                 parent = book.parent_id
1061                 if parent not in book_ids:
1062                     parent = None
1063                 books_by_parent.setdefault(parent, []).append(book)
1064         else:
1065             for book in books.iterator():
1066                 books_by_parent.setdefault(book.parent_id, []).append(book)
1067
1068         orphans = []
1069         books_by_author = OrderedDict()
1070         for tag in Tag.objects.filter(category='author').iterator():
1071             books_by_author[tag] = []
1072
1073         for book in books_by_parent.get(None, ()):
1074             authors = list(book.authors().only('pk'))
1075             if authors:
1076                 for author in authors:
1077                     books_by_author[author].append(book)
1078             else:
1079                 orphans.append(book)
1080
1081         return books_by_author, orphans, books_by_parent
1082
1083     _audiences_pl = {
1084         "SP": (1, "szkoła podstawowa"),
1085         "SP1": (1, "szkoła podstawowa"),
1086         "SP2": (1, "szkoła podstawowa"),
1087         "SP3": (1, "szkoła podstawowa"),
1088         "P": (1, "szkoła podstawowa"),
1089         "G": (2, "gimnazjum"),
1090         "L": (3, "liceum"),
1091         "LP": (3, "liceum"),
1092     }
1093
1094     def audiences_pl(self):
1095         audiences = self.get_extra_info_json().get('audiences', [])
1096         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1097         return [a[1] for a in audiences]
1098
1099     def stage_note(self):
1100         stage = self.get_extra_info_json().get('stage')
1101         if stage and stage < '0.4':
1102             return (_('Ten utwór wymaga uwspółcześnienia'),
1103                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1104         else:
1105             return None, None
1106
1107     def choose_fragments(self, number):
1108         fragments = self.fragments.order_by()
1109         fragments_count = fragments.count()
1110         if not fragments_count and self.children.exists():
1111             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1112             fragments_count = fragments.count()
1113         if fragments_count:
1114             if fragments_count > number:
1115                 offset = randint(0, fragments_count - number)
1116             else:
1117                 offset = 0
1118             return fragments[offset : offset + number]
1119         elif self.parent:
1120             return self.parent.choose_fragments(number)
1121         else:
1122             return []
1123
1124     def choose_fragment(self):
1125         fragments = self.choose_fragments(1)
1126         if fragments:
1127             return fragments[0]
1128         else:
1129             return None
1130
1131     def fragment_data(self):
1132         fragment = self.choose_fragment()
1133         if fragment:
1134             return {
1135                 'title': fragment.book.pretty_title(),
1136                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1137             }
1138         else:
1139             return None
1140
1141     def update_popularity(self):
1142         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1143         try:
1144             pop = self.popularity
1145             pop.count = count
1146             pop.save()
1147         except BookPopularity.DoesNotExist:
1148             BookPopularity.objects.create(book=self, count=count)
1149
1150     def ridero_link(self):
1151         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1152
1153     def elevenreader_link(self):
1154         first_text = self.get_first_text()
1155         if first_text is None:
1156             return None
1157         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1158
1159     def content_warnings(self):
1160         warnings_def = {
1161             'wulgaryzmy': _('wulgaryzmy'),
1162         }
1163         warnings = self.get_extra_info_json().get('content_warnings', [])
1164         warnings = [
1165             warnings_def.get(w, w)
1166             for w in warnings
1167         ]
1168         warnings.sort()
1169         return warnings
1170
1171     def full_sort_key(self):
1172         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1173
1174     def cover_color(self):
1175         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1176
1177     @cached_render('catalogue/book_mini_box.html')
1178     def mini_box(self):
1179         return {
1180             'book': self
1181         }
1182
1183     @cached_render('catalogue/book_mini_box.html')
1184     def mini_box_nolink(self):
1185         return {
1186             'book': self,
1187             'no_link': True,
1188         }
1189
1190
1191 class BookPopularity(models.Model):
1192     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1193     count = models.IntegerField(default=0, db_index=True)