fix
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 import requests
11 from slugify import slugify
12 from sortify import sortify
13 from urllib.request import urlretrieve
14 from django.apps import apps
15 from django.conf import settings
16 from django.db import connection, models, transaction
17 import django.dispatch
18 from django.contrib.contenttypes.fields import GenericRelation
19 from django.template.loader import render_to_string
20 from django.urls import reverse
21 from django.utils.translation import gettext_lazy as _, get_language
22 from fnpdjango.storage import BofhFileSystemStorage
23 from lxml import html
24 from librarian.cover import WLCover
25 from librarian.html import transform_abstrakt
26 from librarian.builders import builders
27 from newtagging import managers
28 from catalogue import constants
29 from catalogue import fields
30 from catalogue.models import Tag, Fragment, BookMedia
31 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
32 from catalogue.models.tag import prefetched_relations
33 from catalogue import app_settings
34 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
35
36 bofh_storage = BofhFileSystemStorage()
37
38
39 class Book(models.Model):
40     """Represents a book imported from WL-XML."""
41     title = models.CharField('tytuł', max_length=32767)
42     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
43     sort_key_author = models.CharField(
44         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
45     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
46     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
47     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
48     description = models.TextField('opis', blank=True)
49     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
50     abstract = models.TextField('abstrakt', blank=True)
51     toc = models.TextField('spis treści', blank=True)
52     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
53     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
54     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
55     extra_info = models.TextField('dodatkowe informacje', default='{}')
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58     print_on_demand = models.BooleanField('druk na żądanie', default=False)
59     recommended = models.BooleanField('polecane', default=False)
60     audio_length = models.CharField('długość audio', blank=True, max_length=8)
61     preview = models.BooleanField('prapremiera', default=False)
62     preview_until = models.DateField('prapremiera do', blank=True, null=True)
63     preview_key = models.CharField(max_length=32, blank=True, null=True)
64     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
65     can_sell = models.BooleanField('do sprzedaży', default=True)
66     can_sell_mp3 = models.BooleanField('do sprzedaży mp3', default=False)
67     isbn_mp3 = models.CharField('ISBN audiobooka', max_length=32, blank=True)
68
69     # files generated during publication
70     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
71     html_file = fields.HtmlField(storage=bofh_storage)
72     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
73     fb2_file = fields.Fb2Field(storage=bofh_storage)
74     txt_file = fields.TxtField(storage=bofh_storage)
75     epub_file = fields.EpubField(storage=bofh_storage)
76     mobi_file = fields.MobiField(storage=bofh_storage)
77     pdf_file = fields.PdfField(storage=bofh_storage)
78
79     cover = fields.CoverField('okładka', storage=bofh_storage)
80     # Cleaner version of cover for thumbs
81     cover_clean = fields.CoverCleanField('czysta okładka')
82     cover_thumb = fields.CoverThumbField('miniatura okładki')
83     cover_api_thumb = fields.CoverApiThumbField(
84         'mniaturka okładki dla aplikacji')
85     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
86     cover_ebookpoint = fields.CoverEbookpointField(
87         'okładka dla Ebookpoint')
88
89     ebook_formats = constants.EBOOK_FORMATS
90     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
91
92     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
93     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
94
95     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
96     has_audience = models.BooleanField(default=False)
97
98     objects = models.Manager()
99     tagged = managers.ModelTaggedItemManager(Tag)
100     tags = managers.TagDescriptor(Tag)
101     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
102     translators = models.ManyToManyField(Tag, blank=True)
103     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
104     has_audio = models.BooleanField(default=False)
105     read_time = models.IntegerField(blank=True, null=True)
106     pages = models.IntegerField(blank=True, null=True)
107     
108     html_built = django.dispatch.Signal()
109     published = django.dispatch.Signal()
110
111     SORT_KEY_SEP = '$'
112
113     is_book = True
114
115     class AlreadyExists(Exception):
116         pass
117
118     class Meta:
119         ordering = ('sort_key_author', 'sort_key')
120         verbose_name = 'książka'
121         verbose_name_plural = 'książki'
122         app_label = 'catalogue'
123
124     def __str__(self):
125         return self.title
126
127     def get_extra_info_json(self):
128         return json.loads(self.extra_info or '{}')
129
130     def get_initial(self):
131         try:
132             return re.search(r'\w', self.title, re.U).group(0)
133         except AttributeError:
134             return ''
135
136     def authors(self):
137         return self.tags.filter(category='author')
138
139     def epochs(self):
140         return self.tags.filter(category='epoch')
141
142     def genres(self):
143         return self.tags.filter(category='genre')
144
145     def kinds(self):
146         return self.tags.filter(category='kind')
147
148     def tag_unicode(self, category):
149         relations = prefetched_relations(self, category)
150         if relations:
151             return ', '.join(rel.tag.name for rel in relations)
152         else:
153             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
154
155     def tags_by_category(self):
156         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
157
158     def author_unicode(self):
159         return self.cached_author
160
161     def kind_unicode(self):
162         return self.tag_unicode('kind')
163
164     def epoch_unicode(self):
165         return self.tag_unicode('epoch')
166
167     def genre_unicode(self):
168         return self.tag_unicode('genre')
169
170     def translator(self):
171         translators = self.get_extra_info_json().get('translators')
172         if not translators:
173             return None
174         if len(translators) > 3:
175             translators = translators[:2]
176             others = ' i inni'
177         else:
178             others = ''
179         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
180
181     def cover_source(self):
182         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
183
184     @property
185     def isbn_pdf(self):
186         return self.get_extra_info_json().get('isbn_pdf')
187
188     @property
189     def isbn_epub(self):
190         return self.get_extra_info_json().get('isbn_epub')
191
192     @property
193     def isbn_mobi(self):
194         return self.get_extra_info_json().get('isbn_mobi')
195
196     @property
197     def redakcja(self):
198         return self.get_extra_info_json().get('about')
199     
200     def is_accessible_to(self, user):
201         if not self.preview:
202             return True
203         if not user.is_authenticated:
204             return False
205         Membership = apps.get_model('club', 'Membership')
206         if Membership.is_active_for(user):
207             return True
208         Funding = apps.get_model('funding', 'Funding')
209         if Funding.objects.filter(user=user, offer__book=self):
210             return True
211         return False
212
213     def save(self, force_insert=False, force_update=False, **kwargs):
214         from sortify import sortify
215
216         self.sort_key = sortify(self.title)[:120]
217         self.title = str(self.title)  # ???
218
219         try:
220             author = self.authors().first().sort_key
221         except AttributeError:
222             author = ''
223         self.sort_key_author = author
224
225         self.cached_author = self.tag_unicode('author')
226         self.has_audience = 'audience' in self.get_extra_info_json()
227
228         if self.preview and not self.preview_key:
229             self.preview_key = get_random_hash(self.slug)[:32]
230
231         ret = super(Book, self).save(force_insert, force_update, **kwargs)
232
233         return ret
234
235     def get_absolute_url(self):
236         return reverse('book_detail', args=[self.slug])
237
238     def gallery_path(self):
239         return gallery_path(self.slug)
240
241     def gallery_url(self):
242         return gallery_url(self.slug)
243
244     def get_first_text(self):
245         if self.html_file:
246             return self
247         child = self.children.all().order_by('parent_number').first()
248         if child is not None:
249             return child.get_first_text()
250
251     def get_last_text(self):
252         if self.html_file:
253             return self
254         child = self.children.all().order_by('parent_number').last()
255         if child is not None:
256             return child.get_last_text()
257
258     def get_prev_text(self):
259         if not self.parent:
260             return None
261         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
262         if sibling is not None:
263             return sibling.get_last_text()
264
265         if self.parent.html_file:
266             return self.parent
267
268         return self.parent.get_prev_text()
269
270     def get_next_text(self, inside=True):
271         if inside:
272             child = self.children.order_by('parent_number').first()
273             if child is not None:
274                 return child.get_first_text()
275
276         if not self.parent:
277             return None
278         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
279         if sibling is not None:
280             return sibling.get_first_text()
281         return self.parent.get_next_text(inside=False)
282
283     def get_siblings(self):
284         if not self.parent:
285             return []
286         return self.parent.children.all().order_by('parent_number')
287
288     def get_children(self):
289         return self.children.all().order_by('parent_number')
290
291     @property
292     def name(self):
293         return self.title
294
295     def language_code(self):
296         return constants.LANGUAGES_3TO2.get(self.language, self.language)
297
298     def language_name(self):
299         return dict(settings.LANGUAGES).get(self.language_code(), "")
300
301     def is_foreign(self):
302         return self.language_code() != settings.LANGUAGE_CODE
303
304     def set_audio_length(self):
305         length = self.get_audio_length()
306         if length > 0:
307             self.audio_length = self.format_audio_length(length)
308             self.save()
309
310     @staticmethod
311     def format_audio_length(seconds):
312         """
313         >>> Book.format_audio_length(1)
314         '0:01'
315         >>> Book.format_audio_length(3661)
316         '1:01:01'
317         """
318         if seconds < 60*60:
319             minutes = seconds // 60
320             seconds = seconds % 60
321             return '%d:%02d' % (minutes, seconds)
322         else:
323             hours = seconds // 3600
324             minutes = seconds % 3600 // 60
325             seconds = seconds % 60
326             return '%d:%02d:%02d' % (hours, minutes, seconds)
327
328     def get_audio_length(self):
329         total = 0
330         for media in self.get_mp3() or ():
331             total += app_settings.GET_MP3_LENGTH(media.file.path)
332         return int(total)
333
334     def get_time(self):
335         return round(self.xml_file.size / 1000 * 40)
336     
337     def has_media(self, type_):
338         if type_ in Book.formats:
339             return bool(getattr(self, "%s_file" % type_))
340         else:
341             return self.media.filter(type=type_).exists()
342
343     def get_media(self, type_):
344         if self.has_media(type_):
345             if type_ in Book.formats:
346                 return getattr(self, "%s_file" % type_)
347             else:
348                 return self.media.filter(type=type_)
349         else:
350             return None
351
352     def get_mp3(self):
353         return self.get_media("mp3")
354
355     def get_odt(self):
356         return self.get_media("odt")
357
358     def get_ogg(self):
359         return self.get_media("ogg")
360
361     def get_daisy(self):
362         return self.get_media("daisy")
363
364     def get_audio_epub(self):
365         return self.get_media("audio.epub")
366
367     def media_url(self, format_):
368         media = self.get_media(format_)
369         if media:
370             if self.preview:
371                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
372             else:
373                 return media.url
374         else:
375             return None
376
377     def html_url(self):
378         return self.media_url('html')
379
380     def html_nonotes_url(self):
381         return self.media_url('html_nonotes')
382
383     def pdf_url(self):
384         return self.media_url('pdf')
385
386     def epub_url(self):
387         return self.media_url('epub')
388
389     def mobi_url(self):
390         return self.media_url('mobi')
391
392     def txt_url(self):
393         return self.media_url('txt')
394
395     def fb2_url(self):
396         return self.media_url('fb2')
397
398     def xml_url(self):
399         return self.media_url('xml')
400
401     def has_description(self):
402         return len(self.description) > 0
403     has_description.short_description = 'opis'
404     has_description.boolean = True
405
406     def has_mp3_file(self):
407         return self.has_media("mp3")
408     has_mp3_file.short_description = 'MP3'
409     has_mp3_file.boolean = True
410
411     def has_ogg_file(self):
412         return self.has_media("ogg")
413     has_ogg_file.short_description = 'OGG'
414     has_ogg_file.boolean = True
415
416     def has_daisy_file(self):
417         return self.has_media("daisy")
418     has_daisy_file.short_description = 'DAISY'
419     has_daisy_file.boolean = True
420
421     def has_sync_file(self):
422         return settings.FEATURE_SYNCHRO and self.has_media("sync")
423
424     def build_sync_file(self):
425         from lxml import html
426         from django.core.files.base import ContentFile
427         with self.html_file.open('rb') as f:
428             h = html.fragment_fromstring(f.read().decode('utf-8'))
429
430         durations = [
431             m['mp3'].duration
432             for m in self.get_audiobooks()[0]
433         ]
434         if settings.MOCK_DURATIONS:
435             durations = settings.MOCK_DURATIONS
436
437         sync = []
438         ts = None
439         sid = 1
440         dirty = False
441         for elem in h.iter():
442             if elem.get('data-audio-ts'):
443                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
444                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
445                 # check if inside verse
446                 p = elem.getparent()
447                 while p is not None:
448                     # Workaround for missing ids.
449                     if 'verse' in p.get('class', ''):
450                         if not p.get('id'):
451                             p.set('id', f'syn{sid}')
452                             dirty = True
453                             sid += 1
454                         sync.append((ts, p.get('id')))
455                         ts = None
456                         break
457                     p = p.getparent()
458             elif ts:
459                 cls = elem.get('class', '')
460                 # Workaround for missing ids.
461                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
462                     if not elem.get('id'):
463                         elem.set('id', f'syn{sid}')
464                         dirty = True
465                         sid += 1
466                     sync.append((ts, elem.get('id')))
467                     ts = None
468         if dirty:
469             htext = html.tostring(h, encoding='utf-8')
470             with open(self.html_file.path, 'wb') as f:
471                 f.write(htext)
472         try:
473             bm = self.media.get(type='sync')
474         except:
475             bm = BookMedia(book=self, type='sync')
476         sync = (
477             '27\n' + '\n'.join(
478                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
479             )).encode('latin1')
480         bm.file.save(
481             None, ContentFile(sync)
482             )
483
484     def get_sync(self):
485         if not self.has_sync_file():
486             return []
487         with self.get_media('sync').first().file.open('r') as f:
488             sync = f.read().split('\n')
489         offset = float(sync[0])
490         items = []
491         for line in sync[1:]:
492             if not line:
493                 continue
494             start, end, elid = line.split()
495             items.append([elid, float(start) + offset])
496         return items
497
498     def sync_ts(self, ts):
499         elid = None
500         for cur_id, t in self.get_sync():
501             if ts >= t:
502                 elid = cur_id
503             else:
504                 break
505         return elid
506
507     def sync_elid(self, elid):
508         for cur_id, t in self.get_sync():
509             if cur_id == elid:
510                 return t
511
512     def has_audio_epub_file(self):
513         return self.has_media("audio.epub")
514
515     @property
516     def media_daisy(self):
517         return self.get_media('daisy')
518
519     @property
520     def media_audio_epub(self):
521         return self.get_media('audio.epub')
522
523     def get_audiobooks(self, with_children=False, processing=False):
524         ogg_files = {}
525         for m in self.media.filter(type='ogg').order_by().iterator():
526             ogg_files[m.name] = m
527
528         audiobooks = []
529         projects = set()
530         total_duration = 0
531         for mp3 in self.media.filter(type='mp3').iterator():
532             # ogg files are always from the same project
533             meta = mp3.get_extra_info_json()
534             project = meta.get('project')
535             if not project:
536                 # temporary fallback
537                 project = 'CzytamySłuchając'
538
539             projects.add((project, meta.get('funded_by', '')))
540             total_duration += mp3.duration or 0
541
542             media = {'mp3': mp3}
543
544             ogg = ogg_files.get(mp3.name)
545             if ogg:
546                 media['ogg'] = ogg
547             audiobooks.append(media)
548
549         if with_children:
550             for child in self.get_children():
551                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
552                     with_children=True, processing=True)
553                 audiobooks.append({'part': child})
554                 audiobooks += ch_audiobooks
555                 projects.update(ch_projects)
556                 total_duration += ch_duration
557
558         if not processing:
559             projects = sorted(projects)
560             total_duration = '%d:%02d' % (
561                 total_duration // 60,
562                 total_duration % 60
563             )
564
565         return audiobooks, projects, total_duration
566
567     def get_audiobooks_with_children(self):
568         return self.get_audiobooks(with_children=True)
569     
570     def wldocument(self, parse_dublincore=True, inherit=True):
571         from catalogue.import_utils import ORMDocProvider
572         from librarian.parser import WLDocument
573
574         if inherit and self.parent:
575             meta_fallbacks = self.parent.cover_info()
576         else:
577             meta_fallbacks = None
578
579         return WLDocument.from_file(
580             self.xml_file.path,
581             provider=ORMDocProvider(self),
582             parse_dublincore=parse_dublincore,
583             meta_fallbacks=meta_fallbacks)
584
585     def wldocument2(self):
586         from catalogue.import_utils import ORMDocProvider
587         from librarian.document import WLDocument
588         doc = WLDocument(
589             self.xml_file.path,
590             provider=ORMDocProvider(self)
591         )
592         doc.meta.update(self.cover_info())
593         return doc
594
595
596     @staticmethod
597     def zip_format(format_):
598         def pretty_file_name(book):
599             return "%s/%s.%s" % (
600                 book.get_extra_info_json()['author'],
601                 book.slug,
602                 format_)
603
604         field_name = "%s_file" % format_
605         field = getattr(Book, field_name)
606         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
607         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
608         return create_zip(paths, field.ZIP)
609
610     def zip_audiobooks(self, format_):
611         bm = BookMedia.objects.filter(book=self, type=format_)
612         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
613         licenses = set()
614         for m in bm:
615             license = constants.LICENSES.get(
616                 m.get_extra_info_json().get('license'), {}
617             ).get('locative')
618             if license:
619                 licenses.add(license)
620         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
621             'licenses': licenses,
622             'meta': self.wldocument2().meta,
623         })
624         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
625
626     def search_index(self, index=None):
627         if not self.findable:
628             return
629         from search.index import Index
630         Index.index_book(self)
631
632     # will make problems in conjunction with paid previews
633     def download_pictures(self, remote_gallery_url):
634         # This is only needed for legacy relative image paths.
635         gallery_path = self.gallery_path()
636         # delete previous files, so we don't include old files in ebooks
637         if os.path.isdir(gallery_path):
638             for filename in os.listdir(gallery_path):
639                 file_path = os.path.join(gallery_path, filename)
640                 os.unlink(file_path)
641         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
642         if ilustr_elements:
643             makedirs(gallery_path)
644             for ilustr in ilustr_elements:
645                 ilustr_src = ilustr.get('src')
646                 if '/' in ilustr_src:
647                     continue
648                 ilustr_path = os.path.join(gallery_path, ilustr_src)
649                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
650
651     def load_abstract(self):
652         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
653         if abstract is not None:
654             self.abstract = transform_abstrakt(abstract)
655         else:
656             self.abstract = ''
657
658     def load_toc(self):
659         self.toc = ''
660         if self.html_file:
661             parser = html.HTMLParser(encoding='utf-8')
662             tree = html.parse(self.html_file.path, parser=parser)
663             toc = tree.find('//div[@id="toc"]/ol')
664             if toc is None or not len(toc):
665                 return
666             html_link = reverse('book_text', args=[self.slug])
667             for a in toc.findall('.//a'):
668                 a.attrib['href'] = html_link + a.attrib['href']
669             self.toc = html.tostring(toc, encoding='unicode')
670             # div#toc
671
672     @classmethod
673     def from_xml_file(cls, xml_file, **kwargs):
674         from django.core.files import File
675         from librarian import dcparser
676
677         # use librarian to parse meta-data
678         book_info = dcparser.parse(xml_file)
679
680         if not isinstance(xml_file, File):
681             xml_file = File(open(xml_file))
682
683         try:
684             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
685         finally:
686             xml_file.close()
687
688     @classmethod
689     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
690                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None, isbn_mp3=None):
691         from catalogue import tasks
692
693         if dont_build is None:
694             dont_build = set()
695         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
696
697         # check for parts before we do anything
698         children = []
699         if hasattr(book_info, 'parts'):
700             for part_url in book_info.parts:
701                 try:
702                     children.append(Book.objects.get(slug=part_url.slug))
703                 except Book.DoesNotExist:
704                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
705
706         # Read book metadata
707         book_slug = book_info.url.slug
708         if re.search(r'[^a-z0-9-]', book_slug):
709             raise ValueError('Invalid characters in slug')
710         book, created = Book.objects.get_or_create(slug=book_slug)
711
712         if created:
713             book_shelves = []
714             old_cover = None
715             book.preview = bool(days)
716             if book.preview:
717                 book.preview_until = date.today() + timedelta(days)
718         else:
719             if not overwrite:
720                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
721             # Save shelves for this book
722             book_shelves = list(book.tags.filter(category='set'))
723             old_cover = book.cover_info()
724
725         # Save XML file
726         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
727         if book.preview:
728             book.xml_file.set_readable(False)
729
730         book.findable = findable
731         book.language = book_info.language
732         book.title = book_info.title
733         book.license = book_info.license or ''
734         if book_info.variant_of:
735             book.common_slug = book_info.variant_of.slug
736         else:
737             book.common_slug = book.slug
738         extra = book_info.to_dict()
739         if logo:
740             extra['logo'] = logo
741         if logo_mono:
742             extra['logo_mono'] = logo_mono
743         if logo_alt:
744             extra['logo_alt'] = logo_alt
745         if can_sell is not None:
746             book.can_sell = can_sell
747         if isbn_mp3 is not None:
748             book.isbn_mp3 = isbn_mp3
749         book.extra_info = json.dumps(extra)
750         book.load_abstract()
751         book.load_toc()
752         book.save()
753
754         book.update_stats()
755         
756         meta_tags = Tag.tags_from_info(book_info)
757
758         just_tags = [t for (t, rel) in meta_tags if not rel]
759         book.tags = set(just_tags + book_shelves)
760         book.save()  # update sort_key_author
761
762         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
763
764         cover_changed = old_cover != book.cover_info()
765         obsolete_children = set(b for b in book.children.all()
766                                 if b not in children)
767         notify_cover_changed = []
768         for n, child_book in enumerate(children):
769             new_child = child_book.parent != book
770             child_book.parent = book
771             child_book.parent_number = n
772             child_book.save()
773             if new_child or cover_changed:
774                 notify_cover_changed.append(child_book)
775         # Disown unfaithful children and let them cope on their own.
776         for child in obsolete_children:
777             child.parent = None
778             child.parent_number = 0
779             child.save()
780             if old_cover:
781                 notify_cover_changed.append(child)
782
783         cls.repopulate_ancestors()
784         tasks.update_counters.delay()
785
786         if remote_gallery_url:
787             book.download_pictures(remote_gallery_url)
788
789         # No saves beyond this point.
790
791         # Build cover.
792         if 'cover' not in dont_build:
793             book.cover.build_delay()
794             book.cover_clean.build_delay()
795             book.cover_thumb.build_delay()
796             book.cover_api_thumb.build_delay()
797             book.simple_cover.build_delay()
798             book.cover_ebookpoint.build_delay()
799
800         # Build HTML and ebooks.
801         book.html_file.build_delay()
802         if not children:
803             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
804                 if format_ not in dont_build:
805                     getattr(book, '%s_file' % format_).build_delay()
806         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
807             if format_ not in dont_build:
808                 getattr(book, '%s_file' % format_).build_delay()
809         book.html_nonotes_file.build_delay()
810
811         if not settings.NO_SEARCH_INDEX and search_index and findable:
812             tasks.index_book.delay(book.id)
813
814         for child in notify_cover_changed:
815             child.parent_cover_changed()
816
817         book.update_popularity()
818         tasks.update_references.delay(book.id)
819
820         cls.published.send(sender=cls, instance=book)
821         return book
822
823     def update_stats(self):
824         stats = self.wldocument2().get_statistics()['total']
825         self.pages = round(
826             stats.get('verses_with_fn', 0) / 30 +
827             stats.get('chars_out_verse_with_fn', 0) / 1800)
828         self.read_time = round(self.get_time())
829         self.save(update_fields=['pages', 'read_time'])
830         if self.parent is not None:
831             self.parent.update_stats()
832
833     def update_references(self):
834         Entity = apps.get_model('references', 'Entity')
835         doc = self.wldocument2()
836         doc._compat_assign_section_ids()
837         doc._compat_assign_ordered_ids()
838         refs = {}
839         for ref_elem in doc.references():
840             uri = ref_elem.attrib.get('href', '')
841             if not uri:
842                 continue
843             if uri in refs:
844                 ref = refs[uri]
845             else:
846                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
847                 if entity_created:
848                     try:
849                         entity.populate()
850                     except:
851                         pass
852                     else:
853                         entity.save()
854                 ref, ref_created = entity.reference_set.get_or_create(book=self)
855                 refs[uri] = ref
856                 if not ref_created:
857                     ref.occurence_set.all().delete()
858             sec = ref_elem.get_link()
859             m = re.match(r'sec(\d+)', sec)
860             assert m is not None
861             sec = int(m.group(1))
862             snippet = ref_elem.get_snippet()
863             b = builders['html-snippet']()
864             for s in snippet:
865                 s.html_build(b)
866             html = b.output().get_bytes().decode('utf-8')
867
868             ref.occurence_set.create(
869                 section=sec,
870                 html=html
871             )
872         self.reference_set.exclude(entity__uri__in=refs).delete()
873
874     @property
875     def references(self):
876         return self.reference_set.all().select_related('entity')
877
878     def update_has_audio(self):
879         self.has_audio = False
880         if self.media.filter(type='mp3').exists():
881             self.has_audio = True
882         if self.descendant.filter(has_audio=True).exists():
883             self.has_audio = True
884         self.save(update_fields=['has_audio'])
885         if self.parent is not None:
886             self.parent.update_has_audio()
887
888     def update_narrators(self):
889         narrator_names = set()
890         for bm in self.media.filter(type='mp3'):
891             narrator_names.update(set(
892                 a.strip() for a in re.split(r',|\si\s', bm.artist)
893             ))
894         narrators = []
895
896         for name in narrator_names:
897             if not name: continue
898             slug = slugify(name)
899             try:
900                 t = Tag.objects.get(category='author', slug=slug)
901             except Tag.DoesNotExist:
902                 sort_key = sortify(
903                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
904                 )
905                 t = Tag.objects.create(
906                     category='author',
907                     name_pl=name,
908                     slug=slug,
909                     sort_key=sort_key,
910                 )
911             narrators.append(t)
912         self.narrators.set(narrators)
913
914     def update_can_sell_mp3(self):
915         ret = True
916         for child in self.get_children():
917             child.update_can_sell_mp3()
918             if not child.can_sell_mp3:
919                 ret = False
920         if self.has_mp3_file():
921             audio_items = requests.get(f'https://audio.wolnelektury.pl/archive/book/{self.slug}.json').json()['items']
922             if not all(x['project']['can_sell'] for x in audio_items):
923                 ret = False
924         self.can_sell_mp3 = ret
925         self.save(update_fields=['can_sell_mp3'])
926
927     @classmethod
928     @transaction.atomic
929     def repopulate_ancestors(cls):
930         """Fixes the ancestry cache."""
931         # TODO: table names
932         cursor = connection.cursor()
933         if connection.vendor == 'postgres':
934             cursor.execute("TRUNCATE catalogue_book_ancestor")
935             cursor.execute("""
936                 WITH RECURSIVE ancestry AS (
937                     SELECT book.id, book.parent_id
938                     FROM catalogue_book AS book
939                     WHERE book.parent_id IS NOT NULL
940                     UNION
941                     SELECT ancestor.id, book.parent_id
942                     FROM ancestry AS ancestor, catalogue_book AS book
943                     WHERE ancestor.parent_id = book.id
944                         AND book.parent_id IS NOT NULL
945                     )
946                 INSERT INTO catalogue_book_ancestor
947                     (from_book_id, to_book_id)
948                     SELECT id, parent_id
949                     FROM ancestry
950                     ORDER BY id;
951                 """)
952         else:
953             cursor.execute("DELETE FROM catalogue_book_ancestor")
954             for b in cls.objects.exclude(parent=None):
955                 parent = b.parent
956                 while parent is not None:
957                     b.ancestor.add(parent)
958                     parent = parent.parent
959
960     @property
961     def ancestors(self):
962         if self.parent:
963             for anc in self.parent.ancestors:
964                 yield anc
965             yield self.parent
966         else:
967             return []
968
969     def clear_cache(self):
970         clear_cached_renders(self.mini_box)
971         clear_cached_renders(self.mini_box_nolink)
972
973     def cover_info(self, inherit=True):
974         """Returns a dictionary to serve as fallback for BookInfo.
975
976         For now, the only thing inherited is the cover image.
977         """
978         need = False
979         info = {}
980         for field in ('cover_url', 'cover_by', 'cover_source'):
981             val = self.get_extra_info_json().get(field)
982             if val:
983                 info[field] = val
984             else:
985                 need = True
986         if inherit and need and self.parent is not None:
987             parent_info = self.parent.cover_info()
988             parent_info.update(info)
989             info = parent_info
990         return info
991
992     def related_themes(self):
993         return Tag.objects.usage_for_queryset(
994             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
995             counts=True).filter(category='theme').order_by('-count')
996
997     def parent_cover_changed(self):
998         """Called when parent book's cover image is changed."""
999         if not self.cover_info(inherit=False):
1000             if 'cover' not in app_settings.DONT_BUILD:
1001                 self.cover.build_delay()
1002                 self.cover_clean.build_delay()
1003                 self.cover_thumb.build_delay()
1004                 self.cover_api_thumb.build_delay()
1005                 self.simple_cover.build_delay()
1006                 self.cover_ebookpoint.build_delay()
1007             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
1008                 if format_ not in app_settings.DONT_BUILD:
1009                     getattr(self, '%s_file' % format_).build_delay()
1010             for child in self.children.all():
1011                 child.parent_cover_changed()
1012
1013     def other_versions(self):
1014         """Find other versions (i.e. in other languages) of the book."""
1015         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
1016
1017     def parents(self):
1018         books = []
1019         parent = self.parent
1020         while parent is not None:
1021             books.insert(0, parent)
1022             parent = parent.parent
1023         return books
1024
1025     def pretty_title(self, html_links=False):
1026         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1027         books = self.parents() + [self]
1028         names.extend([(b.title, b.get_absolute_url()) for b in books])
1029
1030         if html_links:
1031             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1032         else:
1033             names = [tag[0] for tag in names]
1034         return ', '.join(names)
1035
1036     def publisher(self):
1037         publisher = self.get_extra_info_json()['publisher']
1038         if isinstance(publisher, str):
1039             return publisher
1040         elif isinstance(publisher, list):
1041             return ', '.join(publisher)
1042
1043     def get_recommended(self, limit=4):
1044         books_qs = type(self).objects.filter(findable=True)
1045         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1046         books = type(self).tagged.related_to(self, books_qs)[:limit]
1047         return books
1048
1049     @classmethod
1050     def tagged_top_level(cls, tags):
1051         """ Returns top-level books tagged with `tags`.
1052
1053         It only returns those books which don't have ancestors which are
1054         also tagged with those tags.
1055
1056         """
1057         objects = cls.tagged.with_all(tags)
1058         return objects.filter(findable=True).exclude(ancestor__in=objects)
1059
1060     @classmethod
1061     def book_list(cls, book_filter=None):
1062         """Generates a hierarchical listing of all books.
1063
1064         Books are optionally filtered with a test function.
1065
1066         """
1067
1068         books_by_parent = {}
1069         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1070         if book_filter:
1071             books = books.filter(book_filter).distinct()
1072
1073             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1074             for book in books.iterator():
1075                 parent = book.parent_id
1076                 if parent not in book_ids:
1077                     parent = None
1078                 books_by_parent.setdefault(parent, []).append(book)
1079         else:
1080             for book in books.iterator():
1081                 books_by_parent.setdefault(book.parent_id, []).append(book)
1082
1083         orphans = []
1084         books_by_author = OrderedDict()
1085         for tag in Tag.objects.filter(category='author').iterator():
1086             books_by_author[tag] = []
1087
1088         for book in books_by_parent.get(None, ()):
1089             authors = list(book.authors().only('pk'))
1090             if authors:
1091                 for author in authors:
1092                     books_by_author[author].append(book)
1093             else:
1094                 orphans.append(book)
1095
1096         return books_by_author, orphans, books_by_parent
1097
1098     _audiences_pl = {
1099         "SP": (1, "szkoła podstawowa"),
1100         "SP1": (1, "szkoła podstawowa"),
1101         "SP2": (1, "szkoła podstawowa"),
1102         "SP3": (1, "szkoła podstawowa"),
1103         "P": (1, "szkoła podstawowa"),
1104         "G": (2, "gimnazjum"),
1105         "L": (3, "liceum"),
1106         "LP": (3, "liceum"),
1107     }
1108
1109     def audiences_pl(self):
1110         audiences = self.get_extra_info_json().get('audiences', [])
1111         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1112         return [a[1] for a in audiences]
1113
1114     def stage_note(self):
1115         stage = self.get_extra_info_json().get('stage')
1116         if stage and stage < '0.4':
1117             return (_('Ten utwór wymaga uwspółcześnienia'),
1118                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1119         else:
1120             return None, None
1121
1122     def choose_fragments(self, number):
1123         fragments = self.fragments.order_by()
1124         fragments_count = fragments.count()
1125         if not fragments_count and self.children.exists():
1126             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1127             fragments_count = fragments.count()
1128         if fragments_count:
1129             if fragments_count > number:
1130                 offset = randint(0, fragments_count - number)
1131             else:
1132                 offset = 0
1133             return fragments[offset : offset + number]
1134         elif self.parent:
1135             return self.parent.choose_fragments(number)
1136         else:
1137             return []
1138
1139     def choose_fragment(self):
1140         fragments = self.choose_fragments(1)
1141         if fragments:
1142             return fragments[0]
1143         else:
1144             return None
1145
1146     def fragment_data(self):
1147         fragment = self.choose_fragment()
1148         if fragment:
1149             return {
1150                 'title': fragment.book.pretty_title(),
1151                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1152             }
1153         else:
1154             return None
1155
1156     def update_popularity(self):
1157         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1158         try:
1159             pop = self.popularity
1160             pop.count = count
1161             pop.save()
1162         except BookPopularity.DoesNotExist:
1163             BookPopularity.objects.create(book=self, count=count)
1164
1165     def ridero_link(self):
1166         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1167
1168     def elevenreader_link(self):
1169         first_text = self.get_first_text()
1170         if first_text is None:
1171             return None
1172         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1173
1174     def content_warnings(self):
1175         warnings_def = {
1176             'wulgaryzmy': _('wulgaryzmy'),
1177         }
1178         warnings = self.get_extra_info_json().get('content_warnings', [])
1179         warnings = [
1180             warnings_def.get(w, w)
1181             for w in warnings
1182         ]
1183         warnings.sort()
1184         return warnings
1185
1186     def full_sort_key(self):
1187         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1188
1189     def cover_color(self):
1190         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1191
1192     @cached_render('catalogue/book_mini_box.html')
1193     def mini_box(self):
1194         return {
1195             'book': self
1196         }
1197
1198     @cached_render('catalogue/book_mini_box.html')
1199     def mini_box_nolink(self):
1200         return {
1201             'book': self,
1202             'no_link': True,
1203         }
1204
1205
1206 class BookPopularity(models.Model):
1207     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1208     count = models.IntegerField(default=0, db_index=True)