fix stats calc
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from slugify import slugify
11 from sortify import sortify
12 from urllib.request import urlretrieve
13 from django.apps import apps
14 from django.conf import settings
15 from django.db import connection, models, transaction
16 import django.dispatch
17 from django.contrib.contenttypes.fields import GenericRelation
18 from django.template.loader import render_to_string
19 from django.urls import reverse
20 from django.utils.translation import gettext_lazy as _, get_language
21 from fnpdjango.storage import BofhFileSystemStorage
22 from lxml import html
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from librarian.builders import builders
26 from newtagging import managers
27 from catalogue import constants
28 from catalogue import fields
29 from catalogue.models import Tag, Fragment, BookMedia
30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
31 from catalogue.models.tag import prefetched_relations
32 from catalogue import app_settings
33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
34
35 bofh_storage = BofhFileSystemStorage()
36
37
38 class Book(models.Model):
39     """Represents a book imported from WL-XML."""
40     title = models.CharField('tytuł', max_length=32767)
41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
42     sort_key_author = models.CharField(
43         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
47     description = models.TextField('opis', blank=True)
48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
49     abstract = models.TextField('abstrakt', blank=True)
50     toc = models.TextField('spis treści', blank=True)
51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
54     extra_info = models.TextField('dodatkowe informacje', default='{}')
55     gazeta_link = models.CharField(blank=True, max_length=240)
56     wiki_link = models.CharField(blank=True, max_length=240)
57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
58     recommended = models.BooleanField('polecane', default=False)
59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
60     preview = models.BooleanField('prapremiera', default=False)
61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
62     preview_key = models.CharField(max_length=32, blank=True, null=True)
63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
64
65     # files generated during publication
66     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
67     html_file = fields.HtmlField(storage=bofh_storage)
68     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
69     fb2_file = fields.Fb2Field(storage=bofh_storage)
70     txt_file = fields.TxtField(storage=bofh_storage)
71     epub_file = fields.EpubField(storage=bofh_storage)
72     mobi_file = fields.MobiField(storage=bofh_storage)
73     pdf_file = fields.PdfField(storage=bofh_storage)
74
75     cover = fields.CoverField('okładka', storage=bofh_storage)
76     # Cleaner version of cover for thumbs
77     cover_clean = fields.CoverCleanField('czysta okładka')
78     cover_thumb = fields.CoverThumbField('miniatura okładki')
79     cover_api_thumb = fields.CoverApiThumbField(
80         'mniaturka okładki dla aplikacji')
81     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
82     cover_ebookpoint = fields.CoverEbookpointField(
83         'okładka dla Ebookpoint')
84
85     ebook_formats = constants.EBOOK_FORMATS
86     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
87
88     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
89     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
90
91     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
92     has_audience = models.BooleanField(default=False)
93
94     objects = models.Manager()
95     tagged = managers.ModelTaggedItemManager(Tag)
96     tags = managers.TagDescriptor(Tag)
97     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
98     translators = models.ManyToManyField(Tag, blank=True)
99     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
100     has_audio = models.BooleanField(default=False)
101     read_time = models.IntegerField(blank=True, null=True)
102     pages = models.IntegerField(blank=True, null=True)
103     
104     html_built = django.dispatch.Signal()
105     published = django.dispatch.Signal()
106
107     SORT_KEY_SEP = '$'
108
109     is_book = True
110
111     class AlreadyExists(Exception):
112         pass
113
114     class Meta:
115         ordering = ('sort_key_author', 'sort_key')
116         verbose_name = 'książka'
117         verbose_name_plural = 'książki'
118         app_label = 'catalogue'
119
120     def __str__(self):
121         return self.title
122
123     def get_extra_info_json(self):
124         return json.loads(self.extra_info or '{}')
125
126     def get_initial(self):
127         try:
128             return re.search(r'\w', self.title, re.U).group(0)
129         except AttributeError:
130             return ''
131
132     def authors(self):
133         return self.tags.filter(category='author')
134
135     def epochs(self):
136         return self.tags.filter(category='epoch')
137
138     def genres(self):
139         return self.tags.filter(category='genre')
140
141     def kinds(self):
142         return self.tags.filter(category='kind')
143
144     def tag_unicode(self, category):
145         relations = prefetched_relations(self, category)
146         if relations:
147             return ', '.join(rel.tag.name for rel in relations)
148         else:
149             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
150
151     def tags_by_category(self):
152         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
153
154     def author_unicode(self):
155         return self.cached_author
156
157     def kind_unicode(self):
158         return self.tag_unicode('kind')
159
160     def epoch_unicode(self):
161         return self.tag_unicode('epoch')
162
163     def genre_unicode(self):
164         return self.tag_unicode('genre')
165
166     def translator(self):
167         translators = self.get_extra_info_json().get('translators')
168         if not translators:
169             return None
170         if len(translators) > 3:
171             translators = translators[:2]
172             others = ' i inni'
173         else:
174             others = ''
175         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
176
177     def cover_source(self):
178         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
179
180     @property
181     def isbn_pdf(self):
182         return self.get_extra_info_json().get('isbn_pdf')
183
184     @property
185     def isbn_epub(self):
186         return self.get_extra_info_json().get('isbn_epub')
187
188     @property
189     def isbn_mobi(self):
190         return self.get_extra_info_json().get('isbn_mobi')
191
192     @property
193     def redakcja(self):
194         return self.get_extra_info_json().get('about')
195     
196     def is_accessible_to(self, user):
197         if not self.preview:
198             return True
199         if not user.is_authenticated:
200             return False
201         Membership = apps.get_model('club', 'Membership')
202         if Membership.is_active_for(user):
203             return True
204         Funding = apps.get_model('funding', 'Funding')
205         if Funding.objects.filter(user=user, offer__book=self):
206             return True
207         return False
208
209     def save(self, force_insert=False, force_update=False, **kwargs):
210         from sortify import sortify
211
212         self.sort_key = sortify(self.title)[:120]
213         self.title = str(self.title)  # ???
214
215         try:
216             author = self.authors().first().sort_key
217         except AttributeError:
218             author = ''
219         self.sort_key_author = author
220
221         self.cached_author = self.tag_unicode('author')
222         self.has_audience = 'audience' in self.get_extra_info_json()
223
224         if self.preview and not self.preview_key:
225             self.preview_key = get_random_hash(self.slug)[:32]
226
227         ret = super(Book, self).save(force_insert, force_update, **kwargs)
228
229         return ret
230
231     def get_absolute_url(self):
232         return reverse('book_detail', args=[self.slug])
233
234     def gallery_path(self):
235         return gallery_path(self.slug)
236
237     def gallery_url(self):
238         return gallery_url(self.slug)
239
240     def get_first_text(self):
241         if self.html_file:
242             return self
243         child = self.children.all().order_by('parent_number').first()
244         if child is not None:
245             return child.get_first_text()
246
247     def get_last_text(self):
248         if self.html_file:
249             return self
250         child = self.children.all().order_by('parent_number').last()
251         if child is not None:
252             return child.get_last_text()
253
254     def get_prev_text(self):
255         if not self.parent:
256             return None
257         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
258         if sibling is not None:
259             return sibling.get_last_text()
260
261         if self.parent.html_file:
262             return self.parent
263
264         return self.parent.get_prev_text()
265
266     def get_next_text(self, inside=True):
267         if inside:
268             child = self.children.order_by('parent_number').first()
269             if child is not None:
270                 return child.get_first_text()
271
272         if not self.parent:
273             return None
274         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
275         if sibling is not None:
276             return sibling.get_first_text()
277         return self.parent.get_next_text(inside=False)
278
279     def get_siblings(self):
280         if not self.parent:
281             return []
282         return self.parent.children.all().order_by('parent_number')
283
284     def get_children(self):
285         return self.children.all().order_by('parent_number')
286
287     @property
288     def name(self):
289         return self.title
290
291     def language_code(self):
292         return constants.LANGUAGES_3TO2.get(self.language, self.language)
293
294     def language_name(self):
295         return dict(settings.LANGUAGES).get(self.language_code(), "")
296
297     def is_foreign(self):
298         return self.language_code() != settings.LANGUAGE_CODE
299
300     def set_audio_length(self):
301         length = self.get_audio_length()
302         if length > 0:
303             self.audio_length = self.format_audio_length(length)
304             self.save()
305
306     @staticmethod
307     def format_audio_length(seconds):
308         """
309         >>> Book.format_audio_length(1)
310         '0:01'
311         >>> Book.format_audio_length(3661)
312         '1:01:01'
313         """
314         if seconds < 60*60:
315             minutes = seconds // 60
316             seconds = seconds % 60
317             return '%d:%02d' % (minutes, seconds)
318         else:
319             hours = seconds // 3600
320             minutes = seconds % 3600 // 60
321             seconds = seconds % 60
322             return '%d:%02d:%02d' % (hours, minutes, seconds)
323
324     def get_audio_length(self):
325         total = 0
326         for media in self.get_mp3() or ():
327             total += app_settings.GET_MP3_LENGTH(media.file.path)
328         return int(total)
329
330     def get_time(self):
331         return round(self.xml_file.size / 1000 * 40)
332     
333     def has_media(self, type_):
334         if type_ in Book.formats:
335             return bool(getattr(self, "%s_file" % type_))
336         else:
337             return self.media.filter(type=type_).exists()
338
339     def get_media(self, type_):
340         if self.has_media(type_):
341             if type_ in Book.formats:
342                 return getattr(self, "%s_file" % type_)
343             else:
344                 return self.media.filter(type=type_)
345         else:
346             return None
347
348     def get_mp3(self):
349         return self.get_media("mp3")
350
351     def get_odt(self):
352         return self.get_media("odt")
353
354     def get_ogg(self):
355         return self.get_media("ogg")
356
357     def get_daisy(self):
358         return self.get_media("daisy")
359
360     def get_audio_epub(self):
361         return self.get_media("audio.epub")
362
363     def media_url(self, format_):
364         media = self.get_media(format_)
365         if media:
366             if self.preview:
367                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
368             else:
369                 return media.url
370         else:
371             return None
372
373     def html_url(self):
374         return self.media_url('html')
375
376     def html_nonotes_url(self):
377         return self.media_url('html_nonotes')
378
379     def pdf_url(self):
380         return self.media_url('pdf')
381
382     def epub_url(self):
383         return self.media_url('epub')
384
385     def mobi_url(self):
386         return self.media_url('mobi')
387
388     def txt_url(self):
389         return self.media_url('txt')
390
391     def fb2_url(self):
392         return self.media_url('fb2')
393
394     def xml_url(self):
395         return self.media_url('xml')
396
397     def has_description(self):
398         return len(self.description) > 0
399     has_description.short_description = 'opis'
400     has_description.boolean = True
401
402     def has_mp3_file(self):
403         return self.has_media("mp3")
404     has_mp3_file.short_description = 'MP3'
405     has_mp3_file.boolean = True
406
407     def has_ogg_file(self):
408         return self.has_media("ogg")
409     has_ogg_file.short_description = 'OGG'
410     has_ogg_file.boolean = True
411
412     def has_daisy_file(self):
413         return self.has_media("daisy")
414     has_daisy_file.short_description = 'DAISY'
415     has_daisy_file.boolean = True
416
417     def has_sync_file(self):
418         return settings.FEATURE_SYNCHRO and self.has_media("sync")
419
420     def build_sync_file(self):
421         from lxml import html
422         from django.core.files.base import ContentFile
423         with self.html_file.open('rb') as f:
424             h = html.fragment_fromstring(f.read().decode('utf-8'))
425
426         durations = [
427             m['mp3'].duration
428             for m in self.get_audiobooks()[0]
429         ]
430         if settings.MOCK_DURATIONS:
431             durations = settings.MOCK_DURATIONS
432
433         sync = []
434         ts = None
435         sid = 1
436         dirty = False
437         for elem in h.iter():
438             if elem.get('data-audio-ts'):
439                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
440                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
441                 # check if inside verse
442                 p = elem.getparent()
443                 while p is not None:
444                     # Workaround for missing ids.
445                     if 'verse' in p.get('class', ''):
446                         if not p.get('id'):
447                             p.set('id', f'syn{sid}')
448                             dirty = True
449                             sid += 1
450                         sync.append((ts, p.get('id')))
451                         ts = None
452                         break
453                     p = p.getparent()
454             elif ts:
455                 cls = elem.get('class', '')
456                 # Workaround for missing ids.
457                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
458                     if not elem.get('id'):
459                         elem.set('id', f'syn{sid}')
460                         dirty = True
461                         sid += 1
462                     sync.append((ts, elem.get('id')))
463                     ts = None
464         if dirty:
465             htext = html.tostring(h, encoding='utf-8')
466             with open(self.html_file.path, 'wb') as f:
467                 f.write(htext)
468         try:
469             bm = self.media.get(type='sync')
470         except:
471             bm = BookMedia(book=self, type='sync')
472         sync = (
473             '27\n' + '\n'.join(
474                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
475             )).encode('latin1')
476         bm.file.save(
477             None, ContentFile(sync)
478             )
479
480     def get_sync(self):
481         if not self.has_sync_file():
482             return []
483         with self.get_media('sync').first().file.open('r') as f:
484             sync = f.read().split('\n')
485         offset = float(sync[0])
486         items = []
487         for line in sync[1:]:
488             if not line:
489                 continue
490             start, end, elid = line.split()
491             items.append([elid, float(start) + offset])
492         return items
493
494     def sync_ts(self, ts):
495         elid = None
496         for cur_id, t in self.get_sync():
497             if ts >= t:
498                 elid = cur_id
499             else:
500                 break
501         return elid
502
503     def sync_elid(self, elid):
504         for cur_id, t in self.get_sync():
505             if cur_id == elid:
506                 return t
507
508     def has_audio_epub_file(self):
509         return self.has_media("audio.epub")
510
511     @property
512     def media_daisy(self):
513         return self.get_media('daisy')
514
515     @property
516     def media_audio_epub(self):
517         return self.get_media('audio.epub')
518
519     def get_audiobooks(self, with_children=False, processing=False):
520         ogg_files = {}
521         for m in self.media.filter(type='ogg').order_by().iterator():
522             ogg_files[m.name] = m
523
524         audiobooks = []
525         projects = set()
526         total_duration = 0
527         for mp3 in self.media.filter(type='mp3').iterator():
528             # ogg files are always from the same project
529             meta = mp3.get_extra_info_json()
530             project = meta.get('project')
531             if not project:
532                 # temporary fallback
533                 project = 'CzytamySłuchając'
534
535             projects.add((project, meta.get('funded_by', '')))
536             total_duration += mp3.duration or 0
537
538             media = {'mp3': mp3}
539
540             ogg = ogg_files.get(mp3.name)
541             if ogg:
542                 media['ogg'] = ogg
543             audiobooks.append(media)
544
545         if with_children:
546             for child in self.get_children():
547                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
548                     with_children=True, processing=True)
549                 audiobooks.append({'part': child})
550                 audiobooks += ch_audiobooks
551                 projects.update(ch_projects)
552                 total_duration += ch_duration
553
554         if not processing:
555             projects = sorted(projects)
556             total_duration = '%d:%02d' % (
557                 total_duration // 60,
558                 total_duration % 60
559             )
560
561         return audiobooks, projects, total_duration
562
563     def get_audiobooks_with_children(self):
564         return self.get_audiobooks(with_children=True)
565     
566     def wldocument(self, parse_dublincore=True, inherit=True):
567         from catalogue.import_utils import ORMDocProvider
568         from librarian.parser import WLDocument
569
570         if inherit and self.parent:
571             meta_fallbacks = self.parent.cover_info()
572         else:
573             meta_fallbacks = None
574
575         return WLDocument.from_file(
576             self.xml_file.path,
577             provider=ORMDocProvider(self),
578             parse_dublincore=parse_dublincore,
579             meta_fallbacks=meta_fallbacks)
580
581     def wldocument2(self):
582         from catalogue.import_utils import ORMDocProvider
583         from librarian.document import WLDocument
584         doc = WLDocument(
585             self.xml_file.path,
586             provider=ORMDocProvider(self)
587         )
588         doc.meta.update(self.cover_info())
589         return doc
590
591
592     @staticmethod
593     def zip_format(format_):
594         def pretty_file_name(book):
595             return "%s/%s.%s" % (
596                 book.get_extra_info_json()['author'],
597                 book.slug,
598                 format_)
599
600         field_name = "%s_file" % format_
601         field = getattr(Book, field_name)
602         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
603         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
604         return create_zip(paths, field.ZIP)
605
606     def zip_audiobooks(self, format_):
607         bm = BookMedia.objects.filter(book=self, type=format_)
608         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
609         licenses = set()
610         for m in bm:
611             license = constants.LICENSES.get(
612                 m.get_extra_info_json().get('license'), {}
613             ).get('locative')
614             if license:
615                 licenses.add(license)
616         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
617             'licenses': licenses,
618             'meta': self.wldocument2().meta,
619         })
620         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
621
622     def search_index(self, index=None):
623         if not self.findable:
624             return
625         from search.index import Index
626         Index.index_book(self)
627
628     # will make problems in conjunction with paid previews
629     def download_pictures(self, remote_gallery_url):
630         # This is only needed for legacy relative image paths.
631         gallery_path = self.gallery_path()
632         # delete previous files, so we don't include old files in ebooks
633         if os.path.isdir(gallery_path):
634             for filename in os.listdir(gallery_path):
635                 file_path = os.path.join(gallery_path, filename)
636                 os.unlink(file_path)
637         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
638         if ilustr_elements:
639             makedirs(gallery_path)
640             for ilustr in ilustr_elements:
641                 ilustr_src = ilustr.get('src')
642                 if '/' in ilustr_src:
643                     continue
644                 ilustr_path = os.path.join(gallery_path, ilustr_src)
645                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
646
647     def load_abstract(self):
648         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
649         if abstract is not None:
650             self.abstract = transform_abstrakt(abstract)
651         else:
652             self.abstract = ''
653
654     def load_toc(self):
655         self.toc = ''
656         if self.html_file:
657             parser = html.HTMLParser(encoding='utf-8')
658             tree = html.parse(self.html_file.path, parser=parser)
659             toc = tree.find('//div[@id="toc"]/ol')
660             if toc is None or not len(toc):
661                 return
662             html_link = reverse('book_text', args=[self.slug])
663             for a in toc.findall('.//a'):
664                 a.attrib['href'] = html_link + a.attrib['href']
665             self.toc = html.tostring(toc, encoding='unicode')
666             # div#toc
667
668     @classmethod
669     def from_xml_file(cls, xml_file, **kwargs):
670         from django.core.files import File
671         from librarian import dcparser
672
673         # use librarian to parse meta-data
674         book_info = dcparser.parse(xml_file)
675
676         if not isinstance(xml_file, File):
677             xml_file = File(open(xml_file))
678
679         try:
680             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
681         finally:
682             xml_file.close()
683
684     @classmethod
685     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
686                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
687         from catalogue import tasks
688
689         if dont_build is None:
690             dont_build = set()
691         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
692
693         # check for parts before we do anything
694         children = []
695         if hasattr(book_info, 'parts'):
696             for part_url in book_info.parts:
697                 try:
698                     children.append(Book.objects.get(slug=part_url.slug))
699                 except Book.DoesNotExist:
700                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
701
702         # Read book metadata
703         book_slug = book_info.url.slug
704         if re.search(r'[^a-z0-9-]', book_slug):
705             raise ValueError('Invalid characters in slug')
706         book, created = Book.objects.get_or_create(slug=book_slug)
707
708         if created:
709             book_shelves = []
710             old_cover = None
711             book.preview = bool(days)
712             if book.preview:
713                 book.preview_until = date.today() + timedelta(days)
714         else:
715             if not overwrite:
716                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
717             # Save shelves for this book
718             book_shelves = list(book.tags.filter(category='set'))
719             old_cover = book.cover_info()
720
721         # Save XML file
722         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
723         if book.preview:
724             book.xml_file.set_readable(False)
725
726         book.findable = findable
727         book.language = book_info.language
728         book.title = book_info.title
729         book.license = book_info.license or ''
730         if book_info.variant_of:
731             book.common_slug = book_info.variant_of.slug
732         else:
733             book.common_slug = book.slug
734         extra = book_info.to_dict()
735         if logo:
736             extra['logo'] = logo
737         if logo_mono:
738             extra['logo_mono'] = logo_mono
739         if logo_alt:
740             extra['logo_alt'] = logo_alt
741         book.extra_info = json.dumps(extra)
742         book.load_abstract()
743         book.load_toc()
744         book.save()
745
746         book.update_stats()
747         
748         meta_tags = Tag.tags_from_info(book_info)
749
750         just_tags = [t for (t, rel) in meta_tags if not rel]
751         book.tags = set(just_tags + book_shelves)
752         book.save()  # update sort_key_author
753
754         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
755
756         cover_changed = old_cover != book.cover_info()
757         obsolete_children = set(b for b in book.children.all()
758                                 if b not in children)
759         notify_cover_changed = []
760         for n, child_book in enumerate(children):
761             new_child = child_book.parent != book
762             child_book.parent = book
763             child_book.parent_number = n
764             child_book.save()
765             if new_child or cover_changed:
766                 notify_cover_changed.append(child_book)
767         # Disown unfaithful children and let them cope on their own.
768         for child in obsolete_children:
769             child.parent = None
770             child.parent_number = 0
771             child.save()
772             if old_cover:
773                 notify_cover_changed.append(child)
774
775         cls.repopulate_ancestors()
776         tasks.update_counters.delay()
777
778         if remote_gallery_url:
779             book.download_pictures(remote_gallery_url)
780
781         # No saves beyond this point.
782
783         # Build cover.
784         if 'cover' not in dont_build:
785             book.cover.build_delay()
786             book.cover_clean.build_delay()
787             book.cover_thumb.build_delay()
788             book.cover_api_thumb.build_delay()
789             book.simple_cover.build_delay()
790             book.cover_ebookpoint.build_delay()
791
792         # Build HTML and ebooks.
793         book.html_file.build_delay()
794         if not children:
795             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
796                 if format_ not in dont_build:
797                     getattr(book, '%s_file' % format_).build_delay()
798         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
799             if format_ not in dont_build:
800                 getattr(book, '%s_file' % format_).build_delay()
801         book.html_nonotes_file.build_delay()
802
803         if not settings.NO_SEARCH_INDEX and search_index and findable:
804             tasks.index_book.delay(book.id)
805
806         for child in notify_cover_changed:
807             child.parent_cover_changed()
808
809         book.update_popularity()
810         tasks.update_references.delay(book.id)
811
812         cls.published.send(sender=cls, instance=book)
813         return book
814
815     def update_stats(self):
816         stats = self.wldocument2().get_statistics()['total']
817         self.pages = round(
818             stats.get('verses_with_fn', 0) / 30 +
819             stats.get('chars_out_verse_with_fn', 0) / 1800)
820         self.read_time = round(self.get_time())
821         self.save(update_fields=['pages', 'read_time'])
822         if self.parent is not None:
823             self.parent.update_stats()
824
825     def update_references(self):
826         Entity = apps.get_model('references', 'Entity')
827         doc = self.wldocument2()
828         doc._compat_assign_section_ids()
829         doc._compat_assign_ordered_ids()
830         refs = {}
831         for ref_elem in doc.references():
832             uri = ref_elem.attrib.get('href', '')
833             if not uri:
834                 continue
835             if uri in refs:
836                 ref = refs[uri]
837             else:
838                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
839                 if entity_created:
840                     try:
841                         entity.populate()
842                     except:
843                         pass
844                     else:
845                         entity.save()
846                 ref, ref_created = entity.reference_set.get_or_create(book=self)
847                 refs[uri] = ref
848                 if not ref_created:
849                     ref.occurence_set.all().delete()
850             sec = ref_elem.get_link()
851             m = re.match(r'sec(\d+)', sec)
852             assert m is not None
853             sec = int(m.group(1))
854             snippet = ref_elem.get_snippet()
855             b = builders['html-snippet']()
856             for s in snippet:
857                 s.html_build(b)
858             html = b.output().get_bytes().decode('utf-8')
859
860             ref.occurence_set.create(
861                 section=sec,
862                 html=html
863             )
864         self.reference_set.exclude(entity__uri__in=refs).delete()
865
866     @property
867     def references(self):
868         return self.reference_set.all().select_related('entity')
869
870     def update_has_audio(self):
871         self.has_audio = False
872         if self.media.filter(type='mp3').exists():
873             self.has_audio = True
874         if self.descendant.filter(has_audio=True).exists():
875             self.has_audio = True
876         self.save(update_fields=['has_audio'])
877         if self.parent is not None:
878             self.parent.update_has_audio()
879
880     def update_narrators(self):
881         narrator_names = set()
882         for bm in self.media.filter(type='mp3'):
883             narrator_names.update(set(
884                 a.strip() for a in re.split(r',|\si\s', bm.artist)
885             ))
886         narrators = []
887
888         for name in narrator_names:
889             if not name: continue
890             slug = slugify(name)
891             try:
892                 t = Tag.objects.get(category='author', slug=slug)
893             except Tag.DoesNotExist:
894                 sort_key = sortify(
895                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
896                 )
897                 t = Tag.objects.create(
898                     category='author',
899                     name_pl=name,
900                     slug=slug,
901                     sort_key=sort_key,
902                 )
903             narrators.append(t)
904         self.narrators.set(narrators)
905
906     @classmethod
907     @transaction.atomic
908     def repopulate_ancestors(cls):
909         """Fixes the ancestry cache."""
910         # TODO: table names
911         cursor = connection.cursor()
912         if connection.vendor == 'postgres':
913             cursor.execute("TRUNCATE catalogue_book_ancestor")
914             cursor.execute("""
915                 WITH RECURSIVE ancestry AS (
916                     SELECT book.id, book.parent_id
917                     FROM catalogue_book AS book
918                     WHERE book.parent_id IS NOT NULL
919                     UNION
920                     SELECT ancestor.id, book.parent_id
921                     FROM ancestry AS ancestor, catalogue_book AS book
922                     WHERE ancestor.parent_id = book.id
923                         AND book.parent_id IS NOT NULL
924                     )
925                 INSERT INTO catalogue_book_ancestor
926                     (from_book_id, to_book_id)
927                     SELECT id, parent_id
928                     FROM ancestry
929                     ORDER BY id;
930                 """)
931         else:
932             cursor.execute("DELETE FROM catalogue_book_ancestor")
933             for b in cls.objects.exclude(parent=None):
934                 parent = b.parent
935                 while parent is not None:
936                     b.ancestor.add(parent)
937                     parent = parent.parent
938
939     @property
940     def ancestors(self):
941         if self.parent:
942             for anc in self.parent.ancestors:
943                 yield anc
944             yield self.parent
945         else:
946             return []
947
948     def clear_cache(self):
949         clear_cached_renders(self.mini_box)
950         clear_cached_renders(self.mini_box_nolink)
951
952     def cover_info(self, inherit=True):
953         """Returns a dictionary to serve as fallback for BookInfo.
954
955         For now, the only thing inherited is the cover image.
956         """
957         need = False
958         info = {}
959         for field in ('cover_url', 'cover_by', 'cover_source'):
960             val = self.get_extra_info_json().get(field)
961             if val:
962                 info[field] = val
963             else:
964                 need = True
965         if inherit and need and self.parent is not None:
966             parent_info = self.parent.cover_info()
967             parent_info.update(info)
968             info = parent_info
969         return info
970
971     def related_themes(self):
972         return Tag.objects.usage_for_queryset(
973             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
974             counts=True).filter(category='theme').order_by('-count')
975
976     def parent_cover_changed(self):
977         """Called when parent book's cover image is changed."""
978         if not self.cover_info(inherit=False):
979             if 'cover' not in app_settings.DONT_BUILD:
980                 self.cover.build_delay()
981                 self.cover_clean.build_delay()
982                 self.cover_thumb.build_delay()
983                 self.cover_api_thumb.build_delay()
984                 self.simple_cover.build_delay()
985                 self.cover_ebookpoint.build_delay()
986             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
987                 if format_ not in app_settings.DONT_BUILD:
988                     getattr(self, '%s_file' % format_).build_delay()
989             for child in self.children.all():
990                 child.parent_cover_changed()
991
992     def other_versions(self):
993         """Find other versions (i.e. in other languages) of the book."""
994         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
995
996     def parents(self):
997         books = []
998         parent = self.parent
999         while parent is not None:
1000             books.insert(0, parent)
1001             parent = parent.parent
1002         return books
1003
1004     def pretty_title(self, html_links=False):
1005         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1006         books = self.parents() + [self]
1007         names.extend([(b.title, b.get_absolute_url()) for b in books])
1008
1009         if html_links:
1010             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1011         else:
1012             names = [tag[0] for tag in names]
1013         return ', '.join(names)
1014
1015     def publisher(self):
1016         publisher = self.get_extra_info_json()['publisher']
1017         if isinstance(publisher, str):
1018             return publisher
1019         elif isinstance(publisher, list):
1020             return ', '.join(publisher)
1021
1022     def get_recommended(self, limit=4):
1023         books_qs = type(self).objects.filter(findable=True)
1024         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1025         books = type(self).tagged.related_to(self, books_qs)[:limit]
1026         return books
1027
1028     @classmethod
1029     def tagged_top_level(cls, tags):
1030         """ Returns top-level books tagged with `tags`.
1031
1032         It only returns those books which don't have ancestors which are
1033         also tagged with those tags.
1034
1035         """
1036         objects = cls.tagged.with_all(tags)
1037         return objects.filter(findable=True).exclude(ancestor__in=objects)
1038
1039     @classmethod
1040     def book_list(cls, book_filter=None):
1041         """Generates a hierarchical listing of all books.
1042
1043         Books are optionally filtered with a test function.
1044
1045         """
1046
1047         books_by_parent = {}
1048         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1049         if book_filter:
1050             books = books.filter(book_filter).distinct()
1051
1052             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1053             for book in books.iterator():
1054                 parent = book.parent_id
1055                 if parent not in book_ids:
1056                     parent = None
1057                 books_by_parent.setdefault(parent, []).append(book)
1058         else:
1059             for book in books.iterator():
1060                 books_by_parent.setdefault(book.parent_id, []).append(book)
1061
1062         orphans = []
1063         books_by_author = OrderedDict()
1064         for tag in Tag.objects.filter(category='author').iterator():
1065             books_by_author[tag] = []
1066
1067         for book in books_by_parent.get(None, ()):
1068             authors = list(book.authors().only('pk'))
1069             if authors:
1070                 for author in authors:
1071                     books_by_author[author].append(book)
1072             else:
1073                 orphans.append(book)
1074
1075         return books_by_author, orphans, books_by_parent
1076
1077     _audiences_pl = {
1078         "SP": (1, "szkoła podstawowa"),
1079         "SP1": (1, "szkoła podstawowa"),
1080         "SP2": (1, "szkoła podstawowa"),
1081         "SP3": (1, "szkoła podstawowa"),
1082         "P": (1, "szkoła podstawowa"),
1083         "G": (2, "gimnazjum"),
1084         "L": (3, "liceum"),
1085         "LP": (3, "liceum"),
1086     }
1087
1088     def audiences_pl(self):
1089         audiences = self.get_extra_info_json().get('audiences', [])
1090         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1091         return [a[1] for a in audiences]
1092
1093     def stage_note(self):
1094         stage = self.get_extra_info_json().get('stage')
1095         if stage and stage < '0.4':
1096             return (_('Ten utwór wymaga uwspółcześnienia'),
1097                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1098         else:
1099             return None, None
1100
1101     def choose_fragments(self, number):
1102         fragments = self.fragments.order_by()
1103         fragments_count = fragments.count()
1104         if not fragments_count and self.children.exists():
1105             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1106             fragments_count = fragments.count()
1107         if fragments_count:
1108             if fragments_count > number:
1109                 offset = randint(0, fragments_count - number)
1110             else:
1111                 offset = 0
1112             return fragments[offset : offset + number]
1113         elif self.parent:
1114             return self.parent.choose_fragments(number)
1115         else:
1116             return []
1117
1118     def choose_fragment(self):
1119         fragments = self.choose_fragments(1)
1120         if fragments:
1121             return fragments[0]
1122         else:
1123             return None
1124
1125     def fragment_data(self):
1126         fragment = self.choose_fragment()
1127         if fragment:
1128             return {
1129                 'title': fragment.book.pretty_title(),
1130                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1131             }
1132         else:
1133             return None
1134
1135     def update_popularity(self):
1136         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1137         try:
1138             pop = self.popularity
1139             pop.count = count
1140             pop.save()
1141         except BookPopularity.DoesNotExist:
1142             BookPopularity.objects.create(book=self, count=count)
1143
1144     def ridero_link(self):
1145         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1146
1147     def elevenreader_link(self):
1148         first_text = self.get_first_text()
1149         if first_text is None:
1150             return None
1151         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1152
1153     def content_warnings(self):
1154         warnings_def = {
1155             'wulgaryzmy': _('wulgaryzmy'),
1156         }
1157         warnings = self.get_extra_info_json().get('content_warnings', [])
1158         warnings = [
1159             warnings_def.get(w, w)
1160             for w in warnings
1161         ]
1162         warnings.sort()
1163         return warnings
1164
1165     def full_sort_key(self):
1166         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1167
1168     def cover_color(self):
1169         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1170
1171     @cached_render('catalogue/book_mini_box.html')
1172     def mini_box(self):
1173         return {
1174             'book': self
1175         }
1176
1177     @cached_render('catalogue/book_mini_box.html')
1178     def mini_box_nolink(self):
1179         return {
1180             'book': self,
1181             'no_link': True,
1182         }
1183
1184
1185 class BookPopularity(models.Model):
1186     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1187     count = models.IntegerField(default=0, db_index=True)