Add can_sell
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from slugify import slugify
11 from sortify import sortify
12 from urllib.request import urlretrieve
13 from django.apps import apps
14 from django.conf import settings
15 from django.db import connection, models, transaction
16 import django.dispatch
17 from django.contrib.contenttypes.fields import GenericRelation
18 from django.template.loader import render_to_string
19 from django.urls import reverse
20 from django.utils.translation import gettext_lazy as _, get_language
21 from fnpdjango.storage import BofhFileSystemStorage
22 from lxml import html
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from librarian.builders import builders
26 from newtagging import managers
27 from catalogue import constants
28 from catalogue import fields
29 from catalogue.models import Tag, Fragment, BookMedia
30 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
31 from catalogue.models.tag import prefetched_relations
32 from catalogue import app_settings
33 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
34
35 bofh_storage = BofhFileSystemStorage()
36
37
38 class Book(models.Model):
39     """Represents a book imported from WL-XML."""
40     title = models.CharField('tytuł', max_length=32767)
41     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, db_collation='C', editable=False)
42     sort_key_author = models.CharField(
43         'klucz sortowania wg autora', max_length=120, db_index=True, db_collation='C', editable=False, default='')
44     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
45     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
46     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
47     description = models.TextField('opis', blank=True)
48     license = models.CharField('licencja', max_length=255, blank=True, db_index=True)
49     abstract = models.TextField('abstrakt', blank=True)
50     toc = models.TextField('spis treści', blank=True)
51     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
52     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
53     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
54     extra_info = models.TextField('dodatkowe informacje', default='{}')
55     gazeta_link = models.CharField(blank=True, max_length=240)
56     wiki_link = models.CharField(blank=True, max_length=240)
57     print_on_demand = models.BooleanField('druk na żądanie', default=False)
58     recommended = models.BooleanField('polecane', default=False)
59     audio_length = models.CharField('długość audio', blank=True, max_length=8)
60     preview = models.BooleanField('prapremiera', default=False)
61     preview_until = models.DateField('prapremiera do', blank=True, null=True)
62     preview_key = models.CharField(max_length=32, blank=True, null=True)
63     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
64     can_sell = models.BooleanField('do sprzedaży', default=True)
65
66     # files generated during publication
67     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
68     html_file = fields.HtmlField(storage=bofh_storage)
69     html_nonotes_file = fields.HtmlNonotesField(storage=bofh_storage)
70     fb2_file = fields.Fb2Field(storage=bofh_storage)
71     txt_file = fields.TxtField(storage=bofh_storage)
72     epub_file = fields.EpubField(storage=bofh_storage)
73     mobi_file = fields.MobiField(storage=bofh_storage)
74     pdf_file = fields.PdfField(storage=bofh_storage)
75
76     cover = fields.CoverField('okładka', storage=bofh_storage)
77     # Cleaner version of cover for thumbs
78     cover_clean = fields.CoverCleanField('czysta okładka')
79     cover_thumb = fields.CoverThumbField('miniatura okładki')
80     cover_api_thumb = fields.CoverApiThumbField(
81         'mniaturka okładki dla aplikacji')
82     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
83     cover_ebookpoint = fields.CoverEbookpointField(
84         'okładka dla Ebookpoint')
85
86     ebook_formats = constants.EBOOK_FORMATS
87     formats = ebook_formats + ['html', 'xml', 'html_nonotes']
88
89     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
90     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
91
92     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
93     has_audience = models.BooleanField(default=False)
94
95     objects = models.Manager()
96     tagged = managers.ModelTaggedItemManager(Tag)
97     tags = managers.TagDescriptor(Tag)
98     tag_relations = GenericRelation(Tag.intermediary_table_model, related_query_name='tagged_book')
99     translators = models.ManyToManyField(Tag, blank=True)
100     narrators = models.ManyToManyField(Tag, blank=True, related_name='narrated')
101     has_audio = models.BooleanField(default=False)
102     read_time = models.IntegerField(blank=True, null=True)
103     pages = models.IntegerField(blank=True, null=True)
104     
105     html_built = django.dispatch.Signal()
106     published = django.dispatch.Signal()
107
108     SORT_KEY_SEP = '$'
109
110     is_book = True
111
112     class AlreadyExists(Exception):
113         pass
114
115     class Meta:
116         ordering = ('sort_key_author', 'sort_key')
117         verbose_name = 'książka'
118         verbose_name_plural = 'książki'
119         app_label = 'catalogue'
120
121     def __str__(self):
122         return self.title
123
124     def get_extra_info_json(self):
125         return json.loads(self.extra_info or '{}')
126
127     def get_initial(self):
128         try:
129             return re.search(r'\w', self.title, re.U).group(0)
130         except AttributeError:
131             return ''
132
133     def authors(self):
134         return self.tags.filter(category='author')
135
136     def epochs(self):
137         return self.tags.filter(category='epoch')
138
139     def genres(self):
140         return self.tags.filter(category='genre')
141
142     def kinds(self):
143         return self.tags.filter(category='kind')
144
145     def tag_unicode(self, category):
146         relations = prefetched_relations(self, category)
147         if relations:
148             return ', '.join(rel.tag.name for rel in relations)
149         else:
150             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
151
152     def tags_by_category(self):
153         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
154
155     def author_unicode(self):
156         return self.cached_author
157
158     def kind_unicode(self):
159         return self.tag_unicode('kind')
160
161     def epoch_unicode(self):
162         return self.tag_unicode('epoch')
163
164     def genre_unicode(self):
165         return self.tag_unicode('genre')
166
167     def translator(self):
168         translators = self.get_extra_info_json().get('translators')
169         if not translators:
170             return None
171         if len(translators) > 3:
172             translators = translators[:2]
173             others = ' i inni'
174         else:
175             others = ''
176         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
177
178     def cover_source(self):
179         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
180
181     @property
182     def isbn_pdf(self):
183         return self.get_extra_info_json().get('isbn_pdf')
184
185     @property
186     def isbn_epub(self):
187         return self.get_extra_info_json().get('isbn_epub')
188
189     @property
190     def isbn_mobi(self):
191         return self.get_extra_info_json().get('isbn_mobi')
192
193     @property
194     def redakcja(self):
195         return self.get_extra_info_json().get('about')
196     
197     def is_accessible_to(self, user):
198         if not self.preview:
199             return True
200         if not user.is_authenticated:
201             return False
202         Membership = apps.get_model('club', 'Membership')
203         if Membership.is_active_for(user):
204             return True
205         Funding = apps.get_model('funding', 'Funding')
206         if Funding.objects.filter(user=user, offer__book=self):
207             return True
208         return False
209
210     def save(self, force_insert=False, force_update=False, **kwargs):
211         from sortify import sortify
212
213         self.sort_key = sortify(self.title)[:120]
214         self.title = str(self.title)  # ???
215
216         try:
217             author = self.authors().first().sort_key
218         except AttributeError:
219             author = ''
220         self.sort_key_author = author
221
222         self.cached_author = self.tag_unicode('author')
223         self.has_audience = 'audience' in self.get_extra_info_json()
224
225         if self.preview and not self.preview_key:
226             self.preview_key = get_random_hash(self.slug)[:32]
227
228         ret = super(Book, self).save(force_insert, force_update, **kwargs)
229
230         return ret
231
232     def get_absolute_url(self):
233         return reverse('book_detail', args=[self.slug])
234
235     def gallery_path(self):
236         return gallery_path(self.slug)
237
238     def gallery_url(self):
239         return gallery_url(self.slug)
240
241     def get_first_text(self):
242         if self.html_file:
243             return self
244         child = self.children.all().order_by('parent_number').first()
245         if child is not None:
246             return child.get_first_text()
247
248     def get_last_text(self):
249         if self.html_file:
250             return self
251         child = self.children.all().order_by('parent_number').last()
252         if child is not None:
253             return child.get_last_text()
254
255     def get_prev_text(self):
256         if not self.parent:
257             return None
258         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
259         if sibling is not None:
260             return sibling.get_last_text()
261
262         if self.parent.html_file:
263             return self.parent
264
265         return self.parent.get_prev_text()
266
267     def get_next_text(self, inside=True):
268         if inside:
269             child = self.children.order_by('parent_number').first()
270             if child is not None:
271                 return child.get_first_text()
272
273         if not self.parent:
274             return None
275         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
276         if sibling is not None:
277             return sibling.get_first_text()
278         return self.parent.get_next_text(inside=False)
279
280     def get_siblings(self):
281         if not self.parent:
282             return []
283         return self.parent.children.all().order_by('parent_number')
284
285     def get_children(self):
286         return self.children.all().order_by('parent_number')
287
288     @property
289     def name(self):
290         return self.title
291
292     def language_code(self):
293         return constants.LANGUAGES_3TO2.get(self.language, self.language)
294
295     def language_name(self):
296         return dict(settings.LANGUAGES).get(self.language_code(), "")
297
298     def is_foreign(self):
299         return self.language_code() != settings.LANGUAGE_CODE
300
301     def set_audio_length(self):
302         length = self.get_audio_length()
303         if length > 0:
304             self.audio_length = self.format_audio_length(length)
305             self.save()
306
307     @staticmethod
308     def format_audio_length(seconds):
309         """
310         >>> Book.format_audio_length(1)
311         '0:01'
312         >>> Book.format_audio_length(3661)
313         '1:01:01'
314         """
315         if seconds < 60*60:
316             minutes = seconds // 60
317             seconds = seconds % 60
318             return '%d:%02d' % (minutes, seconds)
319         else:
320             hours = seconds // 3600
321             minutes = seconds % 3600 // 60
322             seconds = seconds % 60
323             return '%d:%02d:%02d' % (hours, minutes, seconds)
324
325     def get_audio_length(self):
326         total = 0
327         for media in self.get_mp3() or ():
328             total += app_settings.GET_MP3_LENGTH(media.file.path)
329         return int(total)
330
331     def get_time(self):
332         return round(self.xml_file.size / 1000 * 40)
333     
334     def has_media(self, type_):
335         if type_ in Book.formats:
336             return bool(getattr(self, "%s_file" % type_))
337         else:
338             return self.media.filter(type=type_).exists()
339
340     def get_media(self, type_):
341         if self.has_media(type_):
342             if type_ in Book.formats:
343                 return getattr(self, "%s_file" % type_)
344             else:
345                 return self.media.filter(type=type_)
346         else:
347             return None
348
349     def get_mp3(self):
350         return self.get_media("mp3")
351
352     def get_odt(self):
353         return self.get_media("odt")
354
355     def get_ogg(self):
356         return self.get_media("ogg")
357
358     def get_daisy(self):
359         return self.get_media("daisy")
360
361     def get_audio_epub(self):
362         return self.get_media("audio.epub")
363
364     def media_url(self, format_):
365         media = self.get_media(format_)
366         if media:
367             if self.preview:
368                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
369             else:
370                 return media.url
371         else:
372             return None
373
374     def html_url(self):
375         return self.media_url('html')
376
377     def html_nonotes_url(self):
378         return self.media_url('html_nonotes')
379
380     def pdf_url(self):
381         return self.media_url('pdf')
382
383     def epub_url(self):
384         return self.media_url('epub')
385
386     def mobi_url(self):
387         return self.media_url('mobi')
388
389     def txt_url(self):
390         return self.media_url('txt')
391
392     def fb2_url(self):
393         return self.media_url('fb2')
394
395     def xml_url(self):
396         return self.media_url('xml')
397
398     def has_description(self):
399         return len(self.description) > 0
400     has_description.short_description = 'opis'
401     has_description.boolean = True
402
403     def has_mp3_file(self):
404         return self.has_media("mp3")
405     has_mp3_file.short_description = 'MP3'
406     has_mp3_file.boolean = True
407
408     def has_ogg_file(self):
409         return self.has_media("ogg")
410     has_ogg_file.short_description = 'OGG'
411     has_ogg_file.boolean = True
412
413     def has_daisy_file(self):
414         return self.has_media("daisy")
415     has_daisy_file.short_description = 'DAISY'
416     has_daisy_file.boolean = True
417
418     def has_sync_file(self):
419         return settings.FEATURE_SYNCHRO and self.has_media("sync")
420
421     def build_sync_file(self):
422         from lxml import html
423         from django.core.files.base import ContentFile
424         with self.html_file.open('rb') as f:
425             h = html.fragment_fromstring(f.read().decode('utf-8'))
426
427         durations = [
428             m['mp3'].duration
429             for m in self.get_audiobooks()[0]
430         ]
431         if settings.MOCK_DURATIONS:
432             durations = settings.MOCK_DURATIONS
433
434         sync = []
435         ts = None
436         sid = 1
437         dirty = False
438         for elem in h.iter():
439             if elem.get('data-audio-ts'):
440                 part, ts = int(elem.get('data-audio-part')), float(elem.get('data-audio-ts'))
441                 ts = str(round(sum(durations[:part - 1]) + ts, 3))
442                 # check if inside verse
443                 p = elem.getparent()
444                 while p is not None:
445                     # Workaround for missing ids.
446                     if 'verse' in p.get('class', ''):
447                         if not p.get('id'):
448                             p.set('id', f'syn{sid}')
449                             dirty = True
450                             sid += 1
451                         sync.append((ts, p.get('id')))
452                         ts = None
453                         break
454                     p = p.getparent()
455             elif ts:
456                 cls = elem.get('class', '')
457                 # Workaround for missing ids.
458                 if 'paragraph' in cls or 'verse' in cls or elem.tag in ('h1', 'h2', 'h3', 'h4'):
459                     if not elem.get('id'):
460                         elem.set('id', f'syn{sid}')
461                         dirty = True
462                         sid += 1
463                     sync.append((ts, elem.get('id')))
464                     ts = None
465         if dirty:
466             htext = html.tostring(h, encoding='utf-8')
467             with open(self.html_file.path, 'wb') as f:
468                 f.write(htext)
469         try:
470             bm = self.media.get(type='sync')
471         except:
472             bm = BookMedia(book=self, type='sync')
473         sync = (
474             '27\n' + '\n'.join(
475                 f'{s[0]}\t{sync[i+1][0]}\t{s[1]}' for i, s in enumerate(sync[:-1])
476             )).encode('latin1')
477         bm.file.save(
478             None, ContentFile(sync)
479             )
480
481     def get_sync(self):
482         if not self.has_sync_file():
483             return []
484         with self.get_media('sync').first().file.open('r') as f:
485             sync = f.read().split('\n')
486         offset = float(sync[0])
487         items = []
488         for line in sync[1:]:
489             if not line:
490                 continue
491             start, end, elid = line.split()
492             items.append([elid, float(start) + offset])
493         return items
494
495     def sync_ts(self, ts):
496         elid = None
497         for cur_id, t in self.get_sync():
498             if ts >= t:
499                 elid = cur_id
500             else:
501                 break
502         return elid
503
504     def sync_elid(self, elid):
505         for cur_id, t in self.get_sync():
506             if cur_id == elid:
507                 return t
508
509     def has_audio_epub_file(self):
510         return self.has_media("audio.epub")
511
512     @property
513     def media_daisy(self):
514         return self.get_media('daisy')
515
516     @property
517     def media_audio_epub(self):
518         return self.get_media('audio.epub')
519
520     def get_audiobooks(self, with_children=False, processing=False):
521         ogg_files = {}
522         for m in self.media.filter(type='ogg').order_by().iterator():
523             ogg_files[m.name] = m
524
525         audiobooks = []
526         projects = set()
527         total_duration = 0
528         for mp3 in self.media.filter(type='mp3').iterator():
529             # ogg files are always from the same project
530             meta = mp3.get_extra_info_json()
531             project = meta.get('project')
532             if not project:
533                 # temporary fallback
534                 project = 'CzytamySłuchając'
535
536             projects.add((project, meta.get('funded_by', '')))
537             total_duration += mp3.duration or 0
538
539             media = {'mp3': mp3}
540
541             ogg = ogg_files.get(mp3.name)
542             if ogg:
543                 media['ogg'] = ogg
544             audiobooks.append(media)
545
546         if with_children:
547             for child in self.get_children():
548                 ch_audiobooks, ch_projects, ch_duration = child.get_audiobooks(
549                     with_children=True, processing=True)
550                 audiobooks.append({'part': child})
551                 audiobooks += ch_audiobooks
552                 projects.update(ch_projects)
553                 total_duration += ch_duration
554
555         if not processing:
556             projects = sorted(projects)
557             total_duration = '%d:%02d' % (
558                 total_duration // 60,
559                 total_duration % 60
560             )
561
562         return audiobooks, projects, total_duration
563
564     def get_audiobooks_with_children(self):
565         return self.get_audiobooks(with_children=True)
566     
567     def wldocument(self, parse_dublincore=True, inherit=True):
568         from catalogue.import_utils import ORMDocProvider
569         from librarian.parser import WLDocument
570
571         if inherit and self.parent:
572             meta_fallbacks = self.parent.cover_info()
573         else:
574             meta_fallbacks = None
575
576         return WLDocument.from_file(
577             self.xml_file.path,
578             provider=ORMDocProvider(self),
579             parse_dublincore=parse_dublincore,
580             meta_fallbacks=meta_fallbacks)
581
582     def wldocument2(self):
583         from catalogue.import_utils import ORMDocProvider
584         from librarian.document import WLDocument
585         doc = WLDocument(
586             self.xml_file.path,
587             provider=ORMDocProvider(self)
588         )
589         doc.meta.update(self.cover_info())
590         return doc
591
592
593     @staticmethod
594     def zip_format(format_):
595         def pretty_file_name(book):
596             return "%s/%s.%s" % (
597                 book.get_extra_info_json()['author'],
598                 book.slug,
599                 format_)
600
601         field_name = "%s_file" % format_
602         field = getattr(Book, field_name)
603         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
604         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
605         return create_zip(paths, field.ZIP)
606
607     def zip_audiobooks(self, format_):
608         bm = BookMedia.objects.filter(book=self, type=format_)
609         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
610         licenses = set()
611         for m in bm:
612             license = constants.LICENSES.get(
613                 m.get_extra_info_json().get('license'), {}
614             ).get('locative')
615             if license:
616                 licenses.add(license)
617         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
618             'licenses': licenses,
619             'meta': self.wldocument2().meta,
620         })
621         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
622
623     def search_index(self, index=None):
624         if not self.findable:
625             return
626         from search.index import Index
627         Index.index_book(self)
628
629     # will make problems in conjunction with paid previews
630     def download_pictures(self, remote_gallery_url):
631         # This is only needed for legacy relative image paths.
632         gallery_path = self.gallery_path()
633         # delete previous files, so we don't include old files in ebooks
634         if os.path.isdir(gallery_path):
635             for filename in os.listdir(gallery_path):
636                 file_path = os.path.join(gallery_path, filename)
637                 os.unlink(file_path)
638         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
639         if ilustr_elements:
640             makedirs(gallery_path)
641             for ilustr in ilustr_elements:
642                 ilustr_src = ilustr.get('src')
643                 if '/' in ilustr_src:
644                     continue
645                 ilustr_path = os.path.join(gallery_path, ilustr_src)
646                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
647
648     def load_abstract(self):
649         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
650         if abstract is not None:
651             self.abstract = transform_abstrakt(abstract)
652         else:
653             self.abstract = ''
654
655     def load_toc(self):
656         self.toc = ''
657         if self.html_file:
658             parser = html.HTMLParser(encoding='utf-8')
659             tree = html.parse(self.html_file.path, parser=parser)
660             toc = tree.find('//div[@id="toc"]/ol')
661             if toc is None or not len(toc):
662                 return
663             html_link = reverse('book_text', args=[self.slug])
664             for a in toc.findall('.//a'):
665                 a.attrib['href'] = html_link + a.attrib['href']
666             self.toc = html.tostring(toc, encoding='unicode')
667             # div#toc
668
669     @classmethod
670     def from_xml_file(cls, xml_file, **kwargs):
671         from django.core.files import File
672         from librarian import dcparser
673
674         # use librarian to parse meta-data
675         book_info = dcparser.parse(xml_file)
676
677         if not isinstance(xml_file, File):
678             xml_file = File(open(xml_file))
679
680         try:
681             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
682         finally:
683             xml_file.close()
684
685     @classmethod
686     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
687                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None, can_sell=None):
688         from catalogue import tasks
689
690         if dont_build is None:
691             dont_build = set()
692         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
693
694         # check for parts before we do anything
695         children = []
696         if hasattr(book_info, 'parts'):
697             for part_url in book_info.parts:
698                 try:
699                     children.append(Book.objects.get(slug=part_url.slug))
700                 except Book.DoesNotExist:
701                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
702
703         # Read book metadata
704         book_slug = book_info.url.slug
705         if re.search(r'[^a-z0-9-]', book_slug):
706             raise ValueError('Invalid characters in slug')
707         book, created = Book.objects.get_or_create(slug=book_slug)
708
709         if created:
710             book_shelves = []
711             old_cover = None
712             book.preview = bool(days)
713             if book.preview:
714                 book.preview_until = date.today() + timedelta(days)
715         else:
716             if not overwrite:
717                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
718             # Save shelves for this book
719             book_shelves = list(book.tags.filter(category='set'))
720             old_cover = book.cover_info()
721
722         # Save XML file
723         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
724         if book.preview:
725             book.xml_file.set_readable(False)
726
727         book.findable = findable
728         book.language = book_info.language
729         book.title = book_info.title
730         book.license = book_info.license or ''
731         if book_info.variant_of:
732             book.common_slug = book_info.variant_of.slug
733         else:
734             book.common_slug = book.slug
735         extra = book_info.to_dict()
736         if logo:
737             extra['logo'] = logo
738         if logo_mono:
739             extra['logo_mono'] = logo_mono
740         if logo_alt:
741             extra['logo_alt'] = logo_alt
742         if can_sell is not None:
743             book.can_sell = can_sell
744         book.extra_info = json.dumps(extra)
745         book.load_abstract()
746         book.load_toc()
747         book.save()
748
749         book.update_stats()
750         
751         meta_tags = Tag.tags_from_info(book_info)
752
753         just_tags = [t for (t, rel) in meta_tags if not rel]
754         book.tags = set(just_tags + book_shelves)
755         book.save()  # update sort_key_author
756
757         book.translators.set([t for (t, rel) in meta_tags if rel == 'translator'])
758
759         cover_changed = old_cover != book.cover_info()
760         obsolete_children = set(b for b in book.children.all()
761                                 if b not in children)
762         notify_cover_changed = []
763         for n, child_book in enumerate(children):
764             new_child = child_book.parent != book
765             child_book.parent = book
766             child_book.parent_number = n
767             child_book.save()
768             if new_child or cover_changed:
769                 notify_cover_changed.append(child_book)
770         # Disown unfaithful children and let them cope on their own.
771         for child in obsolete_children:
772             child.parent = None
773             child.parent_number = 0
774             child.save()
775             if old_cover:
776                 notify_cover_changed.append(child)
777
778         cls.repopulate_ancestors()
779         tasks.update_counters.delay()
780
781         if remote_gallery_url:
782             book.download_pictures(remote_gallery_url)
783
784         # No saves beyond this point.
785
786         # Build cover.
787         if 'cover' not in dont_build:
788             book.cover.build_delay()
789             book.cover_clean.build_delay()
790             book.cover_thumb.build_delay()
791             book.cover_api_thumb.build_delay()
792             book.simple_cover.build_delay()
793             book.cover_ebookpoint.build_delay()
794
795         # Build HTML and ebooks.
796         book.html_file.build_delay()
797         if not children:
798             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
799                 if format_ not in dont_build:
800                     getattr(book, '%s_file' % format_).build_delay()
801         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
802             if format_ not in dont_build:
803                 getattr(book, '%s_file' % format_).build_delay()
804         book.html_nonotes_file.build_delay()
805
806         if not settings.NO_SEARCH_INDEX and search_index and findable:
807             tasks.index_book.delay(book.id)
808
809         for child in notify_cover_changed:
810             child.parent_cover_changed()
811
812         book.update_popularity()
813         tasks.update_references.delay(book.id)
814
815         cls.published.send(sender=cls, instance=book)
816         return book
817
818     def update_stats(self):
819         stats = self.wldocument2().get_statistics()['total']
820         self.pages = round(
821             stats.get('verses_with_fn', 0) / 30 +
822             stats.get('chars_out_verse_with_fn', 0) / 1800)
823         self.read_time = round(self.get_time())
824         self.save(update_fields=['pages', 'read_time'])
825         if self.parent is not None:
826             self.parent.update_stats()
827
828     def update_references(self):
829         Entity = apps.get_model('references', 'Entity')
830         doc = self.wldocument2()
831         doc._compat_assign_section_ids()
832         doc._compat_assign_ordered_ids()
833         refs = {}
834         for ref_elem in doc.references():
835             uri = ref_elem.attrib.get('href', '')
836             if not uri:
837                 continue
838             if uri in refs:
839                 ref = refs[uri]
840             else:
841                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
842                 if entity_created:
843                     try:
844                         entity.populate()
845                     except:
846                         pass
847                     else:
848                         entity.save()
849                 ref, ref_created = entity.reference_set.get_or_create(book=self)
850                 refs[uri] = ref
851                 if not ref_created:
852                     ref.occurence_set.all().delete()
853             sec = ref_elem.get_link()
854             m = re.match(r'sec(\d+)', sec)
855             assert m is not None
856             sec = int(m.group(1))
857             snippet = ref_elem.get_snippet()
858             b = builders['html-snippet']()
859             for s in snippet:
860                 s.html_build(b)
861             html = b.output().get_bytes().decode('utf-8')
862
863             ref.occurence_set.create(
864                 section=sec,
865                 html=html
866             )
867         self.reference_set.exclude(entity__uri__in=refs).delete()
868
869     @property
870     def references(self):
871         return self.reference_set.all().select_related('entity')
872
873     def update_has_audio(self):
874         self.has_audio = False
875         if self.media.filter(type='mp3').exists():
876             self.has_audio = True
877         if self.descendant.filter(has_audio=True).exists():
878             self.has_audio = True
879         self.save(update_fields=['has_audio'])
880         if self.parent is not None:
881             self.parent.update_has_audio()
882
883     def update_narrators(self):
884         narrator_names = set()
885         for bm in self.media.filter(type='mp3'):
886             narrator_names.update(set(
887                 a.strip() for a in re.split(r',|\si\s', bm.artist)
888             ))
889         narrators = []
890
891         for name in narrator_names:
892             if not name: continue
893             slug = slugify(name)
894             try:
895                 t = Tag.objects.get(category='author', slug=slug)
896             except Tag.DoesNotExist:
897                 sort_key = sortify(
898                     ' '.join(name.rsplit(' ', 1)[::-1]).lower()
899                 )
900                 t = Tag.objects.create(
901                     category='author',
902                     name_pl=name,
903                     slug=slug,
904                     sort_key=sort_key,
905                 )
906             narrators.append(t)
907         self.narrators.set(narrators)
908
909     @classmethod
910     @transaction.atomic
911     def repopulate_ancestors(cls):
912         """Fixes the ancestry cache."""
913         # TODO: table names
914         cursor = connection.cursor()
915         if connection.vendor == 'postgres':
916             cursor.execute("TRUNCATE catalogue_book_ancestor")
917             cursor.execute("""
918                 WITH RECURSIVE ancestry AS (
919                     SELECT book.id, book.parent_id
920                     FROM catalogue_book AS book
921                     WHERE book.parent_id IS NOT NULL
922                     UNION
923                     SELECT ancestor.id, book.parent_id
924                     FROM ancestry AS ancestor, catalogue_book AS book
925                     WHERE ancestor.parent_id = book.id
926                         AND book.parent_id IS NOT NULL
927                     )
928                 INSERT INTO catalogue_book_ancestor
929                     (from_book_id, to_book_id)
930                     SELECT id, parent_id
931                     FROM ancestry
932                     ORDER BY id;
933                 """)
934         else:
935             cursor.execute("DELETE FROM catalogue_book_ancestor")
936             for b in cls.objects.exclude(parent=None):
937                 parent = b.parent
938                 while parent is not None:
939                     b.ancestor.add(parent)
940                     parent = parent.parent
941
942     @property
943     def ancestors(self):
944         if self.parent:
945             for anc in self.parent.ancestors:
946                 yield anc
947             yield self.parent
948         else:
949             return []
950
951     def clear_cache(self):
952         clear_cached_renders(self.mini_box)
953         clear_cached_renders(self.mini_box_nolink)
954
955     def cover_info(self, inherit=True):
956         """Returns a dictionary to serve as fallback for BookInfo.
957
958         For now, the only thing inherited is the cover image.
959         """
960         need = False
961         info = {}
962         for field in ('cover_url', 'cover_by', 'cover_source'):
963             val = self.get_extra_info_json().get(field)
964             if val:
965                 info[field] = val
966             else:
967                 need = True
968         if inherit and need and self.parent is not None:
969             parent_info = self.parent.cover_info()
970             parent_info.update(info)
971             info = parent_info
972         return info
973
974     def related_themes(self):
975         return Tag.objects.usage_for_queryset(
976             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
977             counts=True).filter(category='theme').order_by('-count')
978
979     def parent_cover_changed(self):
980         """Called when parent book's cover image is changed."""
981         if not self.cover_info(inherit=False):
982             if 'cover' not in app_settings.DONT_BUILD:
983                 self.cover.build_delay()
984                 self.cover_clean.build_delay()
985                 self.cover_thumb.build_delay()
986                 self.cover_api_thumb.build_delay()
987                 self.simple_cover.build_delay()
988                 self.cover_ebookpoint.build_delay()
989             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
990                 if format_ not in app_settings.DONT_BUILD:
991                     getattr(self, '%s_file' % format_).build_delay()
992             for child in self.children.all():
993                 child.parent_cover_changed()
994
995     def other_versions(self):
996         """Find other versions (i.e. in other languages) of the book."""
997         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
998
999     def parents(self):
1000         books = []
1001         parent = self.parent
1002         while parent is not None:
1003             books.insert(0, parent)
1004             parent = parent.parent
1005         return books
1006
1007     def pretty_title(self, html_links=False):
1008         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
1009         books = self.parents() + [self]
1010         names.extend([(b.title, b.get_absolute_url()) for b in books])
1011
1012         if html_links:
1013             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
1014         else:
1015             names = [tag[0] for tag in names]
1016         return ', '.join(names)
1017
1018     def publisher(self):
1019         publisher = self.get_extra_info_json()['publisher']
1020         if isinstance(publisher, str):
1021             return publisher
1022         elif isinstance(publisher, list):
1023             return ', '.join(publisher)
1024
1025     def get_recommended(self, limit=4):
1026         books_qs = type(self).objects.filter(findable=True)
1027         books_qs = books_qs.exclude(common_slug=self.common_slug).exclude(ancestor=self)
1028         books = type(self).tagged.related_to(self, books_qs)[:limit]
1029         return books
1030
1031     @classmethod
1032     def tagged_top_level(cls, tags):
1033         """ Returns top-level books tagged with `tags`.
1034
1035         It only returns those books which don't have ancestors which are
1036         also tagged with those tags.
1037
1038         """
1039         objects = cls.tagged.with_all(tags)
1040         return objects.filter(findable=True).exclude(ancestor__in=objects)
1041
1042     @classmethod
1043     def book_list(cls, book_filter=None):
1044         """Generates a hierarchical listing of all books.
1045
1046         Books are optionally filtered with a test function.
1047
1048         """
1049
1050         books_by_parent = {}
1051         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
1052         if book_filter:
1053             books = books.filter(book_filter).distinct()
1054
1055             book_ids = set(b['pk'] for b in books.values("pk").iterator())
1056             for book in books.iterator():
1057                 parent = book.parent_id
1058                 if parent not in book_ids:
1059                     parent = None
1060                 books_by_parent.setdefault(parent, []).append(book)
1061         else:
1062             for book in books.iterator():
1063                 books_by_parent.setdefault(book.parent_id, []).append(book)
1064
1065         orphans = []
1066         books_by_author = OrderedDict()
1067         for tag in Tag.objects.filter(category='author').iterator():
1068             books_by_author[tag] = []
1069
1070         for book in books_by_parent.get(None, ()):
1071             authors = list(book.authors().only('pk'))
1072             if authors:
1073                 for author in authors:
1074                     books_by_author[author].append(book)
1075             else:
1076                 orphans.append(book)
1077
1078         return books_by_author, orphans, books_by_parent
1079
1080     _audiences_pl = {
1081         "SP": (1, "szkoła podstawowa"),
1082         "SP1": (1, "szkoła podstawowa"),
1083         "SP2": (1, "szkoła podstawowa"),
1084         "SP3": (1, "szkoła podstawowa"),
1085         "P": (1, "szkoła podstawowa"),
1086         "G": (2, "gimnazjum"),
1087         "L": (3, "liceum"),
1088         "LP": (3, "liceum"),
1089     }
1090
1091     def audiences_pl(self):
1092         audiences = self.get_extra_info_json().get('audiences', [])
1093         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
1094         return [a[1] for a in audiences]
1095
1096     def stage_note(self):
1097         stage = self.get_extra_info_json().get('stage')
1098         if stage and stage < '0.4':
1099             return (_('Ten utwór wymaga uwspółcześnienia'),
1100                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
1101         else:
1102             return None, None
1103
1104     def choose_fragments(self, number):
1105         fragments = self.fragments.order_by()
1106         fragments_count = fragments.count()
1107         if not fragments_count and self.children.exists():
1108             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
1109             fragments_count = fragments.count()
1110         if fragments_count:
1111             if fragments_count > number:
1112                 offset = randint(0, fragments_count - number)
1113             else:
1114                 offset = 0
1115             return fragments[offset : offset + number]
1116         elif self.parent:
1117             return self.parent.choose_fragments(number)
1118         else:
1119             return []
1120
1121     def choose_fragment(self):
1122         fragments = self.choose_fragments(1)
1123         if fragments:
1124             return fragments[0]
1125         else:
1126             return None
1127
1128     def fragment_data(self):
1129         fragment = self.choose_fragment()
1130         if fragment:
1131             return {
1132                 'title': fragment.book.pretty_title(),
1133                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1134             }
1135         else:
1136             return None
1137
1138     def update_popularity(self):
1139         count = self.userlistitem_set.values('list__user').order_by('list__user').distinct().count()
1140         try:
1141             pop = self.popularity
1142             pop.count = count
1143             pop.save()
1144         except BookPopularity.DoesNotExist:
1145             BookPopularity.objects.create(book=self, count=count)
1146
1147     def ridero_link(self):
1148         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1149
1150     def elevenreader_link(self):
1151         first_text = self.get_first_text()
1152         if first_text is None:
1153             return None
1154         return 'https://elevenreader.io/audiobooks/wolnelektury:' + first_text.slug
1155
1156     def content_warnings(self):
1157         warnings_def = {
1158             'wulgaryzmy': _('wulgaryzmy'),
1159         }
1160         warnings = self.get_extra_info_json().get('content_warnings', [])
1161         warnings = [
1162             warnings_def.get(w, w)
1163             for w in warnings
1164         ]
1165         warnings.sort()
1166         return warnings
1167
1168     def full_sort_key(self):
1169         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1170
1171     def cover_color(self):
1172         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1173
1174     @cached_render('catalogue/book_mini_box.html')
1175     def mini_box(self):
1176         return {
1177             'book': self
1178         }
1179
1180     @cached_render('catalogue/book_mini_box.html')
1181     def mini_box_nolink(self):
1182         return {
1183             'book': self,
1184             'no_link': True,
1185         }
1186
1187
1188 class BookPopularity(models.Model):
1189     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1190     count = models.IntegerField(default=0, db_index=True)