Epub+mo support
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     is_book = True
143
144     class AlreadyExists(Exception):
145         pass
146
147     class Meta:
148         ordering = ('sort_key_author', 'sort_key')
149         verbose_name = _('book')
150         verbose_name_plural = _('books')
151         app_label = 'catalogue'
152
153     def __str__(self):
154         return self.title
155
156     def get_extra_info_json(self):
157         return json.loads(self.extra_info or '{}')
158
159     def get_initial(self):
160         try:
161             return re.search(r'\w', self.title, re.U).group(0)
162         except AttributeError:
163             return ''
164
165     def authors(self):
166         return self.tags.filter(category='author')
167
168     def epochs(self):
169         return self.tags.filter(category='epoch')
170
171     def genres(self):
172         return self.tags.filter(category='genre')
173
174     def kinds(self):
175         return self.tags.filter(category='kind')
176
177     def tag_unicode(self, category):
178         relations = prefetched_relations(self, category)
179         if relations:
180             return ', '.join(rel.tag.name for rel in relations)
181         else:
182             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
183
184     def tags_by_category(self):
185         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
186
187     def author_unicode(self):
188         return self.cached_author
189
190     def kind_unicode(self):
191         return self.tag_unicode('kind')
192
193     def epoch_unicode(self):
194         return self.tag_unicode('epoch')
195
196     def genre_unicode(self):
197         return self.tag_unicode('genre')
198
199     def translators(self):
200         translators = self.get_extra_info_json().get('translators') or []
201         return [
202             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
203         ]
204
205     def translator(self):
206         translators = self.get_extra_info_json().get('translators')
207         if not translators:
208             return None
209         if len(translators) > 3:
210             translators = translators[:2]
211             others = ' i inni'
212         else:
213             others = ''
214         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
215
216     def cover_source(self):
217         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
218
219     @property
220     def isbn_pdf(self):
221         return self.get_extra_info_json().get('isbn_pdf')
222
223     @property
224     def isbn_epub(self):
225         return self.get_extra_info_json().get('isbn_epub')
226
227     @property
228     def isbn_mobi(self):
229         return self.get_extra_info_json().get('isbn_mobi')
230
231     def is_accessible_to(self, user):
232         if not self.preview:
233             return True
234         if not user.is_authenticated:
235             return False
236         Membership = apps.get_model('club', 'Membership')
237         if Membership.is_active_for(user):
238             return True
239         Funding = apps.get_model('funding', 'Funding')
240         if Funding.objects.filter(user=user, offer__book=self):
241             return True
242         return False
243
244     def save(self, force_insert=False, force_update=False, **kwargs):
245         from sortify import sortify
246
247         self.sort_key = sortify(self.title)[:120]
248         self.title = str(self.title)  # ???
249
250         try:
251             author = self.authors().first().sort_key
252         except AttributeError:
253             author = ''
254         self.sort_key_author = author
255
256         self.cached_author = self.tag_unicode('author')
257         self.has_audience = 'audience' in self.get_extra_info_json()
258
259         if self.preview and not self.preview_key:
260             self.preview_key = get_random_hash(self.slug)[:32]
261
262         ret = super(Book, self).save(force_insert, force_update, **kwargs)
263
264         return ret
265
266     def get_absolute_url(self):
267         return reverse('book_detail', args=[self.slug])
268
269     def gallery_path(self):
270         return gallery_path(self.slug)
271
272     def gallery_url(self):
273         return gallery_url(self.slug)
274
275     def get_first_text(self):
276         if self.html_file:
277             return self
278         child = self.children.all().order_by('parent_number').first()
279         if child is not None:
280             return child.get_first_text()
281
282     def get_last_text(self):
283         if self.html_file:
284             return self
285         child = self.children.all().order_by('parent_number').last()
286         if child is not None:
287             return child.get_last_text()
288
289     def get_prev_text(self):
290         if not self.parent:
291             return None
292         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
293         if sibling is not None:
294             return sibling.get_last_text()
295
296         if self.parent.html_file:
297             return self.parent
298         
299         return self.parent.get_prev_text()
300
301     def get_next_text(self, inside=True):
302         if inside:
303             child = self.children.order_by('parent_number').first()
304             if child is not None:
305                 return child.get_first_text()
306
307         if not self.parent:
308             return None
309         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
310         if sibling is not None:
311             return sibling.get_first_text()
312         return self.parent.get_next_text(inside=False)
313
314     def get_child_audiobook(self):
315         BookMedia = apps.get_model('catalogue', 'BookMedia')
316         if not BookMedia.objects.filter(book__ancestor=self).exists():
317             return None
318         for child in self.children.order_by('parent_number').all():
319             if child.has_mp3_file():
320                 return child
321             child_sub = child.get_child_audiobook()
322             if child_sub is not None:
323                 return child_sub
324
325     def get_siblings(self):
326         if not self.parent:
327             return []
328         return self.parent.children.all().order_by('parent_number')
329
330     def get_children(self):
331         return self.children.all().order_by('parent_number')
332     
333     @property
334     def name(self):
335         return self.title
336
337     def language_code(self):
338         return constants.LANGUAGES_3TO2.get(self.language, self.language)
339
340     def language_name(self):
341         return dict(settings.LANGUAGES).get(self.language_code(), "")
342
343     def is_foreign(self):
344         return self.language_code() != settings.LANGUAGE_CODE
345
346     def set_audio_length(self):
347         length = self.get_audio_length()
348         if length > 0:
349             self.audio_length = self.format_audio_length(length)
350             self.save()
351
352     @staticmethod
353     def format_audio_length(seconds):
354         """
355         >>> Book.format_audio_length(1)
356         '0:01'
357         >>> Book.format_audio_length(3661)
358         '1:01:01'
359         """
360         if seconds < 60*60:
361             minutes = seconds // 60
362             seconds = seconds % 60
363             return '%d:%02d' % (minutes, seconds)
364         else:
365             hours = seconds // 3600
366             minutes = seconds % 3600 // 60
367             seconds = seconds % 60
368             return '%d:%02d:%02d' % (hours, minutes, seconds)
369
370     def get_audio_length(self):
371         total = 0
372         for media in self.get_mp3() or ():
373             total += app_settings.GET_MP3_LENGTH(media.file.path)
374         return int(total)
375
376     def has_media(self, type_):
377         if type_ in Book.formats:
378             return bool(getattr(self, "%s_file" % type_))
379         else:
380             return self.media.filter(type=type_).exists()
381
382     def has_audio(self):
383         return self.has_media('mp3')
384
385     def get_media(self, type_):
386         if self.has_media(type_):
387             if type_ in Book.formats:
388                 return getattr(self, "%s_file" % type_)
389             else:
390                 return self.media.filter(type=type_)
391         else:
392             return None
393
394     def get_mp3(self):
395         return self.get_media("mp3")
396
397     def get_odt(self):
398         return self.get_media("odt")
399
400     def get_ogg(self):
401         return self.get_media("ogg")
402
403     def get_daisy(self):
404         return self.get_media("daisy")
405
406     def get_audio_epub(self):
407         return self.get_media("audio.epub")
408
409     def media_url(self, format_):
410         media = self.get_media(format_)
411         if media:
412             if self.preview:
413                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
414             else:
415                 return media.url
416         else:
417             return None
418
419     def html_url(self):
420         return self.media_url('html')
421
422     def pdf_url(self):
423         return self.media_url('pdf')
424
425     def epub_url(self):
426         return self.media_url('epub')
427
428     def mobi_url(self):
429         return self.media_url('mobi')
430
431     def txt_url(self):
432         return self.media_url('txt')
433
434     def fb2_url(self):
435         return self.media_url('fb2')
436
437     def xml_url(self):
438         return self.media_url('xml')
439
440     def has_description(self):
441         return len(self.description) > 0
442     has_description.short_description = _('description')
443     has_description.boolean = True
444
445     def has_mp3_file(self):
446         return self.has_media("mp3")
447     has_mp3_file.short_description = 'MP3'
448     has_mp3_file.boolean = True
449
450     def has_ogg_file(self):
451         return self.has_media("ogg")
452     has_ogg_file.short_description = 'OGG'
453     has_ogg_file.boolean = True
454
455     def has_daisy_file(self):
456         return self.has_media("daisy")
457     has_daisy_file.short_description = 'DAISY'
458     has_daisy_file.boolean = True
459
460     def has_audio_epub_file(self):
461         return self.has_media("audio.epub")
462
463     @property
464     def media_daisy(self):
465         return self.get_media('daisy')
466     
467     @property
468     def media_audio_epub(self):
469         return self.get_media('audio.epub')
470
471     def get_audiobooks(self):
472         ogg_files = {}
473         for m in self.media.filter(type='ogg').order_by().iterator():
474             ogg_files[m.name] = m
475
476         audiobooks = []
477         projects = set()
478         total_duration = 0
479         for mp3 in self.media.filter(type='mp3').iterator():
480             # ogg files are always from the same project
481             meta = mp3.get_extra_info_json()
482             project = meta.get('project')
483             if not project:
484                 # temporary fallback
485                 project = 'CzytamySłuchając'
486
487             projects.add((project, meta.get('funded_by', '')))
488             total_duration += mp3.duration or 0
489
490             media = {'mp3': mp3}
491
492             ogg = ogg_files.get(mp3.name)
493             if ogg:
494                 media['ogg'] = ogg
495             audiobooks.append(media)
496
497         projects = sorted(projects)
498         total_duration = '%d:%02d' % (
499             total_duration // 60,
500             total_duration % 60
501         )
502         return audiobooks, projects, total_duration
503
504     def wldocument(self, parse_dublincore=True, inherit=True):
505         from catalogue.import_utils import ORMDocProvider
506         from librarian.parser import WLDocument
507
508         if inherit and self.parent:
509             meta_fallbacks = self.parent.cover_info()
510         else:
511             meta_fallbacks = None
512
513         return WLDocument.from_file(
514             self.xml_file.path,
515             provider=ORMDocProvider(self),
516             parse_dublincore=parse_dublincore,
517             meta_fallbacks=meta_fallbacks)
518
519     def wldocument2(self):
520         from catalogue.import_utils import ORMDocProvider
521         from librarian.document import WLDocument
522         doc = WLDocument(
523             self.xml_file.path,
524             provider=ORMDocProvider(self)
525         )
526         doc.meta.update(self.cover_info())
527         return doc
528
529
530     @staticmethod
531     def zip_format(format_):
532         def pretty_file_name(book):
533             return "%s/%s.%s" % (
534                 book.get_extra_info_json()['author'],
535                 book.slug,
536                 format_)
537
538         field_name = "%s_file" % format_
539         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
540         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
541         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
542
543     def zip_audiobooks(self, format_):
544         bm = BookMedia.objects.filter(book=self, type=format_)
545         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
546         licenses = set()
547         for m in bm:
548             license = constants.LICENSES.get(
549                 m.get_extra_info_json().get('license'), {}
550             ).get('locative')
551             if license:
552                 licenses.add(license)
553         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
554             'licenses': licenses,
555         })
556         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
557
558     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
559         if not self.findable:
560             return
561         if index is None:
562             from search.index import Index
563             index = Index()
564         try:
565             index.index_book(self, book_info)
566             if index_tags:
567                 index.index_tags()
568             if commit:
569                 index.index.commit()
570         except Exception as e:
571             index.index.rollback()
572             raise e
573
574     # will make problems in conjunction with paid previews
575     def download_pictures(self, remote_gallery_url):
576         # This is only needed for legacy relative image paths.
577         gallery_path = self.gallery_path()
578         # delete previous files, so we don't include old files in ebooks
579         if os.path.isdir(gallery_path):
580             for filename in os.listdir(gallery_path):
581                 file_path = os.path.join(gallery_path, filename)
582                 os.unlink(file_path)
583         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
584         if ilustr_elements:
585             makedirs(gallery_path)
586             for ilustr in ilustr_elements:
587                 ilustr_src = ilustr.get('src')
588                 if '/' in ilustr_src:
589                     continue
590                 ilustr_path = os.path.join(gallery_path, ilustr_src)
591                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
592
593     def load_abstract(self):
594         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
595         if abstract is not None:
596             self.abstract = transform_abstrakt(abstract)
597         else:
598             self.abstract = ''
599
600     def load_toc(self):
601         self.toc = ''
602         if self.html_file:
603             parser = html.HTMLParser(encoding='utf-8')
604             tree = html.parse(self.html_file.path, parser=parser)
605             toc = tree.find('//div[@id="toc"]/ol')
606             if toc is None or not len(toc):
607                 return
608             html_link = reverse('book_text', args=[self.slug])
609             for a in toc.findall('.//a'):
610                 a.attrib['href'] = html_link + a.attrib['href']
611             self.toc = html.tostring(toc, encoding='unicode')
612             # div#toc
613             
614     @classmethod
615     def from_xml_file(cls, xml_file, **kwargs):
616         from django.core.files import File
617         from librarian import dcparser
618
619         # use librarian to parse meta-data
620         book_info = dcparser.parse(xml_file)
621
622         if not isinstance(xml_file, File):
623             xml_file = File(open(xml_file))
624
625         try:
626             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
627         finally:
628             xml_file.close()
629
630     @classmethod
631     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
632                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
633         if dont_build is None:
634             dont_build = set()
635         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
636
637         # check for parts before we do anything
638         children = []
639         if hasattr(book_info, 'parts'):
640             for part_url in book_info.parts:
641                 try:
642                     children.append(Book.objects.get(slug=part_url.slug))
643                 except Book.DoesNotExist:
644                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
645
646         # Read book metadata
647         book_slug = book_info.url.slug
648         if re.search(r'[^a-z0-9-]', book_slug):
649             raise ValueError('Invalid characters in slug')
650         book, created = Book.objects.get_or_create(slug=book_slug)
651
652         if created:
653             book_shelves = []
654             old_cover = None
655             book.preview = bool(days)
656             if book.preview:
657                 book.preview_until = date.today() + timedelta(days)
658         else:
659             if not overwrite:
660                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
661             # Save shelves for this book
662             book_shelves = list(book.tags.filter(category='set'))
663             old_cover = book.cover_info()
664
665         # Save XML file
666         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
667         if book.preview:
668             book.xml_file.set_readable(False)
669
670         book.findable = findable
671         book.language = book_info.language
672         book.title = book_info.title
673         if book_info.variant_of:
674             book.common_slug = book_info.variant_of.slug
675         else:
676             book.common_slug = book.slug
677         book.extra_info = json.dumps(book_info.to_dict())
678         book.load_abstract()
679         book.load_toc()
680         book.save()
681
682         meta_tags = Tag.tags_from_info(book_info)
683
684         for tag in meta_tags:
685             if not tag.for_books:
686                 tag.for_books = True
687                 tag.save()
688
689         book.tags = set(meta_tags + book_shelves)
690         book.save()  # update sort_key_author
691
692         cover_changed = old_cover != book.cover_info()
693         obsolete_children = set(b for b in book.children.all()
694                                 if b not in children)
695         notify_cover_changed = []
696         for n, child_book in enumerate(children):
697             new_child = child_book.parent != book
698             child_book.parent = book
699             child_book.parent_number = n
700             child_book.save()
701             if new_child or cover_changed:
702                 notify_cover_changed.append(child_book)
703         # Disown unfaithful children and let them cope on their own.
704         for child in obsolete_children:
705             child.parent = None
706             child.parent_number = 0
707             child.save()
708             if old_cover:
709                 notify_cover_changed.append(child)
710
711         cls.repopulate_ancestors()
712         tasks.update_counters.delay()
713
714         if remote_gallery_url:
715             book.download_pictures(remote_gallery_url)
716
717         # No saves beyond this point.
718
719         # Build cover.
720         if 'cover' not in dont_build:
721             book.cover.build_delay()
722             book.cover_clean.build_delay()
723             book.cover_thumb.build_delay()
724             book.cover_api_thumb.build_delay()
725             book.simple_cover.build_delay()
726             book.cover_ebookpoint.build_delay()
727
728         # Build HTML and ebooks.
729         book.html_file.build_delay()
730         if not children:
731             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
732                 if format_ not in dont_build:
733                     getattr(book, '%s_file' % format_).build_delay()
734         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
735             if format_ not in dont_build:
736                 getattr(book, '%s_file' % format_).build_delay()
737
738         if not settings.NO_SEARCH_INDEX and search_index and findable:
739             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
740
741         for child in notify_cover_changed:
742             child.parent_cover_changed()
743
744         book.update_popularity()
745         tasks.update_references.delay(book.id)
746
747         cls.published.send(sender=cls, instance=book)
748         return book
749
750     def get_master(self):
751         master_tags = [
752             'opowiadanie',
753             'powiesc',
754             'dramat_wierszowany_l',
755             'dramat_wierszowany_lp',
756             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
757             'wywiad',
758         ]
759         from librarian.parser import WLDocument
760         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
761         root = wld.edoc.getroot()
762         for master in root.iter():
763             if master.tag in master_tags:
764                 return master
765     
766     def update_references(self):
767         from references.models import Entity, Reference
768         master = self.get_master()
769         if master is None:
770             master = []
771         found = set()
772         for i, sec in enumerate(master):
773             for ref in sec.findall('.//ref'):
774                 href = ref.attrib.get('href', '')
775                 if not href or href in found:
776                     continue
777                 found.add(href)
778                 entity, created = Entity.objects.get_or_create(
779                     uri=href
780                 )
781                 ref, created = Reference.objects.get_or_create(
782                     book=self,
783                     entity=entity
784                 )
785                 ref.first_section = 'sec%d' % (i + 1)
786                 entity.populate()
787                 entity.save()
788         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
789     
790     @property
791     def references(self):
792         return self.reference_set.all().select_related('entity')
793
794     @classmethod
795     @transaction.atomic
796     def repopulate_ancestors(cls):
797         """Fixes the ancestry cache."""
798         # TODO: table names
799         cursor = connection.cursor()
800         if connection.vendor == 'postgres':
801             cursor.execute("TRUNCATE catalogue_book_ancestor")
802             cursor.execute("""
803                 WITH RECURSIVE ancestry AS (
804                     SELECT book.id, book.parent_id
805                     FROM catalogue_book AS book
806                     WHERE book.parent_id IS NOT NULL
807                     UNION
808                     SELECT ancestor.id, book.parent_id
809                     FROM ancestry AS ancestor, catalogue_book AS book
810                     WHERE ancestor.parent_id = book.id
811                         AND book.parent_id IS NOT NULL
812                     )
813                 INSERT INTO catalogue_book_ancestor
814                     (from_book_id, to_book_id)
815                     SELECT id, parent_id
816                     FROM ancestry
817                     ORDER BY id;
818                 """)
819         else:
820             cursor.execute("DELETE FROM catalogue_book_ancestor")
821             for b in cls.objects.exclude(parent=None):
822                 parent = b.parent
823                 while parent is not None:
824                     b.ancestor.add(parent)
825                     parent = parent.parent
826
827     @property
828     def ancestors(self):
829         if self.parent:
830             for anc in self.parent.ancestors:
831                 yield anc
832             yield self.parent
833         else:
834             return []
835                     
836     def clear_cache(self):
837         clear_cached_renders(self.mini_box)
838         clear_cached_renders(self.mini_box_nolink)
839
840     def cover_info(self, inherit=True):
841         """Returns a dictionary to serve as fallback for BookInfo.
842
843         For now, the only thing inherited is the cover image.
844         """
845         need = False
846         info = {}
847         for field in ('cover_url', 'cover_by', 'cover_source'):
848             val = self.get_extra_info_json().get(field)
849             if val:
850                 info[field] = val
851             else:
852                 need = True
853         if inherit and need and self.parent is not None:
854             parent_info = self.parent.cover_info()
855             parent_info.update(info)
856             info = parent_info
857         return info
858
859     def related_themes(self):
860         return Tag.objects.usage_for_queryset(
861             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
862             counts=True).filter(category='theme').order_by('-count')
863
864     def parent_cover_changed(self):
865         """Called when parent book's cover image is changed."""
866         if not self.cover_info(inherit=False):
867             if 'cover' not in app_settings.DONT_BUILD:
868                 self.cover.build_delay()
869                 self.cover_clean.build_delay()
870                 self.cover_thumb.build_delay()
871                 self.cover_api_thumb.build_delay()
872                 self.simple_cover.build_delay()
873                 self.cover_ebookpoint.build_delay()
874             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
875                 if format_ not in app_settings.DONT_BUILD:
876                     getattr(self, '%s_file' % format_).build_delay()
877             for child in self.children.all():
878                 child.parent_cover_changed()
879
880     def other_versions(self):
881         """Find other versions (i.e. in other languages) of the book."""
882         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
883
884     def parents(self):
885         books = []
886         parent = self.parent
887         while parent is not None:
888             books.insert(0, parent)
889             parent = parent.parent
890         return books
891
892     def pretty_title(self, html_links=False):
893         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
894         books = self.parents() + [self]
895         names.extend([(b.title, b.get_absolute_url()) for b in books])
896
897         if html_links:
898             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
899         else:
900             names = [tag[0] for tag in names]
901         return ', '.join(names)
902
903     def publisher(self):
904         publisher = self.get_extra_info_json()['publisher']
905         if isinstance(publisher, str):
906             return publisher
907         elif isinstance(publisher, list):
908             return ', '.join(publisher)
909
910     @classmethod
911     def tagged_top_level(cls, tags):
912         """ Returns top-level books tagged with `tags`.
913
914         It only returns those books which don't have ancestors which are
915         also tagged with those tags.
916
917         """
918         objects = cls.tagged.with_all(tags)
919         return objects.filter(findable=True).exclude(ancestor__in=objects)
920
921     @classmethod
922     def book_list(cls, book_filter=None):
923         """Generates a hierarchical listing of all books.
924
925         Books are optionally filtered with a test function.
926
927         """
928
929         books_by_parent = {}
930         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
931         if book_filter:
932             books = books.filter(book_filter).distinct()
933
934             book_ids = set(b['pk'] for b in books.values("pk").iterator())
935             for book in books.iterator():
936                 parent = book.parent_id
937                 if parent not in book_ids:
938                     parent = None
939                 books_by_parent.setdefault(parent, []).append(book)
940         else:
941             for book in books.iterator():
942                 books_by_parent.setdefault(book.parent_id, []).append(book)
943
944         orphans = []
945         books_by_author = OrderedDict()
946         for tag in Tag.objects.filter(category='author').iterator():
947             books_by_author[tag] = []
948
949         for book in books_by_parent.get(None, ()):
950             authors = list(book.authors().only('pk'))
951             if authors:
952                 for author in authors:
953                     books_by_author[author].append(book)
954             else:
955                 orphans.append(book)
956
957         return books_by_author, orphans, books_by_parent
958
959     _audiences_pl = {
960         "SP": (1, "szkoła podstawowa"),
961         "SP1": (1, "szkoła podstawowa"),
962         "SP2": (1, "szkoła podstawowa"),
963         "SP3": (1, "szkoła podstawowa"),
964         "P": (1, "szkoła podstawowa"),
965         "G": (2, "gimnazjum"),
966         "L": (3, "liceum"),
967         "LP": (3, "liceum"),
968     }
969
970     def audiences_pl(self):
971         audiences = self.get_extra_info_json().get('audiences', [])
972         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
973         return [a[1] for a in audiences]
974
975     def stage_note(self):
976         stage = self.get_extra_info_json().get('stage')
977         if stage and stage < '0.4':
978             return (_('This work needs modernisation'),
979                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
980         else:
981             return None, None
982
983     def choose_fragments(self, number):
984         fragments = self.fragments.order_by()
985         fragments_count = fragments.count()
986         if not fragments_count and self.children.exists():
987             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
988             fragments_count = fragments.count()
989         if fragments_count:
990             if fragments_count > number:
991                 offset = randint(0, fragments_count - number)
992             else:
993                 offset = 0
994             return fragments[offset : offset + number]
995         elif self.parent:
996             return self.parent.choose_fragments(number)
997         else:
998             return []
999
1000     def choose_fragment(self):
1001         fragments = self.choose_fragments(1)
1002         if fragments:
1003             return fragments[0]
1004         else:
1005             return None
1006         
1007     def fragment_data(self):
1008         fragment = self.choose_fragment()
1009         if fragment:
1010             return {
1011                 'title': fragment.book.pretty_title(),
1012                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1013             }
1014         else:
1015             return None
1016
1017     def update_popularity(self):
1018         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1019         try:
1020             pop = self.popularity
1021             pop.count = count
1022             pop.save()
1023         except BookPopularity.DoesNotExist:
1024             BookPopularity.objects.create(book=self, count=count)
1025
1026     def ridero_link(self):
1027         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1028
1029     def like(self, user):
1030         from social.utils import likes, get_set, set_sets
1031         if not likes(user, self):
1032             tag = get_set(user, '')
1033             set_sets(user, self, [tag])
1034
1035     def unlike(self, user):
1036         from social.utils import likes, set_sets
1037         if likes(user, self):
1038             set_sets(user, self, [])
1039
1040     def full_sort_key(self):
1041         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1042
1043     def cover_color(self):
1044         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1045
1046     @cached_render('catalogue/book_mini_box.html')
1047     def mini_box(self):
1048         return {
1049             'book': self
1050         }
1051
1052     @cached_render('catalogue/book_mini_box.html')
1053     def mini_box_nolink(self):
1054         return {
1055             'book': self,
1056             'no_link': True,
1057         }
1058
1059 def add_file_fields():
1060     for format_ in Book.formats:
1061         field_name = "%s_file" % format_
1062         # This weird globals() assignment makes Django migrations comfortable.
1063         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1064         _upload_to.__name__ = '_%s_upload_to' % format_
1065         globals()[_upload_to.__name__] = _upload_to
1066
1067         EbookField(
1068             format_, _("%s file" % format_.upper()),
1069             upload_to=_upload_to,
1070             storage=bofh_storage,
1071             max_length=255,
1072             blank=True,
1073             default=''
1074         ).contribute_to_class(Book, field_name)
1075         if format_ != 'xml':
1076             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1077
1078
1079 add_file_fields()
1080
1081
1082 class BookPopularity(models.Model):
1083     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1084     count = models.IntegerField(default=0, db_index=True)