b624abcad683210b54f9e711e87343469b2ba278
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     is_book = True
143
144     class AlreadyExists(Exception):
145         pass
146
147     class Meta:
148         ordering = ('sort_key_author', 'sort_key')
149         verbose_name = _('book')
150         verbose_name_plural = _('books')
151         app_label = 'catalogue'
152
153     def __str__(self):
154         return self.title
155
156     def get_extra_info_json(self):
157         return json.loads(self.extra_info or '{}')
158
159     def get_initial(self):
160         try:
161             return re.search(r'\w', self.title, re.U).group(0)
162         except AttributeError:
163             return ''
164
165     def authors(self):
166         return self.tags.filter(category='author')
167
168     def epochs(self):
169         return self.tags.filter(category='epoch')
170
171     def genres(self):
172         return self.tags.filter(category='genre')
173
174     def kinds(self):
175         return self.tags.filter(category='kind')
176
177     def tag_unicode(self, category):
178         relations = prefetched_relations(self, category)
179         if relations:
180             return ', '.join(rel.tag.name for rel in relations)
181         else:
182             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
183
184     def tags_by_category(self):
185         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
186
187     def author_unicode(self):
188         return self.cached_author
189
190     def kind_unicode(self):
191         return self.tag_unicode('kind')
192
193     def epoch_unicode(self):
194         return self.tag_unicode('epoch')
195
196     def genre_unicode(self):
197         return self.tag_unicode('genre')
198
199     def translators(self):
200         translators = self.get_extra_info_json().get('translators') or []
201         return [
202             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
203         ]
204
205     def translator(self):
206         translators = self.get_extra_info_json().get('translators')
207         if not translators:
208             return None
209         if len(translators) > 3:
210             translators = translators[:2]
211             others = ' i inni'
212         else:
213             others = ''
214         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
215
216     def cover_source(self):
217         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
218
219     @property
220     def isbn_pdf(self):
221         return self.get_extra_info_json().get('isbn_pdf')
222
223     @property
224     def isbn_epub(self):
225         return self.get_extra_info_json().get('isbn_epub')
226
227     @property
228     def isbn_mobi(self):
229         return self.get_extra_info_json().get('isbn_mobi')
230
231     def is_accessible_to(self, user):
232         if not self.preview:
233             return True
234         if not user.is_authenticated:
235             return False
236         Membership = apps.get_model('club', 'Membership')
237         if Membership.is_active_for(user):
238             return True
239         Funding = apps.get_model('funding', 'Funding')
240         if Funding.objects.filter(user=user, offer__book=self):
241             return True
242         return False
243
244     def save(self, force_insert=False, force_update=False, **kwargs):
245         from sortify import sortify
246
247         self.sort_key = sortify(self.title)[:120]
248         self.title = str(self.title)  # ???
249
250         try:
251             author = self.authors().first().sort_key
252         except AttributeError:
253             author = ''
254         self.sort_key_author = author
255
256         self.cached_author = self.tag_unicode('author')
257         self.has_audience = 'audience' in self.get_extra_info_json()
258
259         if self.preview and not self.preview_key:
260             self.preview_key = get_random_hash(self.slug)[:32]
261
262         ret = super(Book, self).save(force_insert, force_update, **kwargs)
263
264         return ret
265
266     def get_absolute_url(self):
267         return reverse('book_detail', args=[self.slug])
268
269     def gallery_path(self):
270         return gallery_path(self.slug)
271
272     def gallery_url(self):
273         return gallery_url(self.slug)
274
275     def get_first_text(self):
276         if self.html_file:
277             return self
278         child = self.children.all().order_by('parent_number').first()
279         if child is not None:
280             return child.get_first_text()
281
282     def get_last_text(self):
283         if self.html_file:
284             return self
285         child = self.children.all().order_by('parent_number').last()
286         if child is not None:
287             return child.get_last_text()
288
289     def get_prev_text(self):
290         if not self.parent:
291             return None
292         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
293         if sibling is not None:
294             return sibling.get_last_text()
295
296         if self.parent.html_file:
297             return self.parent
298         
299         return self.parent.get_prev_text()
300
301     def get_next_text(self, inside=True):
302         if inside:
303             child = self.children.order_by('parent_number').first()
304             if child is not None:
305                 return child.get_first_text()
306
307         if not self.parent:
308             return None
309         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
310         if sibling is not None:
311             return sibling.get_first_text()
312         return self.parent.get_next_text(inside=False)
313
314     def get_child_audiobook(self):
315         BookMedia = apps.get_model('catalogue', 'BookMedia')
316         if not BookMedia.objects.filter(book__ancestor=self).exists():
317             return None
318         for child in self.children.order_by('parent_number').all():
319             if child.has_mp3_file():
320                 return child
321             child_sub = child.get_child_audiobook()
322             if child_sub is not None:
323                 return child_sub
324
325     def get_siblings(self):
326         if not self.parent:
327             return []
328         return self.parent.children.all().order_by('parent_number')
329
330     def get_children(self):
331         return self.children.all().order_by('parent_number')
332     
333     @property
334     def name(self):
335         return self.title
336
337     def language_code(self):
338         return constants.LANGUAGES_3TO2.get(self.language, self.language)
339
340     def language_name(self):
341         return dict(settings.LANGUAGES).get(self.language_code(), "")
342
343     def is_foreign(self):
344         return self.language_code() != settings.LANGUAGE_CODE
345
346     def set_audio_length(self):
347         length = self.get_audio_length()
348         if length > 0:
349             self.audio_length = self.format_audio_length(length)
350             self.save()
351
352     @staticmethod
353     def format_audio_length(seconds):
354         """
355         >>> Book.format_audio_length(1)
356         '0:01'
357         >>> Book.format_audio_length(3661)
358         '1:01:01'
359         """
360         if seconds < 60*60:
361             minutes = seconds // 60
362             seconds = seconds % 60
363             return '%d:%02d' % (minutes, seconds)
364         else:
365             hours = seconds // 3600
366             minutes = seconds % 3600 // 60
367             seconds = seconds % 60
368             return '%d:%02d:%02d' % (hours, minutes, seconds)
369
370     def get_audio_length(self):
371         total = 0
372         for media in self.get_mp3() or ():
373             total += app_settings.GET_MP3_LENGTH(media.file.path)
374         return int(total)
375
376     def has_media(self, type_):
377         if type_ in Book.formats:
378             return bool(getattr(self, "%s_file" % type_))
379         else:
380             return self.media.filter(type=type_).exists()
381
382     def has_audio(self):
383         return self.has_media('mp3')
384
385     def get_media(self, type_):
386         if self.has_media(type_):
387             if type_ in Book.formats:
388                 return getattr(self, "%s_file" % type_)
389             else:
390                 return self.media.filter(type=type_)
391         else:
392             return None
393
394     def get_mp3(self):
395         return self.get_media("mp3")
396
397     def get_odt(self):
398         return self.get_media("odt")
399
400     def get_ogg(self):
401         return self.get_media("ogg")
402
403     def get_daisy(self):
404         return self.get_media("daisy")
405
406     def media_url(self, format_):
407         media = self.get_media(format_)
408         if media:
409             if self.preview:
410                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
411             else:
412                 return media.url
413         else:
414             return None
415
416     def html_url(self):
417         return self.media_url('html')
418
419     def pdf_url(self):
420         return self.media_url('pdf')
421
422     def epub_url(self):
423         return self.media_url('epub')
424
425     def mobi_url(self):
426         return self.media_url('mobi')
427
428     def txt_url(self):
429         return self.media_url('txt')
430
431     def fb2_url(self):
432         return self.media_url('fb2')
433
434     def xml_url(self):
435         return self.media_url('xml')
436
437     def has_description(self):
438         return len(self.description) > 0
439     has_description.short_description = _('description')
440     has_description.boolean = True
441
442     def has_mp3_file(self):
443         return self.has_media("mp3")
444     has_mp3_file.short_description = 'MP3'
445     has_mp3_file.boolean = True
446
447     def has_ogg_file(self):
448         return self.has_media("ogg")
449     has_ogg_file.short_description = 'OGG'
450     has_ogg_file.boolean = True
451
452     def has_daisy_file(self):
453         return self.has_media("daisy")
454     has_daisy_file.short_description = 'DAISY'
455     has_daisy_file.boolean = True
456
457     @property
458     def media_daisy(self):
459         return self.get_media('daisy')
460     
461     def get_audiobooks(self):
462         ogg_files = {}
463         for m in self.media.filter(type='ogg').order_by().iterator():
464             ogg_files[m.name] = m
465
466         audiobooks = []
467         projects = set()
468         total_duration = 0
469         for mp3 in self.media.filter(type='mp3').iterator():
470             # ogg files are always from the same project
471             meta = mp3.get_extra_info_json()
472             project = meta.get('project')
473             if not project:
474                 # temporary fallback
475                 project = 'CzytamySłuchając'
476
477             projects.add((project, meta.get('funded_by', '')))
478             total_duration += mp3.duration or 0
479
480             media = {'mp3': mp3}
481
482             ogg = ogg_files.get(mp3.name)
483             if ogg:
484                 media['ogg'] = ogg
485             audiobooks.append(media)
486
487         projects = sorted(projects)
488         total_duration = '%d:%02d' % (
489             total_duration // 60,
490             total_duration % 60
491         )
492         return audiobooks, projects, total_duration
493
494     def wldocument(self, parse_dublincore=True, inherit=True):
495         from catalogue.import_utils import ORMDocProvider
496         from librarian.parser import WLDocument
497
498         if inherit and self.parent:
499             meta_fallbacks = self.parent.cover_info()
500         else:
501             meta_fallbacks = None
502
503         return WLDocument.from_file(
504             self.xml_file.path,
505             provider=ORMDocProvider(self),
506             parse_dublincore=parse_dublincore,
507             meta_fallbacks=meta_fallbacks)
508
509     def wldocument2(self):
510         from catalogue.import_utils import ORMDocProvider
511         from librarian.document import WLDocument
512         doc = WLDocument(
513             self.xml_file.path,
514             provider=ORMDocProvider(self)
515         )
516         doc.meta.update(self.cover_info())
517         return doc
518
519
520     @staticmethod
521     def zip_format(format_):
522         def pretty_file_name(book):
523             return "%s/%s.%s" % (
524                 book.get_extra_info_json()['author'],
525                 book.slug,
526                 format_)
527
528         field_name = "%s_file" % format_
529         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
530         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
531         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
532
533     def zip_audiobooks(self, format_):
534         bm = BookMedia.objects.filter(book=self, type=format_)
535         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
536         licenses = set()
537         for m in bm:
538             license = constants.LICENSES.get(
539                 m.get_extra_info_json().get('license'), {}
540             ).get('locative')
541             if license:
542                 licenses.add(license)
543         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
544             'licenses': licenses,
545         })
546         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
547
548     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
549         if not self.findable:
550             return
551         if index is None:
552             from search.index import Index
553             index = Index()
554         try:
555             index.index_book(self, book_info)
556             if index_tags:
557                 index.index_tags()
558             if commit:
559                 index.index.commit()
560         except Exception as e:
561             index.index.rollback()
562             raise e
563
564     # will make problems in conjunction with paid previews
565     def download_pictures(self, remote_gallery_url):
566         # This is only needed for legacy relative image paths.
567         gallery_path = self.gallery_path()
568         # delete previous files, so we don't include old files in ebooks
569         if os.path.isdir(gallery_path):
570             for filename in os.listdir(gallery_path):
571                 file_path = os.path.join(gallery_path, filename)
572                 os.unlink(file_path)
573         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
574         if ilustr_elements:
575             makedirs(gallery_path)
576             for ilustr in ilustr_elements:
577                 ilustr_src = ilustr.get('src')
578                 if '/' in ilustr_src:
579                     continue
580                 ilustr_path = os.path.join(gallery_path, ilustr_src)
581                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
582
583     def load_abstract(self):
584         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
585         if abstract is not None:
586             self.abstract = transform_abstrakt(abstract)
587         else:
588             self.abstract = ''
589
590     def load_toc(self):
591         self.toc = ''
592         if self.html_file:
593             parser = html.HTMLParser(encoding='utf-8')
594             tree = html.parse(self.html_file.path, parser=parser)
595             toc = tree.find('//div[@id="toc"]/ol')
596             if toc is None or not len(toc):
597                 return
598             html_link = reverse('book_text', args=[self.slug])
599             for a in toc.findall('.//a'):
600                 a.attrib['href'] = html_link + a.attrib['href']
601             self.toc = html.tostring(toc, encoding='unicode')
602             # div#toc
603             
604     @classmethod
605     def from_xml_file(cls, xml_file, **kwargs):
606         from django.core.files import File
607         from librarian import dcparser
608
609         # use librarian to parse meta-data
610         book_info = dcparser.parse(xml_file)
611
612         if not isinstance(xml_file, File):
613             xml_file = File(open(xml_file))
614
615         try:
616             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
617         finally:
618             xml_file.close()
619
620     @classmethod
621     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
622                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
623         if dont_build is None:
624             dont_build = set()
625         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
626
627         # check for parts before we do anything
628         children = []
629         if hasattr(book_info, 'parts'):
630             for part_url in book_info.parts:
631                 try:
632                     children.append(Book.objects.get(slug=part_url.slug))
633                 except Book.DoesNotExist:
634                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
635
636         # Read book metadata
637         book_slug = book_info.url.slug
638         if re.search(r'[^a-z0-9-]', book_slug):
639             raise ValueError('Invalid characters in slug')
640         book, created = Book.objects.get_or_create(slug=book_slug)
641
642         if created:
643             book_shelves = []
644             old_cover = None
645             book.preview = bool(days)
646             if book.preview:
647                 book.preview_until = date.today() + timedelta(days)
648         else:
649             if not overwrite:
650                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
651             # Save shelves for this book
652             book_shelves = list(book.tags.filter(category='set'))
653             old_cover = book.cover_info()
654
655         # Save XML file
656         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
657         if book.preview:
658             book.xml_file.set_readable(False)
659
660         book.findable = findable
661         book.language = book_info.language
662         book.title = book_info.title
663         if book_info.variant_of:
664             book.common_slug = book_info.variant_of.slug
665         else:
666             book.common_slug = book.slug
667         book.extra_info = json.dumps(book_info.to_dict())
668         book.load_abstract()
669         book.load_toc()
670         book.save()
671
672         meta_tags = Tag.tags_from_info(book_info)
673
674         for tag in meta_tags:
675             if not tag.for_books:
676                 tag.for_books = True
677                 tag.save()
678
679         book.tags = set(meta_tags + book_shelves)
680         book.save()  # update sort_key_author
681
682         cover_changed = old_cover != book.cover_info()
683         obsolete_children = set(b for b in book.children.all()
684                                 if b not in children)
685         notify_cover_changed = []
686         for n, child_book in enumerate(children):
687             new_child = child_book.parent != book
688             child_book.parent = book
689             child_book.parent_number = n
690             child_book.save()
691             if new_child or cover_changed:
692                 notify_cover_changed.append(child_book)
693         # Disown unfaithful children and let them cope on their own.
694         for child in obsolete_children:
695             child.parent = None
696             child.parent_number = 0
697             child.save()
698             if old_cover:
699                 notify_cover_changed.append(child)
700
701         cls.repopulate_ancestors()
702         tasks.update_counters.delay()
703
704         if remote_gallery_url:
705             book.download_pictures(remote_gallery_url)
706
707         # No saves beyond this point.
708
709         # Build cover.
710         if 'cover' not in dont_build:
711             book.cover.build_delay()
712             book.cover_clean.build_delay()
713             book.cover_thumb.build_delay()
714             book.cover_api_thumb.build_delay()
715             book.simple_cover.build_delay()
716             book.cover_ebookpoint.build_delay()
717
718         # Build HTML and ebooks.
719         book.html_file.build_delay()
720         if not children:
721             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
722                 if format_ not in dont_build:
723                     getattr(book, '%s_file' % format_).build_delay()
724         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
725             if format_ not in dont_build:
726                 getattr(book, '%s_file' % format_).build_delay()
727
728         if not settings.NO_SEARCH_INDEX and search_index and findable:
729             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
730
731         for child in notify_cover_changed:
732             child.parent_cover_changed()
733
734         book.update_popularity()
735         tasks.update_references.delay(book.id)
736
737         cls.published.send(sender=cls, instance=book)
738         return book
739
740     def get_master(self):
741         master_tags = [
742             'opowiadanie',
743             'powiesc',
744             'dramat_wierszowany_l',
745             'dramat_wierszowany_lp',
746             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
747             'wywiad',
748         ]
749         from librarian.parser import WLDocument
750         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
751         root = wld.edoc.getroot()
752         for master in root.iter():
753             if master.tag in master_tags:
754                 return master
755     
756     def update_references(self):
757         from references.models import Entity, Reference
758         master = self.get_master()
759         if master is None:
760             master = []
761         found = set()
762         for i, sec in enumerate(master):
763             for ref in sec.findall('.//ref'):
764                 href = ref.attrib.get('href', '')
765                 if not href or href in found:
766                     continue
767                 found.add(href)
768                 entity, created = Entity.objects.get_or_create(
769                     uri=href
770                 )
771                 ref, created = Reference.objects.get_or_create(
772                     book=self,
773                     entity=entity
774                 )
775                 ref.first_section = 'sec%d' % (i + 1)
776                 entity.populate()
777                 entity.save()
778         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
779     
780     @property
781     def references(self):
782         return self.reference_set.all().select_related('entity')
783
784     @classmethod
785     @transaction.atomic
786     def repopulate_ancestors(cls):
787         """Fixes the ancestry cache."""
788         # TODO: table names
789         cursor = connection.cursor()
790         if connection.vendor == 'postgres':
791             cursor.execute("TRUNCATE catalogue_book_ancestor")
792             cursor.execute("""
793                 WITH RECURSIVE ancestry AS (
794                     SELECT book.id, book.parent_id
795                     FROM catalogue_book AS book
796                     WHERE book.parent_id IS NOT NULL
797                     UNION
798                     SELECT ancestor.id, book.parent_id
799                     FROM ancestry AS ancestor, catalogue_book AS book
800                     WHERE ancestor.parent_id = book.id
801                         AND book.parent_id IS NOT NULL
802                     )
803                 INSERT INTO catalogue_book_ancestor
804                     (from_book_id, to_book_id)
805                     SELECT id, parent_id
806                     FROM ancestry
807                     ORDER BY id;
808                 """)
809         else:
810             cursor.execute("DELETE FROM catalogue_book_ancestor")
811             for b in cls.objects.exclude(parent=None):
812                 parent = b.parent
813                 while parent is not None:
814                     b.ancestor.add(parent)
815                     parent = parent.parent
816
817     @property
818     def ancestors(self):
819         if self.parent:
820             for anc in self.parent.ancestors:
821                 yield anc
822             yield self.parent
823         else:
824             return []
825                     
826     def clear_cache(self):
827         clear_cached_renders(self.mini_box)
828         clear_cached_renders(self.mini_box_nolink)
829
830     def cover_info(self, inherit=True):
831         """Returns a dictionary to serve as fallback for BookInfo.
832
833         For now, the only thing inherited is the cover image.
834         """
835         need = False
836         info = {}
837         for field in ('cover_url', 'cover_by', 'cover_source'):
838             val = self.get_extra_info_json().get(field)
839             if val:
840                 info[field] = val
841             else:
842                 need = True
843         if inherit and need and self.parent is not None:
844             parent_info = self.parent.cover_info()
845             parent_info.update(info)
846             info = parent_info
847         return info
848
849     def related_themes(self):
850         return Tag.objects.usage_for_queryset(
851             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
852             counts=True).filter(category='theme').order_by('-count')
853
854     def parent_cover_changed(self):
855         """Called when parent book's cover image is changed."""
856         if not self.cover_info(inherit=False):
857             if 'cover' not in app_settings.DONT_BUILD:
858                 self.cover.build_delay()
859                 self.cover_clean.build_delay()
860                 self.cover_thumb.build_delay()
861                 self.cover_api_thumb.build_delay()
862                 self.simple_cover.build_delay()
863                 self.cover_ebookpoint.build_delay()
864             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
865                 if format_ not in app_settings.DONT_BUILD:
866                     getattr(self, '%s_file' % format_).build_delay()
867             for child in self.children.all():
868                 child.parent_cover_changed()
869
870     def other_versions(self):
871         """Find other versions (i.e. in other languages) of the book."""
872         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
873
874     def parents(self):
875         books = []
876         parent = self.parent
877         while parent is not None:
878             books.insert(0, parent)
879             parent = parent.parent
880         return books
881
882     def pretty_title(self, html_links=False):
883         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
884         books = self.parents() + [self]
885         names.extend([(b.title, b.get_absolute_url()) for b in books])
886
887         if html_links:
888             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
889         else:
890             names = [tag[0] for tag in names]
891         return ', '.join(names)
892
893     def publisher(self):
894         publisher = self.get_extra_info_json()['publisher']
895         if isinstance(publisher, str):
896             return publisher
897         elif isinstance(publisher, list):
898             return ', '.join(publisher)
899
900     @classmethod
901     def tagged_top_level(cls, tags):
902         """ Returns top-level books tagged with `tags`.
903
904         It only returns those books which don't have ancestors which are
905         also tagged with those tags.
906
907         """
908         objects = cls.tagged.with_all(tags)
909         return objects.filter(findable=True).exclude(ancestor__in=objects)
910
911     @classmethod
912     def book_list(cls, book_filter=None):
913         """Generates a hierarchical listing of all books.
914
915         Books are optionally filtered with a test function.
916
917         """
918
919         books_by_parent = {}
920         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
921         if book_filter:
922             books = books.filter(book_filter).distinct()
923
924             book_ids = set(b['pk'] for b in books.values("pk").iterator())
925             for book in books.iterator():
926                 parent = book.parent_id
927                 if parent not in book_ids:
928                     parent = None
929                 books_by_parent.setdefault(parent, []).append(book)
930         else:
931             for book in books.iterator():
932                 books_by_parent.setdefault(book.parent_id, []).append(book)
933
934         orphans = []
935         books_by_author = OrderedDict()
936         for tag in Tag.objects.filter(category='author').iterator():
937             books_by_author[tag] = []
938
939         for book in books_by_parent.get(None, ()):
940             authors = list(book.authors().only('pk'))
941             if authors:
942                 for author in authors:
943                     books_by_author[author].append(book)
944             else:
945                 orphans.append(book)
946
947         return books_by_author, orphans, books_by_parent
948
949     _audiences_pl = {
950         "SP": (1, "szkoła podstawowa"),
951         "SP1": (1, "szkoła podstawowa"),
952         "SP2": (1, "szkoła podstawowa"),
953         "SP3": (1, "szkoła podstawowa"),
954         "P": (1, "szkoła podstawowa"),
955         "G": (2, "gimnazjum"),
956         "L": (3, "liceum"),
957         "LP": (3, "liceum"),
958     }
959
960     def audiences_pl(self):
961         audiences = self.get_extra_info_json().get('audiences', [])
962         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
963         return [a[1] for a in audiences]
964
965     def stage_note(self):
966         stage = self.get_extra_info_json().get('stage')
967         if stage and stage < '0.4':
968             return (_('This work needs modernisation'),
969                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
970         else:
971             return None, None
972
973     def choose_fragments(self, number):
974         fragments = self.fragments.order_by()
975         fragments_count = fragments.count()
976         if not fragments_count and self.children.exists():
977             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
978             fragments_count = fragments.count()
979         if fragments_count:
980             if fragments_count > number:
981                 offset = randint(0, fragments_count - number)
982             else:
983                 offset = 0
984             return fragments[offset : offset + number]
985         elif self.parent:
986             return self.parent.choose_fragments(number)
987         else:
988             return []
989
990     def choose_fragment(self):
991         fragments = self.choose_fragments(1)
992         if fragments:
993             return fragments[0]
994         else:
995             return None
996         
997     def fragment_data(self):
998         fragment = self.choose_fragment()
999         if fragment:
1000             return {
1001                 'title': fragment.book.pretty_title(),
1002                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1003             }
1004         else:
1005             return None
1006
1007     def update_popularity(self):
1008         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1009         try:
1010             pop = self.popularity
1011             pop.count = count
1012             pop.save()
1013         except BookPopularity.DoesNotExist:
1014             BookPopularity.objects.create(book=self, count=count)
1015
1016     def ridero_link(self):
1017         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1018
1019     def like(self, user):
1020         from social.utils import likes, get_set, set_sets
1021         if not likes(user, self):
1022             tag = get_set(user, '')
1023             set_sets(user, self, [tag])
1024
1025     def unlike(self, user):
1026         from social.utils import likes, set_sets
1027         if likes(user, self):
1028             set_sets(user, self, [])
1029
1030     def full_sort_key(self):
1031         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1032
1033     def cover_color(self):
1034         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1035
1036     @cached_render('catalogue/book_mini_box.html')
1037     def mini_box(self):
1038         return {
1039             'book': self
1040         }
1041
1042     @cached_render('catalogue/book_mini_box.html')
1043     def mini_box_nolink(self):
1044         return {
1045             'book': self,
1046             'no_link': True,
1047         }
1048
1049 def add_file_fields():
1050     for format_ in Book.formats:
1051         field_name = "%s_file" % format_
1052         # This weird globals() assignment makes Django migrations comfortable.
1053         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1054         _upload_to.__name__ = '_%s_upload_to' % format_
1055         globals()[_upload_to.__name__] = _upload_to
1056
1057         EbookField(
1058             format_, _("%s file" % format_.upper()),
1059             upload_to=_upload_to,
1060             storage=bofh_storage,
1061             max_length=255,
1062             blank=True,
1063             default=''
1064         ).contribute_to_class(Book, field_name)
1065         if format_ != 'xml':
1066             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1067
1068
1069 add_file_fields()
1070
1071
1072 class BookPopularity(models.Model):
1073     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1074     count = models.IntegerField(default=0, db_index=True)