Fix downloading images on publish.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229     def is_accessible_to(self, user):
230         if not self.preview:
231             return True
232         if not user.is_authenticated:
233             return False
234         Membership = apps.get_model('club', 'Membership')
235         if Membership.is_active_for(user):
236             return True
237         Funding = apps.get_model('funding', 'Funding')
238         if Funding.objects.filter(user=user, offer__book=self):
239             return True
240         return False
241
242     def save(self, force_insert=False, force_update=False, **kwargs):
243         from sortify import sortify
244
245         self.sort_key = sortify(self.title)[:120]
246         self.title = str(self.title)  # ???
247
248         try:
249             author = self.authors().first().sort_key
250         except AttributeError:
251             author = ''
252         self.sort_key_author = author
253
254         self.cached_author = self.tag_unicode('author')
255         self.has_audience = 'audience' in self.get_extra_info_json()
256
257         if self.preview and not self.preview_key:
258             self.preview_key = get_random_hash(self.slug)[:32]
259
260         ret = super(Book, self).save(force_insert, force_update, **kwargs)
261
262         return ret
263
264     def get_absolute_url(self):
265         return reverse('book_detail', args=[self.slug])
266
267     def gallery_path(self):
268         return gallery_path(self.slug)
269
270     def gallery_url(self):
271         return gallery_url(self.slug)
272
273     def get_first_text(self):
274         if self.html_file:
275             return self
276         child = self.children.all().order_by('parent_number').first()
277         if child is not None:
278             return child.get_first_text()
279
280     def get_last_text(self):
281         if self.html_file:
282             return self
283         child = self.children.all().order_by('parent_number').last()
284         if child is not None:
285             return child.get_last_text()
286
287     def get_prev_text(self):
288         if not self.parent:
289             return None
290         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
291         if sibling is not None:
292             return sibling.get_last_text()
293
294         if self.parent.html_file:
295             return self.parent
296         
297         return self.parent.get_prev_text()
298
299     def get_next_text(self, inside=True):
300         if inside:
301             child = self.children.order_by('parent_number').first()
302             if child is not None:
303                 return child.get_first_text()
304
305         if not self.parent:
306             return None
307         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
308         if sibling is not None:
309             return sibling.get_first_text()
310         return self.parent.get_next_text(inside=False)
311
312     def get_child_audiobook(self):
313         BookMedia = apps.get_model('catalogue', 'BookMedia')
314         if not BookMedia.objects.filter(book__ancestor=self).exists():
315             return None
316         for child in self.children.order_by('parent_number').all():
317             if child.has_mp3_file():
318                 return child
319             child_sub = child.get_child_audiobook()
320             if child_sub is not None:
321                 return child_sub
322
323     def get_siblings(self):
324         if not self.parent:
325             return []
326         return self.parent.children.all().order_by('parent_number')
327
328     def get_children(self):
329         return self.children.all().order_by('parent_number')
330     
331     @property
332     def name(self):
333         return self.title
334
335     def language_code(self):
336         return constants.LANGUAGES_3TO2.get(self.language, self.language)
337
338     def language_name(self):
339         return dict(settings.LANGUAGES).get(self.language_code(), "")
340
341     def is_foreign(self):
342         return self.language_code() != settings.LANGUAGE_CODE
343
344     def set_audio_length(self):
345         length = self.get_audio_length()
346         if length > 0:
347             self.audio_length = self.format_audio_length(length)
348             self.save()
349
350     @staticmethod
351     def format_audio_length(seconds):
352         """
353         >>> Book.format_audio_length(1)
354         '0:01'
355         >>> Book.format_audio_length(3661)
356         '1:01:01'
357         """
358         if seconds < 60*60:
359             minutes = seconds // 60
360             seconds = seconds % 60
361             return '%d:%02d' % (minutes, seconds)
362         else:
363             hours = seconds // 3600
364             minutes = seconds % 3600 // 60
365             seconds = seconds % 60
366             return '%d:%02d:%02d' % (hours, minutes, seconds)
367
368     def get_audio_length(self):
369         total = 0
370         for media in self.get_mp3() or ():
371             total += app_settings.GET_MP3_LENGTH(media.file.path)
372         return int(total)
373
374     def has_media(self, type_):
375         if type_ in Book.formats:
376             return bool(getattr(self, "%s_file" % type_))
377         else:
378             return self.media.filter(type=type_).exists()
379
380     def has_audio(self):
381         return self.has_media('mp3')
382
383     def get_media(self, type_):
384         if self.has_media(type_):
385             if type_ in Book.formats:
386                 return getattr(self, "%s_file" % type_)
387             else:
388                 return self.media.filter(type=type_)
389         else:
390             return None
391
392     def get_mp3(self):
393         return self.get_media("mp3")
394
395     def get_odt(self):
396         return self.get_media("odt")
397
398     def get_ogg(self):
399         return self.get_media("ogg")
400
401     def get_daisy(self):
402         return self.get_media("daisy")
403
404     def media_url(self, format_):
405         media = self.get_media(format_)
406         if media:
407             if self.preview:
408                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
409             else:
410                 return media.url
411         else:
412             return None
413
414     def html_url(self):
415         return self.media_url('html')
416
417     def pdf_url(self):
418         return self.media_url('pdf')
419
420     def epub_url(self):
421         return self.media_url('epub')
422
423     def mobi_url(self):
424         return self.media_url('mobi')
425
426     def txt_url(self):
427         return self.media_url('txt')
428
429     def fb2_url(self):
430         return self.media_url('fb2')
431
432     def xml_url(self):
433         return self.media_url('xml')
434
435     def has_description(self):
436         return len(self.description) > 0
437     has_description.short_description = _('description')
438     has_description.boolean = True
439
440     def has_mp3_file(self):
441         return self.has_media("mp3")
442     has_mp3_file.short_description = 'MP3'
443     has_mp3_file.boolean = True
444
445     def has_ogg_file(self):
446         return self.has_media("ogg")
447     has_ogg_file.short_description = 'OGG'
448     has_ogg_file.boolean = True
449
450     def has_daisy_file(self):
451         return self.has_media("daisy")
452     has_daisy_file.short_description = 'DAISY'
453     has_daisy_file.boolean = True
454
455     @property
456     def media_daisy(self):
457         return self.get_media('daisy')
458     
459     def get_audiobooks(self):
460         ogg_files = {}
461         for m in self.media.filter(type='ogg').order_by().iterator():
462             ogg_files[m.name] = m
463
464         audiobooks = []
465         projects = set()
466         total_duration = 0
467         for mp3 in self.media.filter(type='mp3').iterator():
468             # ogg files are always from the same project
469             meta = mp3.get_extra_info_json()
470             project = meta.get('project')
471             if not project:
472                 # temporary fallback
473                 project = 'CzytamySłuchając'
474
475             projects.add((project, meta.get('funded_by', '')))
476             total_duration += mp3.duration or 0
477
478             media = {'mp3': mp3}
479
480             ogg = ogg_files.get(mp3.name)
481             if ogg:
482                 media['ogg'] = ogg
483             audiobooks.append(media)
484
485         projects = sorted(projects)
486         total_duration = '%d:%02d' % (
487             total_duration // 60,
488             total_duration % 60
489         )
490         return audiobooks, projects, total_duration
491
492     def wldocument(self, parse_dublincore=True, inherit=True):
493         from catalogue.import_utils import ORMDocProvider
494         from librarian.parser import WLDocument
495
496         if inherit and self.parent:
497             meta_fallbacks = self.parent.cover_info()
498         else:
499             meta_fallbacks = None
500
501         return WLDocument.from_file(
502             self.xml_file.path,
503             provider=ORMDocProvider(self),
504             parse_dublincore=parse_dublincore,
505             meta_fallbacks=meta_fallbacks)
506
507     def wldocument2(self):
508         from catalogue.import_utils import ORMDocProvider
509         from librarian.document import WLDocument
510         doc = WLDocument(
511             self.xml_file.path,
512             provider=ORMDocProvider(self)
513         )
514         doc.meta.update(self.cover_info())
515         return doc
516
517
518     @staticmethod
519     def zip_format(format_):
520         def pretty_file_name(book):
521             return "%s/%s.%s" % (
522                 book.get_extra_info_json()['author'],
523                 book.slug,
524                 format_)
525
526         field_name = "%s_file" % format_
527         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
528         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
529         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
530
531     def zip_audiobooks(self, format_):
532         bm = BookMedia.objects.filter(book=self, type=format_)
533         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
534         licenses = set()
535         for m in bm:
536             license = constants.LICENSES.get(
537                 m.get_extra_info_json().get('license'), {}
538             ).get('locative')
539             if license:
540                 licenses.add(license)
541         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
542             'licenses': licenses,
543         })
544         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
545
546     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
547         if not self.findable:
548             return
549         if index is None:
550             from search.index import Index
551             index = Index()
552         try:
553             index.index_book(self, book_info)
554             if index_tags:
555                 index.index_tags()
556             if commit:
557                 index.index.commit()
558         except Exception as e:
559             index.index.rollback()
560             raise e
561
562     # will make problems in conjunction with paid previews
563     def download_pictures(self, remote_gallery_url):
564         # This is only needed for legacy relative image paths.
565         gallery_path = self.gallery_path()
566         # delete previous files, so we don't include old files in ebooks
567         if os.path.isdir(gallery_path):
568             for filename in os.listdir(gallery_path):
569                 file_path = os.path.join(gallery_path, filename)
570                 os.unlink(file_path)
571         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
572         if ilustr_elements:
573             makedirs(gallery_path)
574             for ilustr in ilustr_elements:
575                 ilustr_src = ilustr.get('src')
576                 if '/' in ilustr_src:
577                     continue
578                 ilustr_path = os.path.join(gallery_path, ilustr_src)
579                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
580
581     def load_abstract(self):
582         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
583         if abstract is not None:
584             self.abstract = transform_abstrakt(abstract)
585         else:
586             self.abstract = ''
587
588     def load_toc(self):
589         self.toc = ''
590         if self.html_file:
591             parser = html.HTMLParser(encoding='utf-8')
592             tree = html.parse(self.html_file.path, parser=parser)
593             toc = tree.find('//div[@id="toc"]/ol')
594             if toc is None or not len(toc):
595                 return
596             html_link = reverse('book_text', args=[self.slug])
597             for a in toc.findall('.//a'):
598                 a.attrib['href'] = html_link + a.attrib['href']
599             self.toc = html.tostring(toc, encoding='unicode')
600             # div#toc
601             
602     @classmethod
603     def from_xml_file(cls, xml_file, **kwargs):
604         from django.core.files import File
605         from librarian import dcparser
606
607         # use librarian to parse meta-data
608         book_info = dcparser.parse(xml_file)
609
610         if not isinstance(xml_file, File):
611             xml_file = File(open(xml_file))
612
613         try:
614             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
615         finally:
616             xml_file.close()
617
618     @classmethod
619     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
620                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
621         if dont_build is None:
622             dont_build = set()
623         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
624
625         # check for parts before we do anything
626         children = []
627         if hasattr(book_info, 'parts'):
628             for part_url in book_info.parts:
629                 try:
630                     children.append(Book.objects.get(slug=part_url.slug))
631                 except Book.DoesNotExist:
632                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
633
634         # Read book metadata
635         book_slug = book_info.url.slug
636         if re.search(r'[^a-z0-9-]', book_slug):
637             raise ValueError('Invalid characters in slug')
638         book, created = Book.objects.get_or_create(slug=book_slug)
639
640         if created:
641             book_shelves = []
642             old_cover = None
643             book.preview = bool(days)
644             if book.preview:
645                 book.preview_until = date.today() + timedelta(days)
646         else:
647             if not overwrite:
648                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
649             # Save shelves for this book
650             book_shelves = list(book.tags.filter(category='set'))
651             old_cover = book.cover_info()
652
653         # Save XML file
654         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
655         if book.preview:
656             book.xml_file.set_readable(False)
657
658         book.findable = findable
659         book.language = book_info.language
660         book.title = book_info.title
661         if book_info.variant_of:
662             book.common_slug = book_info.variant_of.slug
663         else:
664             book.common_slug = book.slug
665         book.extra_info = json.dumps(book_info.to_dict())
666         book.load_abstract()
667         book.load_toc()
668         book.save()
669
670         meta_tags = Tag.tags_from_info(book_info)
671
672         for tag in meta_tags:
673             if not tag.for_books:
674                 tag.for_books = True
675                 tag.save()
676
677         book.tags = set(meta_tags + book_shelves)
678         book.save()  # update sort_key_author
679
680         cover_changed = old_cover != book.cover_info()
681         obsolete_children = set(b for b in book.children.all()
682                                 if b not in children)
683         notify_cover_changed = []
684         for n, child_book in enumerate(children):
685             new_child = child_book.parent != book
686             child_book.parent = book
687             child_book.parent_number = n
688             child_book.save()
689             if new_child or cover_changed:
690                 notify_cover_changed.append(child_book)
691         # Disown unfaithful children and let them cope on their own.
692         for child in obsolete_children:
693             child.parent = None
694             child.parent_number = 0
695             child.save()
696             if old_cover:
697                 notify_cover_changed.append(child)
698
699         cls.repopulate_ancestors()
700         tasks.update_counters.delay()
701
702         if remote_gallery_url:
703             book.download_pictures(remote_gallery_url)
704
705         # No saves beyond this point.
706
707         # Build cover.
708         if 'cover' not in dont_build:
709             book.cover.build_delay()
710             book.cover_clean.build_delay()
711             book.cover_thumb.build_delay()
712             book.cover_api_thumb.build_delay()
713             book.simple_cover.build_delay()
714             book.cover_ebookpoint.build_delay()
715
716         # Build HTML and ebooks.
717         book.html_file.build_delay()
718         if not children:
719             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
720                 if format_ not in dont_build:
721                     getattr(book, '%s_file' % format_).build_delay()
722         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
723             if format_ not in dont_build:
724                 getattr(book, '%s_file' % format_).build_delay()
725
726         if not settings.NO_SEARCH_INDEX and search_index and findable:
727             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
728
729         for child in notify_cover_changed:
730             child.parent_cover_changed()
731
732         book.update_popularity()
733         tasks.update_references.delay(book.id)
734
735         cls.published.send(sender=cls, instance=book)
736         return book
737
738     def get_master(self):
739         master_tags = [
740             'opowiadanie',
741             'powiesc',
742             'dramat_wierszowany_l',
743             'dramat_wierszowany_lp',
744             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
745             'wywiad',
746         ]
747         from librarian.parser import WLDocument
748         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
749         root = wld.edoc.getroot()
750         for master in root.iter():
751             if master.tag in master_tags:
752                 return master
753     
754     def update_references(self):
755         from references.models import Entity, Reference
756         master = self.get_master()
757         if master is None:
758             master = []
759         found = set()
760         for i, sec in enumerate(master):
761             for ref in sec.findall('.//ref'):
762                 href = ref.attrib.get('href', '')
763                 if not href or href in found:
764                     continue
765                 found.add(href)
766                 entity, created = Entity.objects.get_or_create(
767                     uri=href
768                 )
769                 ref, created = Reference.objects.get_or_create(
770                     book=self,
771                     entity=entity
772                 )
773                 ref.first_section = 'sec%d' % (i + 1)
774                 entity.populate()
775                 entity.save()
776         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
777     
778     @property
779     def references(self):
780         return self.reference_set.all().select_related('entity')
781
782     @classmethod
783     @transaction.atomic
784     def repopulate_ancestors(cls):
785         """Fixes the ancestry cache."""
786         # TODO: table names
787         cursor = connection.cursor()
788         if connection.vendor == 'postgres':
789             cursor.execute("TRUNCATE catalogue_book_ancestor")
790             cursor.execute("""
791                 WITH RECURSIVE ancestry AS (
792                     SELECT book.id, book.parent_id
793                     FROM catalogue_book AS book
794                     WHERE book.parent_id IS NOT NULL
795                     UNION
796                     SELECT ancestor.id, book.parent_id
797                     FROM ancestry AS ancestor, catalogue_book AS book
798                     WHERE ancestor.parent_id = book.id
799                         AND book.parent_id IS NOT NULL
800                     )
801                 INSERT INTO catalogue_book_ancestor
802                     (from_book_id, to_book_id)
803                     SELECT id, parent_id
804                     FROM ancestry
805                     ORDER BY id;
806                 """)
807         else:
808             cursor.execute("DELETE FROM catalogue_book_ancestor")
809             for b in cls.objects.exclude(parent=None):
810                 parent = b.parent
811                 while parent is not None:
812                     b.ancestor.add(parent)
813                     parent = parent.parent
814
815     @property
816     def ancestors(self):
817         if self.parent:
818             for anc in self.parent.ancestors:
819                 yield anc
820             yield self.parent
821         else:
822             return []
823                     
824     def clear_cache(self):
825         clear_cached_renders(self.mini_box)
826         clear_cached_renders(self.mini_box_nolink)
827
828     def cover_info(self, inherit=True):
829         """Returns a dictionary to serve as fallback for BookInfo.
830
831         For now, the only thing inherited is the cover image.
832         """
833         need = False
834         info = {}
835         for field in ('cover_url', 'cover_by', 'cover_source'):
836             val = self.get_extra_info_json().get(field)
837             if val:
838                 info[field] = val
839             else:
840                 need = True
841         if inherit and need and self.parent is not None:
842             parent_info = self.parent.cover_info()
843             parent_info.update(info)
844             info = parent_info
845         return info
846
847     def related_themes(self):
848         return Tag.objects.usage_for_queryset(
849             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
850             counts=True).filter(category='theme').order_by('-count')
851
852     def parent_cover_changed(self):
853         """Called when parent book's cover image is changed."""
854         if not self.cover_info(inherit=False):
855             if 'cover' not in app_settings.DONT_BUILD:
856                 self.cover.build_delay()
857                 self.cover_clean.build_delay()
858                 self.cover_thumb.build_delay()
859                 self.cover_api_thumb.build_delay()
860                 self.simple_cover.build_delay()
861                 self.cover_ebookpoint.build_delay()
862             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
863                 if format_ not in app_settings.DONT_BUILD:
864                     getattr(self, '%s_file' % format_).build_delay()
865             for child in self.children.all():
866                 child.parent_cover_changed()
867
868     def other_versions(self):
869         """Find other versions (i.e. in other languages) of the book."""
870         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
871
872     def parents(self):
873         books = []
874         parent = self.parent
875         while parent is not None:
876             books.insert(0, parent)
877             parent = parent.parent
878         return books
879
880     def pretty_title(self, html_links=False):
881         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
882         books = self.parents() + [self]
883         names.extend([(b.title, b.get_absolute_url()) for b in books])
884
885         if html_links:
886             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
887         else:
888             names = [tag[0] for tag in names]
889         return ', '.join(names)
890
891     def publisher(self):
892         publisher = self.get_extra_info_json()['publisher']
893         if isinstance(publisher, str):
894             return publisher
895         elif isinstance(publisher, list):
896             return ', '.join(publisher)
897
898     @classmethod
899     def tagged_top_level(cls, tags):
900         """ Returns top-level books tagged with `tags`.
901
902         It only returns those books which don't have ancestors which are
903         also tagged with those tags.
904
905         """
906         objects = cls.tagged.with_all(tags)
907         return objects.filter(findable=True).exclude(ancestor__in=objects)
908
909     @classmethod
910     def book_list(cls, book_filter=None):
911         """Generates a hierarchical listing of all books.
912
913         Books are optionally filtered with a test function.
914
915         """
916
917         books_by_parent = {}
918         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
919         if book_filter:
920             books = books.filter(book_filter).distinct()
921
922             book_ids = set(b['pk'] for b in books.values("pk").iterator())
923             for book in books.iterator():
924                 parent = book.parent_id
925                 if parent not in book_ids:
926                     parent = None
927                 books_by_parent.setdefault(parent, []).append(book)
928         else:
929             for book in books.iterator():
930                 books_by_parent.setdefault(book.parent_id, []).append(book)
931
932         orphans = []
933         books_by_author = OrderedDict()
934         for tag in Tag.objects.filter(category='author').iterator():
935             books_by_author[tag] = []
936
937         for book in books_by_parent.get(None, ()):
938             authors = list(book.authors().only('pk'))
939             if authors:
940                 for author in authors:
941                     books_by_author[author].append(book)
942             else:
943                 orphans.append(book)
944
945         return books_by_author, orphans, books_by_parent
946
947     _audiences_pl = {
948         "SP": (1, "szkoła podstawowa"),
949         "SP1": (1, "szkoła podstawowa"),
950         "SP2": (1, "szkoła podstawowa"),
951         "SP3": (1, "szkoła podstawowa"),
952         "P": (1, "szkoła podstawowa"),
953         "G": (2, "gimnazjum"),
954         "L": (3, "liceum"),
955         "LP": (3, "liceum"),
956     }
957
958     def audiences_pl(self):
959         audiences = self.get_extra_info_json().get('audiences', [])
960         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
961         return [a[1] for a in audiences]
962
963     def stage_note(self):
964         stage = self.get_extra_info_json().get('stage')
965         if stage and stage < '0.4':
966             return (_('This work needs modernisation'),
967                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
968         else:
969             return None, None
970
971     def choose_fragments(self, number):
972         fragments = self.fragments.order_by()
973         fragments_count = fragments.count()
974         if not fragments_count and self.children.exists():
975             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
976             fragments_count = fragments.count()
977         if fragments_count:
978             if fragments_count > number:
979                 offset = randint(0, fragments_count - number)
980             else:
981                 offset = 0
982             return fragments[offset : offset + number]
983         elif self.parent:
984             return self.parent.choose_fragments(number)
985         else:
986             return []
987
988     def choose_fragment(self):
989         fragments = self.choose_fragments(1)
990         if fragments:
991             return fragments[0]
992         else:
993             return None
994         
995     def fragment_data(self):
996         fragment = self.choose_fragment()
997         if fragment:
998             return {
999                 'title': fragment.book.pretty_title(),
1000                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
1001             }
1002         else:
1003             return None
1004
1005     def update_popularity(self):
1006         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1007         try:
1008             pop = self.popularity
1009             pop.count = count
1010             pop.save()
1011         except BookPopularity.DoesNotExist:
1012             BookPopularity.objects.create(book=self, count=count)
1013
1014     def ridero_link(self):
1015         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1016
1017     def like(self, user):
1018         from social.utils import likes, get_set, set_sets
1019         if not likes(user, self):
1020             tag = get_set(user, '')
1021             set_sets(user, self, [tag])
1022
1023     def unlike(self, user):
1024         from social.utils import likes, set_sets
1025         if likes(user, self):
1026             set_sets(user, self, [])
1027
1028     def full_sort_key(self):
1029         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1030
1031     def cover_color(self):
1032         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1033
1034     @cached_render('catalogue/book_mini_box.html')
1035     def mini_box(self):
1036         return {
1037             'book': self
1038         }
1039
1040     @cached_render('catalogue/book_mini_box.html')
1041     def mini_box_nolink(self):
1042         return {
1043             'book': self,
1044             'no_link': True,
1045         }
1046
1047 def add_file_fields():
1048     for format_ in Book.formats:
1049         field_name = "%s_file" % format_
1050         # This weird globals() assignment makes Django migrations comfortable.
1051         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1052         _upload_to.__name__ = '_%s_upload_to' % format_
1053         globals()[_upload_to.__name__] = _upload_to
1054
1055         EbookField(
1056             format_, _("%s file" % format_.upper()),
1057             upload_to=_upload_to,
1058             storage=bofh_storage,
1059             max_length=255,
1060             blank=True,
1061             default=''
1062         ).contribute_to_class(Book, field_name)
1063         if format_ != 'xml':
1064             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1065
1066
1067 add_file_fields()
1068
1069
1070 class BookPopularity(models.Model):
1071     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1072     count = models.IntegerField(default=0, db_index=True)