Almost ready.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229
230     def save(self, force_insert=False, force_update=False, **kwargs):
231         from sortify import sortify
232
233         self.sort_key = sortify(self.title)[:120]
234         self.title = str(self.title)  # ???
235
236         try:
237             author = self.authors().first().sort_key
238         except AttributeError:
239             author = ''
240         self.sort_key_author = author
241
242         self.cached_author = self.tag_unicode('author')
243         self.has_audience = 'audience' in self.get_extra_info_json()
244
245         if self.preview and not self.preview_key:
246             self.preview_key = get_random_hash(self.slug)[:32]
247
248         ret = super(Book, self).save(force_insert, force_update, **kwargs)
249
250         return ret
251
252     def get_absolute_url(self):
253         return reverse('book_detail', args=[self.slug])
254
255     def gallery_path(self):
256         return gallery_path(self.slug)
257
258     def gallery_url(self):
259         return gallery_url(self.slug)
260
261     def get_first_text(self):
262         if self.html_file:
263             return self
264         child = self.children.all().order_by('parent_number').first()
265         if child is not None:
266             return child.get_first_text()
267
268     def get_last_text(self):
269         if self.html_file:
270             return self
271         child = self.children.all().order_by('parent_number').last()
272         if child is not None:
273             return child.get_last_text()
274
275     def get_prev_text(self):
276         if not self.parent:
277             return None
278         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
279         if sibling is not None:
280             return sibling.get_last_text()
281
282         if self.parent.html_file:
283             return self.parent
284         
285         return self.parent.get_prev_text()
286
287     def get_next_text(self):
288         child = self.children.order_by('parent_number').first()
289         if child is not None:
290             return child.get_first_text()
291
292         if not self.parent:
293             return None
294         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
295         if sibling is not None:
296             return sibling.get_first_text()
297         return self.parent.get_next_text()
298
299     def get_child_audiobook(self):
300         BookMedia = apps.get_model('catalogue', 'BookMedia')
301         if not BookMedia.objects.filter(book__ancestor=self).exists():
302             return None
303         for child in self.children.all():
304             if child.has_mp3_file():
305                 return child
306             child_sub = child.get_child_audiobook()
307             if child_sub is not None:
308                 return child_sub
309
310     def get_siblings(self):
311         if not self.parent:
312             return []
313         return self.parent.children.all().order_by('parent_number')
314
315     def get_children(self):
316         return self.children.all().order_by('parent_number')
317     
318     @property
319     def name(self):
320         return self.title
321
322     def language_code(self):
323         return constants.LANGUAGES_3TO2.get(self.language, self.language)
324
325     def language_name(self):
326         return dict(settings.LANGUAGES).get(self.language_code(), "")
327
328     def is_foreign(self):
329         return self.language_code() != settings.LANGUAGE_CODE
330
331     def set_audio_length(self):
332         length = self.get_audio_length()
333         if length > 0:
334             self.audio_length = self.format_audio_length(length)
335             self.save()
336
337     @staticmethod
338     def format_audio_length(seconds):
339         """
340         >>> Book.format_audio_length(1)
341         '0:01'
342         >>> Book.format_audio_length(3661)
343         '1:01:01'
344         """
345         if seconds < 60*60:
346             minutes = seconds // 60
347             seconds = seconds % 60
348             return '%d:%02d' % (minutes, seconds)
349         else:
350             hours = seconds // 3600
351             minutes = seconds % 3600 // 60
352             seconds = seconds % 60
353             return '%d:%02d:%02d' % (hours, minutes, seconds)
354
355     def get_audio_length(self):
356         total = 0
357         for media in self.get_mp3() or ():
358             total += app_settings.GET_MP3_LENGTH(media.file.path)
359         return int(total)
360
361     def has_media(self, type_):
362         if type_ in Book.formats:
363             return bool(getattr(self, "%s_file" % type_))
364         else:
365             return self.media.filter(type=type_).exists()
366
367     def has_audio(self):
368         return self.has_media('mp3')
369
370     def get_media(self, type_):
371         if self.has_media(type_):
372             if type_ in Book.formats:
373                 return getattr(self, "%s_file" % type_)
374             else:
375                 return self.media.filter(type=type_)
376         else:
377             return None
378
379     def get_mp3(self):
380         return self.get_media("mp3")
381
382     def get_odt(self):
383         return self.get_media("odt")
384
385     def get_ogg(self):
386         return self.get_media("ogg")
387
388     def get_daisy(self):
389         return self.get_media("daisy")
390
391     def media_url(self, format_):
392         media = self.get_media(format_)
393         if media:
394             if self.preview:
395                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
396             else:
397                 return media.url
398         else:
399             return None
400
401     def html_url(self):
402         return self.media_url('html')
403
404     def pdf_url(self):
405         return self.media_url('pdf')
406
407     def epub_url(self):
408         return self.media_url('epub')
409
410     def mobi_url(self):
411         return self.media_url('mobi')
412
413     def txt_url(self):
414         return self.media_url('txt')
415
416     def fb2_url(self):
417         return self.media_url('fb2')
418
419     def xml_url(self):
420         return self.media_url('xml')
421
422     def has_description(self):
423         return len(self.description) > 0
424     has_description.short_description = _('description')
425     has_description.boolean = True
426
427     def has_mp3_file(self):
428         return self.has_media("mp3")
429     has_mp3_file.short_description = 'MP3'
430     has_mp3_file.boolean = True
431
432     def has_ogg_file(self):
433         return self.has_media("ogg")
434     has_ogg_file.short_description = 'OGG'
435     has_ogg_file.boolean = True
436
437     def has_daisy_file(self):
438         return self.has_media("daisy")
439     has_daisy_file.short_description = 'DAISY'
440     has_daisy_file.boolean = True
441
442     @property
443     def media_daisy(self):
444         return self.get_media('daisy')
445     
446     def get_audiobooks(self):
447         ogg_files = {}
448         for m in self.media.filter(type='ogg').order_by().iterator():
449             ogg_files[m.name] = m
450
451         audiobooks = []
452         projects = set()
453         total_duration = 0
454         for mp3 in self.media.filter(type='mp3').iterator():
455             # ogg files are always from the same project
456             meta = mp3.get_extra_info_json()
457             project = meta.get('project')
458             if not project:
459                 # temporary fallback
460                 project = 'CzytamySłuchając'
461
462             projects.add((project, meta.get('funded_by', '')))
463             total_duration += mp3.duration or 0
464
465             media = {'mp3': mp3}
466
467             ogg = ogg_files.get(mp3.name)
468             if ogg:
469                 media['ogg'] = ogg
470             audiobooks.append(media)
471
472         projects = sorted(projects)
473         total_duration = '%d:%02d' % (
474             total_duration // 60,
475             total_duration % 60
476         )
477         return audiobooks, projects, total_duration
478
479     def wldocument(self, parse_dublincore=True, inherit=True):
480         from catalogue.import_utils import ORMDocProvider
481         from librarian.parser import WLDocument
482
483         if inherit and self.parent:
484             meta_fallbacks = self.parent.cover_info()
485         else:
486             meta_fallbacks = None
487
488         return WLDocument.from_file(
489             self.xml_file.path,
490             provider=ORMDocProvider(self),
491             parse_dublincore=parse_dublincore,
492             meta_fallbacks=meta_fallbacks)
493
494     def wldocument2(self):
495         from catalogue.import_utils import ORMDocProvider
496         from librarian.document import WLDocument
497         doc = WLDocument(
498             self.xml_file.path,
499             provider=ORMDocProvider(self)
500         )
501         doc.meta.update(self.cover_info())
502         return doc
503
504
505     @staticmethod
506     def zip_format(format_):
507         def pretty_file_name(book):
508             return "%s/%s.%s" % (
509                 book.get_extra_info_json()['author'],
510                 book.slug,
511                 format_)
512
513         field_name = "%s_file" % format_
514         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
515         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
516         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
517
518     def zip_audiobooks(self, format_):
519         bm = BookMedia.objects.filter(book=self, type=format_)
520         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
521         licenses = set()
522         for m in bm:
523             license = constants.LICENSES.get(
524                 m.get_extra_info_json().get('license'), {}
525             ).get('locative')
526             if license:
527                 licenses.add(license)
528         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
529             'licenses': licenses,
530         })
531         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
532
533     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
534         if not self.findable:
535             return
536         if index is None:
537             from search.index import Index
538             index = Index()
539         try:
540             index.index_book(self, book_info)
541             if index_tags:
542                 index.index_tags()
543             if commit:
544                 index.index.commit()
545         except Exception as e:
546             index.index.rollback()
547             raise e
548
549     # will make problems in conjunction with paid previews
550     def download_pictures(self, remote_gallery_url):
551         gallery_path = self.gallery_path()
552         # delete previous files, so we don't include old files in ebooks
553         if os.path.isdir(gallery_path):
554             for filename in os.listdir(gallery_path):
555                 file_path = os.path.join(gallery_path, filename)
556                 os.unlink(file_path)
557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
558         if ilustr_elements:
559             makedirs(gallery_path)
560             for ilustr in ilustr_elements:
561                 ilustr_src = ilustr.get('src')
562                 ilustr_path = os.path.join(gallery_path, ilustr_src)
563                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
564
565     def load_abstract(self):
566         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
567         if abstract is not None:
568             self.abstract = transform_abstrakt(abstract)
569         else:
570             self.abstract = ''
571
572     def load_toc(self):
573         self.toc = ''
574         if self.html_file:
575             parser = html.HTMLParser(encoding='utf-8')
576             tree = html.parse(self.html_file.path, parser=parser)
577             toc = tree.find('//div[@id="toc"]/ol')
578             if toc is None or not len(toc):
579                 return
580             html_link = reverse('book_text', args=[self.slug])
581             for a in toc.findall('.//a'):
582                 a.attrib['href'] = html_link + a.attrib['href']
583             self.toc = html.tostring(toc, encoding='unicode')
584             # div#toc
585             
586     @classmethod
587     def from_xml_file(cls, xml_file, **kwargs):
588         from django.core.files import File
589         from librarian import dcparser
590
591         # use librarian to parse meta-data
592         book_info = dcparser.parse(xml_file)
593
594         if not isinstance(xml_file, File):
595             xml_file = File(open(xml_file))
596
597         try:
598             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
599         finally:
600             xml_file.close()
601
602     @classmethod
603     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
604                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
605         if dont_build is None:
606             dont_build = set()
607         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
608
609         # check for parts before we do anything
610         children = []
611         if hasattr(book_info, 'parts'):
612             for part_url in book_info.parts:
613                 try:
614                     children.append(Book.objects.get(slug=part_url.slug))
615                 except Book.DoesNotExist:
616                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
617
618         # Read book metadata
619         book_slug = book_info.url.slug
620         if re.search(r'[^a-z0-9-]', book_slug):
621             raise ValueError('Invalid characters in slug')
622         book, created = Book.objects.get_or_create(slug=book_slug)
623
624         if created:
625             book_shelves = []
626             old_cover = None
627             book.preview = bool(days)
628             if book.preview:
629                 book.preview_until = date.today() + timedelta(days)
630         else:
631             if not overwrite:
632                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
633             # Save shelves for this book
634             book_shelves = list(book.tags.filter(category='set'))
635             old_cover = book.cover_info()
636
637         # Save XML file
638         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
639         if book.preview:
640             book.xml_file.set_readable(False)
641
642         book.findable = findable
643         book.language = book_info.language
644         book.title = book_info.title
645         if book_info.variant_of:
646             book.common_slug = book_info.variant_of.slug
647         else:
648             book.common_slug = book.slug
649         book.extra_info = json.dumps(book_info.to_dict())
650         book.load_abstract()
651         book.load_toc()
652         book.save()
653
654         meta_tags = Tag.tags_from_info(book_info)
655
656         for tag in meta_tags:
657             if not tag.for_books:
658                 tag.for_books = True
659                 tag.save()
660
661         book.tags = set(meta_tags + book_shelves)
662         book.save()  # update sort_key_author
663
664         cover_changed = old_cover != book.cover_info()
665         obsolete_children = set(b for b in book.children.all()
666                                 if b not in children)
667         notify_cover_changed = []
668         for n, child_book in enumerate(children):
669             new_child = child_book.parent != book
670             child_book.parent = book
671             child_book.parent_number = n
672             child_book.save()
673             if new_child or cover_changed:
674                 notify_cover_changed.append(child_book)
675         # Disown unfaithful children and let them cope on their own.
676         for child in obsolete_children:
677             child.parent = None
678             child.parent_number = 0
679             child.save()
680             if old_cover:
681                 notify_cover_changed.append(child)
682
683         cls.repopulate_ancestors()
684         tasks.update_counters.delay()
685
686         if remote_gallery_url:
687             book.download_pictures(remote_gallery_url)
688
689         # No saves beyond this point.
690
691         # Build cover.
692         if 'cover' not in dont_build:
693             book.cover.build_delay()
694             book.cover_clean.build_delay()
695             book.cover_thumb.build_delay()
696             book.cover_api_thumb.build_delay()
697             book.simple_cover.build_delay()
698             book.cover_ebookpoint.build_delay()
699
700         # Build HTML and ebooks.
701         book.html_file.build_delay()
702         if not children:
703             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
704                 if format_ not in dont_build:
705                     getattr(book, '%s_file' % format_).build_delay()
706         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
707             if format_ not in dont_build:
708                 getattr(book, '%s_file' % format_).build_delay()
709
710         if not settings.NO_SEARCH_INDEX and search_index and findable:
711             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
712
713         for child in notify_cover_changed:
714             child.parent_cover_changed()
715
716         book.update_popularity()
717         tasks.update_references.delay(book.id)
718
719         cls.published.send(sender=cls, instance=book)
720         return book
721
722     def get_master(self):
723         master_tags = [
724             'opowiadanie',
725             'powiesc',
726             'dramat_wierszowany_l',
727             'dramat_wierszowany_lp',
728             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
729             'wywiad',
730         ]
731         from librarian.parser import WLDocument
732         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
733         root = wld.edoc.getroot()
734         for master in root.iter():
735             if master.tag in master_tags:
736                 return master
737     
738     def update_references(self):
739         from references.models import Entity, Reference
740         master = self.get_master()
741         found = set()
742         for i, sec in enumerate(master):
743             for ref in sec.findall('.//ref'):
744                 href = ref.attrib.get('href', '')
745                 if not href or href in found:
746                     continue
747                 found.add(href)
748                 entity, created = Entity.objects.get_or_create(
749                     uri=href
750                 )
751                 ref, created = Reference.objects.get_or_create(
752                     book=self,
753                     entity=entity
754                 )
755                 ref.first_section = 'sec%d' % (i + 1)
756                 entity.populate()
757                 entity.save()
758         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
759     
760     @property
761     def references(self):
762         return self.reference_set.all().select_related('entity')
763
764     @classmethod
765     @transaction.atomic
766     def repopulate_ancestors(cls):
767         """Fixes the ancestry cache."""
768         # TODO: table names
769         cursor = connection.cursor()
770         if connection.vendor == 'postgres':
771             cursor.execute("TRUNCATE catalogue_book_ancestor")
772             cursor.execute("""
773                 WITH RECURSIVE ancestry AS (
774                     SELECT book.id, book.parent_id
775                     FROM catalogue_book AS book
776                     WHERE book.parent_id IS NOT NULL
777                     UNION
778                     SELECT ancestor.id, book.parent_id
779                     FROM ancestry AS ancestor, catalogue_book AS book
780                     WHERE ancestor.parent_id = book.id
781                         AND book.parent_id IS NOT NULL
782                     )
783                 INSERT INTO catalogue_book_ancestor
784                     (from_book_id, to_book_id)
785                     SELECT id, parent_id
786                     FROM ancestry
787                     ORDER BY id;
788                 """)
789         else:
790             cursor.execute("DELETE FROM catalogue_book_ancestor")
791             for b in cls.objects.exclude(parent=None):
792                 parent = b.parent
793                 while parent is not None:
794                     b.ancestor.add(parent)
795                     parent = parent.parent
796
797     @property
798     def ancestors(self):
799         if self.parent:
800             for anc in self.parent.ancestors:
801                 yield anc
802             yield self.parent
803         else:
804             return []
805                     
806     def clear_cache(self):
807         clear_cached_renders(self.mini_box)
808         clear_cached_renders(self.mini_box_nolink)
809
810     def cover_info(self, inherit=True):
811         """Returns a dictionary to serve as fallback for BookInfo.
812
813         For now, the only thing inherited is the cover image.
814         """
815         need = False
816         info = {}
817         for field in ('cover_url', 'cover_by', 'cover_source'):
818             val = self.get_extra_info_json().get(field)
819             if val:
820                 info[field] = val
821             else:
822                 need = True
823         if inherit and need and self.parent is not None:
824             parent_info = self.parent.cover_info()
825             parent_info.update(info)
826             info = parent_info
827         return info
828
829     def related_themes(self):
830         return Tag.objects.usage_for_queryset(
831             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
832             counts=True).filter(category='theme')
833
834     def parent_cover_changed(self):
835         """Called when parent book's cover image is changed."""
836         if not self.cover_info(inherit=False):
837             if 'cover' not in app_settings.DONT_BUILD:
838                 self.cover.build_delay()
839                 self.cover_clean.build_delay()
840                 self.cover_thumb.build_delay()
841                 self.cover_api_thumb.build_delay()
842                 self.simple_cover.build_delay()
843                 self.cover_ebookpoint.build_delay()
844             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
845                 if format_ not in app_settings.DONT_BUILD:
846                     getattr(self, '%s_file' % format_).build_delay()
847             for child in self.children.all():
848                 child.parent_cover_changed()
849
850     def other_versions(self):
851         """Find other versions (i.e. in other languages) of the book."""
852         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
853
854     def parents(self):
855         books = []
856         parent = self.parent
857         while parent is not None:
858             books.insert(0, parent)
859             parent = parent.parent
860         return books
861
862     def pretty_title(self, html_links=False):
863         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
864         books = self.parents() + [self]
865         names.extend([(b.title, b.get_absolute_url()) for b in books])
866
867         if html_links:
868             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
869         else:
870             names = [tag[0] for tag in names]
871         return ', '.join(names)
872
873     def publisher(self):
874         publisher = self.get_extra_info_json()['publisher']
875         if isinstance(publisher, str):
876             return publisher
877         elif isinstance(publisher, list):
878             return ', '.join(publisher)
879
880     @classmethod
881     def tagged_top_level(cls, tags):
882         """ Returns top-level books tagged with `tags`.
883
884         It only returns those books which don't have ancestors which are
885         also tagged with those tags.
886
887         """
888         objects = cls.tagged.with_all(tags)
889         return objects.filter(findable=True).exclude(ancestor__in=objects)
890
891     @classmethod
892     def book_list(cls, book_filter=None):
893         """Generates a hierarchical listing of all books.
894
895         Books are optionally filtered with a test function.
896
897         """
898
899         books_by_parent = {}
900         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
901         if book_filter:
902             books = books.filter(book_filter).distinct()
903
904             book_ids = set(b['pk'] for b in books.values("pk").iterator())
905             for book in books.iterator():
906                 parent = book.parent_id
907                 if parent not in book_ids:
908                     parent = None
909                 books_by_parent.setdefault(parent, []).append(book)
910         else:
911             for book in books.iterator():
912                 books_by_parent.setdefault(book.parent_id, []).append(book)
913
914         orphans = []
915         books_by_author = OrderedDict()
916         for tag in Tag.objects.filter(category='author').iterator():
917             books_by_author[tag] = []
918
919         for book in books_by_parent.get(None, ()):
920             authors = list(book.authors().only('pk'))
921             if authors:
922                 for author in authors:
923                     books_by_author[author].append(book)
924             else:
925                 orphans.append(book)
926
927         return books_by_author, orphans, books_by_parent
928
929     _audiences_pl = {
930         "SP": (1, "szkoła podstawowa"),
931         "SP1": (1, "szkoła podstawowa"),
932         "SP2": (1, "szkoła podstawowa"),
933         "SP3": (1, "szkoła podstawowa"),
934         "P": (1, "szkoła podstawowa"),
935         "G": (2, "gimnazjum"),
936         "L": (3, "liceum"),
937         "LP": (3, "liceum"),
938     }
939
940     def audiences_pl(self):
941         audiences = self.get_extra_info_json().get('audiences', [])
942         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
943         return [a[1] for a in audiences]
944
945     def stage_note(self):
946         stage = self.get_extra_info_json().get('stage')
947         if stage and stage < '0.4':
948             return (_('This work needs modernisation'),
949                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
950         else:
951             return None, None
952
953     def choose_fragments(self, number):
954         fragments = self.fragments.order_by()
955         fragments_count = fragments.count()
956         if not fragments_count and self.children.exists():
957             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
958             fragments_count = fragments.count()
959         if fragments_count:
960             if fragments_count > number:
961                 offset = randint(0, fragments_count - number)
962             else:
963                 offset = 0
964             return fragments[offset : offset + number]
965         elif self.parent:
966             return self.parent.choose_fragments(number)
967         else:
968             return []
969
970     def choose_fragment(self):
971         fragments = self.choose_fragments(1)
972         if fragments:
973             return fragments[0]
974         else:
975             return None
976         
977     def fragment_data(self):
978         fragment = self.choose_fragment()
979         if fragment:
980             return {
981                 'title': fragment.book.pretty_title(),
982                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
983             }
984         else:
985             return None
986
987     def update_popularity(self):
988         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
989         try:
990             pop = self.popularity
991             pop.count = count
992             pop.save()
993         except BookPopularity.DoesNotExist:
994             BookPopularity.objects.create(book=self, count=count)
995
996     def ridero_link(self):
997         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
998
999     def like(self, user):
1000         from social.utils import likes, get_set, set_sets
1001         if not likes(user, self):
1002             tag = get_set(user, '')
1003             set_sets(user, self, [tag])
1004
1005     def unlike(self, user):
1006         from social.utils import likes, set_sets
1007         if likes(user, self):
1008             set_sets(user, self, [])
1009
1010     def full_sort_key(self):
1011         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1012
1013     def cover_color(self):
1014         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1015
1016     @cached_render('catalogue/book_mini_box.html')
1017     def mini_box(self):
1018         return {
1019             'book': self
1020         }
1021
1022     @cached_render('catalogue/book_mini_box.html')
1023     def mini_box_nolink(self):
1024         return {
1025             'book': self,
1026             'no_link': True,
1027         }
1028
1029 def add_file_fields():
1030     for format_ in Book.formats:
1031         field_name = "%s_file" % format_
1032         # This weird globals() assignment makes Django migrations comfortable.
1033         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1034         _upload_to.__name__ = '_%s_upload_to' % format_
1035         globals()[_upload_to.__name__] = _upload_to
1036
1037         EbookField(
1038             format_, _("%s file" % format_.upper()),
1039             upload_to=_upload_to,
1040             storage=bofh_storage,
1041             max_length=255,
1042             blank=True,
1043             default=''
1044         ).contribute_to_class(Book, field_name)
1045         if format_ != 'xml':
1046             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1047
1048
1049 add_file_fields()
1050
1051
1052 class BookPopularity(models.Model):
1053     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1054     count = models.IntegerField(default=0, db_index=True)