Better experiments management.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229     def is_accessible_to(self, user):
230         if not self.preview:
231             return True
232         Membership = apps.get_model('club', 'Membership')
233         return Membership.is_active_for(user)
234
235     def save(self, force_insert=False, force_update=False, **kwargs):
236         from sortify import sortify
237
238         self.sort_key = sortify(self.title)[:120]
239         self.title = str(self.title)  # ???
240
241         try:
242             author = self.authors().first().sort_key
243         except AttributeError:
244             author = ''
245         self.sort_key_author = author
246
247         self.cached_author = self.tag_unicode('author')
248         self.has_audience = 'audience' in self.get_extra_info_json()
249
250         if self.preview and not self.preview_key:
251             self.preview_key = get_random_hash(self.slug)[:32]
252
253         ret = super(Book, self).save(force_insert, force_update, **kwargs)
254
255         return ret
256
257     def get_absolute_url(self):
258         return reverse('book_detail', args=[self.slug])
259
260     def gallery_path(self):
261         return gallery_path(self.slug)
262
263     def gallery_url(self):
264         return gallery_url(self.slug)
265
266     def get_first_text(self):
267         if self.html_file:
268             return self
269         child = self.children.all().order_by('parent_number').first()
270         if child is not None:
271             return child.get_first_text()
272
273     def get_last_text(self):
274         if self.html_file:
275             return self
276         child = self.children.all().order_by('parent_number').last()
277         if child is not None:
278             return child.get_last_text()
279
280     def get_prev_text(self):
281         if not self.parent:
282             return None
283         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
284         if sibling is not None:
285             return sibling.get_last_text()
286
287         if self.parent.html_file:
288             return self.parent
289         
290         return self.parent.get_prev_text()
291
292     def get_next_text(self):
293         child = self.children.order_by('parent_number').first()
294         if child is not None:
295             return child.get_first_text()
296
297         if not self.parent:
298             return None
299         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
300         if sibling is not None:
301             return sibling.get_first_text()
302         return self.parent.get_next_text()
303
304     def get_child_audiobook(self):
305         BookMedia = apps.get_model('catalogue', 'BookMedia')
306         if not BookMedia.objects.filter(book__ancestor=self).exists():
307             return None
308         for child in self.children.all():
309             if child.has_mp3_file():
310                 return child
311             child_sub = child.get_child_audiobook()
312             if child_sub is not None:
313                 return child_sub
314
315     def get_siblings(self):
316         if not self.parent:
317             return []
318         return self.parent.children.all().order_by('parent_number')
319
320     def get_children(self):
321         return self.children.all().order_by('parent_number')
322     
323     @property
324     def name(self):
325         return self.title
326
327     def language_code(self):
328         return constants.LANGUAGES_3TO2.get(self.language, self.language)
329
330     def language_name(self):
331         return dict(settings.LANGUAGES).get(self.language_code(), "")
332
333     def is_foreign(self):
334         return self.language_code() != settings.LANGUAGE_CODE
335
336     def set_audio_length(self):
337         length = self.get_audio_length()
338         if length > 0:
339             self.audio_length = self.format_audio_length(length)
340             self.save()
341
342     @staticmethod
343     def format_audio_length(seconds):
344         """
345         >>> Book.format_audio_length(1)
346         '0:01'
347         >>> Book.format_audio_length(3661)
348         '1:01:01'
349         """
350         if seconds < 60*60:
351             minutes = seconds // 60
352             seconds = seconds % 60
353             return '%d:%02d' % (minutes, seconds)
354         else:
355             hours = seconds // 3600
356             minutes = seconds % 3600 // 60
357             seconds = seconds % 60
358             return '%d:%02d:%02d' % (hours, minutes, seconds)
359
360     def get_audio_length(self):
361         total = 0
362         for media in self.get_mp3() or ():
363             total += app_settings.GET_MP3_LENGTH(media.file.path)
364         return int(total)
365
366     def has_media(self, type_):
367         if type_ in Book.formats:
368             return bool(getattr(self, "%s_file" % type_))
369         else:
370             return self.media.filter(type=type_).exists()
371
372     def has_audio(self):
373         return self.has_media('mp3')
374
375     def get_media(self, type_):
376         if self.has_media(type_):
377             if type_ in Book.formats:
378                 return getattr(self, "%s_file" % type_)
379             else:
380                 return self.media.filter(type=type_)
381         else:
382             return None
383
384     def get_mp3(self):
385         return self.get_media("mp3")
386
387     def get_odt(self):
388         return self.get_media("odt")
389
390     def get_ogg(self):
391         return self.get_media("ogg")
392
393     def get_daisy(self):
394         return self.get_media("daisy")
395
396     def media_url(self, format_):
397         media = self.get_media(format_)
398         if media:
399             if self.preview:
400                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
401             else:
402                 return media.url
403         else:
404             return None
405
406     def html_url(self):
407         return self.media_url('html')
408
409     def pdf_url(self):
410         return self.media_url('pdf')
411
412     def epub_url(self):
413         return self.media_url('epub')
414
415     def mobi_url(self):
416         return self.media_url('mobi')
417
418     def txt_url(self):
419         return self.media_url('txt')
420
421     def fb2_url(self):
422         return self.media_url('fb2')
423
424     def xml_url(self):
425         return self.media_url('xml')
426
427     def has_description(self):
428         return len(self.description) > 0
429     has_description.short_description = _('description')
430     has_description.boolean = True
431
432     def has_mp3_file(self):
433         return self.has_media("mp3")
434     has_mp3_file.short_description = 'MP3'
435     has_mp3_file.boolean = True
436
437     def has_ogg_file(self):
438         return self.has_media("ogg")
439     has_ogg_file.short_description = 'OGG'
440     has_ogg_file.boolean = True
441
442     def has_daisy_file(self):
443         return self.has_media("daisy")
444     has_daisy_file.short_description = 'DAISY'
445     has_daisy_file.boolean = True
446
447     @property
448     def media_daisy(self):
449         return self.get_media('daisy')
450     
451     def get_audiobooks(self):
452         ogg_files = {}
453         for m in self.media.filter(type='ogg').order_by().iterator():
454             ogg_files[m.name] = m
455
456         audiobooks = []
457         projects = set()
458         total_duration = 0
459         for mp3 in self.media.filter(type='mp3').iterator():
460             # ogg files are always from the same project
461             meta = mp3.get_extra_info_json()
462             project = meta.get('project')
463             if not project:
464                 # temporary fallback
465                 project = 'CzytamySłuchając'
466
467             projects.add((project, meta.get('funded_by', '')))
468             total_duration += mp3.duration or 0
469
470             media = {'mp3': mp3}
471
472             ogg = ogg_files.get(mp3.name)
473             if ogg:
474                 media['ogg'] = ogg
475             audiobooks.append(media)
476
477         projects = sorted(projects)
478         total_duration = '%d:%02d' % (
479             total_duration // 60,
480             total_duration % 60
481         )
482         return audiobooks, projects, total_duration
483
484     def wldocument(self, parse_dublincore=True, inherit=True):
485         from catalogue.import_utils import ORMDocProvider
486         from librarian.parser import WLDocument
487
488         if inherit and self.parent:
489             meta_fallbacks = self.parent.cover_info()
490         else:
491             meta_fallbacks = None
492
493         return WLDocument.from_file(
494             self.xml_file.path,
495             provider=ORMDocProvider(self),
496             parse_dublincore=parse_dublincore,
497             meta_fallbacks=meta_fallbacks)
498
499     def wldocument2(self):
500         from catalogue.import_utils import ORMDocProvider
501         from librarian.document import WLDocument
502         doc = WLDocument(
503             self.xml_file.path,
504             provider=ORMDocProvider(self)
505         )
506         doc.meta.update(self.cover_info())
507         return doc
508
509
510     @staticmethod
511     def zip_format(format_):
512         def pretty_file_name(book):
513             return "%s/%s.%s" % (
514                 book.get_extra_info_json()['author'],
515                 book.slug,
516                 format_)
517
518         field_name = "%s_file" % format_
519         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
520         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
521         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
522
523     def zip_audiobooks(self, format_):
524         bm = BookMedia.objects.filter(book=self, type=format_)
525         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
526         licenses = set()
527         for m in bm:
528             license = constants.LICENSES.get(
529                 m.get_extra_info_json().get('license'), {}
530             ).get('locative')
531             if license:
532                 licenses.add(license)
533         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
534             'licenses': licenses,
535         })
536         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
537
538     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
539         if not self.findable:
540             return
541         if index is None:
542             from search.index import Index
543             index = Index()
544         try:
545             index.index_book(self, book_info)
546             if index_tags:
547                 index.index_tags()
548             if commit:
549                 index.index.commit()
550         except Exception as e:
551             index.index.rollback()
552             raise e
553
554     # will make problems in conjunction with paid previews
555     def download_pictures(self, remote_gallery_url):
556         gallery_path = self.gallery_path()
557         # delete previous files, so we don't include old files in ebooks
558         if os.path.isdir(gallery_path):
559             for filename in os.listdir(gallery_path):
560                 file_path = os.path.join(gallery_path, filename)
561                 os.unlink(file_path)
562         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
563         if ilustr_elements:
564             makedirs(gallery_path)
565             for ilustr in ilustr_elements:
566                 ilustr_src = ilustr.get('src')
567                 ilustr_path = os.path.join(gallery_path, ilustr_src)
568                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
569
570     def load_abstract(self):
571         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
572         if abstract is not None:
573             self.abstract = transform_abstrakt(abstract)
574         else:
575             self.abstract = ''
576
577     def load_toc(self):
578         self.toc = ''
579         if self.html_file:
580             parser = html.HTMLParser(encoding='utf-8')
581             tree = html.parse(self.html_file.path, parser=parser)
582             toc = tree.find('//div[@id="toc"]/ol')
583             if toc is None or not len(toc):
584                 return
585             html_link = reverse('book_text', args=[self.slug])
586             for a in toc.findall('.//a'):
587                 a.attrib['href'] = html_link + a.attrib['href']
588             self.toc = html.tostring(toc, encoding='unicode')
589             # div#toc
590             
591     @classmethod
592     def from_xml_file(cls, xml_file, **kwargs):
593         from django.core.files import File
594         from librarian import dcparser
595
596         # use librarian to parse meta-data
597         book_info = dcparser.parse(xml_file)
598
599         if not isinstance(xml_file, File):
600             xml_file = File(open(xml_file))
601
602         try:
603             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
604         finally:
605             xml_file.close()
606
607     @classmethod
608     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
609                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
610         if dont_build is None:
611             dont_build = set()
612         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
613
614         # check for parts before we do anything
615         children = []
616         if hasattr(book_info, 'parts'):
617             for part_url in book_info.parts:
618                 try:
619                     children.append(Book.objects.get(slug=part_url.slug))
620                 except Book.DoesNotExist:
621                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
622
623         # Read book metadata
624         book_slug = book_info.url.slug
625         if re.search(r'[^a-z0-9-]', book_slug):
626             raise ValueError('Invalid characters in slug')
627         book, created = Book.objects.get_or_create(slug=book_slug)
628
629         if created:
630             book_shelves = []
631             old_cover = None
632             book.preview = bool(days)
633             if book.preview:
634                 book.preview_until = date.today() + timedelta(days)
635         else:
636             if not overwrite:
637                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
638             # Save shelves for this book
639             book_shelves = list(book.tags.filter(category='set'))
640             old_cover = book.cover_info()
641
642         # Save XML file
643         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
644         if book.preview:
645             book.xml_file.set_readable(False)
646
647         book.findable = findable
648         book.language = book_info.language
649         book.title = book_info.title
650         if book_info.variant_of:
651             book.common_slug = book_info.variant_of.slug
652         else:
653             book.common_slug = book.slug
654         book.extra_info = json.dumps(book_info.to_dict())
655         book.load_abstract()
656         book.load_toc()
657         book.save()
658
659         meta_tags = Tag.tags_from_info(book_info)
660
661         for tag in meta_tags:
662             if not tag.for_books:
663                 tag.for_books = True
664                 tag.save()
665
666         book.tags = set(meta_tags + book_shelves)
667         book.save()  # update sort_key_author
668
669         cover_changed = old_cover != book.cover_info()
670         obsolete_children = set(b for b in book.children.all()
671                                 if b not in children)
672         notify_cover_changed = []
673         for n, child_book in enumerate(children):
674             new_child = child_book.parent != book
675             child_book.parent = book
676             child_book.parent_number = n
677             child_book.save()
678             if new_child or cover_changed:
679                 notify_cover_changed.append(child_book)
680         # Disown unfaithful children and let them cope on their own.
681         for child in obsolete_children:
682             child.parent = None
683             child.parent_number = 0
684             child.save()
685             if old_cover:
686                 notify_cover_changed.append(child)
687
688         cls.repopulate_ancestors()
689         tasks.update_counters.delay()
690
691         if remote_gallery_url:
692             book.download_pictures(remote_gallery_url)
693
694         # No saves beyond this point.
695
696         # Build cover.
697         if 'cover' not in dont_build:
698             book.cover.build_delay()
699             book.cover_clean.build_delay()
700             book.cover_thumb.build_delay()
701             book.cover_api_thumb.build_delay()
702             book.simple_cover.build_delay()
703             book.cover_ebookpoint.build_delay()
704
705         # Build HTML and ebooks.
706         book.html_file.build_delay()
707         if not children:
708             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
709                 if format_ not in dont_build:
710                     getattr(book, '%s_file' % format_).build_delay()
711         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
712             if format_ not in dont_build:
713                 getattr(book, '%s_file' % format_).build_delay()
714
715         if not settings.NO_SEARCH_INDEX and search_index and findable:
716             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
717
718         for child in notify_cover_changed:
719             child.parent_cover_changed()
720
721         book.update_popularity()
722         tasks.update_references.delay(book.id)
723
724         cls.published.send(sender=cls, instance=book)
725         return book
726
727     def get_master(self):
728         master_tags = [
729             'opowiadanie',
730             'powiesc',
731             'dramat_wierszowany_l',
732             'dramat_wierszowany_lp',
733             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
734             'wywiad',
735         ]
736         from librarian.parser import WLDocument
737         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
738         root = wld.edoc.getroot()
739         for master in root.iter():
740             if master.tag in master_tags:
741                 return master
742     
743     def update_references(self):
744         from references.models import Entity, Reference
745         master = self.get_master()
746         if master is None:
747             master = []
748         found = set()
749         for i, sec in enumerate(master):
750             for ref in sec.findall('.//ref'):
751                 href = ref.attrib.get('href', '')
752                 if not href or href in found:
753                     continue
754                 found.add(href)
755                 entity, created = Entity.objects.get_or_create(
756                     uri=href
757                 )
758                 ref, created = Reference.objects.get_or_create(
759                     book=self,
760                     entity=entity
761                 )
762                 ref.first_section = 'sec%d' % (i + 1)
763                 entity.populate()
764                 entity.save()
765         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
766     
767     @property
768     def references(self):
769         return self.reference_set.all().select_related('entity')
770
771     @classmethod
772     @transaction.atomic
773     def repopulate_ancestors(cls):
774         """Fixes the ancestry cache."""
775         # TODO: table names
776         cursor = connection.cursor()
777         if connection.vendor == 'postgres':
778             cursor.execute("TRUNCATE catalogue_book_ancestor")
779             cursor.execute("""
780                 WITH RECURSIVE ancestry AS (
781                     SELECT book.id, book.parent_id
782                     FROM catalogue_book AS book
783                     WHERE book.parent_id IS NOT NULL
784                     UNION
785                     SELECT ancestor.id, book.parent_id
786                     FROM ancestry AS ancestor, catalogue_book AS book
787                     WHERE ancestor.parent_id = book.id
788                         AND book.parent_id IS NOT NULL
789                     )
790                 INSERT INTO catalogue_book_ancestor
791                     (from_book_id, to_book_id)
792                     SELECT id, parent_id
793                     FROM ancestry
794                     ORDER BY id;
795                 """)
796         else:
797             cursor.execute("DELETE FROM catalogue_book_ancestor")
798             for b in cls.objects.exclude(parent=None):
799                 parent = b.parent
800                 while parent is not None:
801                     b.ancestor.add(parent)
802                     parent = parent.parent
803
804     @property
805     def ancestors(self):
806         if self.parent:
807             for anc in self.parent.ancestors:
808                 yield anc
809             yield self.parent
810         else:
811             return []
812                     
813     def clear_cache(self):
814         clear_cached_renders(self.mini_box)
815         clear_cached_renders(self.mini_box_nolink)
816
817     def cover_info(self, inherit=True):
818         """Returns a dictionary to serve as fallback for BookInfo.
819
820         For now, the only thing inherited is the cover image.
821         """
822         need = False
823         info = {}
824         for field in ('cover_url', 'cover_by', 'cover_source'):
825             val = self.get_extra_info_json().get(field)
826             if val:
827                 info[field] = val
828             else:
829                 need = True
830         if inherit and need and self.parent is not None:
831             parent_info = self.parent.cover_info()
832             parent_info.update(info)
833             info = parent_info
834         return info
835
836     def related_themes(self):
837         return Tag.objects.usage_for_queryset(
838             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
839             counts=True).filter(category='theme')
840
841     def parent_cover_changed(self):
842         """Called when parent book's cover image is changed."""
843         if not self.cover_info(inherit=False):
844             if 'cover' not in app_settings.DONT_BUILD:
845                 self.cover.build_delay()
846                 self.cover_clean.build_delay()
847                 self.cover_thumb.build_delay()
848                 self.cover_api_thumb.build_delay()
849                 self.simple_cover.build_delay()
850                 self.cover_ebookpoint.build_delay()
851             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
852                 if format_ not in app_settings.DONT_BUILD:
853                     getattr(self, '%s_file' % format_).build_delay()
854             for child in self.children.all():
855                 child.parent_cover_changed()
856
857     def other_versions(self):
858         """Find other versions (i.e. in other languages) of the book."""
859         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
860
861     def parents(self):
862         books = []
863         parent = self.parent
864         while parent is not None:
865             books.insert(0, parent)
866             parent = parent.parent
867         return books
868
869     def pretty_title(self, html_links=False):
870         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
871         books = self.parents() + [self]
872         names.extend([(b.title, b.get_absolute_url()) for b in books])
873
874         if html_links:
875             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
876         else:
877             names = [tag[0] for tag in names]
878         return ', '.join(names)
879
880     def publisher(self):
881         publisher = self.get_extra_info_json()['publisher']
882         if isinstance(publisher, str):
883             return publisher
884         elif isinstance(publisher, list):
885             return ', '.join(publisher)
886
887     @classmethod
888     def tagged_top_level(cls, tags):
889         """ Returns top-level books tagged with `tags`.
890
891         It only returns those books which don't have ancestors which are
892         also tagged with those tags.
893
894         """
895         objects = cls.tagged.with_all(tags)
896         return objects.filter(findable=True).exclude(ancestor__in=objects)
897
898     @classmethod
899     def book_list(cls, book_filter=None):
900         """Generates a hierarchical listing of all books.
901
902         Books are optionally filtered with a test function.
903
904         """
905
906         books_by_parent = {}
907         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
908         if book_filter:
909             books = books.filter(book_filter).distinct()
910
911             book_ids = set(b['pk'] for b in books.values("pk").iterator())
912             for book in books.iterator():
913                 parent = book.parent_id
914                 if parent not in book_ids:
915                     parent = None
916                 books_by_parent.setdefault(parent, []).append(book)
917         else:
918             for book in books.iterator():
919                 books_by_parent.setdefault(book.parent_id, []).append(book)
920
921         orphans = []
922         books_by_author = OrderedDict()
923         for tag in Tag.objects.filter(category='author').iterator():
924             books_by_author[tag] = []
925
926         for book in books_by_parent.get(None, ()):
927             authors = list(book.authors().only('pk'))
928             if authors:
929                 for author in authors:
930                     books_by_author[author].append(book)
931             else:
932                 orphans.append(book)
933
934         return books_by_author, orphans, books_by_parent
935
936     _audiences_pl = {
937         "SP": (1, "szkoła podstawowa"),
938         "SP1": (1, "szkoła podstawowa"),
939         "SP2": (1, "szkoła podstawowa"),
940         "SP3": (1, "szkoła podstawowa"),
941         "P": (1, "szkoła podstawowa"),
942         "G": (2, "gimnazjum"),
943         "L": (3, "liceum"),
944         "LP": (3, "liceum"),
945     }
946
947     def audiences_pl(self):
948         audiences = self.get_extra_info_json().get('audiences', [])
949         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
950         return [a[1] for a in audiences]
951
952     def stage_note(self):
953         stage = self.get_extra_info_json().get('stage')
954         if stage and stage < '0.4':
955             return (_('This work needs modernisation'),
956                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
957         else:
958             return None, None
959
960     def choose_fragments(self, number):
961         fragments = self.fragments.order_by()
962         fragments_count = fragments.count()
963         if not fragments_count and self.children.exists():
964             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
965             fragments_count = fragments.count()
966         if fragments_count:
967             if fragments_count > number:
968                 offset = randint(0, fragments_count - number)
969             else:
970                 offset = 0
971             return fragments[offset : offset + number]
972         elif self.parent:
973             return self.parent.choose_fragments(number)
974         else:
975             return []
976
977     def choose_fragment(self):
978         fragments = self.choose_fragments(1)
979         if fragments:
980             return fragments[0]
981         else:
982             return None
983         
984     def fragment_data(self):
985         fragment = self.choose_fragment()
986         if fragment:
987             return {
988                 'title': fragment.book.pretty_title(),
989                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
990             }
991         else:
992             return None
993
994     def update_popularity(self):
995         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
996         try:
997             pop = self.popularity
998             pop.count = count
999             pop.save()
1000         except BookPopularity.DoesNotExist:
1001             BookPopularity.objects.create(book=self, count=count)
1002
1003     def ridero_link(self):
1004         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1005
1006     def like(self, user):
1007         from social.utils import likes, get_set, set_sets
1008         if not likes(user, self):
1009             tag = get_set(user, '')
1010             set_sets(user, self, [tag])
1011
1012     def unlike(self, user):
1013         from social.utils import likes, set_sets
1014         if likes(user, self):
1015             set_sets(user, self, [])
1016
1017     def full_sort_key(self):
1018         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1019
1020     def cover_color(self):
1021         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1022
1023     @cached_render('catalogue/book_mini_box.html')
1024     def mini_box(self):
1025         return {
1026             'book': self
1027         }
1028
1029     @cached_render('catalogue/book_mini_box.html')
1030     def mini_box_nolink(self):
1031         return {
1032             'book': self,
1033             'no_link': True,
1034         }
1035
1036 def add_file_fields():
1037     for format_ in Book.formats:
1038         field_name = "%s_file" % format_
1039         # This weird globals() assignment makes Django migrations comfortable.
1040         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1041         _upload_to.__name__ = '_%s_upload_to' % format_
1042         globals()[_upload_to.__name__] = _upload_to
1043
1044         EbookField(
1045             format_, _("%s file" % format_.upper()),
1046             upload_to=_upload_to,
1047             storage=bofh_storage,
1048             max_length=255,
1049             blank=True,
1050             default=''
1051         ).contribute_to_class(Book, field_name)
1052         if format_ != 'xml':
1053             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1054
1055
1056 add_file_fields()
1057
1058
1059 class BookPopularity(models.Model):
1060     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1061     count = models.IntegerField(default=0, db_index=True)