fix
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229     def is_accessible_to(self, user):
230         if not self.preview:
231             return True
232         if not user.is_authenticated:
233             return False
234         Membership = apps.get_model('club', 'Membership')
235         if Membership.is_active_for(user):
236             return True
237         Funding = apps.get_model('funding', 'Funding')
238         if Funding.objects.filter(user=user, offer__book=self):
239             return True
240         return False
241
242     def save(self, force_insert=False, force_update=False, **kwargs):
243         from sortify import sortify
244
245         self.sort_key = sortify(self.title)[:120]
246         self.title = str(self.title)  # ???
247
248         try:
249             author = self.authors().first().sort_key
250         except AttributeError:
251             author = ''
252         self.sort_key_author = author
253
254         self.cached_author = self.tag_unicode('author')
255         self.has_audience = 'audience' in self.get_extra_info_json()
256
257         if self.preview and not self.preview_key:
258             self.preview_key = get_random_hash(self.slug)[:32]
259
260         ret = super(Book, self).save(force_insert, force_update, **kwargs)
261
262         return ret
263
264     def get_absolute_url(self):
265         return reverse('book_detail', args=[self.slug])
266
267     def gallery_path(self):
268         return gallery_path(self.slug)
269
270     def gallery_url(self):
271         return gallery_url(self.slug)
272
273     def get_first_text(self):
274         if self.html_file:
275             return self
276         child = self.children.all().order_by('parent_number').first()
277         if child is not None:
278             return child.get_first_text()
279
280     def get_last_text(self):
281         if self.html_file:
282             return self
283         child = self.children.all().order_by('parent_number').last()
284         if child is not None:
285             return child.get_last_text()
286
287     def get_prev_text(self):
288         if not self.parent:
289             return None
290         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
291         if sibling is not None:
292             return sibling.get_last_text()
293
294         if self.parent.html_file:
295             return self.parent
296         
297         return self.parent.get_prev_text()
298
299     def get_next_text(self):
300         child = self.children.order_by('parent_number').first()
301         if child is not None:
302             return child.get_first_text()
303
304         if not self.parent:
305             return None
306         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
307         if sibling is not None:
308             return sibling.get_first_text()
309         return self.parent.get_next_text()
310
311     def get_child_audiobook(self):
312         BookMedia = apps.get_model('catalogue', 'BookMedia')
313         if not BookMedia.objects.filter(book__ancestor=self).exists():
314             return None
315         for child in self.children.all():
316             if child.has_mp3_file():
317                 return child
318             child_sub = child.get_child_audiobook()
319             if child_sub is not None:
320                 return child_sub
321
322     def get_siblings(self):
323         if not self.parent:
324             return []
325         return self.parent.children.all().order_by('parent_number')
326
327     def get_children(self):
328         return self.children.all().order_by('parent_number')
329     
330     @property
331     def name(self):
332         return self.title
333
334     def language_code(self):
335         return constants.LANGUAGES_3TO2.get(self.language, self.language)
336
337     def language_name(self):
338         return dict(settings.LANGUAGES).get(self.language_code(), "")
339
340     def is_foreign(self):
341         return self.language_code() != settings.LANGUAGE_CODE
342
343     def set_audio_length(self):
344         length = self.get_audio_length()
345         if length > 0:
346             self.audio_length = self.format_audio_length(length)
347             self.save()
348
349     @staticmethod
350     def format_audio_length(seconds):
351         """
352         >>> Book.format_audio_length(1)
353         '0:01'
354         >>> Book.format_audio_length(3661)
355         '1:01:01'
356         """
357         if seconds < 60*60:
358             minutes = seconds // 60
359             seconds = seconds % 60
360             return '%d:%02d' % (minutes, seconds)
361         else:
362             hours = seconds // 3600
363             minutes = seconds % 3600 // 60
364             seconds = seconds % 60
365             return '%d:%02d:%02d' % (hours, minutes, seconds)
366
367     def get_audio_length(self):
368         total = 0
369         for media in self.get_mp3() or ():
370             total += app_settings.GET_MP3_LENGTH(media.file.path)
371         return int(total)
372
373     def has_media(self, type_):
374         if type_ in Book.formats:
375             return bool(getattr(self, "%s_file" % type_))
376         else:
377             return self.media.filter(type=type_).exists()
378
379     def has_audio(self):
380         return self.has_media('mp3')
381
382     def get_media(self, type_):
383         if self.has_media(type_):
384             if type_ in Book.formats:
385                 return getattr(self, "%s_file" % type_)
386             else:
387                 return self.media.filter(type=type_)
388         else:
389             return None
390
391     def get_mp3(self):
392         return self.get_media("mp3")
393
394     def get_odt(self):
395         return self.get_media("odt")
396
397     def get_ogg(self):
398         return self.get_media("ogg")
399
400     def get_daisy(self):
401         return self.get_media("daisy")
402
403     def media_url(self, format_):
404         media = self.get_media(format_)
405         if media:
406             if self.preview:
407                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
408             else:
409                 return media.url
410         else:
411             return None
412
413     def html_url(self):
414         return self.media_url('html')
415
416     def pdf_url(self):
417         return self.media_url('pdf')
418
419     def epub_url(self):
420         return self.media_url('epub')
421
422     def mobi_url(self):
423         return self.media_url('mobi')
424
425     def txt_url(self):
426         return self.media_url('txt')
427
428     def fb2_url(self):
429         return self.media_url('fb2')
430
431     def xml_url(self):
432         return self.media_url('xml')
433
434     def has_description(self):
435         return len(self.description) > 0
436     has_description.short_description = _('description')
437     has_description.boolean = True
438
439     def has_mp3_file(self):
440         return self.has_media("mp3")
441     has_mp3_file.short_description = 'MP3'
442     has_mp3_file.boolean = True
443
444     def has_ogg_file(self):
445         return self.has_media("ogg")
446     has_ogg_file.short_description = 'OGG'
447     has_ogg_file.boolean = True
448
449     def has_daisy_file(self):
450         return self.has_media("daisy")
451     has_daisy_file.short_description = 'DAISY'
452     has_daisy_file.boolean = True
453
454     @property
455     def media_daisy(self):
456         return self.get_media('daisy')
457     
458     def get_audiobooks(self):
459         ogg_files = {}
460         for m in self.media.filter(type='ogg').order_by().iterator():
461             ogg_files[m.name] = m
462
463         audiobooks = []
464         projects = set()
465         total_duration = 0
466         for mp3 in self.media.filter(type='mp3').iterator():
467             # ogg files are always from the same project
468             meta = mp3.get_extra_info_json()
469             project = meta.get('project')
470             if not project:
471                 # temporary fallback
472                 project = 'CzytamySłuchając'
473
474             projects.add((project, meta.get('funded_by', '')))
475             total_duration += mp3.duration or 0
476
477             media = {'mp3': mp3}
478
479             ogg = ogg_files.get(mp3.name)
480             if ogg:
481                 media['ogg'] = ogg
482             audiobooks.append(media)
483
484         projects = sorted(projects)
485         total_duration = '%d:%02d' % (
486             total_duration // 60,
487             total_duration % 60
488         )
489         return audiobooks, projects, total_duration
490
491     def wldocument(self, parse_dublincore=True, inherit=True):
492         from catalogue.import_utils import ORMDocProvider
493         from librarian.parser import WLDocument
494
495         if inherit and self.parent:
496             meta_fallbacks = self.parent.cover_info()
497         else:
498             meta_fallbacks = None
499
500         return WLDocument.from_file(
501             self.xml_file.path,
502             provider=ORMDocProvider(self),
503             parse_dublincore=parse_dublincore,
504             meta_fallbacks=meta_fallbacks)
505
506     def wldocument2(self):
507         from catalogue.import_utils import ORMDocProvider
508         from librarian.document import WLDocument
509         doc = WLDocument(
510             self.xml_file.path,
511             provider=ORMDocProvider(self)
512         )
513         doc.meta.update(self.cover_info())
514         return doc
515
516
517     @staticmethod
518     def zip_format(format_):
519         def pretty_file_name(book):
520             return "%s/%s.%s" % (
521                 book.get_extra_info_json()['author'],
522                 book.slug,
523                 format_)
524
525         field_name = "%s_file" % format_
526         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
527         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
528         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
529
530     def zip_audiobooks(self, format_):
531         bm = BookMedia.objects.filter(book=self, type=format_)
532         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
533         licenses = set()
534         for m in bm:
535             license = constants.LICENSES.get(
536                 m.get_extra_info_json().get('license'), {}
537             ).get('locative')
538             if license:
539                 licenses.add(license)
540         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
541             'licenses': licenses,
542         })
543         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
544
545     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
546         if not self.findable:
547             return
548         if index is None:
549             from search.index import Index
550             index = Index()
551         try:
552             index.index_book(self, book_info)
553             if index_tags:
554                 index.index_tags()
555             if commit:
556                 index.index.commit()
557         except Exception as e:
558             index.index.rollback()
559             raise e
560
561     # will make problems in conjunction with paid previews
562     def download_pictures(self, remote_gallery_url):
563         gallery_path = self.gallery_path()
564         # delete previous files, so we don't include old files in ebooks
565         if os.path.isdir(gallery_path):
566             for filename in os.listdir(gallery_path):
567                 file_path = os.path.join(gallery_path, filename)
568                 os.unlink(file_path)
569         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
570         if ilustr_elements:
571             makedirs(gallery_path)
572             for ilustr in ilustr_elements:
573                 ilustr_src = ilustr.get('src')
574                 ilustr_path = os.path.join(gallery_path, ilustr_src)
575                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
576
577     def load_abstract(self):
578         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
579         if abstract is not None:
580             self.abstract = transform_abstrakt(abstract)
581         else:
582             self.abstract = ''
583
584     def load_toc(self):
585         self.toc = ''
586         if self.html_file:
587             parser = html.HTMLParser(encoding='utf-8')
588             tree = html.parse(self.html_file.path, parser=parser)
589             toc = tree.find('//div[@id="toc"]/ol')
590             if toc is None or not len(toc):
591                 return
592             html_link = reverse('book_text', args=[self.slug])
593             for a in toc.findall('.//a'):
594                 a.attrib['href'] = html_link + a.attrib['href']
595             self.toc = html.tostring(toc, encoding='unicode')
596             # div#toc
597             
598     @classmethod
599     def from_xml_file(cls, xml_file, **kwargs):
600         from django.core.files import File
601         from librarian import dcparser
602
603         # use librarian to parse meta-data
604         book_info = dcparser.parse(xml_file)
605
606         if not isinstance(xml_file, File):
607             xml_file = File(open(xml_file))
608
609         try:
610             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
611         finally:
612             xml_file.close()
613
614     @classmethod
615     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
616                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
617         if dont_build is None:
618             dont_build = set()
619         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
620
621         # check for parts before we do anything
622         children = []
623         if hasattr(book_info, 'parts'):
624             for part_url in book_info.parts:
625                 try:
626                     children.append(Book.objects.get(slug=part_url.slug))
627                 except Book.DoesNotExist:
628                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
629
630         # Read book metadata
631         book_slug = book_info.url.slug
632         if re.search(r'[^a-z0-9-]', book_slug):
633             raise ValueError('Invalid characters in slug')
634         book, created = Book.objects.get_or_create(slug=book_slug)
635
636         if created:
637             book_shelves = []
638             old_cover = None
639             book.preview = bool(days)
640             if book.preview:
641                 book.preview_until = date.today() + timedelta(days)
642         else:
643             if not overwrite:
644                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
645             # Save shelves for this book
646             book_shelves = list(book.tags.filter(category='set'))
647             old_cover = book.cover_info()
648
649         # Save XML file
650         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
651         if book.preview:
652             book.xml_file.set_readable(False)
653
654         book.findable = findable
655         book.language = book_info.language
656         book.title = book_info.title
657         if book_info.variant_of:
658             book.common_slug = book_info.variant_of.slug
659         else:
660             book.common_slug = book.slug
661         book.extra_info = json.dumps(book_info.to_dict())
662         book.load_abstract()
663         book.load_toc()
664         book.save()
665
666         meta_tags = Tag.tags_from_info(book_info)
667
668         for tag in meta_tags:
669             if not tag.for_books:
670                 tag.for_books = True
671                 tag.save()
672
673         book.tags = set(meta_tags + book_shelves)
674         book.save()  # update sort_key_author
675
676         cover_changed = old_cover != book.cover_info()
677         obsolete_children = set(b for b in book.children.all()
678                                 if b not in children)
679         notify_cover_changed = []
680         for n, child_book in enumerate(children):
681             new_child = child_book.parent != book
682             child_book.parent = book
683             child_book.parent_number = n
684             child_book.save()
685             if new_child or cover_changed:
686                 notify_cover_changed.append(child_book)
687         # Disown unfaithful children and let them cope on their own.
688         for child in obsolete_children:
689             child.parent = None
690             child.parent_number = 0
691             child.save()
692             if old_cover:
693                 notify_cover_changed.append(child)
694
695         cls.repopulate_ancestors()
696         tasks.update_counters.delay()
697
698         if remote_gallery_url:
699             book.download_pictures(remote_gallery_url)
700
701         # No saves beyond this point.
702
703         # Build cover.
704         if 'cover' not in dont_build:
705             book.cover.build_delay()
706             book.cover_clean.build_delay()
707             book.cover_thumb.build_delay()
708             book.cover_api_thumb.build_delay()
709             book.simple_cover.build_delay()
710             book.cover_ebookpoint.build_delay()
711
712         # Build HTML and ebooks.
713         book.html_file.build_delay()
714         if not children:
715             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
716                 if format_ not in dont_build:
717                     getattr(book, '%s_file' % format_).build_delay()
718         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
719             if format_ not in dont_build:
720                 getattr(book, '%s_file' % format_).build_delay()
721
722         if not settings.NO_SEARCH_INDEX and search_index and findable:
723             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
724
725         for child in notify_cover_changed:
726             child.parent_cover_changed()
727
728         book.update_popularity()
729         tasks.update_references.delay(book.id)
730
731         cls.published.send(sender=cls, instance=book)
732         return book
733
734     def get_master(self):
735         master_tags = [
736             'opowiadanie',
737             'powiesc',
738             'dramat_wierszowany_l',
739             'dramat_wierszowany_lp',
740             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
741             'wywiad',
742         ]
743         from librarian.parser import WLDocument
744         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
745         root = wld.edoc.getroot()
746         for master in root.iter():
747             if master.tag in master_tags:
748                 return master
749     
750     def update_references(self):
751         from references.models import Entity, Reference
752         master = self.get_master()
753         if master is None:
754             master = []
755         found = set()
756         for i, sec in enumerate(master):
757             for ref in sec.findall('.//ref'):
758                 href = ref.attrib.get('href', '')
759                 if not href or href in found:
760                     continue
761                 found.add(href)
762                 entity, created = Entity.objects.get_or_create(
763                     uri=href
764                 )
765                 ref, created = Reference.objects.get_or_create(
766                     book=self,
767                     entity=entity
768                 )
769                 ref.first_section = 'sec%d' % (i + 1)
770                 entity.populate()
771                 entity.save()
772         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
773     
774     @property
775     def references(self):
776         return self.reference_set.all().select_related('entity')
777
778     @classmethod
779     @transaction.atomic
780     def repopulate_ancestors(cls):
781         """Fixes the ancestry cache."""
782         # TODO: table names
783         cursor = connection.cursor()
784         if connection.vendor == 'postgres':
785             cursor.execute("TRUNCATE catalogue_book_ancestor")
786             cursor.execute("""
787                 WITH RECURSIVE ancestry AS (
788                     SELECT book.id, book.parent_id
789                     FROM catalogue_book AS book
790                     WHERE book.parent_id IS NOT NULL
791                     UNION
792                     SELECT ancestor.id, book.parent_id
793                     FROM ancestry AS ancestor, catalogue_book AS book
794                     WHERE ancestor.parent_id = book.id
795                         AND book.parent_id IS NOT NULL
796                     )
797                 INSERT INTO catalogue_book_ancestor
798                     (from_book_id, to_book_id)
799                     SELECT id, parent_id
800                     FROM ancestry
801                     ORDER BY id;
802                 """)
803         else:
804             cursor.execute("DELETE FROM catalogue_book_ancestor")
805             for b in cls.objects.exclude(parent=None):
806                 parent = b.parent
807                 while parent is not None:
808                     b.ancestor.add(parent)
809                     parent = parent.parent
810
811     @property
812     def ancestors(self):
813         if self.parent:
814             for anc in self.parent.ancestors:
815                 yield anc
816             yield self.parent
817         else:
818             return []
819                     
820     def clear_cache(self):
821         clear_cached_renders(self.mini_box)
822         clear_cached_renders(self.mini_box_nolink)
823
824     def cover_info(self, inherit=True):
825         """Returns a dictionary to serve as fallback for BookInfo.
826
827         For now, the only thing inherited is the cover image.
828         """
829         need = False
830         info = {}
831         for field in ('cover_url', 'cover_by', 'cover_source'):
832             val = self.get_extra_info_json().get(field)
833             if val:
834                 info[field] = val
835             else:
836                 need = True
837         if inherit and need and self.parent is not None:
838             parent_info = self.parent.cover_info()
839             parent_info.update(info)
840             info = parent_info
841         return info
842
843     def related_themes(self):
844         return Tag.objects.usage_for_queryset(
845             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
846             counts=True).filter(category='theme').order_by('-count')
847
848     def parent_cover_changed(self):
849         """Called when parent book's cover image is changed."""
850         if not self.cover_info(inherit=False):
851             if 'cover' not in app_settings.DONT_BUILD:
852                 self.cover.build_delay()
853                 self.cover_clean.build_delay()
854                 self.cover_thumb.build_delay()
855                 self.cover_api_thumb.build_delay()
856                 self.simple_cover.build_delay()
857                 self.cover_ebookpoint.build_delay()
858             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
859                 if format_ not in app_settings.DONT_BUILD:
860                     getattr(self, '%s_file' % format_).build_delay()
861             for child in self.children.all():
862                 child.parent_cover_changed()
863
864     def other_versions(self):
865         """Find other versions (i.e. in other languages) of the book."""
866         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
867
868     def parents(self):
869         books = []
870         parent = self.parent
871         while parent is not None:
872             books.insert(0, parent)
873             parent = parent.parent
874         return books
875
876     def pretty_title(self, html_links=False):
877         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
878         books = self.parents() + [self]
879         names.extend([(b.title, b.get_absolute_url()) for b in books])
880
881         if html_links:
882             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
883         else:
884             names = [tag[0] for tag in names]
885         return ', '.join(names)
886
887     def publisher(self):
888         publisher = self.get_extra_info_json()['publisher']
889         if isinstance(publisher, str):
890             return publisher
891         elif isinstance(publisher, list):
892             return ', '.join(publisher)
893
894     @classmethod
895     def tagged_top_level(cls, tags):
896         """ Returns top-level books tagged with `tags`.
897
898         It only returns those books which don't have ancestors which are
899         also tagged with those tags.
900
901         """
902         objects = cls.tagged.with_all(tags)
903         return objects.filter(findable=True).exclude(ancestor__in=objects)
904
905     @classmethod
906     def book_list(cls, book_filter=None):
907         """Generates a hierarchical listing of all books.
908
909         Books are optionally filtered with a test function.
910
911         """
912
913         books_by_parent = {}
914         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
915         if book_filter:
916             books = books.filter(book_filter).distinct()
917
918             book_ids = set(b['pk'] for b in books.values("pk").iterator())
919             for book in books.iterator():
920                 parent = book.parent_id
921                 if parent not in book_ids:
922                     parent = None
923                 books_by_parent.setdefault(parent, []).append(book)
924         else:
925             for book in books.iterator():
926                 books_by_parent.setdefault(book.parent_id, []).append(book)
927
928         orphans = []
929         books_by_author = OrderedDict()
930         for tag in Tag.objects.filter(category='author').iterator():
931             books_by_author[tag] = []
932
933         for book in books_by_parent.get(None, ()):
934             authors = list(book.authors().only('pk'))
935             if authors:
936                 for author in authors:
937                     books_by_author[author].append(book)
938             else:
939                 orphans.append(book)
940
941         return books_by_author, orphans, books_by_parent
942
943     _audiences_pl = {
944         "SP": (1, "szkoła podstawowa"),
945         "SP1": (1, "szkoła podstawowa"),
946         "SP2": (1, "szkoła podstawowa"),
947         "SP3": (1, "szkoła podstawowa"),
948         "P": (1, "szkoła podstawowa"),
949         "G": (2, "gimnazjum"),
950         "L": (3, "liceum"),
951         "LP": (3, "liceum"),
952     }
953
954     def audiences_pl(self):
955         audiences = self.get_extra_info_json().get('audiences', [])
956         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
957         return [a[1] for a in audiences]
958
959     def stage_note(self):
960         stage = self.get_extra_info_json().get('stage')
961         if stage and stage < '0.4':
962             return (_('This work needs modernisation'),
963                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
964         else:
965             return None, None
966
967     def choose_fragments(self, number):
968         fragments = self.fragments.order_by()
969         fragments_count = fragments.count()
970         if not fragments_count and self.children.exists():
971             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
972             fragments_count = fragments.count()
973         if fragments_count:
974             if fragments_count > number:
975                 offset = randint(0, fragments_count - number)
976             else:
977                 offset = 0
978             return fragments[offset : offset + number]
979         elif self.parent:
980             return self.parent.choose_fragments(number)
981         else:
982             return []
983
984     def choose_fragment(self):
985         fragments = self.choose_fragments(1)
986         if fragments:
987             return fragments[0]
988         else:
989             return None
990         
991     def fragment_data(self):
992         fragment = self.choose_fragment()
993         if fragment:
994             return {
995                 'title': fragment.book.pretty_title(),
996                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
997             }
998         else:
999             return None
1000
1001     def update_popularity(self):
1002         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1003         try:
1004             pop = self.popularity
1005             pop.count = count
1006             pop.save()
1007         except BookPopularity.DoesNotExist:
1008             BookPopularity.objects.create(book=self, count=count)
1009
1010     def ridero_link(self):
1011         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1012
1013     def like(self, user):
1014         from social.utils import likes, get_set, set_sets
1015         if not likes(user, self):
1016             tag = get_set(user, '')
1017             set_sets(user, self, [tag])
1018
1019     def unlike(self, user):
1020         from social.utils import likes, set_sets
1021         if likes(user, self):
1022             set_sets(user, self, [])
1023
1024     def full_sort_key(self):
1025         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1026
1027     def cover_color(self):
1028         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1029
1030     @cached_render('catalogue/book_mini_box.html')
1031     def mini_box(self):
1032         return {
1033             'book': self
1034         }
1035
1036     @cached_render('catalogue/book_mini_box.html')
1037     def mini_box_nolink(self):
1038         return {
1039             'book': self,
1040             'no_link': True,
1041         }
1042
1043 def add_file_fields():
1044     for format_ in Book.formats:
1045         field_name = "%s_file" % format_
1046         # This weird globals() assignment makes Django migrations comfortable.
1047         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1048         _upload_to.__name__ = '_%s_upload_to' % format_
1049         globals()[_upload_to.__name__] = _upload_to
1050
1051         EbookField(
1052             format_, _("%s file" % format_.upper()),
1053             upload_to=_upload_to,
1054             storage=bofh_storage,
1055             max_length=255,
1056             blank=True,
1057             default=''
1058         ).contribute_to_class(Book, field_name)
1059         if format_ != 'xml':
1060             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1061
1062
1063 add_file_fields()
1064
1065
1066 class BookPopularity(models.Model):
1067     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1068     count = models.IntegerField(default=0, db_index=True)