9de495029582732e2a29663488a8164d76752893
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229     def is_accessible_to(self, user):
230         if not self.preview:
231             return True
232         Membership = apps.get_model('club', 'Membership')
233         if Membership.is_active_for(user):
234             return True
235         Funding = apps.get_model('funding', 'Funding')
236         if Funding.objects.filter(user=user, offer__book=self):
237             return True
238         return False
239
240     def save(self, force_insert=False, force_update=False, **kwargs):
241         from sortify import sortify
242
243         self.sort_key = sortify(self.title)[:120]
244         self.title = str(self.title)  # ???
245
246         try:
247             author = self.authors().first().sort_key
248         except AttributeError:
249             author = ''
250         self.sort_key_author = author
251
252         self.cached_author = self.tag_unicode('author')
253         self.has_audience = 'audience' in self.get_extra_info_json()
254
255         if self.preview and not self.preview_key:
256             self.preview_key = get_random_hash(self.slug)[:32]
257
258         ret = super(Book, self).save(force_insert, force_update, **kwargs)
259
260         return ret
261
262     def get_absolute_url(self):
263         return reverse('book_detail', args=[self.slug])
264
265     def gallery_path(self):
266         return gallery_path(self.slug)
267
268     def gallery_url(self):
269         return gallery_url(self.slug)
270
271     def get_first_text(self):
272         if self.html_file:
273             return self
274         child = self.children.all().order_by('parent_number').first()
275         if child is not None:
276             return child.get_first_text()
277
278     def get_last_text(self):
279         if self.html_file:
280             return self
281         child = self.children.all().order_by('parent_number').last()
282         if child is not None:
283             return child.get_last_text()
284
285     def get_prev_text(self):
286         if not self.parent:
287             return None
288         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
289         if sibling is not None:
290             return sibling.get_last_text()
291
292         if self.parent.html_file:
293             return self.parent
294         
295         return self.parent.get_prev_text()
296
297     def get_next_text(self):
298         child = self.children.order_by('parent_number').first()
299         if child is not None:
300             return child.get_first_text()
301
302         if not self.parent:
303             return None
304         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
305         if sibling is not None:
306             return sibling.get_first_text()
307         return self.parent.get_next_text()
308
309     def get_child_audiobook(self):
310         BookMedia = apps.get_model('catalogue', 'BookMedia')
311         if not BookMedia.objects.filter(book__ancestor=self).exists():
312             return None
313         for child in self.children.all():
314             if child.has_mp3_file():
315                 return child
316             child_sub = child.get_child_audiobook()
317             if child_sub is not None:
318                 return child_sub
319
320     def get_siblings(self):
321         if not self.parent:
322             return []
323         return self.parent.children.all().order_by('parent_number')
324
325     def get_children(self):
326         return self.children.all().order_by('parent_number')
327     
328     @property
329     def name(self):
330         return self.title
331
332     def language_code(self):
333         return constants.LANGUAGES_3TO2.get(self.language, self.language)
334
335     def language_name(self):
336         return dict(settings.LANGUAGES).get(self.language_code(), "")
337
338     def is_foreign(self):
339         return self.language_code() != settings.LANGUAGE_CODE
340
341     def set_audio_length(self):
342         length = self.get_audio_length()
343         if length > 0:
344             self.audio_length = self.format_audio_length(length)
345             self.save()
346
347     @staticmethod
348     def format_audio_length(seconds):
349         """
350         >>> Book.format_audio_length(1)
351         '0:01'
352         >>> Book.format_audio_length(3661)
353         '1:01:01'
354         """
355         if seconds < 60*60:
356             minutes = seconds // 60
357             seconds = seconds % 60
358             return '%d:%02d' % (minutes, seconds)
359         else:
360             hours = seconds // 3600
361             minutes = seconds % 3600 // 60
362             seconds = seconds % 60
363             return '%d:%02d:%02d' % (hours, minutes, seconds)
364
365     def get_audio_length(self):
366         total = 0
367         for media in self.get_mp3() or ():
368             total += app_settings.GET_MP3_LENGTH(media.file.path)
369         return int(total)
370
371     def has_media(self, type_):
372         if type_ in Book.formats:
373             return bool(getattr(self, "%s_file" % type_))
374         else:
375             return self.media.filter(type=type_).exists()
376
377     def has_audio(self):
378         return self.has_media('mp3')
379
380     def get_media(self, type_):
381         if self.has_media(type_):
382             if type_ in Book.formats:
383                 return getattr(self, "%s_file" % type_)
384             else:
385                 return self.media.filter(type=type_)
386         else:
387             return None
388
389     def get_mp3(self):
390         return self.get_media("mp3")
391
392     def get_odt(self):
393         return self.get_media("odt")
394
395     def get_ogg(self):
396         return self.get_media("ogg")
397
398     def get_daisy(self):
399         return self.get_media("daisy")
400
401     def media_url(self, format_):
402         media = self.get_media(format_)
403         if media:
404             if self.preview:
405                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
406             else:
407                 return media.url
408         else:
409             return None
410
411     def html_url(self):
412         return self.media_url('html')
413
414     def pdf_url(self):
415         return self.media_url('pdf')
416
417     def epub_url(self):
418         return self.media_url('epub')
419
420     def mobi_url(self):
421         return self.media_url('mobi')
422
423     def txt_url(self):
424         return self.media_url('txt')
425
426     def fb2_url(self):
427         return self.media_url('fb2')
428
429     def xml_url(self):
430         return self.media_url('xml')
431
432     def has_description(self):
433         return len(self.description) > 0
434     has_description.short_description = _('description')
435     has_description.boolean = True
436
437     def has_mp3_file(self):
438         return self.has_media("mp3")
439     has_mp3_file.short_description = 'MP3'
440     has_mp3_file.boolean = True
441
442     def has_ogg_file(self):
443         return self.has_media("ogg")
444     has_ogg_file.short_description = 'OGG'
445     has_ogg_file.boolean = True
446
447     def has_daisy_file(self):
448         return self.has_media("daisy")
449     has_daisy_file.short_description = 'DAISY'
450     has_daisy_file.boolean = True
451
452     @property
453     def media_daisy(self):
454         return self.get_media('daisy')
455     
456     def get_audiobooks(self):
457         ogg_files = {}
458         for m in self.media.filter(type='ogg').order_by().iterator():
459             ogg_files[m.name] = m
460
461         audiobooks = []
462         projects = set()
463         total_duration = 0
464         for mp3 in self.media.filter(type='mp3').iterator():
465             # ogg files are always from the same project
466             meta = mp3.get_extra_info_json()
467             project = meta.get('project')
468             if not project:
469                 # temporary fallback
470                 project = 'CzytamySłuchając'
471
472             projects.add((project, meta.get('funded_by', '')))
473             total_duration += mp3.duration or 0
474
475             media = {'mp3': mp3}
476
477             ogg = ogg_files.get(mp3.name)
478             if ogg:
479                 media['ogg'] = ogg
480             audiobooks.append(media)
481
482         projects = sorted(projects)
483         total_duration = '%d:%02d' % (
484             total_duration // 60,
485             total_duration % 60
486         )
487         return audiobooks, projects, total_duration
488
489     def wldocument(self, parse_dublincore=True, inherit=True):
490         from catalogue.import_utils import ORMDocProvider
491         from librarian.parser import WLDocument
492
493         if inherit and self.parent:
494             meta_fallbacks = self.parent.cover_info()
495         else:
496             meta_fallbacks = None
497
498         return WLDocument.from_file(
499             self.xml_file.path,
500             provider=ORMDocProvider(self),
501             parse_dublincore=parse_dublincore,
502             meta_fallbacks=meta_fallbacks)
503
504     def wldocument2(self):
505         from catalogue.import_utils import ORMDocProvider
506         from librarian.document import WLDocument
507         doc = WLDocument(
508             self.xml_file.path,
509             provider=ORMDocProvider(self)
510         )
511         doc.meta.update(self.cover_info())
512         return doc
513
514
515     @staticmethod
516     def zip_format(format_):
517         def pretty_file_name(book):
518             return "%s/%s.%s" % (
519                 book.get_extra_info_json()['author'],
520                 book.slug,
521                 format_)
522
523         field_name = "%s_file" % format_
524         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
525         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
526         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
527
528     def zip_audiobooks(self, format_):
529         bm = BookMedia.objects.filter(book=self, type=format_)
530         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
531         licenses = set()
532         for m in bm:
533             license = constants.LICENSES.get(
534                 m.get_extra_info_json().get('license'), {}
535             ).get('locative')
536             if license:
537                 licenses.add(license)
538         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
539             'licenses': licenses,
540         })
541         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
542
543     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
544         if not self.findable:
545             return
546         if index is None:
547             from search.index import Index
548             index = Index()
549         try:
550             index.index_book(self, book_info)
551             if index_tags:
552                 index.index_tags()
553             if commit:
554                 index.index.commit()
555         except Exception as e:
556             index.index.rollback()
557             raise e
558
559     # will make problems in conjunction with paid previews
560     def download_pictures(self, remote_gallery_url):
561         gallery_path = self.gallery_path()
562         # delete previous files, so we don't include old files in ebooks
563         if os.path.isdir(gallery_path):
564             for filename in os.listdir(gallery_path):
565                 file_path = os.path.join(gallery_path, filename)
566                 os.unlink(file_path)
567         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
568         if ilustr_elements:
569             makedirs(gallery_path)
570             for ilustr in ilustr_elements:
571                 ilustr_src = ilustr.get('src')
572                 ilustr_path = os.path.join(gallery_path, ilustr_src)
573                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
574
575     def load_abstract(self):
576         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
577         if abstract is not None:
578             self.abstract = transform_abstrakt(abstract)
579         else:
580             self.abstract = ''
581
582     def load_toc(self):
583         self.toc = ''
584         if self.html_file:
585             parser = html.HTMLParser(encoding='utf-8')
586             tree = html.parse(self.html_file.path, parser=parser)
587             toc = tree.find('//div[@id="toc"]/ol')
588             if toc is None or not len(toc):
589                 return
590             html_link = reverse('book_text', args=[self.slug])
591             for a in toc.findall('.//a'):
592                 a.attrib['href'] = html_link + a.attrib['href']
593             self.toc = html.tostring(toc, encoding='unicode')
594             # div#toc
595             
596     @classmethod
597     def from_xml_file(cls, xml_file, **kwargs):
598         from django.core.files import File
599         from librarian import dcparser
600
601         # use librarian to parse meta-data
602         book_info = dcparser.parse(xml_file)
603
604         if not isinstance(xml_file, File):
605             xml_file = File(open(xml_file))
606
607         try:
608             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
609         finally:
610             xml_file.close()
611
612     @classmethod
613     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
614                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
615         if dont_build is None:
616             dont_build = set()
617         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
618
619         # check for parts before we do anything
620         children = []
621         if hasattr(book_info, 'parts'):
622             for part_url in book_info.parts:
623                 try:
624                     children.append(Book.objects.get(slug=part_url.slug))
625                 except Book.DoesNotExist:
626                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
627
628         # Read book metadata
629         book_slug = book_info.url.slug
630         if re.search(r'[^a-z0-9-]', book_slug):
631             raise ValueError('Invalid characters in slug')
632         book, created = Book.objects.get_or_create(slug=book_slug)
633
634         if created:
635             book_shelves = []
636             old_cover = None
637             book.preview = bool(days)
638             if book.preview:
639                 book.preview_until = date.today() + timedelta(days)
640         else:
641             if not overwrite:
642                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
643             # Save shelves for this book
644             book_shelves = list(book.tags.filter(category='set'))
645             old_cover = book.cover_info()
646
647         # Save XML file
648         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
649         if book.preview:
650             book.xml_file.set_readable(False)
651
652         book.findable = findable
653         book.language = book_info.language
654         book.title = book_info.title
655         if book_info.variant_of:
656             book.common_slug = book_info.variant_of.slug
657         else:
658             book.common_slug = book.slug
659         book.extra_info = json.dumps(book_info.to_dict())
660         book.load_abstract()
661         book.load_toc()
662         book.save()
663
664         meta_tags = Tag.tags_from_info(book_info)
665
666         for tag in meta_tags:
667             if not tag.for_books:
668                 tag.for_books = True
669                 tag.save()
670
671         book.tags = set(meta_tags + book_shelves)
672         book.save()  # update sort_key_author
673
674         cover_changed = old_cover != book.cover_info()
675         obsolete_children = set(b for b in book.children.all()
676                                 if b not in children)
677         notify_cover_changed = []
678         for n, child_book in enumerate(children):
679             new_child = child_book.parent != book
680             child_book.parent = book
681             child_book.parent_number = n
682             child_book.save()
683             if new_child or cover_changed:
684                 notify_cover_changed.append(child_book)
685         # Disown unfaithful children and let them cope on their own.
686         for child in obsolete_children:
687             child.parent = None
688             child.parent_number = 0
689             child.save()
690             if old_cover:
691                 notify_cover_changed.append(child)
692
693         cls.repopulate_ancestors()
694         tasks.update_counters.delay()
695
696         if remote_gallery_url:
697             book.download_pictures(remote_gallery_url)
698
699         # No saves beyond this point.
700
701         # Build cover.
702         if 'cover' not in dont_build:
703             book.cover.build_delay()
704             book.cover_clean.build_delay()
705             book.cover_thumb.build_delay()
706             book.cover_api_thumb.build_delay()
707             book.simple_cover.build_delay()
708             book.cover_ebookpoint.build_delay()
709
710         # Build HTML and ebooks.
711         book.html_file.build_delay()
712         if not children:
713             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
714                 if format_ not in dont_build:
715                     getattr(book, '%s_file' % format_).build_delay()
716         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
717             if format_ not in dont_build:
718                 getattr(book, '%s_file' % format_).build_delay()
719
720         if not settings.NO_SEARCH_INDEX and search_index and findable:
721             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
722
723         for child in notify_cover_changed:
724             child.parent_cover_changed()
725
726         book.update_popularity()
727         tasks.update_references.delay(book.id)
728
729         cls.published.send(sender=cls, instance=book)
730         return book
731
732     def get_master(self):
733         master_tags = [
734             'opowiadanie',
735             'powiesc',
736             'dramat_wierszowany_l',
737             'dramat_wierszowany_lp',
738             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
739             'wywiad',
740         ]
741         from librarian.parser import WLDocument
742         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
743         root = wld.edoc.getroot()
744         for master in root.iter():
745             if master.tag in master_tags:
746                 return master
747     
748     def update_references(self):
749         from references.models import Entity, Reference
750         master = self.get_master()
751         if master is None:
752             master = []
753         found = set()
754         for i, sec in enumerate(master):
755             for ref in sec.findall('.//ref'):
756                 href = ref.attrib.get('href', '')
757                 if not href or href in found:
758                     continue
759                 found.add(href)
760                 entity, created = Entity.objects.get_or_create(
761                     uri=href
762                 )
763                 ref, created = Reference.objects.get_or_create(
764                     book=self,
765                     entity=entity
766                 )
767                 ref.first_section = 'sec%d' % (i + 1)
768                 entity.populate()
769                 entity.save()
770         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
771     
772     @property
773     def references(self):
774         return self.reference_set.all().select_related('entity')
775
776     @classmethod
777     @transaction.atomic
778     def repopulate_ancestors(cls):
779         """Fixes the ancestry cache."""
780         # TODO: table names
781         cursor = connection.cursor()
782         if connection.vendor == 'postgres':
783             cursor.execute("TRUNCATE catalogue_book_ancestor")
784             cursor.execute("""
785                 WITH RECURSIVE ancestry AS (
786                     SELECT book.id, book.parent_id
787                     FROM catalogue_book AS book
788                     WHERE book.parent_id IS NOT NULL
789                     UNION
790                     SELECT ancestor.id, book.parent_id
791                     FROM ancestry AS ancestor, catalogue_book AS book
792                     WHERE ancestor.parent_id = book.id
793                         AND book.parent_id IS NOT NULL
794                     )
795                 INSERT INTO catalogue_book_ancestor
796                     (from_book_id, to_book_id)
797                     SELECT id, parent_id
798                     FROM ancestry
799                     ORDER BY id;
800                 """)
801         else:
802             cursor.execute("DELETE FROM catalogue_book_ancestor")
803             for b in cls.objects.exclude(parent=None):
804                 parent = b.parent
805                 while parent is not None:
806                     b.ancestor.add(parent)
807                     parent = parent.parent
808
809     @property
810     def ancestors(self):
811         if self.parent:
812             for anc in self.parent.ancestors:
813                 yield anc
814             yield self.parent
815         else:
816             return []
817                     
818     def clear_cache(self):
819         clear_cached_renders(self.mini_box)
820         clear_cached_renders(self.mini_box_nolink)
821
822     def cover_info(self, inherit=True):
823         """Returns a dictionary to serve as fallback for BookInfo.
824
825         For now, the only thing inherited is the cover image.
826         """
827         need = False
828         info = {}
829         for field in ('cover_url', 'cover_by', 'cover_source'):
830             val = self.get_extra_info_json().get(field)
831             if val:
832                 info[field] = val
833             else:
834                 need = True
835         if inherit and need and self.parent is not None:
836             parent_info = self.parent.cover_info()
837             parent_info.update(info)
838             info = parent_info
839         return info
840
841     def related_themes(self):
842         return Tag.objects.usage_for_queryset(
843             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
844             counts=True).filter(category='theme').order_by('-count')
845
846     def parent_cover_changed(self):
847         """Called when parent book's cover image is changed."""
848         if not self.cover_info(inherit=False):
849             if 'cover' not in app_settings.DONT_BUILD:
850                 self.cover.build_delay()
851                 self.cover_clean.build_delay()
852                 self.cover_thumb.build_delay()
853                 self.cover_api_thumb.build_delay()
854                 self.simple_cover.build_delay()
855                 self.cover_ebookpoint.build_delay()
856             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
857                 if format_ not in app_settings.DONT_BUILD:
858                     getattr(self, '%s_file' % format_).build_delay()
859             for child in self.children.all():
860                 child.parent_cover_changed()
861
862     def other_versions(self):
863         """Find other versions (i.e. in other languages) of the book."""
864         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
865
866     def parents(self):
867         books = []
868         parent = self.parent
869         while parent is not None:
870             books.insert(0, parent)
871             parent = parent.parent
872         return books
873
874     def pretty_title(self, html_links=False):
875         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
876         books = self.parents() + [self]
877         names.extend([(b.title, b.get_absolute_url()) for b in books])
878
879         if html_links:
880             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
881         else:
882             names = [tag[0] for tag in names]
883         return ', '.join(names)
884
885     def publisher(self):
886         publisher = self.get_extra_info_json()['publisher']
887         if isinstance(publisher, str):
888             return publisher
889         elif isinstance(publisher, list):
890             return ', '.join(publisher)
891
892     @classmethod
893     def tagged_top_level(cls, tags):
894         """ Returns top-level books tagged with `tags`.
895
896         It only returns those books which don't have ancestors which are
897         also tagged with those tags.
898
899         """
900         objects = cls.tagged.with_all(tags)
901         return objects.filter(findable=True).exclude(ancestor__in=objects)
902
903     @classmethod
904     def book_list(cls, book_filter=None):
905         """Generates a hierarchical listing of all books.
906
907         Books are optionally filtered with a test function.
908
909         """
910
911         books_by_parent = {}
912         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
913         if book_filter:
914             books = books.filter(book_filter).distinct()
915
916             book_ids = set(b['pk'] for b in books.values("pk").iterator())
917             for book in books.iterator():
918                 parent = book.parent_id
919                 if parent not in book_ids:
920                     parent = None
921                 books_by_parent.setdefault(parent, []).append(book)
922         else:
923             for book in books.iterator():
924                 books_by_parent.setdefault(book.parent_id, []).append(book)
925
926         orphans = []
927         books_by_author = OrderedDict()
928         for tag in Tag.objects.filter(category='author').iterator():
929             books_by_author[tag] = []
930
931         for book in books_by_parent.get(None, ()):
932             authors = list(book.authors().only('pk'))
933             if authors:
934                 for author in authors:
935                     books_by_author[author].append(book)
936             else:
937                 orphans.append(book)
938
939         return books_by_author, orphans, books_by_parent
940
941     _audiences_pl = {
942         "SP": (1, "szkoła podstawowa"),
943         "SP1": (1, "szkoła podstawowa"),
944         "SP2": (1, "szkoła podstawowa"),
945         "SP3": (1, "szkoła podstawowa"),
946         "P": (1, "szkoła podstawowa"),
947         "G": (2, "gimnazjum"),
948         "L": (3, "liceum"),
949         "LP": (3, "liceum"),
950     }
951
952     def audiences_pl(self):
953         audiences = self.get_extra_info_json().get('audiences', [])
954         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
955         return [a[1] for a in audiences]
956
957     def stage_note(self):
958         stage = self.get_extra_info_json().get('stage')
959         if stage and stage < '0.4':
960             return (_('This work needs modernisation'),
961                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
962         else:
963             return None, None
964
965     def choose_fragments(self, number):
966         fragments = self.fragments.order_by()
967         fragments_count = fragments.count()
968         if not fragments_count and self.children.exists():
969             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
970             fragments_count = fragments.count()
971         if fragments_count:
972             if fragments_count > number:
973                 offset = randint(0, fragments_count - number)
974             else:
975                 offset = 0
976             return fragments[offset : offset + number]
977         elif self.parent:
978             return self.parent.choose_fragments(number)
979         else:
980             return []
981
982     def choose_fragment(self):
983         fragments = self.choose_fragments(1)
984         if fragments:
985             return fragments[0]
986         else:
987             return None
988         
989     def fragment_data(self):
990         fragment = self.choose_fragment()
991         if fragment:
992             return {
993                 'title': fragment.book.pretty_title(),
994                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
995             }
996         else:
997             return None
998
999     def update_popularity(self):
1000         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1001         try:
1002             pop = self.popularity
1003             pop.count = count
1004             pop.save()
1005         except BookPopularity.DoesNotExist:
1006             BookPopularity.objects.create(book=self, count=count)
1007
1008     def ridero_link(self):
1009         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1010
1011     def like(self, user):
1012         from social.utils import likes, get_set, set_sets
1013         if not likes(user, self):
1014             tag = get_set(user, '')
1015             set_sets(user, self, [tag])
1016
1017     def unlike(self, user):
1018         from social.utils import likes, set_sets
1019         if likes(user, self):
1020             set_sets(user, self, [])
1021
1022     def full_sort_key(self):
1023         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1024
1025     def cover_color(self):
1026         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1027
1028     @cached_render('catalogue/book_mini_box.html')
1029     def mini_box(self):
1030         return {
1031             'book': self
1032         }
1033
1034     @cached_render('catalogue/book_mini_box.html')
1035     def mini_box_nolink(self):
1036         return {
1037             'book': self,
1038             'no_link': True,
1039         }
1040
1041 def add_file_fields():
1042     for format_ in Book.formats:
1043         field_name = "%s_file" % format_
1044         # This weird globals() assignment makes Django migrations comfortable.
1045         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1046         _upload_to.__name__ = '_%s_upload_to' % format_
1047         globals()[_upload_to.__name__] = _upload_to
1048
1049         EbookField(
1050             format_, _("%s file" % format_.upper()),
1051             upload_to=_upload_to,
1052             storage=bofh_storage,
1053             max_length=255,
1054             blank=True,
1055             default=''
1056         ).contribute_to_class(Book, field_name)
1057         if format_ != 'xml':
1058             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1059
1060
1061 add_file_fields()
1062
1063
1064 class BookPopularity(models.Model):
1065     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1066     count = models.IntegerField(default=0, db_index=True)