Housekeeping.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229
230     def save(self, force_insert=False, force_update=False, **kwargs):
231         from sortify import sortify
232
233         self.sort_key = sortify(self.title)[:120]
234         self.title = str(self.title)  # ???
235
236         try:
237             author = self.authors().first().sort_key
238         except AttributeError:
239             author = ''
240         self.sort_key_author = author
241
242         self.cached_author = self.tag_unicode('author')
243         self.has_audience = 'audience' in self.get_extra_info_json()
244
245         if self.preview and not self.preview_key:
246             self.preview_key = get_random_hash(self.slug)[:32]
247
248         ret = super(Book, self).save(force_insert, force_update, **kwargs)
249
250         return ret
251
252     def get_absolute_url(self):
253         return reverse('book_detail', args=[self.slug])
254
255     def gallery_path(self):
256         return gallery_path(self.slug)
257
258     def gallery_url(self):
259         return gallery_url(self.slug)
260
261     def get_first_text(self):
262         if self.html_file:
263             return self
264         child = self.children.all().order_by('parent_number').first()
265         if child is not None:
266             return child.get_first_text()
267
268     def get_last_text(self):
269         if self.html_file:
270             return self
271         child = self.children.all().order_by('parent_number').last()
272         if child is not None:
273             return child.get_last_text()
274
275     def get_prev_text(self):
276         if not self.parent:
277             return None
278         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
279         if sibling is not None:
280             return sibling.get_last_text()
281
282         if self.parent.html_file:
283             return self.parent
284         
285         return self.parent.get_prev_text()
286
287     def get_next_text(self):
288         child = self.children.order_by('parent_number').first()
289         if child is not None:
290             return child.get_first_text()
291
292         if not self.parent:
293             return None
294         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
295         if sibling is not None:
296             return sibling.get_first_text()
297         return self.parent.get_next_text()
298
299     def get_child_audiobook(self):
300         BookMedia = apps.get_model('catalogue', 'BookMedia')
301         if not BookMedia.objects.filter(book__ancestor=self).exists():
302             return None
303         for child in self.children.all():
304             if child.has_mp3_file():
305                 return child
306             child_sub = child.get_child_audiobook()
307             if child_sub is not None:
308                 return child_sub
309
310     def get_siblings(self):
311         if not self.parent:
312             return []
313         return self.parent.children.all().order_by('parent_number')
314
315     def get_children(self):
316         return self.children.all().order_by('parent_number')
317     
318     @property
319     def name(self):
320         return self.title
321
322     def language_code(self):
323         return constants.LANGUAGES_3TO2.get(self.language, self.language)
324
325     def language_name(self):
326         return dict(settings.LANGUAGES).get(self.language_code(), "")
327
328     def is_foreign(self):
329         return self.language_code() != settings.LANGUAGE_CODE
330
331     def set_audio_length(self):
332         length = self.get_audio_length()
333         if length > 0:
334             self.audio_length = self.format_audio_length(length)
335             self.save()
336
337     @staticmethod
338     def format_audio_length(seconds):
339         """
340         >>> Book.format_audio_length(1)
341         '0:01'
342         >>> Book.format_audio_length(3661)
343         '1:01:01'
344         """
345         if seconds < 60*60:
346             minutes = seconds // 60
347             seconds = seconds % 60
348             return '%d:%02d' % (minutes, seconds)
349         else:
350             hours = seconds // 3600
351             minutes = seconds % 3600 // 60
352             seconds = seconds % 60
353             return '%d:%02d:%02d' % (hours, minutes, seconds)
354
355     def get_audio_length(self):
356         total = 0
357         for media in self.get_mp3() or ():
358             total += app_settings.GET_MP3_LENGTH(media.file.path)
359         return int(total)
360
361     def has_media(self, type_):
362         if type_ in Book.formats:
363             return bool(getattr(self, "%s_file" % type_))
364         else:
365             return self.media.filter(type=type_).exists()
366
367     def has_audio(self):
368         return self.has_media('mp3')
369
370     def get_media(self, type_):
371         if self.has_media(type_):
372             if type_ in Book.formats:
373                 return getattr(self, "%s_file" % type_)
374             else:
375                 return self.media.filter(type=type_)
376         else:
377             return None
378
379     def get_mp3(self):
380         return self.get_media("mp3")
381
382     def get_odt(self):
383         return self.get_media("odt")
384
385     def get_ogg(self):
386         return self.get_media("ogg")
387
388     def get_daisy(self):
389         return self.get_media("daisy")
390
391     def media_url(self, format_):
392         media = self.get_media(format_)
393         if media:
394             if self.preview:
395                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
396             else:
397                 return media.url
398         else:
399             return None
400
401     def html_url(self):
402         return self.media_url('html')
403
404     def pdf_url(self):
405         return self.media_url('pdf')
406
407     def epub_url(self):
408         return self.media_url('epub')
409
410     def mobi_url(self):
411         return self.media_url('mobi')
412
413     def txt_url(self):
414         return self.media_url('txt')
415
416     def fb2_url(self):
417         return self.media_url('fb2')
418
419     def xml_url(self):
420         return self.media_url('xml')
421
422     def has_description(self):
423         return len(self.description) > 0
424     has_description.short_description = _('description')
425     has_description.boolean = True
426
427     def has_mp3_file(self):
428         return self.has_media("mp3")
429     has_mp3_file.short_description = 'MP3'
430     has_mp3_file.boolean = True
431
432     def has_ogg_file(self):
433         return self.has_media("ogg")
434     has_ogg_file.short_description = 'OGG'
435     has_ogg_file.boolean = True
436
437     def has_daisy_file(self):
438         return self.has_media("daisy")
439     has_daisy_file.short_description = 'DAISY'
440     has_daisy_file.boolean = True
441
442     @property
443     def media_daisy(self):
444         return self.get_media('daisy')
445     
446     def get_audiobooks(self):
447         ogg_files = {}
448         for m in self.media.filter(type='ogg').order_by().iterator():
449             ogg_files[m.name] = m
450
451         audiobooks = []
452         projects = set()
453         total_duration = 0
454         for mp3 in self.media.filter(type='mp3').iterator():
455             # ogg files are always from the same project
456             meta = mp3.get_extra_info_json()
457             project = meta.get('project')
458             if not project:
459                 # temporary fallback
460                 project = 'CzytamySłuchając'
461
462             projects.add((project, meta.get('funded_by', '')))
463             total_duration += mp3.duration or 0
464
465             media = {'mp3': mp3}
466
467             ogg = ogg_files.get(mp3.name)
468             if ogg:
469                 media['ogg'] = ogg
470             audiobooks.append(media)
471
472         projects = sorted(projects)
473         total_duration = '%d:%02d' % (
474             total_duration // 60,
475             total_duration % 60
476         )
477         return audiobooks, projects, total_duration
478
479     def wldocument(self, parse_dublincore=True, inherit=True):
480         from catalogue.import_utils import ORMDocProvider
481         from librarian.parser import WLDocument
482
483         if inherit and self.parent:
484             meta_fallbacks = self.parent.cover_info()
485         else:
486             meta_fallbacks = None
487
488         return WLDocument.from_file(
489             self.xml_file.path,
490             provider=ORMDocProvider(self),
491             parse_dublincore=parse_dublincore,
492             meta_fallbacks=meta_fallbacks)
493
494     def wldocument2(self):
495         from catalogue.import_utils import ORMDocProvider
496         from librarian.document import WLDocument
497         doc = WLDocument(
498             self.xml_file.path,
499             provider=ORMDocProvider(self)
500         )
501         doc.meta.update(self.cover_info())
502         return doc
503
504
505     @staticmethod
506     def zip_format(format_):
507         def pretty_file_name(book):
508             return "%s/%s.%s" % (
509                 book.get_extra_info_json()['author'],
510                 book.slug,
511                 format_)
512
513         field_name = "%s_file" % format_
514         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
515         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
516         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
517
518     def zip_audiobooks(self, format_):
519         bm = BookMedia.objects.filter(book=self, type=format_)
520         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
521         licenses = set()
522         for m in bm:
523             license = constants.LICENSES.get(
524                 m.get_extra_info_json().get('license'), {}
525             ).get('locative')
526             if license:
527                 licenses.add(license)
528         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
529             'licenses': licenses,
530         })
531         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
532
533     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
534         if not self.findable:
535             return
536         if index is None:
537             from search.index import Index
538             index = Index()
539         try:
540             index.index_book(self, book_info)
541             if index_tags:
542                 index.index_tags()
543             if commit:
544                 index.index.commit()
545         except Exception as e:
546             index.index.rollback()
547             raise e
548
549     # will make problems in conjunction with paid previews
550     def download_pictures(self, remote_gallery_url):
551         gallery_path = self.gallery_path()
552         # delete previous files, so we don't include old files in ebooks
553         if os.path.isdir(gallery_path):
554             for filename in os.listdir(gallery_path):
555                 file_path = os.path.join(gallery_path, filename)
556                 os.unlink(file_path)
557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
558         if ilustr_elements:
559             makedirs(gallery_path)
560             for ilustr in ilustr_elements:
561                 ilustr_src = ilustr.get('src')
562                 ilustr_path = os.path.join(gallery_path, ilustr_src)
563                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
564
565     def load_abstract(self):
566         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
567         if abstract is not None:
568             self.abstract = transform_abstrakt(abstract)
569         else:
570             self.abstract = ''
571
572     def load_toc(self):
573         self.toc = ''
574         if self.html_file:
575             parser = html.HTMLParser(encoding='utf-8')
576             tree = html.parse(self.html_file.path, parser=parser)
577             toc = tree.find('//div[@id="toc"]/ol')
578             if toc is None or not len(toc):
579                 return
580             html_link = reverse('book_text', args=[self.slug])
581             for a in toc.findall('.//a'):
582                 a.attrib['href'] = html_link + a.attrib['href']
583             self.toc = html.tostring(toc, encoding='unicode')
584             # div#toc
585             
586     @classmethod
587     def from_xml_file(cls, xml_file, **kwargs):
588         from django.core.files import File
589         from librarian import dcparser
590
591         # use librarian to parse meta-data
592         book_info = dcparser.parse(xml_file)
593
594         if not isinstance(xml_file, File):
595             xml_file = File(open(xml_file))
596
597         try:
598             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
599         finally:
600             xml_file.close()
601
602     @classmethod
603     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
604                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
605         if dont_build is None:
606             dont_build = set()
607         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
608
609         # check for parts before we do anything
610         children = []
611         if hasattr(book_info, 'parts'):
612             for part_url in book_info.parts:
613                 try:
614                     children.append(Book.objects.get(slug=part_url.slug))
615                 except Book.DoesNotExist:
616                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
617
618         # Read book metadata
619         book_slug = book_info.url.slug
620         if re.search(r'[^a-z0-9-]', book_slug):
621             raise ValueError('Invalid characters in slug')
622         book, created = Book.objects.get_or_create(slug=book_slug)
623
624         if created:
625             book_shelves = []
626             old_cover = None
627             book.preview = bool(days)
628             if book.preview:
629                 book.preview_until = date.today() + timedelta(days)
630         else:
631             if not overwrite:
632                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
633             # Save shelves for this book
634             book_shelves = list(book.tags.filter(category='set'))
635             old_cover = book.cover_info()
636
637         # Save XML file
638         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
639         if book.preview:
640             book.xml_file.set_readable(False)
641
642         book.findable = findable
643         book.language = book_info.language
644         book.title = book_info.title
645         if book_info.variant_of:
646             book.common_slug = book_info.variant_of.slug
647         else:
648             book.common_slug = book.slug
649         book.extra_info = json.dumps(book_info.to_dict())
650         book.load_abstract()
651         book.load_toc()
652         book.save()
653
654         meta_tags = Tag.tags_from_info(book_info)
655
656         for tag in meta_tags:
657             if not tag.for_books:
658                 tag.for_books = True
659                 tag.save()
660
661         book.tags = set(meta_tags + book_shelves)
662         book.save()  # update sort_key_author
663
664         cover_changed = old_cover != book.cover_info()
665         obsolete_children = set(b for b in book.children.all()
666                                 if b not in children)
667         notify_cover_changed = []
668         for n, child_book in enumerate(children):
669             new_child = child_book.parent != book
670             child_book.parent = book
671             child_book.parent_number = n
672             child_book.save()
673             if new_child or cover_changed:
674                 notify_cover_changed.append(child_book)
675         # Disown unfaithful children and let them cope on their own.
676         for child in obsolete_children:
677             child.parent = None
678             child.parent_number = 0
679             child.save()
680             if old_cover:
681                 notify_cover_changed.append(child)
682
683         cls.repopulate_ancestors()
684         tasks.update_counters.delay()
685
686         if remote_gallery_url:
687             book.download_pictures(remote_gallery_url)
688
689         # No saves beyond this point.
690
691         # Build cover.
692         if 'cover' not in dont_build:
693             book.cover.build_delay()
694             book.cover_clean.build_delay()
695             book.cover_thumb.build_delay()
696             book.cover_api_thumb.build_delay()
697             book.simple_cover.build_delay()
698             book.cover_ebookpoint.build_delay()
699
700         # Build HTML and ebooks.
701         book.html_file.build_delay()
702         if not children:
703             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
704                 if format_ not in dont_build:
705                     getattr(book, '%s_file' % format_).build_delay()
706         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
707             if format_ not in dont_build:
708                 getattr(book, '%s_file' % format_).build_delay()
709
710         if not settings.NO_SEARCH_INDEX and search_index and findable:
711             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
712
713         for child in notify_cover_changed:
714             child.parent_cover_changed()
715
716         book.update_popularity()
717         tasks.update_references.delay(book.id)
718
719         cls.published.send(sender=cls, instance=book)
720         return book
721
722     def get_master(self):
723         master_tags = [
724             'opowiadanie',
725             'powiesc',
726             'dramat_wierszowany_l',
727             'dramat_wierszowany_lp',
728             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
729             'wywiad',
730         ]
731         from librarian.parser import WLDocument
732         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
733         root = wld.edoc.getroot()
734         for master in root.iter():
735             if master.tag in master_tags:
736                 return master
737     
738     def update_references(self):
739         from references.models import Entity, Reference
740         master = self.get_master()
741         if master is None:
742             master = []
743         found = set()
744         for i, sec in enumerate(master):
745             for ref in sec.findall('.//ref'):
746                 href = ref.attrib.get('href', '')
747                 if not href or href in found:
748                     continue
749                 found.add(href)
750                 entity, created = Entity.objects.get_or_create(
751                     uri=href
752                 )
753                 ref, created = Reference.objects.get_or_create(
754                     book=self,
755                     entity=entity
756                 )
757                 ref.first_section = 'sec%d' % (i + 1)
758                 entity.populate()
759                 entity.save()
760         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
761     
762     @property
763     def references(self):
764         return self.reference_set.all().select_related('entity')
765
766     @classmethod
767     @transaction.atomic
768     def repopulate_ancestors(cls):
769         """Fixes the ancestry cache."""
770         # TODO: table names
771         cursor = connection.cursor()
772         if connection.vendor == 'postgres':
773             cursor.execute("TRUNCATE catalogue_book_ancestor")
774             cursor.execute("""
775                 WITH RECURSIVE ancestry AS (
776                     SELECT book.id, book.parent_id
777                     FROM catalogue_book AS book
778                     WHERE book.parent_id IS NOT NULL
779                     UNION
780                     SELECT ancestor.id, book.parent_id
781                     FROM ancestry AS ancestor, catalogue_book AS book
782                     WHERE ancestor.parent_id = book.id
783                         AND book.parent_id IS NOT NULL
784                     )
785                 INSERT INTO catalogue_book_ancestor
786                     (from_book_id, to_book_id)
787                     SELECT id, parent_id
788                     FROM ancestry
789                     ORDER BY id;
790                 """)
791         else:
792             cursor.execute("DELETE FROM catalogue_book_ancestor")
793             for b in cls.objects.exclude(parent=None):
794                 parent = b.parent
795                 while parent is not None:
796                     b.ancestor.add(parent)
797                     parent = parent.parent
798
799     @property
800     def ancestors(self):
801         if self.parent:
802             for anc in self.parent.ancestors:
803                 yield anc
804             yield self.parent
805         else:
806             return []
807                     
808     def clear_cache(self):
809         clear_cached_renders(self.mini_box)
810         clear_cached_renders(self.mini_box_nolink)
811
812     def cover_info(self, inherit=True):
813         """Returns a dictionary to serve as fallback for BookInfo.
814
815         For now, the only thing inherited is the cover image.
816         """
817         need = False
818         info = {}
819         for field in ('cover_url', 'cover_by', 'cover_source'):
820             val = self.get_extra_info_json().get(field)
821             if val:
822                 info[field] = val
823             else:
824                 need = True
825         if inherit and need and self.parent is not None:
826             parent_info = self.parent.cover_info()
827             parent_info.update(info)
828             info = parent_info
829         return info
830
831     def related_themes(self):
832         return Tag.objects.usage_for_queryset(
833             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
834             counts=True).filter(category='theme')
835
836     def parent_cover_changed(self):
837         """Called when parent book's cover image is changed."""
838         if not self.cover_info(inherit=False):
839             if 'cover' not in app_settings.DONT_BUILD:
840                 self.cover.build_delay()
841                 self.cover_clean.build_delay()
842                 self.cover_thumb.build_delay()
843                 self.cover_api_thumb.build_delay()
844                 self.simple_cover.build_delay()
845                 self.cover_ebookpoint.build_delay()
846             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
847                 if format_ not in app_settings.DONT_BUILD:
848                     getattr(self, '%s_file' % format_).build_delay()
849             for child in self.children.all():
850                 child.parent_cover_changed()
851
852     def other_versions(self):
853         """Find other versions (i.e. in other languages) of the book."""
854         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
855
856     def parents(self):
857         books = []
858         parent = self.parent
859         while parent is not None:
860             books.insert(0, parent)
861             parent = parent.parent
862         return books
863
864     def pretty_title(self, html_links=False):
865         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
866         books = self.parents() + [self]
867         names.extend([(b.title, b.get_absolute_url()) for b in books])
868
869         if html_links:
870             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
871         else:
872             names = [tag[0] for tag in names]
873         return ', '.join(names)
874
875     def publisher(self):
876         publisher = self.get_extra_info_json()['publisher']
877         if isinstance(publisher, str):
878             return publisher
879         elif isinstance(publisher, list):
880             return ', '.join(publisher)
881
882     @classmethod
883     def tagged_top_level(cls, tags):
884         """ Returns top-level books tagged with `tags`.
885
886         It only returns those books which don't have ancestors which are
887         also tagged with those tags.
888
889         """
890         objects = cls.tagged.with_all(tags)
891         return objects.filter(findable=True).exclude(ancestor__in=objects)
892
893     @classmethod
894     def book_list(cls, book_filter=None):
895         """Generates a hierarchical listing of all books.
896
897         Books are optionally filtered with a test function.
898
899         """
900
901         books_by_parent = {}
902         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
903         if book_filter:
904             books = books.filter(book_filter).distinct()
905
906             book_ids = set(b['pk'] for b in books.values("pk").iterator())
907             for book in books.iterator():
908                 parent = book.parent_id
909                 if parent not in book_ids:
910                     parent = None
911                 books_by_parent.setdefault(parent, []).append(book)
912         else:
913             for book in books.iterator():
914                 books_by_parent.setdefault(book.parent_id, []).append(book)
915
916         orphans = []
917         books_by_author = OrderedDict()
918         for tag in Tag.objects.filter(category='author').iterator():
919             books_by_author[tag] = []
920
921         for book in books_by_parent.get(None, ()):
922             authors = list(book.authors().only('pk'))
923             if authors:
924                 for author in authors:
925                     books_by_author[author].append(book)
926             else:
927                 orphans.append(book)
928
929         return books_by_author, orphans, books_by_parent
930
931     _audiences_pl = {
932         "SP": (1, "szkoła podstawowa"),
933         "SP1": (1, "szkoła podstawowa"),
934         "SP2": (1, "szkoła podstawowa"),
935         "SP3": (1, "szkoła podstawowa"),
936         "P": (1, "szkoła podstawowa"),
937         "G": (2, "gimnazjum"),
938         "L": (3, "liceum"),
939         "LP": (3, "liceum"),
940     }
941
942     def audiences_pl(self):
943         audiences = self.get_extra_info_json().get('audiences', [])
944         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
945         return [a[1] for a in audiences]
946
947     def stage_note(self):
948         stage = self.get_extra_info_json().get('stage')
949         if stage and stage < '0.4':
950             return (_('This work needs modernisation'),
951                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
952         else:
953             return None, None
954
955     def choose_fragments(self, number):
956         fragments = self.fragments.order_by()
957         fragments_count = fragments.count()
958         if not fragments_count and self.children.exists():
959             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
960             fragments_count = fragments.count()
961         if fragments_count:
962             if fragments_count > number:
963                 offset = randint(0, fragments_count - number)
964             else:
965                 offset = 0
966             return fragments[offset : offset + number]
967         elif self.parent:
968             return self.parent.choose_fragments(number)
969         else:
970             return []
971
972     def choose_fragment(self):
973         fragments = self.choose_fragments(1)
974         if fragments:
975             return fragments[0]
976         else:
977             return None
978         
979     def fragment_data(self):
980         fragment = self.choose_fragment()
981         if fragment:
982             return {
983                 'title': fragment.book.pretty_title(),
984                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
985             }
986         else:
987             return None
988
989     def update_popularity(self):
990         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
991         try:
992             pop = self.popularity
993             pop.count = count
994             pop.save()
995         except BookPopularity.DoesNotExist:
996             BookPopularity.objects.create(book=self, count=count)
997
998     def ridero_link(self):
999         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1000
1001     def like(self, user):
1002         from social.utils import likes, get_set, set_sets
1003         if not likes(user, self):
1004             tag = get_set(user, '')
1005             set_sets(user, self, [tag])
1006
1007     def unlike(self, user):
1008         from social.utils import likes, set_sets
1009         if likes(user, self):
1010             set_sets(user, self, [])
1011
1012     def full_sort_key(self):
1013         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1014
1015     def cover_color(self):
1016         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1017
1018     @cached_render('catalogue/book_mini_box.html')
1019     def mini_box(self):
1020         return {
1021             'book': self
1022         }
1023
1024     @cached_render('catalogue/book_mini_box.html')
1025     def mini_box_nolink(self):
1026         return {
1027             'book': self,
1028             'no_link': True,
1029         }
1030
1031 def add_file_fields():
1032     for format_ in Book.formats:
1033         field_name = "%s_file" % format_
1034         # This weird globals() assignment makes Django migrations comfortable.
1035         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1036         _upload_to.__name__ = '_%s_upload_to' % format_
1037         globals()[_upload_to.__name__] = _upload_to
1038
1039         EbookField(
1040             format_, _("%s file" % format_.upper()),
1041             upload_to=_upload_to,
1042             storage=bofh_storage,
1043             max_length=255,
1044             blank=True,
1045             default=''
1046         ).contribute_to_class(Book, field_name)
1047         if format_ != 'xml':
1048             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1049
1050
1051 add_file_fields()
1052
1053
1054 class BookPopularity(models.Model):
1055     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1056     count = models.IntegerField(default=0, db_index=True)