Fixes
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from lxml import html
22 from librarian.cover import WLCover
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33
34 bofh_storage = BofhFileSystemStorage()
35
36
37 @deconstructible
38 class UploadToPath(object):
39     def __init__(self, path):
40         self.path = path
41
42     def __call__(self, instance, filename):
43         return self.path % instance.slug
44
45
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_clean_upload_to = UploadToPath('book/cover_clean/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
52
53
54 def _ebook_upload_to(upload_path):
55     return UploadToPath(upload_path)
56
57
58 class Book(models.Model):
59     """Represents a book imported from WL-XML."""
60     title = models.CharField(_('title'), max_length=32767)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
62     sort_key_author = models.CharField(
63         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
65     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
67     description = models.TextField(_('description'), blank=True)
68     abstract = models.TextField(_('abstract'), blank=True)
69     toc = models.TextField(_('toc'), blank=True)
70     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
72     parent_number = models.IntegerField(_('parent number'), default=0)
73     extra_info = models.TextField(_('extra information'), default='{}')
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76     print_on_demand = models.BooleanField(_('print on demand'), default=False)
77     recommended = models.BooleanField(_('recommended'), default=False)
78     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
79     preview = models.BooleanField(_('preview'), default=False)
80     preview_until = models.DateField(_('preview until'), blank=True, null=True)
81     preview_key = models.CharField(max_length=32, blank=True, null=True)
82     findable = models.BooleanField(_('findable'), default=True, db_index=True)
83
84     # files generated during publication
85     cover = EbookField(
86         'cover', _('cover'),
87         null=True, blank=True,
88         upload_to=_cover_upload_to,
89         storage=bofh_storage, max_length=255)
90     cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
91     # Cleaner version of cover for thumbs
92     cover_clean = EbookField(
93         'cover_clean', _('clean cover'),
94         null=True, blank=True,
95         upload_to=_cover_clean_upload_to,
96         max_length=255
97     )
98     cover_clean_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
99     cover_thumb = EbookField(
100         'cover_thumb', _('cover thumbnail'),
101         null=True, blank=True,
102         upload_to=_cover_thumb_upload_to,
103         max_length=255)
104     cover_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
105     cover_api_thumb = EbookField(
106         'cover_api_thumb', _('cover thumbnail for mobile app'),
107         null=True, blank=True,
108         upload_to=_cover_api_thumb_upload_to,
109         max_length=255)
110     cover_api_thumb_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
111     simple_cover = EbookField(
112         'simple_cover', _('cover for mobile app'),
113         null=True, blank=True,
114         upload_to=_simple_cover_upload_to,
115         max_length=255)
116     simple_cover_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
117     cover_ebookpoint = EbookField(
118         'cover_ebookpoint', _('cover for Ebookpoint'),
119         null=True, blank=True,
120         upload_to=_cover_ebookpoint_upload_to,
121         max_length=255)
122     cover_ebookpoint_etag = models.CharField(max_length=255, editable=False, default='', db_index=True)
123     ebook_formats = constants.EBOOK_FORMATS
124     formats = ebook_formats + ['html', 'xml']
125
126     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
127     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
128
129     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
130     has_audience = models.BooleanField(default=False)
131
132     objects = models.Manager()
133     tagged = managers.ModelTaggedItemManager(Tag)
134     tags = managers.TagDescriptor(Tag)
135     tag_relations = GenericRelation(Tag.intermediary_table_model)
136
137     html_built = django.dispatch.Signal()
138     published = django.dispatch.Signal()
139
140     SORT_KEY_SEP = '$'
141
142     class AlreadyExists(Exception):
143         pass
144
145     class Meta:
146         ordering = ('sort_key_author', 'sort_key')
147         verbose_name = _('book')
148         verbose_name_plural = _('books')
149         app_label = 'catalogue'
150
151     def __str__(self):
152         return self.title
153
154     def get_extra_info_json(self):
155         return json.loads(self.extra_info or '{}')
156
157     def get_initial(self):
158         try:
159             return re.search(r'\w', self.title, re.U).group(0)
160         except AttributeError:
161             return ''
162
163     def authors(self):
164         return self.tags.filter(category='author')
165
166     def epochs(self):
167         return self.tags.filter(category='epoch')
168
169     def genres(self):
170         return self.tags.filter(category='genre')
171
172     def kinds(self):
173         return self.tags.filter(category='kind')
174
175     def tag_unicode(self, category):
176         relations = prefetched_relations(self, category)
177         if relations:
178             return ', '.join(rel.tag.name for rel in relations)
179         else:
180             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
181
182     def tags_by_category(self):
183         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
184
185     def author_unicode(self):
186         return self.cached_author
187
188     def kind_unicode(self):
189         return self.tag_unicode('kind')
190
191     def epoch_unicode(self):
192         return self.tag_unicode('epoch')
193
194     def genre_unicode(self):
195         return self.tag_unicode('genre')
196
197     def translators(self):
198         translators = self.get_extra_info_json().get('translators') or []
199         return [
200             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
201         ]
202
203     def translator(self):
204         translators = self.get_extra_info_json().get('translators')
205         if not translators:
206             return None
207         if len(translators) > 3:
208             translators = translators[:2]
209             others = ' i inni'
210         else:
211             others = ''
212         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
213
214     def cover_source(self):
215         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
216
217     @property
218     def isbn_pdf(self):
219         return self.get_extra_info_json().get('isbn_pdf')
220
221     @property
222     def isbn_epub(self):
223         return self.get_extra_info_json().get('isbn_epub')
224
225     @property
226     def isbn_mobi(self):
227         return self.get_extra_info_json().get('isbn_mobi')
228
229     def is_accessible_to(self, user):
230         if not self.preview:
231             return True
232         if not user.is_authenticated:
233             return False
234         Membership = apps.get_model('club', 'Membership')
235         if Membership.is_active_for(user):
236             return True
237         Funding = apps.get_model('funding', 'Funding')
238         if Funding.objects.filter(user=user, offer__book=self):
239             return True
240         return False
241
242     def save(self, force_insert=False, force_update=False, **kwargs):
243         from sortify import sortify
244
245         self.sort_key = sortify(self.title)[:120]
246         self.title = str(self.title)  # ???
247
248         try:
249             author = self.authors().first().sort_key
250         except AttributeError:
251             author = ''
252         self.sort_key_author = author
253
254         self.cached_author = self.tag_unicode('author')
255         self.has_audience = 'audience' in self.get_extra_info_json()
256
257         if self.preview and not self.preview_key:
258             self.preview_key = get_random_hash(self.slug)[:32]
259
260         ret = super(Book, self).save(force_insert, force_update, **kwargs)
261
262         return ret
263
264     def get_absolute_url(self):
265         return reverse('book_detail', args=[self.slug])
266
267     def gallery_path(self):
268         return gallery_path(self.slug)
269
270     def gallery_url(self):
271         return gallery_url(self.slug)
272
273     def get_first_text(self):
274         if self.html_file:
275             return self
276         child = self.children.all().order_by('parent_number').first()
277         if child is not None:
278             return child.get_first_text()
279
280     def get_last_text(self):
281         if self.html_file:
282             return self
283         child = self.children.all().order_by('parent_number').last()
284         if child is not None:
285             return child.get_last_text()
286
287     def get_prev_text(self):
288         if not self.parent:
289             return None
290         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
291         if sibling is not None:
292             return sibling.get_last_text()
293
294         if self.parent.html_file:
295             return self.parent
296         
297         return self.parent.get_prev_text()
298
299     def get_next_text(self, inside=True):
300         if inside:
301             child = self.children.order_by('parent_number').first()
302             if child is not None:
303                 return child.get_first_text()
304
305         if not self.parent:
306             return None
307         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
308         if sibling is not None:
309             return sibling.get_first_text()
310         return self.parent.get_next_text(inside=False)
311
312     def get_child_audiobook(self):
313         BookMedia = apps.get_model('catalogue', 'BookMedia')
314         if not BookMedia.objects.filter(book__ancestor=self).exists():
315             return None
316         for child in self.children.order_by('parent_number').all():
317             if child.has_mp3_file():
318                 return child
319             child_sub = child.get_child_audiobook()
320             if child_sub is not None:
321                 return child_sub
322
323     def get_siblings(self):
324         if not self.parent:
325             return []
326         return self.parent.children.all().order_by('parent_number')
327
328     def get_children(self):
329         return self.children.all().order_by('parent_number')
330     
331     @property
332     def name(self):
333         return self.title
334
335     def language_code(self):
336         return constants.LANGUAGES_3TO2.get(self.language, self.language)
337
338     def language_name(self):
339         return dict(settings.LANGUAGES).get(self.language_code(), "")
340
341     def is_foreign(self):
342         return self.language_code() != settings.LANGUAGE_CODE
343
344     def set_audio_length(self):
345         length = self.get_audio_length()
346         if length > 0:
347             self.audio_length = self.format_audio_length(length)
348             self.save()
349
350     @staticmethod
351     def format_audio_length(seconds):
352         """
353         >>> Book.format_audio_length(1)
354         '0:01'
355         >>> Book.format_audio_length(3661)
356         '1:01:01'
357         """
358         if seconds < 60*60:
359             minutes = seconds // 60
360             seconds = seconds % 60
361             return '%d:%02d' % (minutes, seconds)
362         else:
363             hours = seconds // 3600
364             minutes = seconds % 3600 // 60
365             seconds = seconds % 60
366             return '%d:%02d:%02d' % (hours, minutes, seconds)
367
368     def get_audio_length(self):
369         total = 0
370         for media in self.get_mp3() or ():
371             total += app_settings.GET_MP3_LENGTH(media.file.path)
372         return int(total)
373
374     def has_media(self, type_):
375         if type_ in Book.formats:
376             return bool(getattr(self, "%s_file" % type_))
377         else:
378             return self.media.filter(type=type_).exists()
379
380     def has_audio(self):
381         return self.has_media('mp3')
382
383     def get_media(self, type_):
384         if self.has_media(type_):
385             if type_ in Book.formats:
386                 return getattr(self, "%s_file" % type_)
387             else:
388                 return self.media.filter(type=type_)
389         else:
390             return None
391
392     def get_mp3(self):
393         return self.get_media("mp3")
394
395     def get_odt(self):
396         return self.get_media("odt")
397
398     def get_ogg(self):
399         return self.get_media("ogg")
400
401     def get_daisy(self):
402         return self.get_media("daisy")
403
404     def media_url(self, format_):
405         media = self.get_media(format_)
406         if media:
407             if self.preview:
408                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
409             else:
410                 return media.url
411         else:
412             return None
413
414     def html_url(self):
415         return self.media_url('html')
416
417     def pdf_url(self):
418         return self.media_url('pdf')
419
420     def epub_url(self):
421         return self.media_url('epub')
422
423     def mobi_url(self):
424         return self.media_url('mobi')
425
426     def txt_url(self):
427         return self.media_url('txt')
428
429     def fb2_url(self):
430         return self.media_url('fb2')
431
432     def xml_url(self):
433         return self.media_url('xml')
434
435     def has_description(self):
436         return len(self.description) > 0
437     has_description.short_description = _('description')
438     has_description.boolean = True
439
440     def has_mp3_file(self):
441         return self.has_media("mp3")
442     has_mp3_file.short_description = 'MP3'
443     has_mp3_file.boolean = True
444
445     def has_ogg_file(self):
446         return self.has_media("ogg")
447     has_ogg_file.short_description = 'OGG'
448     has_ogg_file.boolean = True
449
450     def has_daisy_file(self):
451         return self.has_media("daisy")
452     has_daisy_file.short_description = 'DAISY'
453     has_daisy_file.boolean = True
454
455     @property
456     def media_daisy(self):
457         return self.get_media('daisy')
458     
459     def get_audiobooks(self):
460         ogg_files = {}
461         for m in self.media.filter(type='ogg').order_by().iterator():
462             ogg_files[m.name] = m
463
464         audiobooks = []
465         projects = set()
466         total_duration = 0
467         for mp3 in self.media.filter(type='mp3').iterator():
468             # ogg files are always from the same project
469             meta = mp3.get_extra_info_json()
470             project = meta.get('project')
471             if not project:
472                 # temporary fallback
473                 project = 'CzytamySłuchając'
474
475             projects.add((project, meta.get('funded_by', '')))
476             total_duration += mp3.duration or 0
477
478             media = {'mp3': mp3}
479
480             ogg = ogg_files.get(mp3.name)
481             if ogg:
482                 media['ogg'] = ogg
483             audiobooks.append(media)
484
485         projects = sorted(projects)
486         total_duration = '%d:%02d' % (
487             total_duration // 60,
488             total_duration % 60
489         )
490         return audiobooks, projects, total_duration
491
492     def wldocument(self, parse_dublincore=True, inherit=True):
493         from catalogue.import_utils import ORMDocProvider
494         from librarian.parser import WLDocument
495
496         if inherit and self.parent:
497             meta_fallbacks = self.parent.cover_info()
498         else:
499             meta_fallbacks = None
500
501         return WLDocument.from_file(
502             self.xml_file.path,
503             provider=ORMDocProvider(self),
504             parse_dublincore=parse_dublincore,
505             meta_fallbacks=meta_fallbacks)
506
507     def wldocument2(self):
508         from catalogue.import_utils import ORMDocProvider
509         from librarian.document import WLDocument
510         doc = WLDocument(
511             self.xml_file.path,
512             provider=ORMDocProvider(self)
513         )
514         doc.meta.update(self.cover_info())
515         return doc
516
517
518     @staticmethod
519     def zip_format(format_):
520         def pretty_file_name(book):
521             return "%s/%s.%s" % (
522                 book.get_extra_info_json()['author'],
523                 book.slug,
524                 format_)
525
526         field_name = "%s_file" % format_
527         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
528         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
529         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
530
531     def zip_audiobooks(self, format_):
532         bm = BookMedia.objects.filter(book=self, type=format_)
533         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
534         licenses = set()
535         for m in bm:
536             license = constants.LICENSES.get(
537                 m.get_extra_info_json().get('license'), {}
538             ).get('locative')
539             if license:
540                 licenses.add(license)
541         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
542             'licenses': licenses,
543         })
544         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
545
546     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
547         if not self.findable:
548             return
549         if index is None:
550             from search.index import Index
551             index = Index()
552         try:
553             index.index_book(self, book_info)
554             if index_tags:
555                 index.index_tags()
556             if commit:
557                 index.index.commit()
558         except Exception as e:
559             index.index.rollback()
560             raise e
561
562     # will make problems in conjunction with paid previews
563     def download_pictures(self, remote_gallery_url):
564         gallery_path = self.gallery_path()
565         # delete previous files, so we don't include old files in ebooks
566         if os.path.isdir(gallery_path):
567             for filename in os.listdir(gallery_path):
568                 file_path = os.path.join(gallery_path, filename)
569                 os.unlink(file_path)
570         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
571         if ilustr_elements:
572             makedirs(gallery_path)
573             for ilustr in ilustr_elements:
574                 ilustr_src = ilustr.get('src')
575                 ilustr_path = os.path.join(gallery_path, ilustr_src)
576                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
577
578     def load_abstract(self):
579         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
580         if abstract is not None:
581             self.abstract = transform_abstrakt(abstract)
582         else:
583             self.abstract = ''
584
585     def load_toc(self):
586         self.toc = ''
587         if self.html_file:
588             parser = html.HTMLParser(encoding='utf-8')
589             tree = html.parse(self.html_file.path, parser=parser)
590             toc = tree.find('//div[@id="toc"]/ol')
591             if toc is None or not len(toc):
592                 return
593             html_link = reverse('book_text', args=[self.slug])
594             for a in toc.findall('.//a'):
595                 a.attrib['href'] = html_link + a.attrib['href']
596             self.toc = html.tostring(toc, encoding='unicode')
597             # div#toc
598             
599     @classmethod
600     def from_xml_file(cls, xml_file, **kwargs):
601         from django.core.files import File
602         from librarian import dcparser
603
604         # use librarian to parse meta-data
605         book_info = dcparser.parse(xml_file)
606
607         if not isinstance(xml_file, File):
608             xml_file = File(open(xml_file))
609
610         try:
611             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
612         finally:
613             xml_file.close()
614
615     @classmethod
616     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
617                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
618         if dont_build is None:
619             dont_build = set()
620         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
621
622         # check for parts before we do anything
623         children = []
624         if hasattr(book_info, 'parts'):
625             for part_url in book_info.parts:
626                 try:
627                     children.append(Book.objects.get(slug=part_url.slug))
628                 except Book.DoesNotExist:
629                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
630
631         # Read book metadata
632         book_slug = book_info.url.slug
633         if re.search(r'[^a-z0-9-]', book_slug):
634             raise ValueError('Invalid characters in slug')
635         book, created = Book.objects.get_or_create(slug=book_slug)
636
637         if created:
638             book_shelves = []
639             old_cover = None
640             book.preview = bool(days)
641             if book.preview:
642                 book.preview_until = date.today() + timedelta(days)
643         else:
644             if not overwrite:
645                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
646             # Save shelves for this book
647             book_shelves = list(book.tags.filter(category='set'))
648             old_cover = book.cover_info()
649
650         # Save XML file
651         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
652         if book.preview:
653             book.xml_file.set_readable(False)
654
655         book.findable = findable
656         book.language = book_info.language
657         book.title = book_info.title
658         if book_info.variant_of:
659             book.common_slug = book_info.variant_of.slug
660         else:
661             book.common_slug = book.slug
662         book.extra_info = json.dumps(book_info.to_dict())
663         book.load_abstract()
664         book.load_toc()
665         book.save()
666
667         meta_tags = Tag.tags_from_info(book_info)
668
669         for tag in meta_tags:
670             if not tag.for_books:
671                 tag.for_books = True
672                 tag.save()
673
674         book.tags = set(meta_tags + book_shelves)
675         book.save()  # update sort_key_author
676
677         cover_changed = old_cover != book.cover_info()
678         obsolete_children = set(b for b in book.children.all()
679                                 if b not in children)
680         notify_cover_changed = []
681         for n, child_book in enumerate(children):
682             new_child = child_book.parent != book
683             child_book.parent = book
684             child_book.parent_number = n
685             child_book.save()
686             if new_child or cover_changed:
687                 notify_cover_changed.append(child_book)
688         # Disown unfaithful children and let them cope on their own.
689         for child in obsolete_children:
690             child.parent = None
691             child.parent_number = 0
692             child.save()
693             if old_cover:
694                 notify_cover_changed.append(child)
695
696         cls.repopulate_ancestors()
697         tasks.update_counters.delay()
698
699         if remote_gallery_url:
700             book.download_pictures(remote_gallery_url)
701
702         # No saves beyond this point.
703
704         # Build cover.
705         if 'cover' not in dont_build:
706             book.cover.build_delay()
707             book.cover_clean.build_delay()
708             book.cover_thumb.build_delay()
709             book.cover_api_thumb.build_delay()
710             book.simple_cover.build_delay()
711             book.cover_ebookpoint.build_delay()
712
713         # Build HTML and ebooks.
714         book.html_file.build_delay()
715         if not children:
716             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
717                 if format_ not in dont_build:
718                     getattr(book, '%s_file' % format_).build_delay()
719         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
720             if format_ not in dont_build:
721                 getattr(book, '%s_file' % format_).build_delay()
722
723         if not settings.NO_SEARCH_INDEX and search_index and findable:
724             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
725
726         for child in notify_cover_changed:
727             child.parent_cover_changed()
728
729         book.update_popularity()
730         tasks.update_references.delay(book.id)
731
732         cls.published.send(sender=cls, instance=book)
733         return book
734
735     def get_master(self):
736         master_tags = [
737             'opowiadanie',
738             'powiesc',
739             'dramat_wierszowany_l',
740             'dramat_wierszowany_lp',
741             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
742             'wywiad',
743         ]
744         from librarian.parser import WLDocument
745         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
746         root = wld.edoc.getroot()
747         for master in root.iter():
748             if master.tag in master_tags:
749                 return master
750     
751     def update_references(self):
752         from references.models import Entity, Reference
753         master = self.get_master()
754         if master is None:
755             master = []
756         found = set()
757         for i, sec in enumerate(master):
758             for ref in sec.findall('.//ref'):
759                 href = ref.attrib.get('href', '')
760                 if not href or href in found:
761                     continue
762                 found.add(href)
763                 entity, created = Entity.objects.get_or_create(
764                     uri=href
765                 )
766                 ref, created = Reference.objects.get_or_create(
767                     book=self,
768                     entity=entity
769                 )
770                 ref.first_section = 'sec%d' % (i + 1)
771                 entity.populate()
772                 entity.save()
773         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
774     
775     @property
776     def references(self):
777         return self.reference_set.all().select_related('entity')
778
779     @classmethod
780     @transaction.atomic
781     def repopulate_ancestors(cls):
782         """Fixes the ancestry cache."""
783         # TODO: table names
784         cursor = connection.cursor()
785         if connection.vendor == 'postgres':
786             cursor.execute("TRUNCATE catalogue_book_ancestor")
787             cursor.execute("""
788                 WITH RECURSIVE ancestry AS (
789                     SELECT book.id, book.parent_id
790                     FROM catalogue_book AS book
791                     WHERE book.parent_id IS NOT NULL
792                     UNION
793                     SELECT ancestor.id, book.parent_id
794                     FROM ancestry AS ancestor, catalogue_book AS book
795                     WHERE ancestor.parent_id = book.id
796                         AND book.parent_id IS NOT NULL
797                     )
798                 INSERT INTO catalogue_book_ancestor
799                     (from_book_id, to_book_id)
800                     SELECT id, parent_id
801                     FROM ancestry
802                     ORDER BY id;
803                 """)
804         else:
805             cursor.execute("DELETE FROM catalogue_book_ancestor")
806             for b in cls.objects.exclude(parent=None):
807                 parent = b.parent
808                 while parent is not None:
809                     b.ancestor.add(parent)
810                     parent = parent.parent
811
812     @property
813     def ancestors(self):
814         if self.parent:
815             for anc in self.parent.ancestors:
816                 yield anc
817             yield self.parent
818         else:
819             return []
820                     
821     def clear_cache(self):
822         clear_cached_renders(self.mini_box)
823         clear_cached_renders(self.mini_box_nolink)
824
825     def cover_info(self, inherit=True):
826         """Returns a dictionary to serve as fallback for BookInfo.
827
828         For now, the only thing inherited is the cover image.
829         """
830         need = False
831         info = {}
832         for field in ('cover_url', 'cover_by', 'cover_source'):
833             val = self.get_extra_info_json().get(field)
834             if val:
835                 info[field] = val
836             else:
837                 need = True
838         if inherit and need and self.parent is not None:
839             parent_info = self.parent.cover_info()
840             parent_info.update(info)
841             info = parent_info
842         return info
843
844     def related_themes(self):
845         return Tag.objects.usage_for_queryset(
846             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
847             counts=True).filter(category='theme').order_by('-count')
848
849     def parent_cover_changed(self):
850         """Called when parent book's cover image is changed."""
851         if not self.cover_info(inherit=False):
852             if 'cover' not in app_settings.DONT_BUILD:
853                 self.cover.build_delay()
854                 self.cover_clean.build_delay()
855                 self.cover_thumb.build_delay()
856                 self.cover_api_thumb.build_delay()
857                 self.simple_cover.build_delay()
858                 self.cover_ebookpoint.build_delay()
859             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
860                 if format_ not in app_settings.DONT_BUILD:
861                     getattr(self, '%s_file' % format_).build_delay()
862             for child in self.children.all():
863                 child.parent_cover_changed()
864
865     def other_versions(self):
866         """Find other versions (i.e. in other languages) of the book."""
867         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
868
869     def parents(self):
870         books = []
871         parent = self.parent
872         while parent is not None:
873             books.insert(0, parent)
874             parent = parent.parent
875         return books
876
877     def pretty_title(self, html_links=False):
878         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
879         books = self.parents() + [self]
880         names.extend([(b.title, b.get_absolute_url()) for b in books])
881
882         if html_links:
883             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
884         else:
885             names = [tag[0] for tag in names]
886         return ', '.join(names)
887
888     def publisher(self):
889         publisher = self.get_extra_info_json()['publisher']
890         if isinstance(publisher, str):
891             return publisher
892         elif isinstance(publisher, list):
893             return ', '.join(publisher)
894
895     @classmethod
896     def tagged_top_level(cls, tags):
897         """ Returns top-level books tagged with `tags`.
898
899         It only returns those books which don't have ancestors which are
900         also tagged with those tags.
901
902         """
903         objects = cls.tagged.with_all(tags)
904         return objects.filter(findable=True).exclude(ancestor__in=objects)
905
906     @classmethod
907     def book_list(cls, book_filter=None):
908         """Generates a hierarchical listing of all books.
909
910         Books are optionally filtered with a test function.
911
912         """
913
914         books_by_parent = {}
915         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
916         if book_filter:
917             books = books.filter(book_filter).distinct()
918
919             book_ids = set(b['pk'] for b in books.values("pk").iterator())
920             for book in books.iterator():
921                 parent = book.parent_id
922                 if parent not in book_ids:
923                     parent = None
924                 books_by_parent.setdefault(parent, []).append(book)
925         else:
926             for book in books.iterator():
927                 books_by_parent.setdefault(book.parent_id, []).append(book)
928
929         orphans = []
930         books_by_author = OrderedDict()
931         for tag in Tag.objects.filter(category='author').iterator():
932             books_by_author[tag] = []
933
934         for book in books_by_parent.get(None, ()):
935             authors = list(book.authors().only('pk'))
936             if authors:
937                 for author in authors:
938                     books_by_author[author].append(book)
939             else:
940                 orphans.append(book)
941
942         return books_by_author, orphans, books_by_parent
943
944     _audiences_pl = {
945         "SP": (1, "szkoła podstawowa"),
946         "SP1": (1, "szkoła podstawowa"),
947         "SP2": (1, "szkoła podstawowa"),
948         "SP3": (1, "szkoła podstawowa"),
949         "P": (1, "szkoła podstawowa"),
950         "G": (2, "gimnazjum"),
951         "L": (3, "liceum"),
952         "LP": (3, "liceum"),
953     }
954
955     def audiences_pl(self):
956         audiences = self.get_extra_info_json().get('audiences', [])
957         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
958         return [a[1] for a in audiences]
959
960     def stage_note(self):
961         stage = self.get_extra_info_json().get('stage')
962         if stage and stage < '0.4':
963             return (_('This work needs modernisation'),
964                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
965         else:
966             return None, None
967
968     def choose_fragments(self, number):
969         fragments = self.fragments.order_by()
970         fragments_count = fragments.count()
971         if not fragments_count and self.children.exists():
972             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
973             fragments_count = fragments.count()
974         if fragments_count:
975             if fragments_count > number:
976                 offset = randint(0, fragments_count - number)
977             else:
978                 offset = 0
979             return fragments[offset : offset + number]
980         elif self.parent:
981             return self.parent.choose_fragments(number)
982         else:
983             return []
984
985     def choose_fragment(self):
986         fragments = self.choose_fragments(1)
987         if fragments:
988             return fragments[0]
989         else:
990             return None
991         
992     def fragment_data(self):
993         fragment = self.choose_fragment()
994         if fragment:
995             return {
996                 'title': fragment.book.pretty_title(),
997                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
998             }
999         else:
1000             return None
1001
1002     def update_popularity(self):
1003         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
1004         try:
1005             pop = self.popularity
1006             pop.count = count
1007             pop.save()
1008         except BookPopularity.DoesNotExist:
1009             BookPopularity.objects.create(book=self, count=count)
1010
1011     def ridero_link(self):
1012         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1013
1014     def like(self, user):
1015         from social.utils import likes, get_set, set_sets
1016         if not likes(user, self):
1017             tag = get_set(user, '')
1018             set_sets(user, self, [tag])
1019
1020     def unlike(self, user):
1021         from social.utils import likes, set_sets
1022         if likes(user, self):
1023             set_sets(user, self, [])
1024
1025     def full_sort_key(self):
1026         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1027
1028     def cover_color(self):
1029         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1030
1031     @cached_render('catalogue/book_mini_box.html')
1032     def mini_box(self):
1033         return {
1034             'book': self
1035         }
1036
1037     @cached_render('catalogue/book_mini_box.html')
1038     def mini_box_nolink(self):
1039         return {
1040             'book': self,
1041             'no_link': True,
1042         }
1043
1044 def add_file_fields():
1045     for format_ in Book.formats:
1046         field_name = "%s_file" % format_
1047         # This weird globals() assignment makes Django migrations comfortable.
1048         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
1049         _upload_to.__name__ = '_%s_upload_to' % format_
1050         globals()[_upload_to.__name__] = _upload_to
1051
1052         EbookField(
1053             format_, _("%s file" % format_.upper()),
1054             upload_to=_upload_to,
1055             storage=bofh_storage,
1056             max_length=255,
1057             blank=True,
1058             default=''
1059         ).contribute_to_class(Book, field_name)
1060         if format_ != 'xml':
1061             models.CharField(max_length=255, editable=False, default='', db_index=True).contribute_to_class(Book, f'{field_name}_etag')
1062
1063
1064 add_file_fields()
1065
1066
1067 class BookPopularity(models.Model):
1068     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1069     count = models.IntegerField(default=0, db_index=True)