10e9d22e33414c1e08eb485a1b27c557111aeee2
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import gettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from librarian.builders import builders
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue import fields
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32
33 bofh_storage = BofhFileSystemStorage()
34
35
36 class Book(models.Model):
37     """Represents a book imported from WL-XML."""
38     title = models.CharField('tytuł', max_length=32767)
39     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
40     sort_key_author = models.CharField(
41         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
42     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
43     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
44     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
45     description = models.TextField('opis', blank=True)
46     abstract = models.TextField('abstrakt', blank=True)
47     toc = models.TextField('spis treści', blank=True)
48     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
49     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
50     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
51     extra_info = models.TextField('dodatkowe informacje', default='{}')
52     gazeta_link = models.CharField(blank=True, max_length=240)
53     wiki_link = models.CharField(blank=True, max_length=240)
54     print_on_demand = models.BooleanField('druk na żądanie', default=False)
55     recommended = models.BooleanField('polecane', default=False)
56     audio_length = models.CharField('długość audio', blank=True, max_length=8)
57     preview = models.BooleanField('prapremiera', default=False)
58     preview_until = models.DateField('prapremiera do', blank=True, null=True)
59     preview_key = models.CharField(max_length=32, blank=True, null=True)
60     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
61
62     # files generated during publication
63     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
64     html_file = fields.HtmlField(storage=bofh_storage)
65     fb2_file = fields.Fb2Field(storage=bofh_storage)
66     txt_file = fields.TxtField(storage=bofh_storage)
67     epub_file = fields.EpubField(storage=bofh_storage)
68     mobi_file = fields.MobiField(storage=bofh_storage)
69     pdf_file = fields.PdfField(storage=bofh_storage)
70
71     cover = fields.CoverField('okładka', storage=bofh_storage)
72     # Cleaner version of cover for thumbs
73     cover_clean = fields.CoverCleanField('czysta okładka')
74     cover_thumb = fields.CoverThumbField('miniatura okładki')
75     cover_api_thumb = fields.CoverApiThumbField(
76         'mniaturka okładki dla aplikacji')
77     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
78     cover_ebookpoint = fields.CoverEbookpointField(
79         'okładka dla Ebookpoint')
80
81     ebook_formats = constants.EBOOK_FORMATS
82     formats = ebook_formats + ['html', 'xml']
83
84     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
85     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
86
87     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
88     has_audience = models.BooleanField(default=False)
89
90     objects = models.Manager()
91     tagged = managers.ModelTaggedItemManager(Tag)
92     tags = managers.TagDescriptor(Tag)
93     tag_relations = GenericRelation(Tag.intermediary_table_model)
94
95     html_built = django.dispatch.Signal()
96     published = django.dispatch.Signal()
97
98     SORT_KEY_SEP = '$'
99
100     is_book = True
101
102     class AlreadyExists(Exception):
103         pass
104
105     class Meta:
106         ordering = ('sort_key_author', 'sort_key')
107         verbose_name = 'książka'
108         verbose_name_plural = 'książki'
109         app_label = 'catalogue'
110
111     def __str__(self):
112         return self.title
113
114     def get_extra_info_json(self):
115         return json.loads(self.extra_info or '{}')
116
117     def get_initial(self):
118         try:
119             return re.search(r'\w', self.title, re.U).group(0)
120         except AttributeError:
121             return ''
122
123     def authors(self):
124         return self.tags.filter(category='author')
125
126     def epochs(self):
127         return self.tags.filter(category='epoch')
128
129     def genres(self):
130         return self.tags.filter(category='genre')
131
132     def kinds(self):
133         return self.tags.filter(category='kind')
134
135     def tag_unicode(self, category):
136         relations = prefetched_relations(self, category)
137         if relations:
138             return ', '.join(rel.tag.name for rel in relations)
139         else:
140             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
141
142     def tags_by_category(self):
143         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
144
145     def author_unicode(self):
146         return self.cached_author
147
148     def kind_unicode(self):
149         return self.tag_unicode('kind')
150
151     def epoch_unicode(self):
152         return self.tag_unicode('epoch')
153
154     def genre_unicode(self):
155         return self.tag_unicode('genre')
156
157     def translators(self):
158         translators = self.get_extra_info_json().get('translators') or []
159         return [
160             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
161         ]
162
163     def translator(self):
164         translators = self.get_extra_info_json().get('translators')
165         if not translators:
166             return None
167         if len(translators) > 3:
168             translators = translators[:2]
169             others = ' i inni'
170         else:
171             others = ''
172         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
173
174     def cover_source(self):
175         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
176
177     @property
178     def isbn_pdf(self):
179         return self.get_extra_info_json().get('isbn_pdf')
180
181     @property
182     def isbn_epub(self):
183         return self.get_extra_info_json().get('isbn_epub')
184
185     @property
186     def isbn_mobi(self):
187         return self.get_extra_info_json().get('isbn_mobi')
188
189     def is_accessible_to(self, user):
190         if not self.preview:
191             return True
192         if not user.is_authenticated:
193             return False
194         Membership = apps.get_model('club', 'Membership')
195         if Membership.is_active_for(user):
196             return True
197         Funding = apps.get_model('funding', 'Funding')
198         if Funding.objects.filter(user=user, offer__book=self):
199             return True
200         return False
201
202     def save(self, force_insert=False, force_update=False, **kwargs):
203         from sortify import sortify
204
205         self.sort_key = sortify(self.title)[:120]
206         self.title = str(self.title)  # ???
207
208         try:
209             author = self.authors().first().sort_key
210         except AttributeError:
211             author = ''
212         self.sort_key_author = author
213
214         self.cached_author = self.tag_unicode('author')
215         self.has_audience = 'audience' in self.get_extra_info_json()
216
217         if self.preview and not self.preview_key:
218             self.preview_key = get_random_hash(self.slug)[:32]
219
220         ret = super(Book, self).save(force_insert, force_update, **kwargs)
221
222         return ret
223
224     def get_absolute_url(self):
225         return reverse('book_detail', args=[self.slug])
226
227     def gallery_path(self):
228         return gallery_path(self.slug)
229
230     def gallery_url(self):
231         return gallery_url(self.slug)
232
233     def get_first_text(self):
234         if self.html_file:
235             return self
236         child = self.children.all().order_by('parent_number').first()
237         if child is not None:
238             return child.get_first_text()
239
240     def get_last_text(self):
241         if self.html_file:
242             return self
243         child = self.children.all().order_by('parent_number').last()
244         if child is not None:
245             return child.get_last_text()
246
247     def get_prev_text(self):
248         if not self.parent:
249             return None
250         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
251         if sibling is not None:
252             return sibling.get_last_text()
253
254         if self.parent.html_file:
255             return self.parent
256
257         return self.parent.get_prev_text()
258
259     def get_next_text(self, inside=True):
260         if inside:
261             child = self.children.order_by('parent_number').first()
262             if child is not None:
263                 return child.get_first_text()
264
265         if not self.parent:
266             return None
267         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
268         if sibling is not None:
269             return sibling.get_first_text()
270         return self.parent.get_next_text(inside=False)
271
272     def get_child_audiobook(self):
273         BookMedia = apps.get_model('catalogue', 'BookMedia')
274         if not BookMedia.objects.filter(book__ancestor=self).exists():
275             return None
276         for child in self.children.order_by('parent_number').all():
277             if child.has_mp3_file():
278                 return child
279             child_sub = child.get_child_audiobook()
280             if child_sub is not None:
281                 return child_sub
282
283     def get_siblings(self):
284         if not self.parent:
285             return []
286         return self.parent.children.all().order_by('parent_number')
287
288     def get_children(self):
289         return self.children.all().order_by('parent_number')
290
291     @property
292     def name(self):
293         return self.title
294
295     def language_code(self):
296         return constants.LANGUAGES_3TO2.get(self.language, self.language)
297
298     def language_name(self):
299         return dict(settings.LANGUAGES).get(self.language_code(), "")
300
301     def is_foreign(self):
302         return self.language_code() != settings.LANGUAGE_CODE
303
304     def set_audio_length(self):
305         length = self.get_audio_length()
306         if length > 0:
307             self.audio_length = self.format_audio_length(length)
308             self.save()
309
310     @staticmethod
311     def format_audio_length(seconds):
312         """
313         >>> Book.format_audio_length(1)
314         '0:01'
315         >>> Book.format_audio_length(3661)
316         '1:01:01'
317         """
318         if seconds < 60*60:
319             minutes = seconds // 60
320             seconds = seconds % 60
321             return '%d:%02d' % (minutes, seconds)
322         else:
323             hours = seconds // 3600
324             minutes = seconds % 3600 // 60
325             seconds = seconds % 60
326             return '%d:%02d:%02d' % (hours, minutes, seconds)
327
328     def get_audio_length(self):
329         total = 0
330         for media in self.get_mp3() or ():
331             total += app_settings.GET_MP3_LENGTH(media.file.path)
332         return int(total)
333
334     def get_time(self):
335         return round(self.xml_file.size / 1000 * 40)
336     
337     def has_media(self, type_):
338         if type_ in Book.formats:
339             return bool(getattr(self, "%s_file" % type_))
340         else:
341             return self.media.filter(type=type_).exists()
342
343     def has_audio(self):
344         return self.has_media('mp3')
345
346     def get_media(self, type_):
347         if self.has_media(type_):
348             if type_ in Book.formats:
349                 return getattr(self, "%s_file" % type_)
350             else:
351                 return self.media.filter(type=type_)
352         else:
353             return None
354
355     def get_mp3(self):
356         return self.get_media("mp3")
357
358     def get_odt(self):
359         return self.get_media("odt")
360
361     def get_ogg(self):
362         return self.get_media("ogg")
363
364     def get_daisy(self):
365         return self.get_media("daisy")
366
367     def get_audio_epub(self):
368         return self.get_media("audio.epub")
369
370     def media_url(self, format_):
371         media = self.get_media(format_)
372         if media:
373             if self.preview:
374                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
375             else:
376                 return media.url
377         else:
378             return None
379
380     def html_url(self):
381         return self.media_url('html')
382
383     def pdf_url(self):
384         return self.media_url('pdf')
385
386     def epub_url(self):
387         return self.media_url('epub')
388
389     def mobi_url(self):
390         return self.media_url('mobi')
391
392     def txt_url(self):
393         return self.media_url('txt')
394
395     def fb2_url(self):
396         return self.media_url('fb2')
397
398     def xml_url(self):
399         return self.media_url('xml')
400
401     def has_description(self):
402         return len(self.description) > 0
403     has_description.short_description = 'opis'
404     has_description.boolean = True
405
406     def has_mp3_file(self):
407         return self.has_media("mp3")
408     has_mp3_file.short_description = 'MP3'
409     has_mp3_file.boolean = True
410
411     def has_ogg_file(self):
412         return self.has_media("ogg")
413     has_ogg_file.short_description = 'OGG'
414     has_ogg_file.boolean = True
415
416     def has_daisy_file(self):
417         return self.has_media("daisy")
418     has_daisy_file.short_description = 'DAISY'
419     has_daisy_file.boolean = True
420
421     def has_sync_file(self):
422         return self.has_media("sync")
423
424     def get_sync(self):
425         with self.get_media('sync').first().file.open('r') as f:
426             sync = f.read().split('\n')
427         offset = float(sync[0])
428         items = []
429         for line in sync[1:]:
430             if not line:
431                 continue
432             start, end, elid = line.split()
433             items.append([elid, float(start) + offset])
434         return json.dumps(items)
435     
436     def has_audio_epub_file(self):
437         return self.has_media("audio.epub")
438
439     @property
440     def media_daisy(self):
441         return self.get_media('daisy')
442
443     @property
444     def media_audio_epub(self):
445         return self.get_media('audio.epub')
446
447     def get_audiobooks(self):
448         ogg_files = {}
449         for m in self.media.filter(type='ogg').order_by().iterator():
450             ogg_files[m.name] = m
451
452         audiobooks = []
453         projects = set()
454         total_duration = 0
455         for mp3 in self.media.filter(type='mp3').iterator():
456             # ogg files are always from the same project
457             meta = mp3.get_extra_info_json()
458             project = meta.get('project')
459             if not project:
460                 # temporary fallback
461                 project = 'CzytamySłuchając'
462
463             projects.add((project, meta.get('funded_by', '')))
464             total_duration += mp3.duration or 0
465
466             media = {'mp3': mp3}
467
468             ogg = ogg_files.get(mp3.name)
469             if ogg:
470                 media['ogg'] = ogg
471             audiobooks.append(media)
472
473         projects = sorted(projects)
474         total_duration = '%d:%02d' % (
475             total_duration // 60,
476             total_duration % 60
477         )
478         return audiobooks, projects, total_duration
479
480     def wldocument(self, parse_dublincore=True, inherit=True):
481         from catalogue.import_utils import ORMDocProvider
482         from librarian.parser import WLDocument
483
484         if inherit and self.parent:
485             meta_fallbacks = self.parent.cover_info()
486         else:
487             meta_fallbacks = None
488
489         return WLDocument.from_file(
490             self.xml_file.path,
491             provider=ORMDocProvider(self),
492             parse_dublincore=parse_dublincore,
493             meta_fallbacks=meta_fallbacks)
494
495     def wldocument2(self):
496         from catalogue.import_utils import ORMDocProvider
497         from librarian.document import WLDocument
498         doc = WLDocument(
499             self.xml_file.path,
500             provider=ORMDocProvider(self)
501         )
502         doc.meta.update(self.cover_info())
503         return doc
504
505
506     @staticmethod
507     def zip_format(format_):
508         def pretty_file_name(book):
509             return "%s/%s.%s" % (
510                 book.get_extra_info_json()['author'],
511                 book.slug,
512                 format_)
513
514         field_name = "%s_file" % format_
515         field = getattr(Book, field_name)
516         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
517         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
518         return create_zip(paths, field.ZIP)
519
520     def zip_audiobooks(self, format_):
521         bm = BookMedia.objects.filter(book=self, type=format_)
522         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
523         licenses = set()
524         for m in bm:
525             license = constants.LICENSES.get(
526                 m.get_extra_info_json().get('license'), {}
527             ).get('locative')
528             if license:
529                 licenses.add(license)
530         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
531             'licenses': licenses,
532             'meta': self.wldocument2().meta,
533         })
534         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
535
536     def search_index(self, index=None):
537         if not self.findable:
538             return
539         from search.index import Index
540         Index.index_book(self)
541
542     # will make problems in conjunction with paid previews
543     def download_pictures(self, remote_gallery_url):
544         # This is only needed for legacy relative image paths.
545         gallery_path = self.gallery_path()
546         # delete previous files, so we don't include old files in ebooks
547         if os.path.isdir(gallery_path):
548             for filename in os.listdir(gallery_path):
549                 file_path = os.path.join(gallery_path, filename)
550                 os.unlink(file_path)
551         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
552         if ilustr_elements:
553             makedirs(gallery_path)
554             for ilustr in ilustr_elements:
555                 ilustr_src = ilustr.get('src')
556                 if '/' in ilustr_src:
557                     continue
558                 ilustr_path = os.path.join(gallery_path, ilustr_src)
559                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
560
561     def load_abstract(self):
562         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
563         if abstract is not None:
564             self.abstract = transform_abstrakt(abstract)
565         else:
566             self.abstract = ''
567
568     def load_toc(self):
569         self.toc = ''
570         if self.html_file:
571             parser = html.HTMLParser(encoding='utf-8')
572             tree = html.parse(self.html_file.path, parser=parser)
573             toc = tree.find('//div[@id="toc"]/ol')
574             if toc is None or not len(toc):
575                 return
576             html_link = reverse('book_text', args=[self.slug])
577             for a in toc.findall('.//a'):
578                 a.attrib['href'] = html_link + a.attrib['href']
579             self.toc = html.tostring(toc, encoding='unicode')
580             # div#toc
581
582     @classmethod
583     def from_xml_file(cls, xml_file, **kwargs):
584         from django.core.files import File
585         from librarian import dcparser
586
587         # use librarian to parse meta-data
588         book_info = dcparser.parse(xml_file)
589
590         if not isinstance(xml_file, File):
591             xml_file = File(open(xml_file))
592
593         try:
594             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
595         finally:
596             xml_file.close()
597
598     @classmethod
599     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
600                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None, logo_alt=None):
601         from catalogue import tasks
602
603         if dont_build is None:
604             dont_build = set()
605         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
606
607         # check for parts before we do anything
608         children = []
609         if hasattr(book_info, 'parts'):
610             for part_url in book_info.parts:
611                 try:
612                     children.append(Book.objects.get(slug=part_url.slug))
613                 except Book.DoesNotExist:
614                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
615
616         # Read book metadata
617         book_slug = book_info.url.slug
618         if re.search(r'[^a-z0-9-]', book_slug):
619             raise ValueError('Invalid characters in slug')
620         book, created = Book.objects.get_or_create(slug=book_slug)
621
622         if created:
623             book_shelves = []
624             old_cover = None
625             book.preview = bool(days)
626             if book.preview:
627                 book.preview_until = date.today() + timedelta(days)
628         else:
629             if not overwrite:
630                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
631             # Save shelves for this book
632             book_shelves = list(book.tags.filter(category='set'))
633             old_cover = book.cover_info()
634
635         # Save XML file
636         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
637         if book.preview:
638             book.xml_file.set_readable(False)
639
640         book.findable = findable
641         book.language = book_info.language
642         book.title = book_info.title
643         if book_info.variant_of:
644             book.common_slug = book_info.variant_of.slug
645         else:
646             book.common_slug = book.slug
647         extra = book_info.to_dict()
648         if logo:
649             extra['logo'] = logo
650         if logo_mono:
651             extra['logo_mono'] = logo_mono
652         if logo_alt:
653             extra['logo_alt'] = logo_alt
654         book.extra_info = json.dumps(extra)
655         book.load_abstract()
656         book.load_toc()
657         book.save()
658
659         meta_tags = Tag.tags_from_info(book_info)
660
661         for tag in meta_tags:
662             if not tag.for_books:
663                 tag.for_books = True
664                 tag.save()
665
666         book.tags = set(meta_tags + book_shelves)
667         book.save()  # update sort_key_author
668
669         cover_changed = old_cover != book.cover_info()
670         obsolete_children = set(b for b in book.children.all()
671                                 if b not in children)
672         notify_cover_changed = []
673         for n, child_book in enumerate(children):
674             new_child = child_book.parent != book
675             child_book.parent = book
676             child_book.parent_number = n
677             child_book.save()
678             if new_child or cover_changed:
679                 notify_cover_changed.append(child_book)
680         # Disown unfaithful children and let them cope on their own.
681         for child in obsolete_children:
682             child.parent = None
683             child.parent_number = 0
684             child.save()
685             if old_cover:
686                 notify_cover_changed.append(child)
687
688         cls.repopulate_ancestors()
689         tasks.update_counters.delay()
690
691         if remote_gallery_url:
692             book.download_pictures(remote_gallery_url)
693
694         # No saves beyond this point.
695
696         # Build cover.
697         if 'cover' not in dont_build:
698             book.cover.build_delay()
699             book.cover_clean.build_delay()
700             book.cover_thumb.build_delay()
701             book.cover_api_thumb.build_delay()
702             book.simple_cover.build_delay()
703             book.cover_ebookpoint.build_delay()
704
705         # Build HTML and ebooks.
706         book.html_file.build_delay()
707         if not children:
708             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
709                 if format_ not in dont_build:
710                     getattr(book, '%s_file' % format_).build_delay()
711         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
712             if format_ not in dont_build:
713                 getattr(book, '%s_file' % format_).build_delay()
714
715         if not settings.NO_SEARCH_INDEX and search_index and findable:
716             tasks.index_book.delay(book.id)
717
718         for child in notify_cover_changed:
719             child.parent_cover_changed()
720
721         book.update_popularity()
722         tasks.update_references.delay(book.id)
723
724         cls.published.send(sender=cls, instance=book)
725         return book
726
727     def update_references(self):
728         Entity = apps.get_model('references', 'Entity')
729         doc = self.wldocument2()
730         doc._compat_assign_section_ids()
731         doc._compat_assign_ordered_ids()
732         refs = {}
733         for ref_elem in doc.references():
734             uri = ref_elem.attrib.get('href', '')
735             if not uri:
736                 continue
737             if uri in refs:
738                 ref = refs[uri]
739             else:
740                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
741                 if entity_created:
742                     try:
743                         entity.populate()
744                     except:
745                         pass
746                     else:
747                         entity.save()
748                 ref, ref_created = entity.reference_set.get_or_create(book=self)
749                 refs[uri] = ref
750                 if not ref_created:
751                     ref.occurence_set.all().delete()
752             sec = ref_elem.get_link()
753             m = re.match(r'sec(\d+)', sec)
754             assert m is not None
755             sec = int(m.group(1))
756             snippet = ref_elem.get_snippet()
757             b = builders['html-snippet']()
758             for s in snippet:
759                 s.html_build(b)
760             html = b.output().get_bytes().decode('utf-8')
761
762             ref.occurence_set.create(
763                 section=sec,
764                 html=html
765             )
766         self.reference_set.exclude(entity__uri__in=refs).delete()
767
768     @property
769     def references(self):
770         return self.reference_set.all().select_related('entity')
771
772     @classmethod
773     @transaction.atomic
774     def repopulate_ancestors(cls):
775         """Fixes the ancestry cache."""
776         # TODO: table names
777         cursor = connection.cursor()
778         if connection.vendor == 'postgres':
779             cursor.execute("TRUNCATE catalogue_book_ancestor")
780             cursor.execute("""
781                 WITH RECURSIVE ancestry AS (
782                     SELECT book.id, book.parent_id
783                     FROM catalogue_book AS book
784                     WHERE book.parent_id IS NOT NULL
785                     UNION
786                     SELECT ancestor.id, book.parent_id
787                     FROM ancestry AS ancestor, catalogue_book AS book
788                     WHERE ancestor.parent_id = book.id
789                         AND book.parent_id IS NOT NULL
790                     )
791                 INSERT INTO catalogue_book_ancestor
792                     (from_book_id, to_book_id)
793                     SELECT id, parent_id
794                     FROM ancestry
795                     ORDER BY id;
796                 """)
797         else:
798             cursor.execute("DELETE FROM catalogue_book_ancestor")
799             for b in cls.objects.exclude(parent=None):
800                 parent = b.parent
801                 while parent is not None:
802                     b.ancestor.add(parent)
803                     parent = parent.parent
804
805     @property
806     def ancestors(self):
807         if self.parent:
808             for anc in self.parent.ancestors:
809                 yield anc
810             yield self.parent
811         else:
812             return []
813
814     def clear_cache(self):
815         clear_cached_renders(self.mini_box)
816         clear_cached_renders(self.mini_box_nolink)
817
818     def cover_info(self, inherit=True):
819         """Returns a dictionary to serve as fallback for BookInfo.
820
821         For now, the only thing inherited is the cover image.
822         """
823         need = False
824         info = {}
825         for field in ('cover_url', 'cover_by', 'cover_source'):
826             val = self.get_extra_info_json().get(field)
827             if val:
828                 info[field] = val
829             else:
830                 need = True
831         if inherit and need and self.parent is not None:
832             parent_info = self.parent.cover_info()
833             parent_info.update(info)
834             info = parent_info
835         return info
836
837     def related_themes(self):
838         return Tag.objects.usage_for_queryset(
839             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
840             counts=True).filter(category='theme').order_by('-count')
841
842     def parent_cover_changed(self):
843         """Called when parent book's cover image is changed."""
844         if not self.cover_info(inherit=False):
845             if 'cover' not in app_settings.DONT_BUILD:
846                 self.cover.build_delay()
847                 self.cover_clean.build_delay()
848                 self.cover_thumb.build_delay()
849                 self.cover_api_thumb.build_delay()
850                 self.simple_cover.build_delay()
851                 self.cover_ebookpoint.build_delay()
852             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
853                 if format_ not in app_settings.DONT_BUILD:
854                     getattr(self, '%s_file' % format_).build_delay()
855             for child in self.children.all():
856                 child.parent_cover_changed()
857
858     def other_versions(self):
859         """Find other versions (i.e. in other languages) of the book."""
860         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
861
862     def parents(self):
863         books = []
864         parent = self.parent
865         while parent is not None:
866             books.insert(0, parent)
867             parent = parent.parent
868         return books
869
870     def pretty_title(self, html_links=False):
871         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
872         books = self.parents() + [self]
873         names.extend([(b.title, b.get_absolute_url()) for b in books])
874
875         if html_links:
876             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
877         else:
878             names = [tag[0] for tag in names]
879         return ', '.join(names)
880
881     def publisher(self):
882         publisher = self.get_extra_info_json()['publisher']
883         if isinstance(publisher, str):
884             return publisher
885         elif isinstance(publisher, list):
886             return ', '.join(publisher)
887
888     @classmethod
889     def tagged_top_level(cls, tags):
890         """ Returns top-level books tagged with `tags`.
891
892         It only returns those books which don't have ancestors which are
893         also tagged with those tags.
894
895         """
896         objects = cls.tagged.with_all(tags)
897         return objects.filter(findable=True).exclude(ancestor__in=objects)
898
899     @classmethod
900     def book_list(cls, book_filter=None):
901         """Generates a hierarchical listing of all books.
902
903         Books are optionally filtered with a test function.
904
905         """
906
907         books_by_parent = {}
908         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
909         if book_filter:
910             books = books.filter(book_filter).distinct()
911
912             book_ids = set(b['pk'] for b in books.values("pk").iterator())
913             for book in books.iterator():
914                 parent = book.parent_id
915                 if parent not in book_ids:
916                     parent = None
917                 books_by_parent.setdefault(parent, []).append(book)
918         else:
919             for book in books.iterator():
920                 books_by_parent.setdefault(book.parent_id, []).append(book)
921
922         orphans = []
923         books_by_author = OrderedDict()
924         for tag in Tag.objects.filter(category='author').iterator():
925             books_by_author[tag] = []
926
927         for book in books_by_parent.get(None, ()):
928             authors = list(book.authors().only('pk'))
929             if authors:
930                 for author in authors:
931                     books_by_author[author].append(book)
932             else:
933                 orphans.append(book)
934
935         return books_by_author, orphans, books_by_parent
936
937     _audiences_pl = {
938         "SP": (1, "szkoła podstawowa"),
939         "SP1": (1, "szkoła podstawowa"),
940         "SP2": (1, "szkoła podstawowa"),
941         "SP3": (1, "szkoła podstawowa"),
942         "P": (1, "szkoła podstawowa"),
943         "G": (2, "gimnazjum"),
944         "L": (3, "liceum"),
945         "LP": (3, "liceum"),
946     }
947
948     def audiences_pl(self):
949         audiences = self.get_extra_info_json().get('audiences', [])
950         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
951         return [a[1] for a in audiences]
952
953     def stage_note(self):
954         stage = self.get_extra_info_json().get('stage')
955         if stage and stage < '0.4':
956             return (_('Ten utwór wymaga uwspółcześnienia'),
957                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
958         else:
959             return None, None
960
961     def choose_fragments(self, number):
962         fragments = self.fragments.order_by()
963         fragments_count = fragments.count()
964         if not fragments_count and self.children.exists():
965             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
966             fragments_count = fragments.count()
967         if fragments_count:
968             if fragments_count > number:
969                 offset = randint(0, fragments_count - number)
970             else:
971                 offset = 0
972             return fragments[offset : offset + number]
973         elif self.parent:
974             return self.parent.choose_fragments(number)
975         else:
976             return []
977
978     def choose_fragment(self):
979         fragments = self.choose_fragments(1)
980         if fragments:
981             return fragments[0]
982         else:
983             return None
984
985     def fragment_data(self):
986         fragment = self.choose_fragment()
987         if fragment:
988             return {
989                 'title': fragment.book.pretty_title(),
990                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
991             }
992         else:
993             return None
994
995     def update_popularity(self):
996         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
997         try:
998             pop = self.popularity
999             pop.count = count
1000             pop.save()
1001         except BookPopularity.DoesNotExist:
1002             BookPopularity.objects.create(book=self, count=count)
1003
1004     def ridero_link(self):
1005         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1006
1007     def like(self, user):
1008         from social.utils import likes, get_set, set_sets
1009         if not likes(user, self):
1010             tag = get_set(user, '')
1011             set_sets(user, self, [tag])
1012
1013     def unlike(self, user):
1014         from social.utils import likes, set_sets
1015         if likes(user, self):
1016             set_sets(user, self, [])
1017
1018     def full_sort_key(self):
1019         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1020
1021     def cover_color(self):
1022         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1023
1024     @cached_render('catalogue/book_mini_box.html')
1025     def mini_box(self):
1026         return {
1027             'book': self
1028         }
1029
1030     @cached_render('catalogue/book_mini_box.html')
1031     def mini_box_nolink(self):
1032         return {
1033             'book': self,
1034             'no_link': True,
1035         }
1036
1037
1038 class BookPopularity(models.Model):
1039     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1040     count = models.IntegerField(default=0, db_index=True)