b14269f8d2b99692b85e6fdbc07585fe4c52bb9f
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import gettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from librarian.builders import builders
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue import fields
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32
33 bofh_storage = BofhFileSystemStorage()
34
35
36 class Book(models.Model):
37     """Represents a book imported from WL-XML."""
38     title = models.CharField('tytuł', max_length=32767)
39     sort_key = models.CharField('klucz sortowania', max_length=120, db_index=True, editable=False)
40     sort_key_author = models.CharField(
41         'klucz sortowania wg autora', max_length=120, db_index=True, editable=False, default='')
42     slug = models.SlugField('slug', max_length=120, db_index=True, unique=True)
43     common_slug = models.SlugField('wspólny slug', max_length=120, db_index=True)
44     language = models.CharField('kod języka', max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
45     description = models.TextField('opis', blank=True)
46     abstract = models.TextField('abstrakt', blank=True)
47     toc = models.TextField('spis treści', blank=True)
48     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, db_index=True)
49     changed_at = models.DateTimeField('data motyfikacji', auto_now=True, db_index=True)
50     parent_number = models.IntegerField('numer w ramach rodzica', default=0)
51     extra_info = models.TextField('dodatkowe informacje', default='{}')
52     gazeta_link = models.CharField(blank=True, max_length=240)
53     wiki_link = models.CharField(blank=True, max_length=240)
54     print_on_demand = models.BooleanField('druk na żądanie', default=False)
55     recommended = models.BooleanField('polecane', default=False)
56     audio_length = models.CharField('długość audio', blank=True, max_length=8)
57     preview = models.BooleanField('prapremiera', default=False)
58     preview_until = models.DateField('prapremiera do', blank=True, null=True)
59     preview_key = models.CharField(max_length=32, blank=True, null=True)
60     findable = models.BooleanField('wyszukiwalna', default=True, db_index=True)
61
62     # files generated during publication
63     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
64     html_file = fields.HtmlField(storage=bofh_storage)
65     fb2_file = fields.Fb2Field(storage=bofh_storage)
66     txt_file = fields.TxtField(storage=bofh_storage)
67     epub_file = fields.EpubField(storage=bofh_storage)
68     mobi_file = fields.MobiField(storage=bofh_storage)
69     pdf_file = fields.PdfField(storage=bofh_storage)
70
71     cover = fields.CoverField('okładka', storage=bofh_storage)
72     # Cleaner version of cover for thumbs
73     cover_clean = fields.CoverCleanField('czysta okładka')
74     cover_thumb = fields.CoverThumbField('miniatura okładki')
75     cover_api_thumb = fields.CoverApiThumbField(
76         'mniaturka okładki dla aplikacji')
77     simple_cover = fields.SimpleCoverField('okładka dla aplikacji')
78     cover_ebookpoint = fields.CoverEbookpointField(
79         'okładka dla Ebookpoint')
80
81     ebook_formats = constants.EBOOK_FORMATS
82     formats = ebook_formats + ['html', 'xml']
83
84     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
85     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
86
87     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
88     has_audience = models.BooleanField(default=False)
89
90     objects = models.Manager()
91     tagged = managers.ModelTaggedItemManager(Tag)
92     tags = managers.TagDescriptor(Tag)
93     tag_relations = GenericRelation(Tag.intermediary_table_model)
94
95     html_built = django.dispatch.Signal()
96     published = django.dispatch.Signal()
97
98     SORT_KEY_SEP = '$'
99
100     is_book = True
101
102     class AlreadyExists(Exception):
103         pass
104
105     class Meta:
106         ordering = ('sort_key_author', 'sort_key')
107         verbose_name = 'książka'
108         verbose_name_plural = 'książki'
109         app_label = 'catalogue'
110
111     def __str__(self):
112         return self.title
113
114     def get_extra_info_json(self):
115         return json.loads(self.extra_info or '{}')
116
117     def get_initial(self):
118         try:
119             return re.search(r'\w', self.title, re.U).group(0)
120         except AttributeError:
121             return ''
122
123     def authors(self):
124         return self.tags.filter(category='author')
125
126     def epochs(self):
127         return self.tags.filter(category='epoch')
128
129     def genres(self):
130         return self.tags.filter(category='genre')
131
132     def kinds(self):
133         return self.tags.filter(category='kind')
134
135     def tag_unicode(self, category):
136         relations = prefetched_relations(self, category)
137         if relations:
138             return ', '.join(rel.tag.name for rel in relations)
139         else:
140             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
141
142     def tags_by_category(self):
143         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
144
145     def author_unicode(self):
146         return self.cached_author
147
148     def kind_unicode(self):
149         return self.tag_unicode('kind')
150
151     def epoch_unicode(self):
152         return self.tag_unicode('epoch')
153
154     def genre_unicode(self):
155         return self.tag_unicode('genre')
156
157     def translators(self):
158         translators = self.get_extra_info_json().get('translators') or []
159         return [
160             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
161         ]
162
163     def translator(self):
164         translators = self.get_extra_info_json().get('translators')
165         if not translators:
166             return None
167         if len(translators) > 3:
168             translators = translators[:2]
169             others = ' i inni'
170         else:
171             others = ''
172         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
173
174     def cover_source(self):
175         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
176
177     @property
178     def isbn_pdf(self):
179         return self.get_extra_info_json().get('isbn_pdf')
180
181     @property
182     def isbn_epub(self):
183         return self.get_extra_info_json().get('isbn_epub')
184
185     @property
186     def isbn_mobi(self):
187         return self.get_extra_info_json().get('isbn_mobi')
188
189     def is_accessible_to(self, user):
190         if not self.preview:
191             return True
192         if not user.is_authenticated:
193             return False
194         Membership = apps.get_model('club', 'Membership')
195         if Membership.is_active_for(user):
196             return True
197         Funding = apps.get_model('funding', 'Funding')
198         if Funding.objects.filter(user=user, offer__book=self):
199             return True
200         return False
201
202     def save(self, force_insert=False, force_update=False, **kwargs):
203         from sortify import sortify
204
205         self.sort_key = sortify(self.title)[:120]
206         self.title = str(self.title)  # ???
207
208         try:
209             author = self.authors().first().sort_key
210         except AttributeError:
211             author = ''
212         self.sort_key_author = author
213
214         self.cached_author = self.tag_unicode('author')
215         self.has_audience = 'audience' in self.get_extra_info_json()
216
217         if self.preview and not self.preview_key:
218             self.preview_key = get_random_hash(self.slug)[:32]
219
220         ret = super(Book, self).save(force_insert, force_update, **kwargs)
221
222         return ret
223
224     def get_absolute_url(self):
225         return reverse('book_detail', args=[self.slug])
226
227     def gallery_path(self):
228         return gallery_path(self.slug)
229
230     def gallery_url(self):
231         return gallery_url(self.slug)
232
233     def get_first_text(self):
234         if self.html_file:
235             return self
236         child = self.children.all().order_by('parent_number').first()
237         if child is not None:
238             return child.get_first_text()
239
240     def get_last_text(self):
241         if self.html_file:
242             return self
243         child = self.children.all().order_by('parent_number').last()
244         if child is not None:
245             return child.get_last_text()
246
247     def get_prev_text(self):
248         if not self.parent:
249             return None
250         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
251         if sibling is not None:
252             return sibling.get_last_text()
253
254         if self.parent.html_file:
255             return self.parent
256
257         return self.parent.get_prev_text()
258
259     def get_next_text(self, inside=True):
260         if inside:
261             child = self.children.order_by('parent_number').first()
262             if child is not None:
263                 return child.get_first_text()
264
265         if not self.parent:
266             return None
267         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
268         if sibling is not None:
269             return sibling.get_first_text()
270         return self.parent.get_next_text(inside=False)
271
272     def get_child_audiobook(self):
273         BookMedia = apps.get_model('catalogue', 'BookMedia')
274         if not BookMedia.objects.filter(book__ancestor=self).exists():
275             return None
276         for child in self.children.order_by('parent_number').all():
277             if child.has_mp3_file():
278                 return child
279             child_sub = child.get_child_audiobook()
280             if child_sub is not None:
281                 return child_sub
282
283     def get_siblings(self):
284         if not self.parent:
285             return []
286         return self.parent.children.all().order_by('parent_number')
287
288     def get_children(self):
289         return self.children.all().order_by('parent_number')
290
291     @property
292     def name(self):
293         return self.title
294
295     def language_code(self):
296         return constants.LANGUAGES_3TO2.get(self.language, self.language)
297
298     def language_name(self):
299         return dict(settings.LANGUAGES).get(self.language_code(), "")
300
301     def is_foreign(self):
302         return self.language_code() != settings.LANGUAGE_CODE
303
304     def set_audio_length(self):
305         length = self.get_audio_length()
306         if length > 0:
307             self.audio_length = self.format_audio_length(length)
308             self.save()
309
310     @staticmethod
311     def format_audio_length(seconds):
312         """
313         >>> Book.format_audio_length(1)
314         '0:01'
315         >>> Book.format_audio_length(3661)
316         '1:01:01'
317         """
318         if seconds < 60*60:
319             minutes = seconds // 60
320             seconds = seconds % 60
321             return '%d:%02d' % (minutes, seconds)
322         else:
323             hours = seconds // 3600
324             minutes = seconds % 3600 // 60
325             seconds = seconds % 60
326             return '%d:%02d:%02d' % (hours, minutes, seconds)
327
328     def get_audio_length(self):
329         total = 0
330         for media in self.get_mp3() or ():
331             total += app_settings.GET_MP3_LENGTH(media.file.path)
332         return int(total)
333
334     def has_media(self, type_):
335         if type_ in Book.formats:
336             return bool(getattr(self, "%s_file" % type_))
337         else:
338             return self.media.filter(type=type_).exists()
339
340     def has_audio(self):
341         return self.has_media('mp3')
342
343     def get_media(self, type_):
344         if self.has_media(type_):
345             if type_ in Book.formats:
346                 return getattr(self, "%s_file" % type_)
347             else:
348                 return self.media.filter(type=type_)
349         else:
350             return None
351
352     def get_mp3(self):
353         return self.get_media("mp3")
354
355     def get_odt(self):
356         return self.get_media("odt")
357
358     def get_ogg(self):
359         return self.get_media("ogg")
360
361     def get_daisy(self):
362         return self.get_media("daisy")
363
364     def get_audio_epub(self):
365         return self.get_media("audio.epub")
366
367     def media_url(self, format_):
368         media = self.get_media(format_)
369         if media:
370             if self.preview:
371                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
372             else:
373                 return media.url
374         else:
375             return None
376
377     def html_url(self):
378         return self.media_url('html')
379
380     def pdf_url(self):
381         return self.media_url('pdf')
382
383     def epub_url(self):
384         return self.media_url('epub')
385
386     def mobi_url(self):
387         return self.media_url('mobi')
388
389     def txt_url(self):
390         return self.media_url('txt')
391
392     def fb2_url(self):
393         return self.media_url('fb2')
394
395     def xml_url(self):
396         return self.media_url('xml')
397
398     def has_description(self):
399         return len(self.description) > 0
400     has_description.short_description = 'opis'
401     has_description.boolean = True
402
403     def has_mp3_file(self):
404         return self.has_media("mp3")
405     has_mp3_file.short_description = 'MP3'
406     has_mp3_file.boolean = True
407
408     def has_ogg_file(self):
409         return self.has_media("ogg")
410     has_ogg_file.short_description = 'OGG'
411     has_ogg_file.boolean = True
412
413     def has_daisy_file(self):
414         return self.has_media("daisy")
415     has_daisy_file.short_description = 'DAISY'
416     has_daisy_file.boolean = True
417
418     def has_sync_file(self):
419         return self.has_media("sync")
420
421     def get_sync(self):
422         with self.get_media('sync').first().file.open('r') as f:
423             sync = f.read().split('\n')
424         offset = float(sync[0])
425         items = []
426         for line in sync[1:]:
427             if not line:
428                 continue
429             start, end, elid = line.split()
430             items.append([elid, float(start) + offset])
431         return json.dumps(items)
432     
433     def has_audio_epub_file(self):
434         return self.has_media("audio.epub")
435
436     @property
437     def media_daisy(self):
438         return self.get_media('daisy')
439
440     @property
441     def media_audio_epub(self):
442         return self.get_media('audio.epub')
443
444     def get_audiobooks(self):
445         ogg_files = {}
446         for m in self.media.filter(type='ogg').order_by().iterator():
447             ogg_files[m.name] = m
448
449         audiobooks = []
450         projects = set()
451         total_duration = 0
452         for mp3 in self.media.filter(type='mp3').iterator():
453             # ogg files are always from the same project
454             meta = mp3.get_extra_info_json()
455             project = meta.get('project')
456             if not project:
457                 # temporary fallback
458                 project = 'CzytamySłuchając'
459
460             projects.add((project, meta.get('funded_by', '')))
461             total_duration += mp3.duration or 0
462
463             media = {'mp3': mp3}
464
465             ogg = ogg_files.get(mp3.name)
466             if ogg:
467                 media['ogg'] = ogg
468             audiobooks.append(media)
469
470         projects = sorted(projects)
471         total_duration = '%d:%02d' % (
472             total_duration // 60,
473             total_duration % 60
474         )
475         return audiobooks, projects, total_duration
476
477     def wldocument(self, parse_dublincore=True, inherit=True):
478         from catalogue.import_utils import ORMDocProvider
479         from librarian.parser import WLDocument
480
481         if inherit and self.parent:
482             meta_fallbacks = self.parent.cover_info()
483         else:
484             meta_fallbacks = None
485
486         return WLDocument.from_file(
487             self.xml_file.path,
488             provider=ORMDocProvider(self),
489             parse_dublincore=parse_dublincore,
490             meta_fallbacks=meta_fallbacks)
491
492     def wldocument2(self):
493         from catalogue.import_utils import ORMDocProvider
494         from librarian.document import WLDocument
495         doc = WLDocument(
496             self.xml_file.path,
497             provider=ORMDocProvider(self)
498         )
499         doc.meta.update(self.cover_info())
500         return doc
501
502
503     @staticmethod
504     def zip_format(format_):
505         def pretty_file_name(book):
506             return "%s/%s.%s" % (
507                 book.get_extra_info_json()['author'],
508                 book.slug,
509                 format_)
510
511         field_name = "%s_file" % format_
512         field = getattr(Book, field_name)
513         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
514         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
515         return create_zip(paths, field.ZIP)
516
517     def zip_audiobooks(self, format_):
518         bm = BookMedia.objects.filter(book=self, type=format_)
519         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
520         licenses = set()
521         for m in bm:
522             license = constants.LICENSES.get(
523                 m.get_extra_info_json().get('license'), {}
524             ).get('locative')
525             if license:
526                 licenses.add(license)
527         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
528             'licenses': licenses,
529             'meta': self.wldocument2().meta,
530         })
531         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
532
533     def search_index(self, index=None):
534         if not self.findable:
535             return
536         from search.index import Index
537         Index.index_book(self)
538
539     # will make problems in conjunction with paid previews
540     def download_pictures(self, remote_gallery_url):
541         # This is only needed for legacy relative image paths.
542         gallery_path = self.gallery_path()
543         # delete previous files, so we don't include old files in ebooks
544         if os.path.isdir(gallery_path):
545             for filename in os.listdir(gallery_path):
546                 file_path = os.path.join(gallery_path, filename)
547                 os.unlink(file_path)
548         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
549         if ilustr_elements:
550             makedirs(gallery_path)
551             for ilustr in ilustr_elements:
552                 ilustr_src = ilustr.get('src')
553                 if '/' in ilustr_src:
554                     continue
555                 ilustr_path = os.path.join(gallery_path, ilustr_src)
556                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
557
558     def load_abstract(self):
559         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
560         if abstract is not None:
561             self.abstract = transform_abstrakt(abstract)
562         else:
563             self.abstract = ''
564
565     def load_toc(self):
566         self.toc = ''
567         if self.html_file:
568             parser = html.HTMLParser(encoding='utf-8')
569             tree = html.parse(self.html_file.path, parser=parser)
570             toc = tree.find('//div[@id="toc"]/ol')
571             if toc is None or not len(toc):
572                 return
573             html_link = reverse('book_text', args=[self.slug])
574             for a in toc.findall('.//a'):
575                 a.attrib['href'] = html_link + a.attrib['href']
576             self.toc = html.tostring(toc, encoding='unicode')
577             # div#toc
578
579     @classmethod
580     def from_xml_file(cls, xml_file, **kwargs):
581         from django.core.files import File
582         from librarian import dcparser
583
584         # use librarian to parse meta-data
585         book_info = dcparser.parse(xml_file)
586
587         if not isinstance(xml_file, File):
588             xml_file = File(open(xml_file))
589
590         try:
591             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
592         finally:
593             xml_file.close()
594
595     @classmethod
596     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
597                            remote_gallery_url=None, days=0, findable=True, logo=None, logo_mono=None):
598         from catalogue import tasks
599
600         if dont_build is None:
601             dont_build = set()
602         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
603
604         # check for parts before we do anything
605         children = []
606         if hasattr(book_info, 'parts'):
607             for part_url in book_info.parts:
608                 try:
609                     children.append(Book.objects.get(slug=part_url.slug))
610                 except Book.DoesNotExist:
611                     raise Book.DoesNotExist('Książka "%s" nie istnieje.' % part_url.slug)
612
613         # Read book metadata
614         book_slug = book_info.url.slug
615         if re.search(r'[^a-z0-9-]', book_slug):
616             raise ValueError('Invalid characters in slug')
617         book, created = Book.objects.get_or_create(slug=book_slug)
618
619         if created:
620             book_shelves = []
621             old_cover = None
622             book.preview = bool(days)
623             if book.preview:
624                 book.preview_until = date.today() + timedelta(days)
625         else:
626             if not overwrite:
627                 raise Book.AlreadyExists('Książka %s już istnieje' % book_slug)
628             # Save shelves for this book
629             book_shelves = list(book.tags.filter(category='set'))
630             old_cover = book.cover_info()
631
632         # Save XML file
633         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
634         if book.preview:
635             book.xml_file.set_readable(False)
636
637         book.findable = findable
638         book.language = book_info.language
639         book.title = book_info.title
640         if book_info.variant_of:
641             book.common_slug = book_info.variant_of.slug
642         else:
643             book.common_slug = book.slug
644         extra = book_info.to_dict()
645         if logo:
646             extra['logo'] = logo
647         if logo_mono:
648             extra['logo_mono'] = logo_mono
649         book.extra_info = json.dumps(extra)
650         book.load_abstract()
651         book.load_toc()
652         book.save()
653
654         meta_tags = Tag.tags_from_info(book_info)
655
656         for tag in meta_tags:
657             if not tag.for_books:
658                 tag.for_books = True
659                 tag.save()
660
661         book.tags = set(meta_tags + book_shelves)
662         book.save()  # update sort_key_author
663
664         cover_changed = old_cover != book.cover_info()
665         obsolete_children = set(b for b in book.children.all()
666                                 if b not in children)
667         notify_cover_changed = []
668         for n, child_book in enumerate(children):
669             new_child = child_book.parent != book
670             child_book.parent = book
671             child_book.parent_number = n
672             child_book.save()
673             if new_child or cover_changed:
674                 notify_cover_changed.append(child_book)
675         # Disown unfaithful children and let them cope on their own.
676         for child in obsolete_children:
677             child.parent = None
678             child.parent_number = 0
679             child.save()
680             if old_cover:
681                 notify_cover_changed.append(child)
682
683         cls.repopulate_ancestors()
684         tasks.update_counters.delay()
685
686         if remote_gallery_url:
687             book.download_pictures(remote_gallery_url)
688
689         # No saves beyond this point.
690
691         # Build cover.
692         if 'cover' not in dont_build:
693             book.cover.build_delay()
694             book.cover_clean.build_delay()
695             book.cover_thumb.build_delay()
696             book.cover_api_thumb.build_delay()
697             book.simple_cover.build_delay()
698             book.cover_ebookpoint.build_delay()
699
700         # Build HTML and ebooks.
701         book.html_file.build_delay()
702         if not children:
703             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
704                 if format_ not in dont_build:
705                     getattr(book, '%s_file' % format_).build_delay()
706         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
707             if format_ not in dont_build:
708                 getattr(book, '%s_file' % format_).build_delay()
709
710         if not settings.NO_SEARCH_INDEX and search_index and findable:
711             tasks.index_book.delay(book.id)
712
713         for child in notify_cover_changed:
714             child.parent_cover_changed()
715
716         book.update_popularity()
717         tasks.update_references.delay(book.id)
718
719         cls.published.send(sender=cls, instance=book)
720         return book
721
722     def update_references(self):
723         Entity = apps.get_model('references', 'Entity')
724         doc = self.wldocument2()
725         doc._compat_assign_section_ids()
726         doc._compat_assign_ordered_ids()
727         refs = {}
728         for ref_elem in doc.references():
729             uri = ref_elem.attrib.get('href', '')
730             if not uri:
731                 continue
732             if uri in refs:
733                 ref = refs[uri]
734             else:
735                 entity, entity_created = Entity.objects.get_or_create(uri=uri)
736                 if entity_created:
737                     try:
738                         entity.populate()
739                     except:
740                         pass
741                     else:
742                         entity.save()
743                 ref, ref_created = entity.reference_set.get_or_create(book=self)
744                 refs[uri] = ref
745                 if not ref_created:
746                     ref.occurence_set.all().delete()
747             sec = ref_elem.get_link()
748             m = re.match(r'sec(\d+)', sec)
749             assert m is not None
750             sec = int(m.group(1))
751             snippet = ref_elem.get_snippet()
752             b = builders['html-snippet']()
753             for s in snippet:
754                 s.html_build(b)
755             html = b.output().get_bytes().decode('utf-8')
756
757             ref.occurence_set.create(
758                 section=sec,
759                 html=html
760             )
761         self.reference_set.exclude(entity__uri__in=refs).delete()
762
763     @property
764     def references(self):
765         return self.reference_set.all().select_related('entity')
766
767     @classmethod
768     @transaction.atomic
769     def repopulate_ancestors(cls):
770         """Fixes the ancestry cache."""
771         # TODO: table names
772         cursor = connection.cursor()
773         if connection.vendor == 'postgres':
774             cursor.execute("TRUNCATE catalogue_book_ancestor")
775             cursor.execute("""
776                 WITH RECURSIVE ancestry AS (
777                     SELECT book.id, book.parent_id
778                     FROM catalogue_book AS book
779                     WHERE book.parent_id IS NOT NULL
780                     UNION
781                     SELECT ancestor.id, book.parent_id
782                     FROM ancestry AS ancestor, catalogue_book AS book
783                     WHERE ancestor.parent_id = book.id
784                         AND book.parent_id IS NOT NULL
785                     )
786                 INSERT INTO catalogue_book_ancestor
787                     (from_book_id, to_book_id)
788                     SELECT id, parent_id
789                     FROM ancestry
790                     ORDER BY id;
791                 """)
792         else:
793             cursor.execute("DELETE FROM catalogue_book_ancestor")
794             for b in cls.objects.exclude(parent=None):
795                 parent = b.parent
796                 while parent is not None:
797                     b.ancestor.add(parent)
798                     parent = parent.parent
799
800     @property
801     def ancestors(self):
802         if self.parent:
803             for anc in self.parent.ancestors:
804                 yield anc
805             yield self.parent
806         else:
807             return []
808
809     def clear_cache(self):
810         clear_cached_renders(self.mini_box)
811         clear_cached_renders(self.mini_box_nolink)
812
813     def cover_info(self, inherit=True):
814         """Returns a dictionary to serve as fallback for BookInfo.
815
816         For now, the only thing inherited is the cover image.
817         """
818         need = False
819         info = {}
820         for field in ('cover_url', 'cover_by', 'cover_source'):
821             val = self.get_extra_info_json().get(field)
822             if val:
823                 info[field] = val
824             else:
825                 need = True
826         if inherit and need and self.parent is not None:
827             parent_info = self.parent.cover_info()
828             parent_info.update(info)
829             info = parent_info
830         return info
831
832     def related_themes(self):
833         return Tag.objects.usage_for_queryset(
834             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
835             counts=True).filter(category='theme').order_by('-count')
836
837     def parent_cover_changed(self):
838         """Called when parent book's cover image is changed."""
839         if not self.cover_info(inherit=False):
840             if 'cover' not in app_settings.DONT_BUILD:
841                 self.cover.build_delay()
842                 self.cover_clean.build_delay()
843                 self.cover_thumb.build_delay()
844                 self.cover_api_thumb.build_delay()
845                 self.simple_cover.build_delay()
846                 self.cover_ebookpoint.build_delay()
847             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
848                 if format_ not in app_settings.DONT_BUILD:
849                     getattr(self, '%s_file' % format_).build_delay()
850             for child in self.children.all():
851                 child.parent_cover_changed()
852
853     def other_versions(self):
854         """Find other versions (i.e. in other languages) of the book."""
855         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
856
857     def parents(self):
858         books = []
859         parent = self.parent
860         while parent is not None:
861             books.insert(0, parent)
862             parent = parent.parent
863         return books
864
865     def pretty_title(self, html_links=False):
866         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
867         books = self.parents() + [self]
868         names.extend([(b.title, b.get_absolute_url()) for b in books])
869
870         if html_links:
871             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
872         else:
873             names = [tag[0] for tag in names]
874         return ', '.join(names)
875
876     def publisher(self):
877         publisher = self.get_extra_info_json()['publisher']
878         if isinstance(publisher, str):
879             return publisher
880         elif isinstance(publisher, list):
881             return ', '.join(publisher)
882
883     @classmethod
884     def tagged_top_level(cls, tags):
885         """ Returns top-level books tagged with `tags`.
886
887         It only returns those books which don't have ancestors which are
888         also tagged with those tags.
889
890         """
891         objects = cls.tagged.with_all(tags)
892         return objects.filter(findable=True).exclude(ancestor__in=objects)
893
894     @classmethod
895     def book_list(cls, book_filter=None):
896         """Generates a hierarchical listing of all books.
897
898         Books are optionally filtered with a test function.
899
900         """
901
902         books_by_parent = {}
903         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
904         if book_filter:
905             books = books.filter(book_filter).distinct()
906
907             book_ids = set(b['pk'] for b in books.values("pk").iterator())
908             for book in books.iterator():
909                 parent = book.parent_id
910                 if parent not in book_ids:
911                     parent = None
912                 books_by_parent.setdefault(parent, []).append(book)
913         else:
914             for book in books.iterator():
915                 books_by_parent.setdefault(book.parent_id, []).append(book)
916
917         orphans = []
918         books_by_author = OrderedDict()
919         for tag in Tag.objects.filter(category='author').iterator():
920             books_by_author[tag] = []
921
922         for book in books_by_parent.get(None, ()):
923             authors = list(book.authors().only('pk'))
924             if authors:
925                 for author in authors:
926                     books_by_author[author].append(book)
927             else:
928                 orphans.append(book)
929
930         return books_by_author, orphans, books_by_parent
931
932     _audiences_pl = {
933         "SP": (1, "szkoła podstawowa"),
934         "SP1": (1, "szkoła podstawowa"),
935         "SP2": (1, "szkoła podstawowa"),
936         "SP3": (1, "szkoła podstawowa"),
937         "P": (1, "szkoła podstawowa"),
938         "G": (2, "gimnazjum"),
939         "L": (3, "liceum"),
940         "LP": (3, "liceum"),
941     }
942
943     def audiences_pl(self):
944         audiences = self.get_extra_info_json().get('audiences', [])
945         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
946         return [a[1] for a in audiences]
947
948     def stage_note(self):
949         stage = self.get_extra_info_json().get('stage')
950         if stage and stage < '0.4':
951             return (_('Ten utwór wymaga uwspółcześnienia'),
952                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
953         else:
954             return None, None
955
956     def choose_fragments(self, number):
957         fragments = self.fragments.order_by()
958         fragments_count = fragments.count()
959         if not fragments_count and self.children.exists():
960             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
961             fragments_count = fragments.count()
962         if fragments_count:
963             if fragments_count > number:
964                 offset = randint(0, fragments_count - number)
965             else:
966                 offset = 0
967             return fragments[offset : offset + number]
968         elif self.parent:
969             return self.parent.choose_fragments(number)
970         else:
971             return []
972
973     def choose_fragment(self):
974         fragments = self.choose_fragments(1)
975         if fragments:
976             return fragments[0]
977         else:
978             return None
979
980     def fragment_data(self):
981         fragment = self.choose_fragment()
982         if fragment:
983             return {
984                 'title': fragment.book.pretty_title(),
985                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
986             }
987         else:
988             return None
989
990     def update_popularity(self):
991         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
992         try:
993             pop = self.popularity
994             pop.count = count
995             pop.save()
996         except BookPopularity.DoesNotExist:
997             BookPopularity.objects.create(book=self, count=count)
998
999     def ridero_link(self):
1000         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1001
1002     def like(self, user):
1003         from social.utils import likes, get_set, set_sets
1004         if not likes(user, self):
1005             tag = get_set(user, '')
1006             set_sets(user, self, [tag])
1007
1008     def unlike(self, user):
1009         from social.utils import likes, set_sets
1010         if likes(user, self):
1011             set_sets(user, self, [])
1012
1013     def full_sort_key(self):
1014         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1015
1016     def cover_color(self):
1017         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1018
1019     @cached_render('catalogue/book_mini_box.html')
1020     def mini_box(self):
1021         return {
1022             'book': self
1023         }
1024
1025     @cached_render('catalogue/book_mini_box.html')
1026     def mini_box_nolink(self):
1027         return {
1028             'book': self,
1029             'no_link': True,
1030         }
1031
1032
1033 class BookPopularity(models.Model):
1034     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1035     count = models.IntegerField(default=0, db_index=True)