Moving forward.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import gettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue import fields
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
31
32 bofh_storage = BofhFileSystemStorage()
33
34
35 class Book(models.Model):
36     """Represents a book imported from WL-XML."""
37     title = models.CharField(_('title'), max_length=32767)
38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
39     sort_key_author = models.CharField(
40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
44     description = models.TextField(_('description'), blank=True)
45     abstract = models.TextField(_('abstract'), blank=True)
46     toc = models.TextField(_('toc'), blank=True)
47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
49     parent_number = models.IntegerField(_('parent number'), default=0)
50     extra_info = models.TextField(_('extra information'), default='{}')
51     gazeta_link = models.CharField(blank=True, max_length=240)
52     wiki_link = models.CharField(blank=True, max_length=240)
53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
54     recommended = models.BooleanField(_('recommended'), default=False)
55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
56     preview = models.BooleanField(_('preview'), default=False)
57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
58     preview_key = models.CharField(max_length=32, blank=True, null=True)
59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
60
61     # files generated during publication
62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
63     html_file = fields.HtmlField(storage=bofh_storage)
64     fb2_file = fields.Fb2Field(storage=bofh_storage)
65     txt_file = fields.TxtField(storage=bofh_storage)
66     epub_file = fields.EpubField(storage=bofh_storage)
67     mobi_file = fields.MobiField(storage=bofh_storage)
68     pdf_file = fields.PdfField(storage=bofh_storage)
69
70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
71     # Cleaner version of cover for thumbs
72     cover_clean = fields.CoverCleanField(_('clean cover'))
73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
74     cover_api_thumb = fields.CoverApiThumbField(
75         _('cover thumbnail for mobile app'))
76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
77     cover_ebookpoint = fields.CoverEbookpointField(
78         _('cover for Ebookpoint'))
79
80     ebook_formats = constants.EBOOK_FORMATS
81     formats = ebook_formats + ['html', 'xml']
82
83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
85
86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
87     has_audience = models.BooleanField(default=False)
88
89     objects = models.Manager()
90     tagged = managers.ModelTaggedItemManager(Tag)
91     tags = managers.TagDescriptor(Tag)
92     tag_relations = GenericRelation(Tag.intermediary_table_model)
93
94     html_built = django.dispatch.Signal()
95     published = django.dispatch.Signal()
96
97     SORT_KEY_SEP = '$'
98
99     is_book = True
100
101     class AlreadyExists(Exception):
102         pass
103
104     class Meta:
105         ordering = ('sort_key_author', 'sort_key')
106         verbose_name = _('book')
107         verbose_name_plural = _('books')
108         app_label = 'catalogue'
109
110     def __str__(self):
111         return self.title
112
113     def get_extra_info_json(self):
114         return json.loads(self.extra_info or '{}')
115
116     def get_initial(self):
117         try:
118             return re.search(r'\w', self.title, re.U).group(0)
119         except AttributeError:
120             return ''
121
122     def authors(self):
123         return self.tags.filter(category='author')
124
125     def epochs(self):
126         return self.tags.filter(category='epoch')
127
128     def genres(self):
129         return self.tags.filter(category='genre')
130
131     def kinds(self):
132         return self.tags.filter(category='kind')
133
134     def tag_unicode(self, category):
135         relations = prefetched_relations(self, category)
136         if relations:
137             return ', '.join(rel.tag.name for rel in relations)
138         else:
139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
140
141     def tags_by_category(self):
142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
143
144     def author_unicode(self):
145         return self.cached_author
146
147     def kind_unicode(self):
148         return self.tag_unicode('kind')
149
150     def epoch_unicode(self):
151         return self.tag_unicode('epoch')
152
153     def genre_unicode(self):
154         return self.tag_unicode('genre')
155
156     def translators(self):
157         translators = self.get_extra_info_json().get('translators') or []
158         return [
159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
160         ]
161
162     def translator(self):
163         translators = self.get_extra_info_json().get('translators')
164         if not translators:
165             return None
166         if len(translators) > 3:
167             translators = translators[:2]
168             others = ' i inni'
169         else:
170             others = ''
171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
172
173     def cover_source(self):
174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
175
176     @property
177     def isbn_pdf(self):
178         return self.get_extra_info_json().get('isbn_pdf')
179
180     @property
181     def isbn_epub(self):
182         return self.get_extra_info_json().get('isbn_epub')
183
184     @property
185     def isbn_mobi(self):
186         return self.get_extra_info_json().get('isbn_mobi')
187
188     def is_accessible_to(self, user):
189         if not self.preview:
190             return True
191         if not user.is_authenticated:
192             return False
193         Membership = apps.get_model('club', 'Membership')
194         if Membership.is_active_for(user):
195             return True
196         Funding = apps.get_model('funding', 'Funding')
197         if Funding.objects.filter(user=user, offer__book=self):
198             return True
199         return False
200
201     def save(self, force_insert=False, force_update=False, **kwargs):
202         from sortify import sortify
203
204         self.sort_key = sortify(self.title)[:120]
205         self.title = str(self.title)  # ???
206
207         try:
208             author = self.authors().first().sort_key
209         except AttributeError:
210             author = ''
211         self.sort_key_author = author
212
213         self.cached_author = self.tag_unicode('author')
214         self.has_audience = 'audience' in self.get_extra_info_json()
215
216         if self.preview and not self.preview_key:
217             self.preview_key = get_random_hash(self.slug)[:32]
218
219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
220
221         return ret
222
223     def get_absolute_url(self):
224         return reverse('book_detail', args=[self.slug])
225
226     def gallery_path(self):
227         return gallery_path(self.slug)
228
229     def gallery_url(self):
230         return gallery_url(self.slug)
231
232     def get_first_text(self):
233         if self.html_file:
234             return self
235         child = self.children.all().order_by('parent_number').first()
236         if child is not None:
237             return child.get_first_text()
238
239     def get_last_text(self):
240         if self.html_file:
241             return self
242         child = self.children.all().order_by('parent_number').last()
243         if child is not None:
244             return child.get_last_text()
245
246     def get_prev_text(self):
247         if not self.parent:
248             return None
249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
250         if sibling is not None:
251             return sibling.get_last_text()
252
253         if self.parent.html_file:
254             return self.parent
255
256         return self.parent.get_prev_text()
257
258     def get_next_text(self, inside=True):
259         if inside:
260             child = self.children.order_by('parent_number').first()
261             if child is not None:
262                 return child.get_first_text()
263
264         if not self.parent:
265             return None
266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
267         if sibling is not None:
268             return sibling.get_first_text()
269         return self.parent.get_next_text(inside=False)
270
271     def get_child_audiobook(self):
272         BookMedia = apps.get_model('catalogue', 'BookMedia')
273         if not BookMedia.objects.filter(book__ancestor=self).exists():
274             return None
275         for child in self.children.order_by('parent_number').all():
276             if child.has_mp3_file():
277                 return child
278             child_sub = child.get_child_audiobook()
279             if child_sub is not None:
280                 return child_sub
281
282     def get_siblings(self):
283         if not self.parent:
284             return []
285         return self.parent.children.all().order_by('parent_number')
286
287     def get_children(self):
288         return self.children.all().order_by('parent_number')
289
290     @property
291     def name(self):
292         return self.title
293
294     def language_code(self):
295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
296
297     def language_name(self):
298         return dict(settings.LANGUAGES).get(self.language_code(), "")
299
300     def is_foreign(self):
301         return self.language_code() != settings.LANGUAGE_CODE
302
303     def set_audio_length(self):
304         length = self.get_audio_length()
305         if length > 0:
306             self.audio_length = self.format_audio_length(length)
307             self.save()
308
309     @staticmethod
310     def format_audio_length(seconds):
311         """
312         >>> Book.format_audio_length(1)
313         '0:01'
314         >>> Book.format_audio_length(3661)
315         '1:01:01'
316         """
317         if seconds < 60*60:
318             minutes = seconds // 60
319             seconds = seconds % 60
320             return '%d:%02d' % (minutes, seconds)
321         else:
322             hours = seconds // 3600
323             minutes = seconds % 3600 // 60
324             seconds = seconds % 60
325             return '%d:%02d:%02d' % (hours, minutes, seconds)
326
327     def get_audio_length(self):
328         total = 0
329         for media in self.get_mp3() or ():
330             total += app_settings.GET_MP3_LENGTH(media.file.path)
331         return int(total)
332
333     def has_media(self, type_):
334         if type_ in Book.formats:
335             return bool(getattr(self, "%s_file" % type_))
336         else:
337             return self.media.filter(type=type_).exists()
338
339     def has_audio(self):
340         return self.has_media('mp3')
341
342     def get_media(self, type_):
343         if self.has_media(type_):
344             if type_ in Book.formats:
345                 return getattr(self, "%s_file" % type_)
346             else:
347                 return self.media.filter(type=type_)
348         else:
349             return None
350
351     def get_mp3(self):
352         return self.get_media("mp3")
353
354     def get_odt(self):
355         return self.get_media("odt")
356
357     def get_ogg(self):
358         return self.get_media("ogg")
359
360     def get_daisy(self):
361         return self.get_media("daisy")
362
363     def get_audio_epub(self):
364         return self.get_media("audio.epub")
365
366     def media_url(self, format_):
367         media = self.get_media(format_)
368         if media:
369             if self.preview:
370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
371             else:
372                 return media.url
373         else:
374             return None
375
376     def html_url(self):
377         return self.media_url('html')
378
379     def pdf_url(self):
380         return self.media_url('pdf')
381
382     def epub_url(self):
383         return self.media_url('epub')
384
385     def mobi_url(self):
386         return self.media_url('mobi')
387
388     def txt_url(self):
389         return self.media_url('txt')
390
391     def fb2_url(self):
392         return self.media_url('fb2')
393
394     def xml_url(self):
395         return self.media_url('xml')
396
397     def has_description(self):
398         return len(self.description) > 0
399     has_description.short_description = _('description')
400     has_description.boolean = True
401
402     def has_mp3_file(self):
403         return self.has_media("mp3")
404     has_mp3_file.short_description = 'MP3'
405     has_mp3_file.boolean = True
406
407     def has_ogg_file(self):
408         return self.has_media("ogg")
409     has_ogg_file.short_description = 'OGG'
410     has_ogg_file.boolean = True
411
412     def has_daisy_file(self):
413         return self.has_media("daisy")
414     has_daisy_file.short_description = 'DAISY'
415     has_daisy_file.boolean = True
416
417     def has_sync_file(self):
418         return self.has_media("sync")
419
420     def get_sync(self):
421         with self.get_media('sync').first().file.open('r') as f:
422             sync = f.read().split('\n')
423         offset = float(sync[0])
424         items = []
425         for line in sync[1:]:
426             if not line:
427                 continue
428             start, end, elid = line.split()
429             items.append([elid, float(start) + offset])
430         return json.dumps(items)
431     
432     def has_audio_epub_file(self):
433         return self.has_media("audio.epub")
434
435     @property
436     def media_daisy(self):
437         return self.get_media('daisy')
438
439     @property
440     def media_audio_epub(self):
441         return self.get_media('audio.epub')
442
443     def get_audiobooks(self):
444         ogg_files = {}
445         for m in self.media.filter(type='ogg').order_by().iterator():
446             ogg_files[m.name] = m
447
448         audiobooks = []
449         projects = set()
450         total_duration = 0
451         for mp3 in self.media.filter(type='mp3').iterator():
452             # ogg files are always from the same project
453             meta = mp3.get_extra_info_json()
454             project = meta.get('project')
455             if not project:
456                 # temporary fallback
457                 project = 'CzytamySłuchając'
458
459             projects.add((project, meta.get('funded_by', '')))
460             total_duration += mp3.duration or 0
461
462             media = {'mp3': mp3}
463
464             ogg = ogg_files.get(mp3.name)
465             if ogg:
466                 media['ogg'] = ogg
467             audiobooks.append(media)
468
469         projects = sorted(projects)
470         total_duration = '%d:%02d' % (
471             total_duration // 60,
472             total_duration % 60
473         )
474         return audiobooks, projects, total_duration
475
476     def wldocument(self, parse_dublincore=True, inherit=True):
477         from catalogue.import_utils import ORMDocProvider
478         from librarian.parser import WLDocument
479
480         if inherit and self.parent:
481             meta_fallbacks = self.parent.cover_info()
482         else:
483             meta_fallbacks = None
484
485         return WLDocument.from_file(
486             self.xml_file.path,
487             provider=ORMDocProvider(self),
488             parse_dublincore=parse_dublincore,
489             meta_fallbacks=meta_fallbacks)
490
491     def wldocument2(self):
492         from catalogue.import_utils import ORMDocProvider
493         from librarian.document import WLDocument
494         doc = WLDocument(
495             self.xml_file.path,
496             provider=ORMDocProvider(self)
497         )
498         doc.meta.update(self.cover_info())
499         return doc
500
501
502     @staticmethod
503     def zip_format(format_):
504         def pretty_file_name(book):
505             return "%s/%s.%s" % (
506                 book.get_extra_info_json()['author'],
507                 book.slug,
508                 format_)
509
510         field_name = "%s_file" % format_
511         field = getattr(Book, field_name)
512         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
513         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
514         return create_zip(paths, field.ZIP)
515
516     def zip_audiobooks(self, format_):
517         bm = BookMedia.objects.filter(book=self, type=format_)
518         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
519         licenses = set()
520         for m in bm:
521             license = constants.LICENSES.get(
522                 m.get_extra_info_json().get('license'), {}
523             ).get('locative')
524             if license:
525                 licenses.add(license)
526         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
527             'licenses': licenses,
528             'meta': self.wldocument2().meta,
529         })
530         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
531
532     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
533         if not self.findable:
534             return
535         if index is None:
536             from search.index import Index
537             index = Index()
538         try:
539             index.index_book(self, book_info)
540             if index_tags:
541                 index.index_tags()
542             if commit:
543                 index.index.commit()
544         except Exception as e:
545             index.index.rollback()
546             raise e
547
548     # will make problems in conjunction with paid previews
549     def download_pictures(self, remote_gallery_url):
550         # This is only needed for legacy relative image paths.
551         gallery_path = self.gallery_path()
552         # delete previous files, so we don't include old files in ebooks
553         if os.path.isdir(gallery_path):
554             for filename in os.listdir(gallery_path):
555                 file_path = os.path.join(gallery_path, filename)
556                 os.unlink(file_path)
557         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
558         if ilustr_elements:
559             makedirs(gallery_path)
560             for ilustr in ilustr_elements:
561                 ilustr_src = ilustr.get('src')
562                 if '/' in ilustr_src:
563                     continue
564                 ilustr_path = os.path.join(gallery_path, ilustr_src)
565                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
566
567     def load_abstract(self):
568         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
569         if abstract is not None:
570             self.abstract = transform_abstrakt(abstract)
571         else:
572             self.abstract = ''
573
574     def load_toc(self):
575         self.toc = ''
576         if self.html_file:
577             parser = html.HTMLParser(encoding='utf-8')
578             tree = html.parse(self.html_file.path, parser=parser)
579             toc = tree.find('//div[@id="toc"]/ol')
580             if toc is None or not len(toc):
581                 return
582             html_link = reverse('book_text', args=[self.slug])
583             for a in toc.findall('.//a'):
584                 a.attrib['href'] = html_link + a.attrib['href']
585             self.toc = html.tostring(toc, encoding='unicode')
586             # div#toc
587
588     @classmethod
589     def from_xml_file(cls, xml_file, **kwargs):
590         from django.core.files import File
591         from librarian import dcparser
592
593         # use librarian to parse meta-data
594         book_info = dcparser.parse(xml_file)
595
596         if not isinstance(xml_file, File):
597             xml_file = File(open(xml_file))
598
599         try:
600             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
601         finally:
602             xml_file.close()
603
604     @classmethod
605     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
606                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
607         from catalogue import tasks
608
609         if dont_build is None:
610             dont_build = set()
611         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
612
613         # check for parts before we do anything
614         children = []
615         if hasattr(book_info, 'parts'):
616             for part_url in book_info.parts:
617                 try:
618                     children.append(Book.objects.get(slug=part_url.slug))
619                 except Book.DoesNotExist:
620                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
621
622         # Read book metadata
623         book_slug = book_info.url.slug
624         if re.search(r'[^a-z0-9-]', book_slug):
625             raise ValueError('Invalid characters in slug')
626         book, created = Book.objects.get_or_create(slug=book_slug)
627
628         if created:
629             book_shelves = []
630             old_cover = None
631             book.preview = bool(days)
632             if book.preview:
633                 book.preview_until = date.today() + timedelta(days)
634         else:
635             if not overwrite:
636                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
637             # Save shelves for this book
638             book_shelves = list(book.tags.filter(category='set'))
639             old_cover = book.cover_info()
640
641         # Save XML file
642         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
643         if book.preview:
644             book.xml_file.set_readable(False)
645
646         book.findable = findable
647         book.language = book_info.language
648         book.title = book_info.title
649         if book_info.variant_of:
650             book.common_slug = book_info.variant_of.slug
651         else:
652             book.common_slug = book.slug
653         book.extra_info = json.dumps(book_info.to_dict())
654         book.load_abstract()
655         book.load_toc()
656         book.save()
657
658         meta_tags = Tag.tags_from_info(book_info)
659
660         for tag in meta_tags:
661             if not tag.for_books:
662                 tag.for_books = True
663                 tag.save()
664
665         book.tags = set(meta_tags + book_shelves)
666         book.save()  # update sort_key_author
667
668         cover_changed = old_cover != book.cover_info()
669         obsolete_children = set(b for b in book.children.all()
670                                 if b not in children)
671         notify_cover_changed = []
672         for n, child_book in enumerate(children):
673             new_child = child_book.parent != book
674             child_book.parent = book
675             child_book.parent_number = n
676             child_book.save()
677             if new_child or cover_changed:
678                 notify_cover_changed.append(child_book)
679         # Disown unfaithful children and let them cope on their own.
680         for child in obsolete_children:
681             child.parent = None
682             child.parent_number = 0
683             child.save()
684             if old_cover:
685                 notify_cover_changed.append(child)
686
687         cls.repopulate_ancestors()
688         tasks.update_counters.delay()
689
690         if remote_gallery_url:
691             book.download_pictures(remote_gallery_url)
692
693         # No saves beyond this point.
694
695         # Build cover.
696         if 'cover' not in dont_build:
697             book.cover.build_delay()
698             book.cover_clean.build_delay()
699             book.cover_thumb.build_delay()
700             book.cover_api_thumb.build_delay()
701             book.simple_cover.build_delay()
702             book.cover_ebookpoint.build_delay()
703
704         # Build HTML and ebooks.
705         book.html_file.build_delay()
706         if not children:
707             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
708                 if format_ not in dont_build:
709                     getattr(book, '%s_file' % format_).build_delay()
710         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
711             if format_ not in dont_build:
712                 getattr(book, '%s_file' % format_).build_delay()
713
714         if not settings.NO_SEARCH_INDEX and search_index and findable:
715             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
716
717         for child in notify_cover_changed:
718             child.parent_cover_changed()
719
720         book.update_popularity()
721         tasks.update_references.delay(book.id)
722
723         cls.published.send(sender=cls, instance=book)
724         return book
725
726     def get_master(self):
727         master_tags = [
728             'opowiadanie',
729             'powiesc',
730             'dramat_wierszowany_l',
731             'dramat_wierszowany_lp',
732             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
733             'wywiad',
734         ]
735         from librarian.parser import WLDocument
736         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
737         root = wld.edoc.getroot()
738         for master in root.iter():
739             if master.tag in master_tags:
740                 return master
741
742     def update_references(self):
743         from references.models import Entity, Reference
744         master = self.get_master()
745         if master is None:
746             master = []
747         found = set()
748         for i, sec in enumerate(master):
749             for ref in sec.findall('.//ref'):
750                 href = ref.attrib.get('href', '')
751                 if not href or href in found:
752                     continue
753                 found.add(href)
754                 entity, created = Entity.objects.get_or_create(
755                     uri=href
756                 )
757                 ref, created = Reference.objects.get_or_create(
758                     book=self,
759                     entity=entity
760                 )
761                 ref.first_section = 'sec%d' % (i + 1)
762                 entity.populate()
763                 entity.save()
764         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
765
766     @property
767     def references(self):
768         return self.reference_set.all().select_related('entity')
769
770     @classmethod
771     @transaction.atomic
772     def repopulate_ancestors(cls):
773         """Fixes the ancestry cache."""
774         # TODO: table names
775         cursor = connection.cursor()
776         if connection.vendor == 'postgres':
777             cursor.execute("TRUNCATE catalogue_book_ancestor")
778             cursor.execute("""
779                 WITH RECURSIVE ancestry AS (
780                     SELECT book.id, book.parent_id
781                     FROM catalogue_book AS book
782                     WHERE book.parent_id IS NOT NULL
783                     UNION
784                     SELECT ancestor.id, book.parent_id
785                     FROM ancestry AS ancestor, catalogue_book AS book
786                     WHERE ancestor.parent_id = book.id
787                         AND book.parent_id IS NOT NULL
788                     )
789                 INSERT INTO catalogue_book_ancestor
790                     (from_book_id, to_book_id)
791                     SELECT id, parent_id
792                     FROM ancestry
793                     ORDER BY id;
794                 """)
795         else:
796             cursor.execute("DELETE FROM catalogue_book_ancestor")
797             for b in cls.objects.exclude(parent=None):
798                 parent = b.parent
799                 while parent is not None:
800                     b.ancestor.add(parent)
801                     parent = parent.parent
802
803     @property
804     def ancestors(self):
805         if self.parent:
806             for anc in self.parent.ancestors:
807                 yield anc
808             yield self.parent
809         else:
810             return []
811
812     def clear_cache(self):
813         clear_cached_renders(self.mini_box)
814         clear_cached_renders(self.mini_box_nolink)
815
816     def cover_info(self, inherit=True):
817         """Returns a dictionary to serve as fallback for BookInfo.
818
819         For now, the only thing inherited is the cover image.
820         """
821         need = False
822         info = {}
823         for field in ('cover_url', 'cover_by', 'cover_source'):
824             val = self.get_extra_info_json().get(field)
825             if val:
826                 info[field] = val
827             else:
828                 need = True
829         if inherit and need and self.parent is not None:
830             parent_info = self.parent.cover_info()
831             parent_info.update(info)
832             info = parent_info
833         return info
834
835     def related_themes(self):
836         return Tag.objects.usage_for_queryset(
837             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
838             counts=True).filter(category='theme').order_by('-count')
839
840     def parent_cover_changed(self):
841         """Called when parent book's cover image is changed."""
842         if not self.cover_info(inherit=False):
843             if 'cover' not in app_settings.DONT_BUILD:
844                 self.cover.build_delay()
845                 self.cover_clean.build_delay()
846                 self.cover_thumb.build_delay()
847                 self.cover_api_thumb.build_delay()
848                 self.simple_cover.build_delay()
849                 self.cover_ebookpoint.build_delay()
850             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
851                 if format_ not in app_settings.DONT_BUILD:
852                     getattr(self, '%s_file' % format_).build_delay()
853             for child in self.children.all():
854                 child.parent_cover_changed()
855
856     def other_versions(self):
857         """Find other versions (i.e. in other languages) of the book."""
858         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
859
860     def parents(self):
861         books = []
862         parent = self.parent
863         while parent is not None:
864             books.insert(0, parent)
865             parent = parent.parent
866         return books
867
868     def pretty_title(self, html_links=False):
869         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
870         books = self.parents() + [self]
871         names.extend([(b.title, b.get_absolute_url()) for b in books])
872
873         if html_links:
874             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
875         else:
876             names = [tag[0] for tag in names]
877         return ', '.join(names)
878
879     def publisher(self):
880         publisher = self.get_extra_info_json()['publisher']
881         if isinstance(publisher, str):
882             return publisher
883         elif isinstance(publisher, list):
884             return ', '.join(publisher)
885
886     @classmethod
887     def tagged_top_level(cls, tags):
888         """ Returns top-level books tagged with `tags`.
889
890         It only returns those books which don't have ancestors which are
891         also tagged with those tags.
892
893         """
894         objects = cls.tagged.with_all(tags)
895         return objects.filter(findable=True).exclude(ancestor__in=objects)
896
897     @classmethod
898     def book_list(cls, book_filter=None):
899         """Generates a hierarchical listing of all books.
900
901         Books are optionally filtered with a test function.
902
903         """
904
905         books_by_parent = {}
906         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
907         if book_filter:
908             books = books.filter(book_filter).distinct()
909
910             book_ids = set(b['pk'] for b in books.values("pk").iterator())
911             for book in books.iterator():
912                 parent = book.parent_id
913                 if parent not in book_ids:
914                     parent = None
915                 books_by_parent.setdefault(parent, []).append(book)
916         else:
917             for book in books.iterator():
918                 books_by_parent.setdefault(book.parent_id, []).append(book)
919
920         orphans = []
921         books_by_author = OrderedDict()
922         for tag in Tag.objects.filter(category='author').iterator():
923             books_by_author[tag] = []
924
925         for book in books_by_parent.get(None, ()):
926             authors = list(book.authors().only('pk'))
927             if authors:
928                 for author in authors:
929                     books_by_author[author].append(book)
930             else:
931                 orphans.append(book)
932
933         return books_by_author, orphans, books_by_parent
934
935     _audiences_pl = {
936         "SP": (1, "szkoła podstawowa"),
937         "SP1": (1, "szkoła podstawowa"),
938         "SP2": (1, "szkoła podstawowa"),
939         "SP3": (1, "szkoła podstawowa"),
940         "P": (1, "szkoła podstawowa"),
941         "G": (2, "gimnazjum"),
942         "L": (3, "liceum"),
943         "LP": (3, "liceum"),
944     }
945
946     def audiences_pl(self):
947         audiences = self.get_extra_info_json().get('audiences', [])
948         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
949         return [a[1] for a in audiences]
950
951     def stage_note(self):
952         stage = self.get_extra_info_json().get('stage')
953         if stage and stage < '0.4':
954             return (_('This work needs modernisation'),
955                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
956         else:
957             return None, None
958
959     def choose_fragments(self, number):
960         fragments = self.fragments.order_by()
961         fragments_count = fragments.count()
962         if not fragments_count and self.children.exists():
963             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
964             fragments_count = fragments.count()
965         if fragments_count:
966             if fragments_count > number:
967                 offset = randint(0, fragments_count - number)
968             else:
969                 offset = 0
970             return fragments[offset : offset + number]
971         elif self.parent:
972             return self.parent.choose_fragments(number)
973         else:
974             return []
975
976     def choose_fragment(self):
977         fragments = self.choose_fragments(1)
978         if fragments:
979             return fragments[0]
980         else:
981             return None
982
983     def fragment_data(self):
984         fragment = self.choose_fragment()
985         if fragment:
986             return {
987                 'title': fragment.book.pretty_title(),
988                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
989             }
990         else:
991             return None
992
993     def update_popularity(self):
994         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
995         try:
996             pop = self.popularity
997             pop.count = count
998             pop.save()
999         except BookPopularity.DoesNotExist:
1000             BookPopularity.objects.create(book=self, count=count)
1001
1002     def ridero_link(self):
1003         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
1004
1005     def like(self, user):
1006         from social.utils import likes, get_set, set_sets
1007         if not likes(user, self):
1008             tag = get_set(user, '')
1009             set_sets(user, self, [tag])
1010
1011     def unlike(self, user):
1012         from social.utils import likes, set_sets
1013         if likes(user, self):
1014             set_sets(user, self, [])
1015
1016     def full_sort_key(self):
1017         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1018
1019     def cover_color(self):
1020         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1021
1022     @cached_render('catalogue/book_mini_box.html')
1023     def mini_box(self):
1024         return {
1025             'book': self
1026         }
1027
1028     @cached_render('catalogue/book_mini_box.html')
1029     def mini_box_nolink(self):
1030         return {
1031             'book': self,
1032             'no_link': True,
1033         }
1034
1035
1036 class BookPopularity(models.Model):
1037     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1038     count = models.IntegerField(default=0, db_index=True)