f002158a3be29cfa18ff9459fc70cc50d20d897f
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import gettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue import fields
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
31
32 bofh_storage = BofhFileSystemStorage()
33
34
35 class Book(models.Model):
36     """Represents a book imported from WL-XML."""
37     title = models.CharField(_('title'), max_length=32767)
38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
39     sort_key_author = models.CharField(
40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
44     description = models.TextField(_('description'), blank=True)
45     abstract = models.TextField(_('abstract'), blank=True)
46     toc = models.TextField(_('toc'), blank=True)
47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
49     parent_number = models.IntegerField(_('parent number'), default=0)
50     extra_info = models.TextField(_('extra information'), default='{}')
51     gazeta_link = models.CharField(blank=True, max_length=240)
52     wiki_link = models.CharField(blank=True, max_length=240)
53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
54     recommended = models.BooleanField(_('recommended'), default=False)
55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
56     preview = models.BooleanField(_('preview'), default=False)
57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
58     preview_key = models.CharField(max_length=32, blank=True, null=True)
59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
60
61     # files generated during publication
62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
63     html_file = fields.HtmlField(storage=bofh_storage)
64     fb2_file = fields.Fb2Field(storage=bofh_storage)
65     txt_file = fields.TxtField(storage=bofh_storage)
66     epub_file = fields.EpubField(storage=bofh_storage)
67     mobi_file = fields.MobiField(storage=bofh_storage)
68     pdf_file = fields.PdfField(storage=bofh_storage)
69
70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
71     # Cleaner version of cover for thumbs
72     cover_clean = fields.CoverCleanField(_('clean cover'))
73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
74     cover_api_thumb = fields.CoverApiThumbField(
75         _('cover thumbnail for mobile app'))
76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
77     cover_ebookpoint = fields.CoverEbookpointField(
78         _('cover for Ebookpoint'))
79
80     ebook_formats = constants.EBOOK_FORMATS
81     formats = ebook_formats + ['html', 'xml']
82
83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
85
86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
87     has_audience = models.BooleanField(default=False)
88
89     objects = models.Manager()
90     tagged = managers.ModelTaggedItemManager(Tag)
91     tags = managers.TagDescriptor(Tag)
92     tag_relations = GenericRelation(Tag.intermediary_table_model)
93
94     html_built = django.dispatch.Signal()
95     published = django.dispatch.Signal()
96
97     SORT_KEY_SEP = '$'
98
99     is_book = True
100
101     class AlreadyExists(Exception):
102         pass
103
104     class Meta:
105         ordering = ('sort_key_author', 'sort_key')
106         verbose_name = _('book')
107         verbose_name_plural = _('books')
108         app_label = 'catalogue'
109
110     def __str__(self):
111         return self.title
112
113     def get_extra_info_json(self):
114         return json.loads(self.extra_info or '{}')
115
116     def get_initial(self):
117         try:
118             return re.search(r'\w', self.title, re.U).group(0)
119         except AttributeError:
120             return ''
121
122     def authors(self):
123         return self.tags.filter(category='author')
124
125     def epochs(self):
126         return self.tags.filter(category='epoch')
127
128     def genres(self):
129         return self.tags.filter(category='genre')
130
131     def kinds(self):
132         return self.tags.filter(category='kind')
133
134     def tag_unicode(self, category):
135         relations = prefetched_relations(self, category)
136         if relations:
137             return ', '.join(rel.tag.name for rel in relations)
138         else:
139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
140
141     def tags_by_category(self):
142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
143
144     def author_unicode(self):
145         return self.cached_author
146
147     def kind_unicode(self):
148         return self.tag_unicode('kind')
149
150     def epoch_unicode(self):
151         return self.tag_unicode('epoch')
152
153     def genre_unicode(self):
154         return self.tag_unicode('genre')
155
156     def translators(self):
157         translators = self.get_extra_info_json().get('translators') or []
158         return [
159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
160         ]
161
162     def translator(self):
163         translators = self.get_extra_info_json().get('translators')
164         if not translators:
165             return None
166         if len(translators) > 3:
167             translators = translators[:2]
168             others = ' i inni'
169         else:
170             others = ''
171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
172
173     def cover_source(self):
174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
175
176     @property
177     def isbn_pdf(self):
178         return self.get_extra_info_json().get('isbn_pdf')
179
180     @property
181     def isbn_epub(self):
182         return self.get_extra_info_json().get('isbn_epub')
183
184     @property
185     def isbn_mobi(self):
186         return self.get_extra_info_json().get('isbn_mobi')
187
188     def is_accessible_to(self, user):
189         if not self.preview:
190             return True
191         if not user.is_authenticated:
192             return False
193         Membership = apps.get_model('club', 'Membership')
194         if Membership.is_active_for(user):
195             return True
196         Funding = apps.get_model('funding', 'Funding')
197         if Funding.objects.filter(user=user, offer__book=self):
198             return True
199         return False
200
201     def save(self, force_insert=False, force_update=False, **kwargs):
202         from sortify import sortify
203
204         self.sort_key = sortify(self.title)[:120]
205         self.title = str(self.title)  # ???
206
207         try:
208             author = self.authors().first().sort_key
209         except AttributeError:
210             author = ''
211         self.sort_key_author = author
212
213         self.cached_author = self.tag_unicode('author')
214         self.has_audience = 'audience' in self.get_extra_info_json()
215
216         if self.preview and not self.preview_key:
217             self.preview_key = get_random_hash(self.slug)[:32]
218
219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
220
221         return ret
222
223     def get_absolute_url(self):
224         return reverse('book_detail', args=[self.slug])
225
226     def gallery_path(self):
227         return gallery_path(self.slug)
228
229     def gallery_url(self):
230         return gallery_url(self.slug)
231
232     def get_first_text(self):
233         if self.html_file:
234             return self
235         child = self.children.all().order_by('parent_number').first()
236         if child is not None:
237             return child.get_first_text()
238
239     def get_last_text(self):
240         if self.html_file:
241             return self
242         child = self.children.all().order_by('parent_number').last()
243         if child is not None:
244             return child.get_last_text()
245
246     def get_prev_text(self):
247         if not self.parent:
248             return None
249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
250         if sibling is not None:
251             return sibling.get_last_text()
252
253         if self.parent.html_file:
254             return self.parent
255
256         return self.parent.get_prev_text()
257
258     def get_next_text(self, inside=True):
259         if inside:
260             child = self.children.order_by('parent_number').first()
261             if child is not None:
262                 return child.get_first_text()
263
264         if not self.parent:
265             return None
266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
267         if sibling is not None:
268             return sibling.get_first_text()
269         return self.parent.get_next_text(inside=False)
270
271     def get_child_audiobook(self):
272         BookMedia = apps.get_model('catalogue', 'BookMedia')
273         if not BookMedia.objects.filter(book__ancestor=self).exists():
274             return None
275         for child in self.children.order_by('parent_number').all():
276             if child.has_mp3_file():
277                 return child
278             child_sub = child.get_child_audiobook()
279             if child_sub is not None:
280                 return child_sub
281
282     def get_siblings(self):
283         if not self.parent:
284             return []
285         return self.parent.children.all().order_by('parent_number')
286
287     def get_children(self):
288         return self.children.all().order_by('parent_number')
289
290     @property
291     def name(self):
292         return self.title
293
294     def language_code(self):
295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
296
297     def language_name(self):
298         return dict(settings.LANGUAGES).get(self.language_code(), "")
299
300     def is_foreign(self):
301         return self.language_code() != settings.LANGUAGE_CODE
302
303     def set_audio_length(self):
304         length = self.get_audio_length()
305         if length > 0:
306             self.audio_length = self.format_audio_length(length)
307             self.save()
308
309     @staticmethod
310     def format_audio_length(seconds):
311         """
312         >>> Book.format_audio_length(1)
313         '0:01'
314         >>> Book.format_audio_length(3661)
315         '1:01:01'
316         """
317         if seconds < 60*60:
318             minutes = seconds // 60
319             seconds = seconds % 60
320             return '%d:%02d' % (minutes, seconds)
321         else:
322             hours = seconds // 3600
323             minutes = seconds % 3600 // 60
324             seconds = seconds % 60
325             return '%d:%02d:%02d' % (hours, minutes, seconds)
326
327     def get_audio_length(self):
328         total = 0
329         for media in self.get_mp3() or ():
330             total += app_settings.GET_MP3_LENGTH(media.file.path)
331         return int(total)
332
333     def has_media(self, type_):
334         if type_ in Book.formats:
335             return bool(getattr(self, "%s_file" % type_))
336         else:
337             return self.media.filter(type=type_).exists()
338
339     def has_audio(self):
340         return self.has_media('mp3')
341
342     def get_media(self, type_):
343         if self.has_media(type_):
344             if type_ in Book.formats:
345                 return getattr(self, "%s_file" % type_)
346             else:
347                 return self.media.filter(type=type_)
348         else:
349             return None
350
351     def get_mp3(self):
352         return self.get_media("mp3")
353
354     def get_odt(self):
355         return self.get_media("odt")
356
357     def get_ogg(self):
358         return self.get_media("ogg")
359
360     def get_daisy(self):
361         return self.get_media("daisy")
362
363     def get_audio_epub(self):
364         return self.get_media("audio.epub")
365
366     def media_url(self, format_):
367         media = self.get_media(format_)
368         if media:
369             if self.preview:
370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
371             else:
372                 return media.url
373         else:
374             return None
375
376     def html_url(self):
377         return self.media_url('html')
378
379     def pdf_url(self):
380         return self.media_url('pdf')
381
382     def epub_url(self):
383         return self.media_url('epub')
384
385     def mobi_url(self):
386         return self.media_url('mobi')
387
388     def txt_url(self):
389         return self.media_url('txt')
390
391     def fb2_url(self):
392         return self.media_url('fb2')
393
394     def xml_url(self):
395         return self.media_url('xml')
396
397     def has_description(self):
398         return len(self.description) > 0
399     has_description.short_description = _('description')
400     has_description.boolean = True
401
402     def has_mp3_file(self):
403         return self.has_media("mp3")
404     has_mp3_file.short_description = 'MP3'
405     has_mp3_file.boolean = True
406
407     def has_ogg_file(self):
408         return self.has_media("ogg")
409     has_ogg_file.short_description = 'OGG'
410     has_ogg_file.boolean = True
411
412     def has_daisy_file(self):
413         return self.has_media("daisy")
414     has_daisy_file.short_description = 'DAISY'
415     has_daisy_file.boolean = True
416
417     def has_audio_epub_file(self):
418         return self.has_media("audio.epub")
419
420     @property
421     def media_daisy(self):
422         return self.get_media('daisy')
423
424     @property
425     def media_audio_epub(self):
426         return self.get_media('audio.epub')
427
428     def get_audiobooks(self):
429         ogg_files = {}
430         for m in self.media.filter(type='ogg').order_by().iterator():
431             ogg_files[m.name] = m
432
433         audiobooks = []
434         projects = set()
435         total_duration = 0
436         for mp3 in self.media.filter(type='mp3').iterator():
437             # ogg files are always from the same project
438             meta = mp3.get_extra_info_json()
439             project = meta.get('project')
440             if not project:
441                 # temporary fallback
442                 project = 'CzytamySłuchając'
443
444             projects.add((project, meta.get('funded_by', '')))
445             total_duration += mp3.duration or 0
446
447             media = {'mp3': mp3}
448
449             ogg = ogg_files.get(mp3.name)
450             if ogg:
451                 media['ogg'] = ogg
452             audiobooks.append(media)
453
454         projects = sorted(projects)
455         total_duration = '%d:%02d' % (
456             total_duration // 60,
457             total_duration % 60
458         )
459         return audiobooks, projects, total_duration
460
461     def wldocument(self, parse_dublincore=True, inherit=True):
462         from catalogue.import_utils import ORMDocProvider
463         from librarian.parser import WLDocument
464
465         if inherit and self.parent:
466             meta_fallbacks = self.parent.cover_info()
467         else:
468             meta_fallbacks = None
469
470         return WLDocument.from_file(
471             self.xml_file.path,
472             provider=ORMDocProvider(self),
473             parse_dublincore=parse_dublincore,
474             meta_fallbacks=meta_fallbacks)
475
476     def wldocument2(self):
477         from catalogue.import_utils import ORMDocProvider
478         from librarian.document import WLDocument
479         doc = WLDocument(
480             self.xml_file.path,
481             provider=ORMDocProvider(self)
482         )
483         doc.meta.update(self.cover_info())
484         return doc
485
486
487     @staticmethod
488     def zip_format(format_):
489         def pretty_file_name(book):
490             return "%s/%s.%s" % (
491                 book.get_extra_info_json()['author'],
492                 book.slug,
493                 format_)
494
495         field_name = "%s_file" % format_
496         field = getattr(Book, field_name)
497         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
498         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
499         return create_zip(paths, field.ZIP)
500
501     def zip_audiobooks(self, format_):
502         bm = BookMedia.objects.filter(book=self, type=format_)
503         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
504         licenses = set()
505         for m in bm:
506             license = constants.LICENSES.get(
507                 m.get_extra_info_json().get('license'), {}
508             ).get('locative')
509             if license:
510                 licenses.add(license)
511         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
512             'licenses': licenses,
513             'meta': self.wldocument2().meta,
514         })
515         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
516
517     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
518         if not self.findable:
519             return
520         if index is None:
521             from search.index import Index
522             index = Index()
523         try:
524             index.index_book(self, book_info)
525             if index_tags:
526                 index.index_tags()
527             if commit:
528                 index.index.commit()
529         except Exception as e:
530             index.index.rollback()
531             raise e
532
533     # will make problems in conjunction with paid previews
534     def download_pictures(self, remote_gallery_url):
535         # This is only needed for legacy relative image paths.
536         gallery_path = self.gallery_path()
537         # delete previous files, so we don't include old files in ebooks
538         if os.path.isdir(gallery_path):
539             for filename in os.listdir(gallery_path):
540                 file_path = os.path.join(gallery_path, filename)
541                 os.unlink(file_path)
542         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
543         if ilustr_elements:
544             makedirs(gallery_path)
545             for ilustr in ilustr_elements:
546                 ilustr_src = ilustr.get('src')
547                 if '/' in ilustr_src:
548                     continue
549                 ilustr_path = os.path.join(gallery_path, ilustr_src)
550                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
551
552     def load_abstract(self):
553         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
554         if abstract is not None:
555             self.abstract = transform_abstrakt(abstract)
556         else:
557             self.abstract = ''
558
559     def load_toc(self):
560         self.toc = ''
561         if self.html_file:
562             parser = html.HTMLParser(encoding='utf-8')
563             tree = html.parse(self.html_file.path, parser=parser)
564             toc = tree.find('//div[@id="toc"]/ol')
565             if toc is None or not len(toc):
566                 return
567             html_link = reverse('book_text', args=[self.slug])
568             for a in toc.findall('.//a'):
569                 a.attrib['href'] = html_link + a.attrib['href']
570             self.toc = html.tostring(toc, encoding='unicode')
571             # div#toc
572
573     @classmethod
574     def from_xml_file(cls, xml_file, **kwargs):
575         from django.core.files import File
576         from librarian import dcparser
577
578         # use librarian to parse meta-data
579         book_info = dcparser.parse(xml_file)
580
581         if not isinstance(xml_file, File):
582             xml_file = File(open(xml_file))
583
584         try:
585             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
586         finally:
587             xml_file.close()
588
589     @classmethod
590     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
591                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
592         from catalogue import tasks
593
594         if dont_build is None:
595             dont_build = set()
596         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
597
598         # check for parts before we do anything
599         children = []
600         if hasattr(book_info, 'parts'):
601             for part_url in book_info.parts:
602                 try:
603                     children.append(Book.objects.get(slug=part_url.slug))
604                 except Book.DoesNotExist:
605                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
606
607         # Read book metadata
608         book_slug = book_info.url.slug
609         if re.search(r'[^a-z0-9-]', book_slug):
610             raise ValueError('Invalid characters in slug')
611         book, created = Book.objects.get_or_create(slug=book_slug)
612
613         if created:
614             book_shelves = []
615             old_cover = None
616             book.preview = bool(days)
617             if book.preview:
618                 book.preview_until = date.today() + timedelta(days)
619         else:
620             if not overwrite:
621                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
622             # Save shelves for this book
623             book_shelves = list(book.tags.filter(category='set'))
624             old_cover = book.cover_info()
625
626         # Save XML file
627         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
628         if book.preview:
629             book.xml_file.set_readable(False)
630
631         book.findable = findable
632         book.language = book_info.language
633         book.title = book_info.title
634         if book_info.variant_of:
635             book.common_slug = book_info.variant_of.slug
636         else:
637             book.common_slug = book.slug
638         book.extra_info = json.dumps(book_info.to_dict())
639         book.load_abstract()
640         book.load_toc()
641         book.save()
642
643         meta_tags = Tag.tags_from_info(book_info)
644
645         for tag in meta_tags:
646             if not tag.for_books:
647                 tag.for_books = True
648                 tag.save()
649
650         book.tags = set(meta_tags + book_shelves)
651         book.save()  # update sort_key_author
652
653         cover_changed = old_cover != book.cover_info()
654         obsolete_children = set(b for b in book.children.all()
655                                 if b not in children)
656         notify_cover_changed = []
657         for n, child_book in enumerate(children):
658             new_child = child_book.parent != book
659             child_book.parent = book
660             child_book.parent_number = n
661             child_book.save()
662             if new_child or cover_changed:
663                 notify_cover_changed.append(child_book)
664         # Disown unfaithful children and let them cope on their own.
665         for child in obsolete_children:
666             child.parent = None
667             child.parent_number = 0
668             child.save()
669             if old_cover:
670                 notify_cover_changed.append(child)
671
672         cls.repopulate_ancestors()
673         tasks.update_counters.delay()
674
675         if remote_gallery_url:
676             book.download_pictures(remote_gallery_url)
677
678         # No saves beyond this point.
679
680         # Build cover.
681         if 'cover' not in dont_build:
682             book.cover.build_delay()
683             book.cover_clean.build_delay()
684             book.cover_thumb.build_delay()
685             book.cover_api_thumb.build_delay()
686             book.simple_cover.build_delay()
687             book.cover_ebookpoint.build_delay()
688
689         # Build HTML and ebooks.
690         book.html_file.build_delay()
691         if not children:
692             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
693                 if format_ not in dont_build:
694                     getattr(book, '%s_file' % format_).build_delay()
695         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
696             if format_ not in dont_build:
697                 getattr(book, '%s_file' % format_).build_delay()
698
699         if not settings.NO_SEARCH_INDEX and search_index and findable:
700             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
701
702         for child in notify_cover_changed:
703             child.parent_cover_changed()
704
705         book.update_popularity()
706         tasks.update_references.delay(book.id)
707
708         cls.published.send(sender=cls, instance=book)
709         return book
710
711     def get_master(self):
712         master_tags = [
713             'opowiadanie',
714             'powiesc',
715             'dramat_wierszowany_l',
716             'dramat_wierszowany_lp',
717             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
718             'wywiad',
719         ]
720         from librarian.parser import WLDocument
721         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
722         root = wld.edoc.getroot()
723         for master in root.iter():
724             if master.tag in master_tags:
725                 return master
726
727     def update_references(self):
728         from references.models import Entity, Reference
729         master = self.get_master()
730         if master is None:
731             master = []
732         found = set()
733         for i, sec in enumerate(master):
734             for ref in sec.findall('.//ref'):
735                 href = ref.attrib.get('href', '')
736                 if not href or href in found:
737                     continue
738                 found.add(href)
739                 entity, created = Entity.objects.get_or_create(
740                     uri=href
741                 )
742                 ref, created = Reference.objects.get_or_create(
743                     book=self,
744                     entity=entity
745                 )
746                 ref.first_section = 'sec%d' % (i + 1)
747                 entity.populate()
748                 entity.save()
749         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
750
751     @property
752     def references(self):
753         return self.reference_set.all().select_related('entity')
754
755     @classmethod
756     @transaction.atomic
757     def repopulate_ancestors(cls):
758         """Fixes the ancestry cache."""
759         # TODO: table names
760         cursor = connection.cursor()
761         if connection.vendor == 'postgres':
762             cursor.execute("TRUNCATE catalogue_book_ancestor")
763             cursor.execute("""
764                 WITH RECURSIVE ancestry AS (
765                     SELECT book.id, book.parent_id
766                     FROM catalogue_book AS book
767                     WHERE book.parent_id IS NOT NULL
768                     UNION
769                     SELECT ancestor.id, book.parent_id
770                     FROM ancestry AS ancestor, catalogue_book AS book
771                     WHERE ancestor.parent_id = book.id
772                         AND book.parent_id IS NOT NULL
773                     )
774                 INSERT INTO catalogue_book_ancestor
775                     (from_book_id, to_book_id)
776                     SELECT id, parent_id
777                     FROM ancestry
778                     ORDER BY id;
779                 """)
780         else:
781             cursor.execute("DELETE FROM catalogue_book_ancestor")
782             for b in cls.objects.exclude(parent=None):
783                 parent = b.parent
784                 while parent is not None:
785                     b.ancestor.add(parent)
786                     parent = parent.parent
787
788     @property
789     def ancestors(self):
790         if self.parent:
791             for anc in self.parent.ancestors:
792                 yield anc
793             yield self.parent
794         else:
795             return []
796
797     def clear_cache(self):
798         clear_cached_renders(self.mini_box)
799         clear_cached_renders(self.mini_box_nolink)
800
801     def cover_info(self, inherit=True):
802         """Returns a dictionary to serve as fallback for BookInfo.
803
804         For now, the only thing inherited is the cover image.
805         """
806         need = False
807         info = {}
808         for field in ('cover_url', 'cover_by', 'cover_source'):
809             val = self.get_extra_info_json().get(field)
810             if val:
811                 info[field] = val
812             else:
813                 need = True
814         if inherit and need and self.parent is not None:
815             parent_info = self.parent.cover_info()
816             parent_info.update(info)
817             info = parent_info
818         return info
819
820     def related_themes(self):
821         return Tag.objects.usage_for_queryset(
822             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
823             counts=True).filter(category='theme').order_by('-count')
824
825     def parent_cover_changed(self):
826         """Called when parent book's cover image is changed."""
827         if not self.cover_info(inherit=False):
828             if 'cover' not in app_settings.DONT_BUILD:
829                 self.cover.build_delay()
830                 self.cover_clean.build_delay()
831                 self.cover_thumb.build_delay()
832                 self.cover_api_thumb.build_delay()
833                 self.simple_cover.build_delay()
834                 self.cover_ebookpoint.build_delay()
835             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
836                 if format_ not in app_settings.DONT_BUILD:
837                     getattr(self, '%s_file' % format_).build_delay()
838             for child in self.children.all():
839                 child.parent_cover_changed()
840
841     def other_versions(self):
842         """Find other versions (i.e. in other languages) of the book."""
843         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
844
845     def parents(self):
846         books = []
847         parent = self.parent
848         while parent is not None:
849             books.insert(0, parent)
850             parent = parent.parent
851         return books
852
853     def pretty_title(self, html_links=False):
854         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
855         books = self.parents() + [self]
856         names.extend([(b.title, b.get_absolute_url()) for b in books])
857
858         if html_links:
859             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
860         else:
861             names = [tag[0] for tag in names]
862         return ', '.join(names)
863
864     def publisher(self):
865         publisher = self.get_extra_info_json()['publisher']
866         if isinstance(publisher, str):
867             return publisher
868         elif isinstance(publisher, list):
869             return ', '.join(publisher)
870
871     @classmethod
872     def tagged_top_level(cls, tags):
873         """ Returns top-level books tagged with `tags`.
874
875         It only returns those books which don't have ancestors which are
876         also tagged with those tags.
877
878         """
879         objects = cls.tagged.with_all(tags)
880         return objects.filter(findable=True).exclude(ancestor__in=objects)
881
882     @classmethod
883     def book_list(cls, book_filter=None):
884         """Generates a hierarchical listing of all books.
885
886         Books are optionally filtered with a test function.
887
888         """
889
890         books_by_parent = {}
891         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
892         if book_filter:
893             books = books.filter(book_filter).distinct()
894
895             book_ids = set(b['pk'] for b in books.values("pk").iterator())
896             for book in books.iterator():
897                 parent = book.parent_id
898                 if parent not in book_ids:
899                     parent = None
900                 books_by_parent.setdefault(parent, []).append(book)
901         else:
902             for book in books.iterator():
903                 books_by_parent.setdefault(book.parent_id, []).append(book)
904
905         orphans = []
906         books_by_author = OrderedDict()
907         for tag in Tag.objects.filter(category='author').iterator():
908             books_by_author[tag] = []
909
910         for book in books_by_parent.get(None, ()):
911             authors = list(book.authors().only('pk'))
912             if authors:
913                 for author in authors:
914                     books_by_author[author].append(book)
915             else:
916                 orphans.append(book)
917
918         return books_by_author, orphans, books_by_parent
919
920     _audiences_pl = {
921         "SP": (1, "szkoła podstawowa"),
922         "SP1": (1, "szkoła podstawowa"),
923         "SP2": (1, "szkoła podstawowa"),
924         "SP3": (1, "szkoła podstawowa"),
925         "P": (1, "szkoła podstawowa"),
926         "G": (2, "gimnazjum"),
927         "L": (3, "liceum"),
928         "LP": (3, "liceum"),
929     }
930
931     def audiences_pl(self):
932         audiences = self.get_extra_info_json().get('audiences', [])
933         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
934         return [a[1] for a in audiences]
935
936     def stage_note(self):
937         stage = self.get_extra_info_json().get('stage')
938         if stage and stage < '0.4':
939             return (_('This work needs modernisation'),
940                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
941         else:
942             return None, None
943
944     def choose_fragments(self, number):
945         fragments = self.fragments.order_by()
946         fragments_count = fragments.count()
947         if not fragments_count and self.children.exists():
948             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
949             fragments_count = fragments.count()
950         if fragments_count:
951             if fragments_count > number:
952                 offset = randint(0, fragments_count - number)
953             else:
954                 offset = 0
955             return fragments[offset : offset + number]
956         elif self.parent:
957             return self.parent.choose_fragments(number)
958         else:
959             return []
960
961     def choose_fragment(self):
962         fragments = self.choose_fragments(1)
963         if fragments:
964             return fragments[0]
965         else:
966             return None
967
968     def fragment_data(self):
969         fragment = self.choose_fragment()
970         if fragment:
971             return {
972                 'title': fragment.book.pretty_title(),
973                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
974             }
975         else:
976             return None
977
978     def update_popularity(self):
979         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
980         try:
981             pop = self.popularity
982             pop.count = count
983             pop.save()
984         except BookPopularity.DoesNotExist:
985             BookPopularity.objects.create(book=self, count=count)
986
987     def ridero_link(self):
988         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
989
990     def like(self, user):
991         from social.utils import likes, get_set, set_sets
992         if not likes(user, self):
993             tag = get_set(user, '')
994             set_sets(user, self, [tag])
995
996     def unlike(self, user):
997         from social.utils import likes, set_sets
998         if likes(user, self):
999             set_sets(user, self, [])
1000
1001     def full_sort_key(self):
1002         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1003
1004     def cover_color(self):
1005         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1006
1007     @cached_render('catalogue/book_mini_box.html')
1008     def mini_box(self):
1009         return {
1010             'book': self
1011         }
1012
1013     @cached_render('catalogue/book_mini_box.html')
1014     def mini_box_nolink(self):
1015         return {
1016             'book': self,
1017             'no_link': True,
1018         }
1019
1020
1021 class BookPopularity(models.Model):
1022     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1023     count = models.IntegerField(default=0, db_index=True)