bcbefeaa1313687c4626109ba77ddf8abad0f2ab
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import gettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue import fields
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
31
32 bofh_storage = BofhFileSystemStorage()
33
34
35 class Book(models.Model):
36     """Represents a book imported from WL-XML."""
37     title = models.CharField(_('title'), max_length=32767)
38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
39     sort_key_author = models.CharField(
40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
44     description = models.TextField(_('description'), blank=True)
45     abstract = models.TextField(_('abstract'), blank=True)
46     toc = models.TextField(_('toc'), blank=True)
47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
49     parent_number = models.IntegerField(_('parent number'), default=0)
50     extra_info = models.TextField(_('extra information'), default='{}')
51     gazeta_link = models.CharField(blank=True, max_length=240)
52     wiki_link = models.CharField(blank=True, max_length=240)
53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
54     recommended = models.BooleanField(_('recommended'), default=False)
55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
56     preview = models.BooleanField(_('preview'), default=False)
57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
58     preview_key = models.CharField(max_length=32, blank=True, null=True)
59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
60
61     # files generated during publication
62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
63     html_file = fields.HtmlField(storage=bofh_storage)
64     fb2_file = fields.Fb2Field(storage=bofh_storage)
65     txt_file = fields.TxtField(storage=bofh_storage)
66     epub_file = fields.EpubField(storage=bofh_storage)
67     mobi_file = fields.MobiField(storage=bofh_storage)
68     pdf_file = fields.PdfField(storage=bofh_storage)
69
70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
71     # Cleaner version of cover for thumbs
72     cover_clean = fields.CoverCleanField(_('clean cover'))
73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
74     cover_api_thumb = fields.CoverApiThumbField(
75         _('cover thumbnail for mobile app'))
76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
77     cover_ebookpoint = fields.CoverEbookpointField(
78         _('cover for Ebookpoint'))
79
80     ebook_formats = constants.EBOOK_FORMATS
81     formats = ebook_formats + ['html', 'xml']
82
83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
85
86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
87     has_audience = models.BooleanField(default=False)
88
89     objects = models.Manager()
90     tagged = managers.ModelTaggedItemManager(Tag)
91     tags = managers.TagDescriptor(Tag)
92     tag_relations = GenericRelation(Tag.intermediary_table_model)
93
94     html_built = django.dispatch.Signal()
95     published = django.dispatch.Signal()
96
97     SORT_KEY_SEP = '$'
98
99     is_book = True
100
101     class AlreadyExists(Exception):
102         pass
103
104     class Meta:
105         ordering = ('sort_key_author', 'sort_key')
106         verbose_name = _('book')
107         verbose_name_plural = _('books')
108         app_label = 'catalogue'
109
110     def __str__(self):
111         return self.title
112
113     def get_extra_info_json(self):
114         return json.loads(self.extra_info or '{}')
115
116     def get_initial(self):
117         try:
118             return re.search(r'\w', self.title, re.U).group(0)
119         except AttributeError:
120             return ''
121
122     def authors(self):
123         return self.tags.filter(category='author')
124
125     def epochs(self):
126         return self.tags.filter(category='epoch')
127
128     def genres(self):
129         return self.tags.filter(category='genre')
130
131     def kinds(self):
132         return self.tags.filter(category='kind')
133
134     def tag_unicode(self, category):
135         relations = prefetched_relations(self, category)
136         if relations:
137             return ', '.join(rel.tag.name for rel in relations)
138         else:
139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
140
141     def tags_by_category(self):
142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
143
144     def author_unicode(self):
145         return self.cached_author
146
147     def kind_unicode(self):
148         return self.tag_unicode('kind')
149
150     def epoch_unicode(self):
151         return self.tag_unicode('epoch')
152
153     def genre_unicode(self):
154         return self.tag_unicode('genre')
155
156     def translators(self):
157         translators = self.get_extra_info_json().get('translators') or []
158         return [
159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
160         ]
161
162     def translator(self):
163         translators = self.get_extra_info_json().get('translators')
164         if not translators:
165             return None
166         if len(translators) > 3:
167             translators = translators[:2]
168             others = ' i inni'
169         else:
170             others = ''
171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
172
173     def cover_source(self):
174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
175
176     @property
177     def isbn_pdf(self):
178         return self.get_extra_info_json().get('isbn_pdf')
179
180     @property
181     def isbn_epub(self):
182         return self.get_extra_info_json().get('isbn_epub')
183
184     @property
185     def isbn_mobi(self):
186         return self.get_extra_info_json().get('isbn_mobi')
187
188     def is_accessible_to(self, user):
189         if not self.preview:
190             return True
191         if not user.is_authenticated:
192             return False
193         Membership = apps.get_model('club', 'Membership')
194         if Membership.is_active_for(user):
195             return True
196         Funding = apps.get_model('funding', 'Funding')
197         if Funding.objects.filter(user=user, offer__book=self):
198             return True
199         return False
200
201     def save(self, force_insert=False, force_update=False, **kwargs):
202         from sortify import sortify
203
204         self.sort_key = sortify(self.title)[:120]
205         self.title = str(self.title)  # ???
206
207         try:
208             author = self.authors().first().sort_key
209         except AttributeError:
210             author = ''
211         self.sort_key_author = author
212
213         self.cached_author = self.tag_unicode('author')
214         self.has_audience = 'audience' in self.get_extra_info_json()
215
216         if self.preview and not self.preview_key:
217             self.preview_key = get_random_hash(self.slug)[:32]
218
219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
220
221         return ret
222
223     def get_absolute_url(self):
224         return reverse('book_detail', args=[self.slug])
225
226     def gallery_path(self):
227         return gallery_path(self.slug)
228
229     def gallery_url(self):
230         return gallery_url(self.slug)
231
232     def get_first_text(self):
233         if self.html_file:
234             return self
235         child = self.children.all().order_by('parent_number').first()
236         if child is not None:
237             return child.get_first_text()
238
239     def get_last_text(self):
240         if self.html_file:
241             return self
242         child = self.children.all().order_by('parent_number').last()
243         if child is not None:
244             return child.get_last_text()
245
246     def get_prev_text(self):
247         if not self.parent:
248             return None
249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
250         if sibling is not None:
251             return sibling.get_last_text()
252
253         if self.parent.html_file:
254             return self.parent
255
256         return self.parent.get_prev_text()
257
258     def get_next_text(self, inside=True):
259         if inside:
260             child = self.children.order_by('parent_number').first()
261             if child is not None:
262                 return child.get_first_text()
263
264         if not self.parent:
265             return None
266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
267         if sibling is not None:
268             return sibling.get_first_text()
269         return self.parent.get_next_text(inside=False)
270
271     def get_child_audiobook(self):
272         BookMedia = apps.get_model('catalogue', 'BookMedia')
273         if not BookMedia.objects.filter(book__ancestor=self).exists():
274             return None
275         for child in self.children.order_by('parent_number').all():
276             if child.has_mp3_file():
277                 return child
278             child_sub = child.get_child_audiobook()
279             if child_sub is not None:
280                 return child_sub
281
282     def get_siblings(self):
283         if not self.parent:
284             return []
285         return self.parent.children.all().order_by('parent_number')
286
287     def get_children(self):
288         return self.children.all().order_by('parent_number')
289
290     @property
291     def name(self):
292         return self.title
293
294     def language_code(self):
295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
296
297     def language_name(self):
298         return dict(settings.LANGUAGES).get(self.language_code(), "")
299
300     def is_foreign(self):
301         return self.language_code() != settings.LANGUAGE_CODE
302
303     def set_audio_length(self):
304         length = self.get_audio_length()
305         if length > 0:
306             self.audio_length = self.format_audio_length(length)
307             self.save()
308
309     @staticmethod
310     def format_audio_length(seconds):
311         """
312         >>> Book.format_audio_length(1)
313         '0:01'
314         >>> Book.format_audio_length(3661)
315         '1:01:01'
316         """
317         if seconds < 60*60:
318             minutes = seconds // 60
319             seconds = seconds % 60
320             return '%d:%02d' % (minutes, seconds)
321         else:
322             hours = seconds // 3600
323             minutes = seconds % 3600 // 60
324             seconds = seconds % 60
325             return '%d:%02d:%02d' % (hours, minutes, seconds)
326
327     def get_audio_length(self):
328         total = 0
329         for media in self.get_mp3() or ():
330             total += app_settings.GET_MP3_LENGTH(media.file.path)
331         return int(total)
332
333     def has_media(self, type_):
334         if type_ in Book.formats:
335             return bool(getattr(self, "%s_file" % type_))
336         else:
337             return self.media.filter(type=type_).exists()
338
339     def has_audio(self):
340         return self.has_media('mp3')
341
342     def get_media(self, type_):
343         if self.has_media(type_):
344             if type_ in Book.formats:
345                 return getattr(self, "%s_file" % type_)
346             else:
347                 return self.media.filter(type=type_)
348         else:
349             return None
350
351     def get_mp3(self):
352         return self.get_media("mp3")
353
354     def get_odt(self):
355         return self.get_media("odt")
356
357     def get_ogg(self):
358         return self.get_media("ogg")
359
360     def get_daisy(self):
361         return self.get_media("daisy")
362
363     def get_audio_epub(self):
364         return self.get_media("audio.epub")
365
366     def media_url(self, format_):
367         media = self.get_media(format_)
368         if media:
369             if self.preview:
370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
371             else:
372                 return media.url
373         else:
374             return None
375
376     def html_url(self):
377         return self.media_url('html')
378
379     def pdf_url(self):
380         return self.media_url('pdf')
381
382     def epub_url(self):
383         return self.media_url('epub')
384
385     def mobi_url(self):
386         return self.media_url('mobi')
387
388     def txt_url(self):
389         return self.media_url('txt')
390
391     def fb2_url(self):
392         return self.media_url('fb2')
393
394     def xml_url(self):
395         return self.media_url('xml')
396
397     def has_description(self):
398         return len(self.description) > 0
399     has_description.short_description = _('description')
400     has_description.boolean = True
401
402     def has_mp3_file(self):
403         return self.has_media("mp3")
404     has_mp3_file.short_description = 'MP3'
405     has_mp3_file.boolean = True
406
407     def has_ogg_file(self):
408         return self.has_media("ogg")
409     has_ogg_file.short_description = 'OGG'
410     has_ogg_file.boolean = True
411
412     def has_daisy_file(self):
413         return self.has_media("daisy")
414     has_daisy_file.short_description = 'DAISY'
415     has_daisy_file.boolean = True
416
417     def has_sync_file(self):
418         return self.has_media("sync")
419
420     def get_sync(self):
421         with self.get_media('sync').first().file.open('r') as f:
422             sync = f.read().split('\n')
423         offset = float(sync[0])
424         items = []
425         for line in sync[1:]:
426             if not line:
427                 continue
428             start, end, elid = line.split()
429             items.append([elid, float(start) + offset])
430         return json.dumps(items)
431     
432     def has_audio_epub_file(self):
433         return self.has_media("audio.epub")
434
435     @property
436     def media_daisy(self):
437         return self.get_media('daisy')
438
439     @property
440     def media_audio_epub(self):
441         return self.get_media('audio.epub')
442
443     def get_audiobooks(self):
444         ogg_files = {}
445         for m in self.media.filter(type='ogg').order_by().iterator():
446             ogg_files[m.name] = m
447
448         audiobooks = []
449         projects = set()
450         total_duration = 0
451         for mp3 in self.media.filter(type='mp3').iterator():
452             # ogg files are always from the same project
453             meta = mp3.get_extra_info_json()
454             project = meta.get('project')
455             if not project:
456                 # temporary fallback
457                 project = 'CzytamySłuchając'
458
459             projects.add((project, meta.get('funded_by', '')))
460             total_duration += mp3.duration or 0
461
462             media = {'mp3': mp3}
463
464             ogg = ogg_files.get(mp3.name)
465             if ogg:
466                 media['ogg'] = ogg
467             audiobooks.append(media)
468
469         projects = sorted(projects)
470         total_duration = '%d:%02d' % (
471             total_duration // 60,
472             total_duration % 60
473         )
474         return audiobooks, projects, total_duration
475
476     def wldocument(self, parse_dublincore=True, inherit=True):
477         from catalogue.import_utils import ORMDocProvider
478         from librarian.parser import WLDocument
479
480         if inherit and self.parent:
481             meta_fallbacks = self.parent.cover_info()
482         else:
483             meta_fallbacks = None
484
485         return WLDocument.from_file(
486             self.xml_file.path,
487             provider=ORMDocProvider(self),
488             parse_dublincore=parse_dublincore,
489             meta_fallbacks=meta_fallbacks)
490
491     def wldocument2(self):
492         from catalogue.import_utils import ORMDocProvider
493         from librarian.document import WLDocument
494         doc = WLDocument(
495             self.xml_file.path,
496             provider=ORMDocProvider(self)
497         )
498         doc.meta.update(self.cover_info())
499         return doc
500
501
502     @staticmethod
503     def zip_format(format_):
504         def pretty_file_name(book):
505             return "%s/%s.%s" % (
506                 book.get_extra_info_json()['author'],
507                 book.slug,
508                 format_)
509
510         field_name = "%s_file" % format_
511         field = getattr(Book, field_name)
512         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
513         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
514         return create_zip(paths, field.ZIP)
515
516     def zip_audiobooks(self, format_):
517         bm = BookMedia.objects.filter(book=self, type=format_)
518         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
519         licenses = set()
520         for m in bm:
521             license = constants.LICENSES.get(
522                 m.get_extra_info_json().get('license'), {}
523             ).get('locative')
524             if license:
525                 licenses.add(license)
526         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
527             'licenses': licenses,
528             'meta': self.wldocument2().meta,
529         })
530         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
531
532     def search_index(self, index=None):
533         if not self.findable:
534             return
535         from search.index import Index
536         Index.index_book(self)
537
538     # will make problems in conjunction with paid previews
539     def download_pictures(self, remote_gallery_url):
540         # This is only needed for legacy relative image paths.
541         gallery_path = self.gallery_path()
542         # delete previous files, so we don't include old files in ebooks
543         if os.path.isdir(gallery_path):
544             for filename in os.listdir(gallery_path):
545                 file_path = os.path.join(gallery_path, filename)
546                 os.unlink(file_path)
547         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
548         if ilustr_elements:
549             makedirs(gallery_path)
550             for ilustr in ilustr_elements:
551                 ilustr_src = ilustr.get('src')
552                 if '/' in ilustr_src:
553                     continue
554                 ilustr_path = os.path.join(gallery_path, ilustr_src)
555                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
556
557     def load_abstract(self):
558         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
559         if abstract is not None:
560             self.abstract = transform_abstrakt(abstract)
561         else:
562             self.abstract = ''
563
564     def load_toc(self):
565         self.toc = ''
566         if self.html_file:
567             parser = html.HTMLParser(encoding='utf-8')
568             tree = html.parse(self.html_file.path, parser=parser)
569             toc = tree.find('//div[@id="toc"]/ol')
570             if toc is None or not len(toc):
571                 return
572             html_link = reverse('book_text', args=[self.slug])
573             for a in toc.findall('.//a'):
574                 a.attrib['href'] = html_link + a.attrib['href']
575             self.toc = html.tostring(toc, encoding='unicode')
576             # div#toc
577
578     @classmethod
579     def from_xml_file(cls, xml_file, **kwargs):
580         from django.core.files import File
581         from librarian import dcparser
582
583         # use librarian to parse meta-data
584         book_info = dcparser.parse(xml_file)
585
586         if not isinstance(xml_file, File):
587             xml_file = File(open(xml_file))
588
589         try:
590             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
591         finally:
592             xml_file.close()
593
594     @classmethod
595     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
596                            remote_gallery_url=None, days=0, findable=True):
597         from catalogue import tasks
598
599         if dont_build is None:
600             dont_build = set()
601         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
602
603         # check for parts before we do anything
604         children = []
605         if hasattr(book_info, 'parts'):
606             for part_url in book_info.parts:
607                 try:
608                     children.append(Book.objects.get(slug=part_url.slug))
609                 except Book.DoesNotExist:
610                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
611
612         # Read book metadata
613         book_slug = book_info.url.slug
614         if re.search(r'[^a-z0-9-]', book_slug):
615             raise ValueError('Invalid characters in slug')
616         book, created = Book.objects.get_or_create(slug=book_slug)
617
618         if created:
619             book_shelves = []
620             old_cover = None
621             book.preview = bool(days)
622             if book.preview:
623                 book.preview_until = date.today() + timedelta(days)
624         else:
625             if not overwrite:
626                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
627             # Save shelves for this book
628             book_shelves = list(book.tags.filter(category='set'))
629             old_cover = book.cover_info()
630
631         # Save XML file
632         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
633         if book.preview:
634             book.xml_file.set_readable(False)
635
636         book.findable = findable
637         book.language = book_info.language
638         book.title = book_info.title
639         if book_info.variant_of:
640             book.common_slug = book_info.variant_of.slug
641         else:
642             book.common_slug = book.slug
643         book.extra_info = json.dumps(book_info.to_dict())
644         book.load_abstract()
645         book.load_toc()
646         book.save()
647
648         meta_tags = Tag.tags_from_info(book_info)
649
650         for tag in meta_tags:
651             if not tag.for_books:
652                 tag.for_books = True
653                 tag.save()
654
655         book.tags = set(meta_tags + book_shelves)
656         book.save()  # update sort_key_author
657
658         cover_changed = old_cover != book.cover_info()
659         obsolete_children = set(b for b in book.children.all()
660                                 if b not in children)
661         notify_cover_changed = []
662         for n, child_book in enumerate(children):
663             new_child = child_book.parent != book
664             child_book.parent = book
665             child_book.parent_number = n
666             child_book.save()
667             if new_child or cover_changed:
668                 notify_cover_changed.append(child_book)
669         # Disown unfaithful children and let them cope on their own.
670         for child in obsolete_children:
671             child.parent = None
672             child.parent_number = 0
673             child.save()
674             if old_cover:
675                 notify_cover_changed.append(child)
676
677         cls.repopulate_ancestors()
678         tasks.update_counters.delay()
679
680         if remote_gallery_url:
681             book.download_pictures(remote_gallery_url)
682
683         # No saves beyond this point.
684
685         # Build cover.
686         if 'cover' not in dont_build:
687             book.cover.build_delay()
688             book.cover_clean.build_delay()
689             book.cover_thumb.build_delay()
690             book.cover_api_thumb.build_delay()
691             book.simple_cover.build_delay()
692             book.cover_ebookpoint.build_delay()
693
694         # Build HTML and ebooks.
695         book.html_file.build_delay()
696         if not children:
697             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
698                 if format_ not in dont_build:
699                     getattr(book, '%s_file' % format_).build_delay()
700         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
701             if format_ not in dont_build:
702                 getattr(book, '%s_file' % format_).build_delay()
703
704         if not settings.NO_SEARCH_INDEX and search_index and findable:
705             tasks.index_book.delay(book.id)
706
707         for child in notify_cover_changed:
708             child.parent_cover_changed()
709
710         book.update_popularity()
711         tasks.update_references.delay(book.id)
712
713         cls.published.send(sender=cls, instance=book)
714         return book
715
716     def get_master(self):
717         master_tags = [
718             'opowiadanie',
719             'powiesc',
720             'dramat_wierszowany_l',
721             'dramat_wierszowany_lp',
722             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
723             'wywiad',
724         ]
725         from librarian.parser import WLDocument
726         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
727         root = wld.edoc.getroot()
728         for master in root.iter():
729             if master.tag in master_tags:
730                 return master
731
732     def update_references(self):
733         from references.models import Entity, Reference
734         master = self.get_master()
735         if master is None:
736             master = []
737         found = set()
738         for i, sec in enumerate(master):
739             for ref in sec.findall('.//ref'):
740                 href = ref.attrib.get('href', '')
741                 if not href or href in found:
742                     continue
743                 found.add(href)
744                 entity, created = Entity.objects.get_or_create(
745                     uri=href
746                 )
747                 ref, created = Reference.objects.get_or_create(
748                     book=self,
749                     entity=entity
750                 )
751                 ref.first_section = 'sec%d' % (i + 1)
752                 entity.populate()
753                 entity.save()
754         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
755
756     @property
757     def references(self):
758         return self.reference_set.all().select_related('entity')
759
760     @classmethod
761     @transaction.atomic
762     def repopulate_ancestors(cls):
763         """Fixes the ancestry cache."""
764         # TODO: table names
765         cursor = connection.cursor()
766         if connection.vendor == 'postgres':
767             cursor.execute("TRUNCATE catalogue_book_ancestor")
768             cursor.execute("""
769                 WITH RECURSIVE ancestry AS (
770                     SELECT book.id, book.parent_id
771                     FROM catalogue_book AS book
772                     WHERE book.parent_id IS NOT NULL
773                     UNION
774                     SELECT ancestor.id, book.parent_id
775                     FROM ancestry AS ancestor, catalogue_book AS book
776                     WHERE ancestor.parent_id = book.id
777                         AND book.parent_id IS NOT NULL
778                     )
779                 INSERT INTO catalogue_book_ancestor
780                     (from_book_id, to_book_id)
781                     SELECT id, parent_id
782                     FROM ancestry
783                     ORDER BY id;
784                 """)
785         else:
786             cursor.execute("DELETE FROM catalogue_book_ancestor")
787             for b in cls.objects.exclude(parent=None):
788                 parent = b.parent
789                 while parent is not None:
790                     b.ancestor.add(parent)
791                     parent = parent.parent
792
793     @property
794     def ancestors(self):
795         if self.parent:
796             for anc in self.parent.ancestors:
797                 yield anc
798             yield self.parent
799         else:
800             return []
801
802     def clear_cache(self):
803         clear_cached_renders(self.mini_box)
804         clear_cached_renders(self.mini_box_nolink)
805
806     def cover_info(self, inherit=True):
807         """Returns a dictionary to serve as fallback for BookInfo.
808
809         For now, the only thing inherited is the cover image.
810         """
811         need = False
812         info = {}
813         for field in ('cover_url', 'cover_by', 'cover_source'):
814             val = self.get_extra_info_json().get(field)
815             if val:
816                 info[field] = val
817             else:
818                 need = True
819         if inherit and need and self.parent is not None:
820             parent_info = self.parent.cover_info()
821             parent_info.update(info)
822             info = parent_info
823         return info
824
825     def related_themes(self):
826         return Tag.objects.usage_for_queryset(
827             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
828             counts=True).filter(category='theme').order_by('-count')
829
830     def parent_cover_changed(self):
831         """Called when parent book's cover image is changed."""
832         if not self.cover_info(inherit=False):
833             if 'cover' not in app_settings.DONT_BUILD:
834                 self.cover.build_delay()
835                 self.cover_clean.build_delay()
836                 self.cover_thumb.build_delay()
837                 self.cover_api_thumb.build_delay()
838                 self.simple_cover.build_delay()
839                 self.cover_ebookpoint.build_delay()
840             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
841                 if format_ not in app_settings.DONT_BUILD:
842                     getattr(self, '%s_file' % format_).build_delay()
843             for child in self.children.all():
844                 child.parent_cover_changed()
845
846     def other_versions(self):
847         """Find other versions (i.e. in other languages) of the book."""
848         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
849
850     def parents(self):
851         books = []
852         parent = self.parent
853         while parent is not None:
854             books.insert(0, parent)
855             parent = parent.parent
856         return books
857
858     def pretty_title(self, html_links=False):
859         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
860         books = self.parents() + [self]
861         names.extend([(b.title, b.get_absolute_url()) for b in books])
862
863         if html_links:
864             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
865         else:
866             names = [tag[0] for tag in names]
867         return ', '.join(names)
868
869     def publisher(self):
870         publisher = self.get_extra_info_json()['publisher']
871         if isinstance(publisher, str):
872             return publisher
873         elif isinstance(publisher, list):
874             return ', '.join(publisher)
875
876     @classmethod
877     def tagged_top_level(cls, tags):
878         """ Returns top-level books tagged with `tags`.
879
880         It only returns those books which don't have ancestors which are
881         also tagged with those tags.
882
883         """
884         objects = cls.tagged.with_all(tags)
885         return objects.filter(findable=True).exclude(ancestor__in=objects)
886
887     @classmethod
888     def book_list(cls, book_filter=None):
889         """Generates a hierarchical listing of all books.
890
891         Books are optionally filtered with a test function.
892
893         """
894
895         books_by_parent = {}
896         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
897         if book_filter:
898             books = books.filter(book_filter).distinct()
899
900             book_ids = set(b['pk'] for b in books.values("pk").iterator())
901             for book in books.iterator():
902                 parent = book.parent_id
903                 if parent not in book_ids:
904                     parent = None
905                 books_by_parent.setdefault(parent, []).append(book)
906         else:
907             for book in books.iterator():
908                 books_by_parent.setdefault(book.parent_id, []).append(book)
909
910         orphans = []
911         books_by_author = OrderedDict()
912         for tag in Tag.objects.filter(category='author').iterator():
913             books_by_author[tag] = []
914
915         for book in books_by_parent.get(None, ()):
916             authors = list(book.authors().only('pk'))
917             if authors:
918                 for author in authors:
919                     books_by_author[author].append(book)
920             else:
921                 orphans.append(book)
922
923         return books_by_author, orphans, books_by_parent
924
925     _audiences_pl = {
926         "SP": (1, "szkoła podstawowa"),
927         "SP1": (1, "szkoła podstawowa"),
928         "SP2": (1, "szkoła podstawowa"),
929         "SP3": (1, "szkoła podstawowa"),
930         "P": (1, "szkoła podstawowa"),
931         "G": (2, "gimnazjum"),
932         "L": (3, "liceum"),
933         "LP": (3, "liceum"),
934     }
935
936     def audiences_pl(self):
937         audiences = self.get_extra_info_json().get('audiences', [])
938         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
939         return [a[1] for a in audiences]
940
941     def stage_note(self):
942         stage = self.get_extra_info_json().get('stage')
943         if stage and stage < '0.4':
944             return (_('This work needs modernisation'),
945                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
946         else:
947             return None, None
948
949     def choose_fragments(self, number):
950         fragments = self.fragments.order_by()
951         fragments_count = fragments.count()
952         if not fragments_count and self.children.exists():
953             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
954             fragments_count = fragments.count()
955         if fragments_count:
956             if fragments_count > number:
957                 offset = randint(0, fragments_count - number)
958             else:
959                 offset = 0
960             return fragments[offset : offset + number]
961         elif self.parent:
962             return self.parent.choose_fragments(number)
963         else:
964             return []
965
966     def choose_fragment(self):
967         fragments = self.choose_fragments(1)
968         if fragments:
969             return fragments[0]
970         else:
971             return None
972
973     def fragment_data(self):
974         fragment = self.choose_fragment()
975         if fragment:
976             return {
977                 'title': fragment.book.pretty_title(),
978                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
979             }
980         else:
981             return None
982
983     def update_popularity(self):
984         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
985         try:
986             pop = self.popularity
987             pop.count = count
988             pop.save()
989         except BookPopularity.DoesNotExist:
990             BookPopularity.objects.create(book=self, count=count)
991
992     def ridero_link(self):
993         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
994
995     def like(self, user):
996         from social.utils import likes, get_set, set_sets
997         if not likes(user, self):
998             tag = get_set(user, '')
999             set_sets(user, self, [tag])
1000
1001     def unlike(self, user):
1002         from social.utils import likes, set_sets
1003         if likes(user, self):
1004             set_sets(user, self, [])
1005
1006     def full_sort_key(self):
1007         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1008
1009     def cover_color(self):
1010         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1011
1012     @cached_render('catalogue/book_mini_box.html')
1013     def mini_box(self):
1014         return {
1015             'book': self
1016         }
1017
1018     @cached_render('catalogue/book_mini_box.html')
1019     def mini_box_nolink(self):
1020         return {
1021             'book': self,
1022             'no_link': True,
1023         }
1024
1025
1026 class BookPopularity(models.Model):
1027     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1028     count = models.IntegerField(default=0, db_index=True)