Housekeeping: reorganize format fields, simplify the building tasks.
[wolnelektury.git] / src / catalogue / models / book.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from collections import OrderedDict
5 import json
6 from datetime import date, timedelta
7 from random import randint
8 import os.path
9 import re
10 from urllib.request import urlretrieve
11 from django.apps import apps
12 from django.conf import settings
13 from django.db import connection, models, transaction
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.template.loader import render_to_string
17 from django.urls import reverse
18 from django.utils.translation import ugettext_lazy as _, get_language
19 from fnpdjango.storage import BofhFileSystemStorage
20 from lxml import html
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue import fields
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
31
32 bofh_storage = BofhFileSystemStorage()
33
34
35 class Book(models.Model):
36     """Represents a book imported from WL-XML."""
37     title = models.CharField(_('title'), max_length=32767)
38     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
39     sort_key_author = models.CharField(
40         _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
41     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
42     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
43     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
44     description = models.TextField(_('description'), blank=True)
45     abstract = models.TextField(_('abstract'), blank=True)
46     toc = models.TextField(_('toc'), blank=True)
47     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
48     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
49     parent_number = models.IntegerField(_('parent number'), default=0)
50     extra_info = models.TextField(_('extra information'), default='{}')
51     gazeta_link = models.CharField(blank=True, max_length=240)
52     wiki_link = models.CharField(blank=True, max_length=240)
53     print_on_demand = models.BooleanField(_('print on demand'), default=False)
54     recommended = models.BooleanField(_('recommended'), default=False)
55     audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
56     preview = models.BooleanField(_('preview'), default=False)
57     preview_until = models.DateField(_('preview until'), blank=True, null=True)
58     preview_key = models.CharField(max_length=32, blank=True, null=True)
59     findable = models.BooleanField(_('findable'), default=True, db_index=True)
60
61     # files generated during publication
62     xml_file = fields.XmlField(storage=bofh_storage, with_etag=False)
63     html_file = fields.HtmlField(storage=bofh_storage)
64     fb2_file = fields.Fb2Field(storage=bofh_storage)
65     txt_file = fields.TxtField(storage=bofh_storage)
66     epub_file = fields.EpubField(storage=bofh_storage)
67     mobi_file = fields.MobiField(storage=bofh_storage)
68     pdf_file = fields.PdfField(storage=bofh_storage)
69
70     cover = fields.CoverField(_('cover'), storage=bofh_storage)
71     # Cleaner version of cover for thumbs
72     cover_clean = fields.CoverCleanField(_('clean cover'))
73     cover_thumb = fields.CoverThumbField(_('cover thumbnail'))
74     cover_api_thumb = fields.CoverApiThumbField(
75         _('cover thumbnail for mobile app'))
76     simple_cover = fields.SimpleCoverField(_('cover for mobile app'))
77     cover_ebookpoint = fields.CoverEbookpointField(
78         _('cover for Ebookpoint'))
79
80     ebook_formats = constants.EBOOK_FORMATS
81     formats = ebook_formats + ['html', 'xml']
82
83     parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
84     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
85
86     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
87     has_audience = models.BooleanField(default=False)
88
89     objects = models.Manager()
90     tagged = managers.ModelTaggedItemManager(Tag)
91     tags = managers.TagDescriptor(Tag)
92     tag_relations = GenericRelation(Tag.intermediary_table_model)
93
94     html_built = django.dispatch.Signal()
95     published = django.dispatch.Signal()
96
97     SORT_KEY_SEP = '$'
98
99     is_book = True
100
101     class AlreadyExists(Exception):
102         pass
103
104     class Meta:
105         ordering = ('sort_key_author', 'sort_key')
106         verbose_name = _('book')
107         verbose_name_plural = _('books')
108         app_label = 'catalogue'
109
110     def __str__(self):
111         return self.title
112
113     def get_extra_info_json(self):
114         return json.loads(self.extra_info or '{}')
115
116     def get_initial(self):
117         try:
118             return re.search(r'\w', self.title, re.U).group(0)
119         except AttributeError:
120             return ''
121
122     def authors(self):
123         return self.tags.filter(category='author')
124
125     def epochs(self):
126         return self.tags.filter(category='epoch')
127
128     def genres(self):
129         return self.tags.filter(category='genre')
130
131     def kinds(self):
132         return self.tags.filter(category='kind')
133
134     def tag_unicode(self, category):
135         relations = prefetched_relations(self, category)
136         if relations:
137             return ', '.join(rel.tag.name for rel in relations)
138         else:
139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
140
141     def tags_by_category(self):
142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
143
144     def author_unicode(self):
145         return self.cached_author
146
147     def kind_unicode(self):
148         return self.tag_unicode('kind')
149
150     def epoch_unicode(self):
151         return self.tag_unicode('epoch')
152
153     def genre_unicode(self):
154         return self.tag_unicode('genre')
155
156     def translators(self):
157         translators = self.get_extra_info_json().get('translators') or []
158         return [
159             '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
160         ]
161
162     def translator(self):
163         translators = self.get_extra_info_json().get('translators')
164         if not translators:
165             return None
166         if len(translators) > 3:
167             translators = translators[:2]
168             others = ' i inni'
169         else:
170             others = ''
171         return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
172
173     def cover_source(self):
174         return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
175
176     @property
177     def isbn_pdf(self):
178         return self.get_extra_info_json().get('isbn_pdf')
179
180     @property
181     def isbn_epub(self):
182         return self.get_extra_info_json().get('isbn_epub')
183
184     @property
185     def isbn_mobi(self):
186         return self.get_extra_info_json().get('isbn_mobi')
187
188     def is_accessible_to(self, user):
189         if not self.preview:
190             return True
191         if not user.is_authenticated:
192             return False
193         Membership = apps.get_model('club', 'Membership')
194         if Membership.is_active_for(user):
195             return True
196         Funding = apps.get_model('funding', 'Funding')
197         if Funding.objects.filter(user=user, offer__book=self):
198             return True
199         return False
200
201     def save(self, force_insert=False, force_update=False, **kwargs):
202         from sortify import sortify
203
204         self.sort_key = sortify(self.title)[:120]
205         self.title = str(self.title)  # ???
206
207         try:
208             author = self.authors().first().sort_key
209         except AttributeError:
210             author = ''
211         self.sort_key_author = author
212
213         self.cached_author = self.tag_unicode('author')
214         self.has_audience = 'audience' in self.get_extra_info_json()
215
216         if self.preview and not self.preview_key:
217             self.preview_key = get_random_hash(self.slug)[:32]
218
219         ret = super(Book, self).save(force_insert, force_update, **kwargs)
220
221         return ret
222
223     def get_absolute_url(self):
224         return reverse('book_detail', args=[self.slug])
225
226     def gallery_path(self):
227         return gallery_path(self.slug)
228
229     def gallery_url(self):
230         return gallery_url(self.slug)
231
232     def get_first_text(self):
233         if self.html_file:
234             return self
235         child = self.children.all().order_by('parent_number').first()
236         if child is not None:
237             return child.get_first_text()
238
239     def get_last_text(self):
240         if self.html_file:
241             return self
242         child = self.children.all().order_by('parent_number').last()
243         if child is not None:
244             return child.get_last_text()
245
246     def get_prev_text(self):
247         if not self.parent:
248             return None
249         sibling = self.parent.children.filter(parent_number__lt=self.parent_number).order_by('-parent_number').first()
250         if sibling is not None:
251             return sibling.get_last_text()
252
253         if self.parent.html_file:
254             return self.parent
255
256         return self.parent.get_prev_text()
257
258     def get_next_text(self, inside=True):
259         if inside:
260             child = self.children.order_by('parent_number').first()
261             if child is not None:
262                 return child.get_first_text()
263
264         if not self.parent:
265             return None
266         sibling = self.parent.children.filter(parent_number__gt=self.parent_number).order_by('parent_number').first()
267         if sibling is not None:
268             return sibling.get_first_text()
269         return self.parent.get_next_text(inside=False)
270
271     def get_child_audiobook(self):
272         BookMedia = apps.get_model('catalogue', 'BookMedia')
273         if not BookMedia.objects.filter(book__ancestor=self).exists():
274             return None
275         for child in self.children.order_by('parent_number').all():
276             if child.has_mp3_file():
277                 return child
278             child_sub = child.get_child_audiobook()
279             if child_sub is not None:
280                 return child_sub
281
282     def get_siblings(self):
283         if not self.parent:
284             return []
285         return self.parent.children.all().order_by('parent_number')
286
287     def get_children(self):
288         return self.children.all().order_by('parent_number')
289
290     @property
291     def name(self):
292         return self.title
293
294     def language_code(self):
295         return constants.LANGUAGES_3TO2.get(self.language, self.language)
296
297     def language_name(self):
298         return dict(settings.LANGUAGES).get(self.language_code(), "")
299
300     def is_foreign(self):
301         return self.language_code() != settings.LANGUAGE_CODE
302
303     def set_audio_length(self):
304         length = self.get_audio_length()
305         if length > 0:
306             self.audio_length = self.format_audio_length(length)
307             self.save()
308
309     @staticmethod
310     def format_audio_length(seconds):
311         """
312         >>> Book.format_audio_length(1)
313         '0:01'
314         >>> Book.format_audio_length(3661)
315         '1:01:01'
316         """
317         if seconds < 60*60:
318             minutes = seconds // 60
319             seconds = seconds % 60
320             return '%d:%02d' % (minutes, seconds)
321         else:
322             hours = seconds // 3600
323             minutes = seconds % 3600 // 60
324             seconds = seconds % 60
325             return '%d:%02d:%02d' % (hours, minutes, seconds)
326
327     def get_audio_length(self):
328         total = 0
329         for media in self.get_mp3() or ():
330             total += app_settings.GET_MP3_LENGTH(media.file.path)
331         return int(total)
332
333     def has_media(self, type_):
334         if type_ in Book.formats:
335             return bool(getattr(self, "%s_file" % type_))
336         else:
337             return self.media.filter(type=type_).exists()
338
339     def has_audio(self):
340         return self.has_media('mp3')
341
342     def get_media(self, type_):
343         if self.has_media(type_):
344             if type_ in Book.formats:
345                 return getattr(self, "%s_file" % type_)
346             else:
347                 return self.media.filter(type=type_)
348         else:
349             return None
350
351     def get_mp3(self):
352         return self.get_media("mp3")
353
354     def get_odt(self):
355         return self.get_media("odt")
356
357     def get_ogg(self):
358         return self.get_media("ogg")
359
360     def get_daisy(self):
361         return self.get_media("daisy")
362
363     def get_audio_epub(self):
364         return self.get_media("audio.epub")
365
366     def media_url(self, format_):
367         media = self.get_media(format_)
368         if media:
369             if self.preview:
370                 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
371             else:
372                 return media.url
373         else:
374             return None
375
376     def html_url(self):
377         return self.media_url('html')
378
379     def pdf_url(self):
380         return self.media_url('pdf')
381
382     def epub_url(self):
383         return self.media_url('epub')
384
385     def mobi_url(self):
386         return self.media_url('mobi')
387
388     def txt_url(self):
389         return self.media_url('txt')
390
391     def fb2_url(self):
392         return self.media_url('fb2')
393
394     def xml_url(self):
395         return self.media_url('xml')
396
397     def has_description(self):
398         return len(self.description) > 0
399     has_description.short_description = _('description')
400     has_description.boolean = True
401
402     def has_mp3_file(self):
403         return self.has_media("mp3")
404     has_mp3_file.short_description = 'MP3'
405     has_mp3_file.boolean = True
406
407     def has_ogg_file(self):
408         return self.has_media("ogg")
409     has_ogg_file.short_description = 'OGG'
410     has_ogg_file.boolean = True
411
412     def has_daisy_file(self):
413         return self.has_media("daisy")
414     has_daisy_file.short_description = 'DAISY'
415     has_daisy_file.boolean = True
416
417     def has_audio_epub_file(self):
418         return self.has_media("audio.epub")
419
420     @property
421     def media_daisy(self):
422         return self.get_media('daisy')
423
424     @property
425     def media_audio_epub(self):
426         return self.get_media('audio.epub')
427
428     def get_audiobooks(self):
429         ogg_files = {}
430         for m in self.media.filter(type='ogg').order_by().iterator():
431             ogg_files[m.name] = m
432
433         audiobooks = []
434         projects = set()
435         total_duration = 0
436         for mp3 in self.media.filter(type='mp3').iterator():
437             # ogg files are always from the same project
438             meta = mp3.get_extra_info_json()
439             project = meta.get('project')
440             if not project:
441                 # temporary fallback
442                 project = 'CzytamySłuchając'
443
444             projects.add((project, meta.get('funded_by', '')))
445             total_duration += mp3.duration or 0
446
447             media = {'mp3': mp3}
448
449             ogg = ogg_files.get(mp3.name)
450             if ogg:
451                 media['ogg'] = ogg
452             audiobooks.append(media)
453
454         projects = sorted(projects)
455         total_duration = '%d:%02d' % (
456             total_duration // 60,
457             total_duration % 60
458         )
459         return audiobooks, projects, total_duration
460
461     def wldocument(self, parse_dublincore=True, inherit=True):
462         from catalogue.import_utils import ORMDocProvider
463         from librarian.parser import WLDocument
464
465         if inherit and self.parent:
466             meta_fallbacks = self.parent.cover_info()
467         else:
468             meta_fallbacks = None
469
470         return WLDocument.from_file(
471             self.xml_file.path,
472             provider=ORMDocProvider(self),
473             parse_dublincore=parse_dublincore,
474             meta_fallbacks=meta_fallbacks)
475
476     def wldocument2(self):
477         from catalogue.import_utils import ORMDocProvider
478         from librarian.document import WLDocument
479         doc = WLDocument(
480             self.xml_file.path,
481             provider=ORMDocProvider(self)
482         )
483         doc.meta.update(self.cover_info())
484         return doc
485
486
487     @staticmethod
488     def zip_format(format_):
489         def pretty_file_name(book):
490             return "%s/%s.%s" % (
491                 book.get_extra_info_json()['author'],
492                 book.slug,
493                 format_)
494
495         field_name = "%s_file" % format_
496         field = getattr(Book, field_name)
497         books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
498         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
499         return create_zip(paths, field.ZIP)
500
501     def zip_audiobooks(self, format_):
502         bm = BookMedia.objects.filter(book=self, type=format_)
503         paths = map(lambda bm: (bm.get_nice_filename(), bm.file.path), bm)
504         licenses = set()
505         for m in bm:
506             license = constants.LICENSES.get(
507                 m.get_extra_info_json().get('license'), {}
508             ).get('locative')
509             if license:
510                 licenses.add(license)
511         readme = render_to_string('catalogue/audiobook_zip_readme.txt', {
512             'licenses': licenses,
513         })
514         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
515
516     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
517         if not self.findable:
518             return
519         if index is None:
520             from search.index import Index
521             index = Index()
522         try:
523             index.index_book(self, book_info)
524             if index_tags:
525                 index.index_tags()
526             if commit:
527                 index.index.commit()
528         except Exception as e:
529             index.index.rollback()
530             raise e
531
532     # will make problems in conjunction with paid previews
533     def download_pictures(self, remote_gallery_url):
534         # This is only needed for legacy relative image paths.
535         gallery_path = self.gallery_path()
536         # delete previous files, so we don't include old files in ebooks
537         if os.path.isdir(gallery_path):
538             for filename in os.listdir(gallery_path):
539                 file_path = os.path.join(gallery_path, filename)
540                 os.unlink(file_path)
541         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
542         if ilustr_elements:
543             makedirs(gallery_path)
544             for ilustr in ilustr_elements:
545                 ilustr_src = ilustr.get('src')
546                 if '/' in ilustr_src:
547                     continue
548                 ilustr_path = os.path.join(gallery_path, ilustr_src)
549                 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
550
551     def load_abstract(self):
552         abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
553         if abstract is not None:
554             self.abstract = transform_abstrakt(abstract)
555         else:
556             self.abstract = ''
557
558     def load_toc(self):
559         self.toc = ''
560         if self.html_file:
561             parser = html.HTMLParser(encoding='utf-8')
562             tree = html.parse(self.html_file.path, parser=parser)
563             toc = tree.find('//div[@id="toc"]/ol')
564             if toc is None or not len(toc):
565                 return
566             html_link = reverse('book_text', args=[self.slug])
567             for a in toc.findall('.//a'):
568                 a.attrib['href'] = html_link + a.attrib['href']
569             self.toc = html.tostring(toc, encoding='unicode')
570             # div#toc
571
572     @classmethod
573     def from_xml_file(cls, xml_file, **kwargs):
574         from django.core.files import File
575         from librarian import dcparser
576
577         # use librarian to parse meta-data
578         book_info = dcparser.parse(xml_file)
579
580         if not isinstance(xml_file, File):
581             xml_file = File(open(xml_file))
582
583         try:
584             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
585         finally:
586             xml_file.close()
587
588     @classmethod
589     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
590                            search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
591         from catalogue import tasks
592
593         if dont_build is None:
594             dont_build = set()
595         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
596
597         # check for parts before we do anything
598         children = []
599         if hasattr(book_info, 'parts'):
600             for part_url in book_info.parts:
601                 try:
602                     children.append(Book.objects.get(slug=part_url.slug))
603                 except Book.DoesNotExist:
604                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
605
606         # Read book metadata
607         book_slug = book_info.url.slug
608         if re.search(r'[^a-z0-9-]', book_slug):
609             raise ValueError('Invalid characters in slug')
610         book, created = Book.objects.get_or_create(slug=book_slug)
611
612         if created:
613             book_shelves = []
614             old_cover = None
615             book.preview = bool(days)
616             if book.preview:
617                 book.preview_until = date.today() + timedelta(days)
618         else:
619             if not overwrite:
620                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
621             # Save shelves for this book
622             book_shelves = list(book.tags.filter(category='set'))
623             old_cover = book.cover_info()
624
625         # Save XML file
626         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
627         if book.preview:
628             book.xml_file.set_readable(False)
629
630         book.findable = findable
631         book.language = book_info.language
632         book.title = book_info.title
633         if book_info.variant_of:
634             book.common_slug = book_info.variant_of.slug
635         else:
636             book.common_slug = book.slug
637         book.extra_info = json.dumps(book_info.to_dict())
638         book.load_abstract()
639         book.load_toc()
640         book.save()
641
642         meta_tags = Tag.tags_from_info(book_info)
643
644         for tag in meta_tags:
645             if not tag.for_books:
646                 tag.for_books = True
647                 tag.save()
648
649         book.tags = set(meta_tags + book_shelves)
650         book.save()  # update sort_key_author
651
652         cover_changed = old_cover != book.cover_info()
653         obsolete_children = set(b for b in book.children.all()
654                                 if b not in children)
655         notify_cover_changed = []
656         for n, child_book in enumerate(children):
657             new_child = child_book.parent != book
658             child_book.parent = book
659             child_book.parent_number = n
660             child_book.save()
661             if new_child or cover_changed:
662                 notify_cover_changed.append(child_book)
663         # Disown unfaithful children and let them cope on their own.
664         for child in obsolete_children:
665             child.parent = None
666             child.parent_number = 0
667             child.save()
668             if old_cover:
669                 notify_cover_changed.append(child)
670
671         cls.repopulate_ancestors()
672         tasks.update_counters.delay()
673
674         if remote_gallery_url:
675             book.download_pictures(remote_gallery_url)
676
677         # No saves beyond this point.
678
679         # Build cover.
680         if 'cover' not in dont_build:
681             book.cover.build_delay()
682             book.cover_clean.build_delay()
683             book.cover_thumb.build_delay()
684             book.cover_api_thumb.build_delay()
685             book.simple_cover.build_delay()
686             book.cover_ebookpoint.build_delay()
687
688         # Build HTML and ebooks.
689         book.html_file.build_delay()
690         if not children:
691             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
692                 if format_ not in dont_build:
693                     getattr(book, '%s_file' % format_).build_delay()
694         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
695             if format_ not in dont_build:
696                 getattr(book, '%s_file' % format_).build_delay()
697
698         if not settings.NO_SEARCH_INDEX and search_index and findable:
699             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
700
701         for child in notify_cover_changed:
702             child.parent_cover_changed()
703
704         book.update_popularity()
705         tasks.update_references.delay(book.id)
706
707         cls.published.send(sender=cls, instance=book)
708         return book
709
710     def get_master(self):
711         master_tags = [
712             'opowiadanie',
713             'powiesc',
714             'dramat_wierszowany_l',
715             'dramat_wierszowany_lp',
716             'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
717             'wywiad',
718         ]
719         from librarian.parser import WLDocument
720         wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
721         root = wld.edoc.getroot()
722         for master in root.iter():
723             if master.tag in master_tags:
724                 return master
725
726     def update_references(self):
727         from references.models import Entity, Reference
728         master = self.get_master()
729         if master is None:
730             master = []
731         found = set()
732         for i, sec in enumerate(master):
733             for ref in sec.findall('.//ref'):
734                 href = ref.attrib.get('href', '')
735                 if not href or href in found:
736                     continue
737                 found.add(href)
738                 entity, created = Entity.objects.get_or_create(
739                     uri=href
740                 )
741                 ref, created = Reference.objects.get_or_create(
742                     book=self,
743                     entity=entity
744                 )
745                 ref.first_section = 'sec%d' % (i + 1)
746                 entity.populate()
747                 entity.save()
748         Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
749
750     @property
751     def references(self):
752         return self.reference_set.all().select_related('entity')
753
754     @classmethod
755     @transaction.atomic
756     def repopulate_ancestors(cls):
757         """Fixes the ancestry cache."""
758         # TODO: table names
759         cursor = connection.cursor()
760         if connection.vendor == 'postgres':
761             cursor.execute("TRUNCATE catalogue_book_ancestor")
762             cursor.execute("""
763                 WITH RECURSIVE ancestry AS (
764                     SELECT book.id, book.parent_id
765                     FROM catalogue_book AS book
766                     WHERE book.parent_id IS NOT NULL
767                     UNION
768                     SELECT ancestor.id, book.parent_id
769                     FROM ancestry AS ancestor, catalogue_book AS book
770                     WHERE ancestor.parent_id = book.id
771                         AND book.parent_id IS NOT NULL
772                     )
773                 INSERT INTO catalogue_book_ancestor
774                     (from_book_id, to_book_id)
775                     SELECT id, parent_id
776                     FROM ancestry
777                     ORDER BY id;
778                 """)
779         else:
780             cursor.execute("DELETE FROM catalogue_book_ancestor")
781             for b in cls.objects.exclude(parent=None):
782                 parent = b.parent
783                 while parent is not None:
784                     b.ancestor.add(parent)
785                     parent = parent.parent
786
787     @property
788     def ancestors(self):
789         if self.parent:
790             for anc in self.parent.ancestors:
791                 yield anc
792             yield self.parent
793         else:
794             return []
795
796     def clear_cache(self):
797         clear_cached_renders(self.mini_box)
798         clear_cached_renders(self.mini_box_nolink)
799
800     def cover_info(self, inherit=True):
801         """Returns a dictionary to serve as fallback for BookInfo.
802
803         For now, the only thing inherited is the cover image.
804         """
805         need = False
806         info = {}
807         for field in ('cover_url', 'cover_by', 'cover_source'):
808             val = self.get_extra_info_json().get(field)
809             if val:
810                 info[field] = val
811             else:
812                 need = True
813         if inherit and need and self.parent is not None:
814             parent_info = self.parent.cover_info()
815             parent_info.update(info)
816             info = parent_info
817         return info
818
819     def related_themes(self):
820         return Tag.objects.usage_for_queryset(
821             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
822             counts=True).filter(category='theme').order_by('-count')
823
824     def parent_cover_changed(self):
825         """Called when parent book's cover image is changed."""
826         if not self.cover_info(inherit=False):
827             if 'cover' not in app_settings.DONT_BUILD:
828                 self.cover.build_delay()
829                 self.cover_clean.build_delay()
830                 self.cover_thumb.build_delay()
831                 self.cover_api_thumb.build_delay()
832                 self.simple_cover.build_delay()
833                 self.cover_ebookpoint.build_delay()
834             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
835                 if format_ not in app_settings.DONT_BUILD:
836                     getattr(self, '%s_file' % format_).build_delay()
837             for child in self.children.all():
838                 child.parent_cover_changed()
839
840     def other_versions(self):
841         """Find other versions (i.e. in other languages) of the book."""
842         return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
843
844     def parents(self):
845         books = []
846         parent = self.parent
847         while parent is not None:
848             books.insert(0, parent)
849             parent = parent.parent
850         return books
851
852     def pretty_title(self, html_links=False):
853         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
854         books = self.parents() + [self]
855         names.extend([(b.title, b.get_absolute_url()) for b in books])
856
857         if html_links:
858             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
859         else:
860             names = [tag[0] for tag in names]
861         return ', '.join(names)
862
863     def publisher(self):
864         publisher = self.get_extra_info_json()['publisher']
865         if isinstance(publisher, str):
866             return publisher
867         elif isinstance(publisher, list):
868             return ', '.join(publisher)
869
870     @classmethod
871     def tagged_top_level(cls, tags):
872         """ Returns top-level books tagged with `tags`.
873
874         It only returns those books which don't have ancestors which are
875         also tagged with those tags.
876
877         """
878         objects = cls.tagged.with_all(tags)
879         return objects.filter(findable=True).exclude(ancestor__in=objects)
880
881     @classmethod
882     def book_list(cls, book_filter=None):
883         """Generates a hierarchical listing of all books.
884
885         Books are optionally filtered with a test function.
886
887         """
888
889         books_by_parent = {}
890         books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
891         if book_filter:
892             books = books.filter(book_filter).distinct()
893
894             book_ids = set(b['pk'] for b in books.values("pk").iterator())
895             for book in books.iterator():
896                 parent = book.parent_id
897                 if parent not in book_ids:
898                     parent = None
899                 books_by_parent.setdefault(parent, []).append(book)
900         else:
901             for book in books.iterator():
902                 books_by_parent.setdefault(book.parent_id, []).append(book)
903
904         orphans = []
905         books_by_author = OrderedDict()
906         for tag in Tag.objects.filter(category='author').iterator():
907             books_by_author[tag] = []
908
909         for book in books_by_parent.get(None, ()):
910             authors = list(book.authors().only('pk'))
911             if authors:
912                 for author in authors:
913                     books_by_author[author].append(book)
914             else:
915                 orphans.append(book)
916
917         return books_by_author, orphans, books_by_parent
918
919     _audiences_pl = {
920         "SP": (1, "szkoła podstawowa"),
921         "SP1": (1, "szkoła podstawowa"),
922         "SP2": (1, "szkoła podstawowa"),
923         "SP3": (1, "szkoła podstawowa"),
924         "P": (1, "szkoła podstawowa"),
925         "G": (2, "gimnazjum"),
926         "L": (3, "liceum"),
927         "LP": (3, "liceum"),
928     }
929
930     def audiences_pl(self):
931         audiences = self.get_extra_info_json().get('audiences', [])
932         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
933         return [a[1] for a in audiences]
934
935     def stage_note(self):
936         stage = self.get_extra_info_json().get('stage')
937         if stage and stage < '0.4':
938             return (_('This work needs modernisation'),
939                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
940         else:
941             return None, None
942
943     def choose_fragments(self, number):
944         fragments = self.fragments.order_by()
945         fragments_count = fragments.count()
946         if not fragments_count and self.children.exists():
947             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
948             fragments_count = fragments.count()
949         if fragments_count:
950             if fragments_count > number:
951                 offset = randint(0, fragments_count - number)
952             else:
953                 offset = 0
954             return fragments[offset : offset + number]
955         elif self.parent:
956             return self.parent.choose_fragments(number)
957         else:
958             return []
959
960     def choose_fragment(self):
961         fragments = self.choose_fragments(1)
962         if fragments:
963             return fragments[0]
964         else:
965             return None
966
967     def fragment_data(self):
968         fragment = self.choose_fragment()
969         if fragment:
970             return {
971                 'title': fragment.book.pretty_title(),
972                 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
973             }
974         else:
975             return None
976
977     def update_popularity(self):
978         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
979         try:
980             pop = self.popularity
981             pop.count = count
982             pop.save()
983         except BookPopularity.DoesNotExist:
984             BookPopularity.objects.create(book=self, count=count)
985
986     def ridero_link(self):
987         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
988
989     def like(self, user):
990         from social.utils import likes, get_set, set_sets
991         if not likes(user, self):
992             tag = get_set(user, '')
993             set_sets(user, self, [tag])
994
995     def unlike(self, user):
996         from social.utils import likes, set_sets
997         if likes(user, self):
998             set_sets(user, self, [])
999
1000     def full_sort_key(self):
1001         return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
1002
1003     def cover_color(self):
1004         return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
1005
1006     @cached_render('catalogue/book_mini_box.html')
1007     def mini_box(self):
1008         return {
1009             'book': self
1010         }
1011
1012     @cached_render('catalogue/book_mini_box.html')
1013     def mini_box_nolink(self):
1014         return {
1015             'book': self,
1016             'no_link': True,
1017         }
1018
1019
1020 class BookPopularity(models.Model):
1021     book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
1022     count = models.IntegerField(default=0, db_index=True)