1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
48 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
51 def _ebook_upload_to(upload_path):
52 return UploadToPath(upload_path)
55 class Book(models.Model):
56 """Represents a book imported from WL-XML."""
57 title = models.CharField(_('title'), max_length=32767)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
59 sort_key_author = models.CharField(
60 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
62 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
63 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
64 description = models.TextField(_('description'), blank=True)
65 abstract = models.TextField(_('abstract'), blank=True)
66 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
67 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
68 parent_number = models.IntegerField(_('parent number'), default=0)
69 extra_info = models.TextField(_('extra information'), default='{}')
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
72 print_on_demand = models.BooleanField(_('print on demand'), default=False)
73 recommended = models.BooleanField(_('recommended'), default=False)
74 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
75 preview = models.BooleanField(_('preview'), default=False)
76 preview_until = models.DateField(_('preview until'), blank=True, null=True)
77 preview_key = models.CharField(max_length=32, blank=True, null=True)
78 findable = models.BooleanField(_('findable'), default=True, db_index=True)
80 # files generated during publication
83 null=True, blank=True,
84 upload_to=_cover_upload_to,
85 storage=bofh_storage, max_length=255)
86 # Cleaner version of cover for thumbs
87 cover_thumb = EbookField(
88 'cover_thumb', _('cover thumbnail'),
89 null=True, blank=True,
90 upload_to=_cover_thumb_upload_to,
92 cover_api_thumb = EbookField(
93 'cover_api_thumb', _('cover thumbnail for mobile app'),
94 null=True, blank=True,
95 upload_to=_cover_api_thumb_upload_to,
97 simple_cover = EbookField(
98 'simple_cover', _('cover for mobile app'),
99 null=True, blank=True,
100 upload_to=_simple_cover_upload_to,
102 cover_ebookpoint = EbookField(
103 'cover_ebookpoint', _('cover for Ebookpoint'),
104 null=True, blank=True,
105 upload_to=_cover_ebookpoint_upload_to,
107 ebook_formats = constants.EBOOK_FORMATS
108 formats = ebook_formats + ['html', 'xml']
110 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
111 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
113 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
114 has_audience = models.BooleanField(default=False)
116 objects = models.Manager()
117 tagged = managers.ModelTaggedItemManager(Tag)
118 tags = managers.TagDescriptor(Tag)
119 tag_relations = GenericRelation(Tag.intermediary_table_model)
121 html_built = django.dispatch.Signal()
122 published = django.dispatch.Signal()
126 class AlreadyExists(Exception):
130 ordering = ('sort_key_author', 'sort_key')
131 verbose_name = _('book')
132 verbose_name_plural = _('books')
133 app_label = 'catalogue'
138 def get_extra_info_json(self):
139 return json.loads(self.extra_info or '{}')
141 def get_initial(self):
143 return re.search(r'\w', self.title, re.U).group(0)
144 except AttributeError:
148 return self.tags.filter(category='author')
151 return self.tags.filter(category='epoch')
154 return self.tags.filter(category='genre')
157 return self.tags.filter(category='kind')
159 def tag_unicode(self, category):
160 relations = prefetched_relations(self, category)
162 return ', '.join(rel.tag.name for rel in relations)
164 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
166 def tags_by_category(self):
167 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
169 def author_unicode(self):
170 return self.cached_author
172 def kind_unicode(self):
173 return self.tag_unicode('kind')
175 def epoch_unicode(self):
176 return self.tag_unicode('epoch')
178 def genre_unicode(self):
179 return self.tag_unicode('genre')
181 def translators(self):
182 translators = self.get_extra_info_json().get('translators') or []
184 '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
187 def translator(self):
188 translators = self.get_extra_info_json().get('translators')
191 if len(translators) > 3:
192 translators = translators[:2]
196 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
198 def cover_source(self):
199 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
201 def save(self, force_insert=False, force_update=False, **kwargs):
202 from sortify import sortify
204 self.sort_key = sortify(self.title)[:120]
205 self.title = str(self.title) # ???
208 author = self.authors().first().sort_key
209 except AttributeError:
211 self.sort_key_author = author
213 self.cached_author = self.tag_unicode('author')
214 self.has_audience = 'audience' in self.get_extra_info_json()
216 if self.preview and not self.preview_key:
217 self.preview_key = get_random_hash(self.slug)[:32]
219 ret = super(Book, self).save(force_insert, force_update, **kwargs)
223 def get_absolute_url(self):
224 return reverse('book_detail', args=[self.slug])
226 def gallery_path(self):
227 return gallery_path(self.slug)
229 def gallery_url(self):
230 return gallery_url(self.slug)
236 def language_code(self):
237 return constants.LANGUAGES_3TO2.get(self.language, self.language)
239 def language_name(self):
240 return dict(settings.LANGUAGES).get(self.language_code(), "")
242 def is_foreign(self):
243 return self.language_code() != settings.LANGUAGE_CODE
245 def set_audio_length(self):
246 length = self.get_audio_length()
248 self.audio_length = self.format_audio_length(length)
252 def format_audio_length(seconds):
254 >>> Book.format_audio_length(1)
256 >>> Book.format_audio_length(3661)
260 minutes = seconds // 60
261 seconds = seconds % 60
262 return '%d:%02d' % (minutes, seconds)
264 hours = seconds // 3600
265 minutes = seconds % 3600 // 60
266 seconds = seconds % 60
267 return '%d:%02d:%02d' % (hours, minutes, seconds)
269 def get_audio_length(self):
271 for media in self.get_mp3() or ():
272 total += app_settings.GET_MP3_LENGTH(media.file.path)
275 def has_media(self, type_):
276 if type_ in Book.formats:
277 return bool(getattr(self, "%s_file" % type_))
279 return self.media.filter(type=type_).exists()
282 return self.has_media('mp3')
284 def get_media(self, type_):
285 if self.has_media(type_):
286 if type_ in Book.formats:
287 return getattr(self, "%s_file" % type_)
289 return self.media.filter(type=type_)
294 return self.get_media("mp3")
297 return self.get_media("odt")
300 return self.get_media("ogg")
303 return self.get_media("daisy")
305 def media_url(self, format_):
306 media = self.get_media(format_)
309 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
316 return self.media_url('html')
319 return self.media_url('pdf')
322 return self.media_url('epub')
325 return self.media_url('mobi')
328 return self.media_url('txt')
331 return self.media_url('fb2')
334 return self.media_url('xml')
336 def has_description(self):
337 return len(self.description) > 0
338 has_description.short_description = _('description')
339 has_description.boolean = True
341 def has_mp3_file(self):
342 return self.has_media("mp3")
343 has_mp3_file.short_description = 'MP3'
344 has_mp3_file.boolean = True
346 def has_ogg_file(self):
347 return self.has_media("ogg")
348 has_ogg_file.short_description = 'OGG'
349 has_ogg_file.boolean = True
351 def has_daisy_file(self):
352 return self.has_media("daisy")
353 has_daisy_file.short_description = 'DAISY'
354 has_daisy_file.boolean = True
356 def get_audiobooks(self):
358 for m in self.media.filter(type='ogg').order_by().iterator():
359 ogg_files[m.name] = m
363 for mp3 in self.media.filter(type='mp3').iterator():
364 # ogg files are always from the same project
365 meta = mp3.get_extra_info_json()
366 project = meta.get('project')
369 project = 'CzytamySłuchając'
371 projects.add((project, meta.get('funded_by', '')))
375 ogg = ogg_files.get(mp3.name)
378 audiobooks.append(media)
380 projects = sorted(projects)
381 return audiobooks, projects
383 def wldocument(self, parse_dublincore=True, inherit=True):
384 from catalogue.import_utils import ORMDocProvider
385 from librarian.parser import WLDocument
387 if inherit and self.parent:
388 meta_fallbacks = self.parent.cover_info()
390 meta_fallbacks = None
392 return WLDocument.from_file(
394 provider=ORMDocProvider(self),
395 parse_dublincore=parse_dublincore,
396 meta_fallbacks=meta_fallbacks)
399 def zip_format(format_):
400 def pretty_file_name(book):
401 return "%s/%s.%s" % (
402 book.get_extra_info_json()['author'],
406 field_name = "%s_file" % format_
407 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
408 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
409 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
411 def zip_audiobooks(self, format_):
412 bm = BookMedia.objects.filter(book=self, type=format_)
413 paths = map(lambda bm: (None, bm.file.path), bm)
414 return create_zip(paths, "%s_%s" % (self.slug, format_))
416 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
417 if not self.findable:
420 from search.index import Index
423 index.index_book(self, book_info)
428 except Exception as e:
429 index.index.rollback()
432 # will make problems in conjunction with paid previews
433 def download_pictures(self, remote_gallery_url):
434 gallery_path = self.gallery_path()
435 # delete previous files, so we don't include old files in ebooks
436 if os.path.isdir(gallery_path):
437 for filename in os.listdir(gallery_path):
438 file_path = os.path.join(gallery_path, filename)
440 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
442 makedirs(gallery_path)
443 for ilustr in ilustr_elements:
444 ilustr_src = ilustr.get('src')
445 ilustr_path = os.path.join(gallery_path, ilustr_src)
446 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
448 def load_abstract(self):
449 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
450 if abstract is not None:
451 self.abstract = transform_abstrakt(abstract)
456 def from_xml_file(cls, xml_file, **kwargs):
457 from django.core.files import File
458 from librarian import dcparser
460 # use librarian to parse meta-data
461 book_info = dcparser.parse(xml_file)
463 if not isinstance(xml_file, File):
464 xml_file = File(open(xml_file))
467 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
472 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
473 search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
474 if dont_build is None:
476 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
478 # check for parts before we do anything
480 if hasattr(book_info, 'parts'):
481 for part_url in book_info.parts:
483 children.append(Book.objects.get(slug=part_url.slug))
484 except Book.DoesNotExist:
485 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
488 book_slug = book_info.url.slug
489 if re.search(r'[^a-z0-9-]', book_slug):
490 raise ValueError('Invalid characters in slug')
491 book, created = Book.objects.get_or_create(slug=book_slug)
496 book.preview = bool(days)
498 book.preview_until = date.today() + timedelta(days)
501 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
502 # Save shelves for this book
503 book_shelves = list(book.tags.filter(category='set'))
504 old_cover = book.cover_info()
507 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
509 book.xml_file.set_readable(False)
511 book.findable = findable
512 book.language = book_info.language
513 book.title = book_info.title
514 if book_info.variant_of:
515 book.common_slug = book_info.variant_of.slug
517 book.common_slug = book.slug
518 book.extra_info = json.dumps(book_info.to_dict())
522 meta_tags = Tag.tags_from_info(book_info)
524 for tag in meta_tags:
525 if not tag.for_books:
529 book.tags = set(meta_tags + book_shelves)
530 book.save() # update sort_key_author
532 cover_changed = old_cover != book.cover_info()
533 obsolete_children = set(b for b in book.children.all()
534 if b not in children)
535 notify_cover_changed = []
536 for n, child_book in enumerate(children):
537 new_child = child_book.parent != book
538 child_book.parent = book
539 child_book.parent_number = n
541 if new_child or cover_changed:
542 notify_cover_changed.append(child_book)
543 # Disown unfaithful children and let them cope on their own.
544 for child in obsolete_children:
546 child.parent_number = 0
549 notify_cover_changed.append(child)
551 cls.repopulate_ancestors()
552 tasks.update_counters.delay()
554 if remote_gallery_url:
555 book.download_pictures(remote_gallery_url)
557 # No saves beyond this point.
560 if 'cover' not in dont_build:
561 book.cover.build_delay()
562 book.cover_thumb.build_delay()
563 book.cover_api_thumb.build_delay()
564 book.simple_cover.build_delay()
565 book.cover_ebookpoint.build_delay()
567 # Build HTML and ebooks.
568 book.html_file.build_delay()
570 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
571 if format_ not in dont_build:
572 getattr(book, '%s_file' % format_).build_delay()
573 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
574 if format_ not in dont_build:
575 getattr(book, '%s_file' % format_).build_delay()
577 if not settings.NO_SEARCH_INDEX and search_index and findable:
578 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
580 for child in notify_cover_changed:
581 child.parent_cover_changed()
583 book.update_popularity()
584 cls.published.send(sender=cls, instance=book)
589 def repopulate_ancestors(cls):
590 """Fixes the ancestry cache."""
592 cursor = connection.cursor()
593 if connection.vendor == 'postgres':
594 cursor.execute("TRUNCATE catalogue_book_ancestor")
596 WITH RECURSIVE ancestry AS (
597 SELECT book.id, book.parent_id
598 FROM catalogue_book AS book
599 WHERE book.parent_id IS NOT NULL
601 SELECT ancestor.id, book.parent_id
602 FROM ancestry AS ancestor, catalogue_book AS book
603 WHERE ancestor.parent_id = book.id
604 AND book.parent_id IS NOT NULL
606 INSERT INTO catalogue_book_ancestor
607 (from_book_id, to_book_id)
613 cursor.execute("DELETE FROM catalogue_book_ancestor")
614 for b in cls.objects.exclude(parent=None):
616 while parent is not None:
617 b.ancestor.add(parent)
618 parent = parent.parent
620 def clear_cache(self):
621 clear_cached_renders(self.mini_box)
622 clear_cached_renders(self.mini_box_nolink)
624 def cover_info(self, inherit=True):
625 """Returns a dictionary to serve as fallback for BookInfo.
627 For now, the only thing inherited is the cover image.
631 for field in ('cover_url', 'cover_by', 'cover_source'):
632 val = self.get_extra_info_json().get(field)
637 if inherit and need and self.parent is not None:
638 parent_info = self.parent.cover_info()
639 parent_info.update(info)
643 def related_themes(self):
644 return Tag.objects.usage_for_queryset(
645 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
646 counts=True).filter(category='theme')
648 def parent_cover_changed(self):
649 """Called when parent book's cover image is changed."""
650 if not self.cover_info(inherit=False):
651 if 'cover' not in app_settings.DONT_BUILD:
652 self.cover.build_delay()
653 self.cover_thumb.build_delay()
654 self.cover_api_thumb.build_delay()
655 self.simple_cover.build_delay()
656 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
657 if format_ not in app_settings.DONT_BUILD:
658 getattr(self, '%s_file' % format_).build_delay()
659 for child in self.children.all():
660 child.parent_cover_changed()
662 def other_versions(self):
663 """Find other versions (i.e. in other languages) of the book."""
664 return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
669 while parent is not None:
670 books.insert(0, parent)
671 parent = parent.parent
674 def pretty_title(self, html_links=False):
675 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
676 books = self.parents() + [self]
677 names.extend([(b.title, b.get_absolute_url()) for b in books])
680 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
682 names = [tag[0] for tag in names]
683 return ', '.join(names)
686 publisher = self.get_extra_info_json()['publisher']
687 if isinstance(publisher, str):
689 elif isinstance(publisher, list):
690 return ', '.join(publisher)
693 def tagged_top_level(cls, tags):
694 """ Returns top-level books tagged with `tags`.
696 It only returns those books which don't have ancestors which are
697 also tagged with those tags.
700 objects = cls.tagged.with_all(tags)
701 return objects.filter(findable=True).exclude(ancestor__in=objects)
704 def book_list(cls, book_filter=None):
705 """Generates a hierarchical listing of all books.
707 Books are optionally filtered with a test function.
712 books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
714 books = books.filter(book_filter).distinct()
716 book_ids = set(b['pk'] for b in books.values("pk").iterator())
717 for book in books.iterator():
718 parent = book.parent_id
719 if parent not in book_ids:
721 books_by_parent.setdefault(parent, []).append(book)
723 for book in books.iterator():
724 books_by_parent.setdefault(book.parent_id, []).append(book)
727 books_by_author = OrderedDict()
728 for tag in Tag.objects.filter(category='author').iterator():
729 books_by_author[tag] = []
731 for book in books_by_parent.get(None, ()):
732 authors = list(book.authors().only('pk'))
734 for author in authors:
735 books_by_author[author].append(book)
739 return books_by_author, orphans, books_by_parent
742 "SP": (1, "szkoła podstawowa"),
743 "SP1": (1, "szkoła podstawowa"),
744 "SP2": (1, "szkoła podstawowa"),
745 "SP3": (1, "szkoła podstawowa"),
746 "P": (1, "szkoła podstawowa"),
747 "G": (2, "gimnazjum"),
752 def audiences_pl(self):
753 audiences = self.get_extra_info_json().get('audiences', [])
754 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
755 return [a[1] for a in audiences]
757 def stage_note(self):
758 stage = self.get_extra_info_json().get('stage')
759 if stage and stage < '0.4':
760 return (_('This work needs modernisation'),
761 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
765 def choose_fragment(self):
766 fragments = self.fragments.order_by()
767 fragments_count = fragments.count()
768 if not fragments_count and self.children.exists():
769 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
770 fragments_count = fragments.count()
772 return fragments[randint(0, fragments_count - 1)]
774 return self.parent.choose_fragment()
778 def fragment_data(self):
779 fragment = self.choose_fragment()
782 'title': fragment.book.pretty_title(),
783 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
788 def update_popularity(self):
789 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
791 pop = self.popularity
794 except BookPopularity.DoesNotExist:
795 BookPopularity.objects.create(book=self, count=count)
797 def ridero_link(self):
798 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
800 def like(self, user):
801 from social.utils import likes, get_set, set_sets
802 if not likes(user, self):
803 tag = get_set(user, '')
804 set_sets(user, self, [tag])
806 def unlike(self, user):
807 from social.utils import likes, set_sets
808 if likes(user, self):
809 set_sets(user, self, [])
811 def full_sort_key(self):
812 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
814 def cover_color(self):
815 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
817 @cached_render('catalogue/book_mini_box.html')
823 @cached_render('catalogue/book_mini_box.html')
824 def mini_box_nolink(self):
830 def add_file_fields():
831 for format_ in Book.formats:
832 field_name = "%s_file" % format_
833 # This weird globals() assignment makes Django migrations comfortable.
834 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
835 _upload_to.__name__ = '_%s_upload_to' % format_
836 globals()[_upload_to.__name__] = _upload_to
839 format_, _("%s file" % format_.upper()),
840 upload_to=_upload_to,
841 storage=bofh_storage,
845 ).contribute_to_class(Book, field_name)
851 class BookPopularity(models.Model):
852 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
853 count = models.IntegerField(default=0, db_index=True)