1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
5 from datetime import date, timedelta
6 from random import randint
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 import django.dispatch
13 from django.contrib.contenttypes.fields import GenericRelation
14 from django.urls import reverse
15 from django.utils.translation import ugettext_lazy as _, get_language
16 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
21 from librarian.cover import WLCover
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue.fields import EbookField
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from catalogue import tasks
31 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
33 bofh_storage = BofhFileSystemStorage()
37 class UploadToPath(object):
38 def __init__(self, path):
41 def __call__(self, instance, filename):
42 return self.path % instance.slug
45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 def _ebook_upload_to(upload_path):
52 return UploadToPath(upload_path)
55 class Book(models.Model):
56 """Represents a book imported from WL-XML."""
57 title = models.CharField(_('title'), max_length=32767)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
59 sort_key_author = models.CharField(
60 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
62 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
63 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
64 description = models.TextField(_('description'), blank=True)
65 abstract = models.TextField(_('abstract'), blank=True)
66 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
67 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
68 parent_number = models.IntegerField(_('parent number'), default=0)
69 extra_info = jsonfield.JSONField(_('extra information'), default={})
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
72 print_on_demand = models.BooleanField(_('print on demand'), default=False)
73 recommended = models.BooleanField(_('recommended'), default=False)
74 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
75 preview = models.BooleanField(_('preview'), default=False)
76 preview_until = models.DateField(_('preview until'), blank=True, null=True)
77 preview_key = models.CharField(max_length=32, blank=True, null=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
120 class AlreadyExists(Exception):
124 ordering = ('sort_key_author', 'sort_key')
125 verbose_name = _('book')
126 verbose_name_plural = _('books')
127 app_label = 'catalogue'
132 def get_initial(self):
134 return re.search(r'\w', self.title, re.U).group(0)
135 except AttributeError:
139 return self.tags.filter(category='author')
142 return self.tags.filter(category='epoch')
145 return self.tags.filter(category='genre')
148 return self.tags.filter(category='kind')
150 def tag_unicode(self, category):
151 relations = prefetched_relations(self, category)
153 return ', '.join(rel.tag.name for rel in relations)
155 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
157 def tags_by_category(self):
158 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
160 def author_unicode(self):
161 return self.cached_author
163 def kind_unicode(self):
164 return self.tag_unicode('kind')
166 def epoch_unicode(self):
167 return self.tag_unicode('epoch')
169 def genre_unicode(self):
170 return self.tag_unicode('genre')
172 def translator(self):
173 translators = self.extra_info.get('translators')
176 if len(translators) > 3:
177 translators = translators[:2]
181 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
183 def cover_source(self):
184 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
186 def save(self, force_insert=False, force_update=False, **kwargs):
187 from sortify import sortify
189 self.sort_key = sortify(self.title)[:120]
190 self.title = str(self.title) # ???
193 author = self.authors().first().sort_key
194 except AttributeError:
196 self.sort_key_author = author
198 self.cached_author = self.tag_unicode('author')
199 self.has_audience = 'audience' in self.extra_info
201 if self.preview and not self.preview_key:
202 self.preview_key = get_random_hash(self.slug)[:32]
204 ret = super(Book, self).save(force_insert, force_update, **kwargs)
208 def get_absolute_url(self):
209 return reverse('book_detail', args=[self.slug])
211 def gallery_path(self):
212 return gallery_path(self.slug)
214 def gallery_url(self):
215 return gallery_url(self.slug)
221 def language_code(self):
222 return constants.LANGUAGES_3TO2.get(self.language, self.language)
224 def language_name(self):
225 return dict(settings.LANGUAGES).get(self.language_code(), "")
227 def is_foreign(self):
228 return self.language_code() != settings.LANGUAGE_CODE
230 def set_audio_length(self):
231 length = self.get_audio_length()
233 self.audio_length = self.format_audio_length(length)
237 def format_audio_length(seconds):
239 minutes = seconds // 60
240 seconds = seconds % 60
241 return '%d:%02d' % (minutes, seconds)
243 hours = seconds // 3600
244 minutes = seconds % 3600 // 60
245 seconds = seconds % 60
246 return '%d:%02d:%02d' % (hours, minutes, seconds)
248 def get_audio_length(self):
250 for media in self.get_mp3() or ():
251 total += app_settings.GET_MP3_LENGTH(media.file.path)
254 def has_media(self, type_):
255 if type_ in Book.formats:
256 return bool(getattr(self, "%s_file" % type_))
258 return self.media.filter(type=type_).exists()
261 return self.has_media('mp3')
263 def get_media(self, type_):
264 if self.has_media(type_):
265 if type_ in Book.formats:
266 return getattr(self, "%s_file" % type_)
268 return self.media.filter(type=type_)
273 return self.get_media("mp3")
276 return self.get_media("odt")
279 return self.get_media("ogg")
282 return self.get_media("daisy")
284 def media_url(self, format_):
285 media = self.get_media(format_)
288 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
295 return self.media_url('html')
298 return self.media_url('pdf')
301 return self.media_url('epub')
304 return self.media_url('mobi')
307 return self.media_url('txt')
310 return self.media_url('fb2')
313 return self.media_url('xml')
315 def has_description(self):
316 return len(self.description) > 0
317 has_description.short_description = _('description')
318 has_description.boolean = True
320 def has_mp3_file(self):
321 return self.has_media("mp3")
322 has_mp3_file.short_description = 'MP3'
323 has_mp3_file.boolean = True
325 def has_ogg_file(self):
326 return self.has_media("ogg")
327 has_ogg_file.short_description = 'OGG'
328 has_ogg_file.boolean = True
330 def has_daisy_file(self):
331 return self.has_media("daisy")
332 has_daisy_file.short_description = 'DAISY'
333 has_daisy_file.boolean = True
335 def get_audiobooks(self):
337 for m in self.media.filter(type='ogg').order_by().iterator():
338 ogg_files[m.name] = m
342 for mp3 in self.media.filter(type='mp3').iterator():
343 # ogg files are always from the same project
344 meta = mp3.extra_info
345 project = meta.get('project')
348 project = u'CzytamySłuchając'
350 projects.add((project, meta.get('funded_by', '')))
354 ogg = ogg_files.get(mp3.name)
357 audiobooks.append(media)
359 projects = sorted(projects)
360 return audiobooks, projects
362 def wldocument(self, parse_dublincore=True, inherit=True):
363 from catalogue.import_utils import ORMDocProvider
364 from librarian.parser import WLDocument
366 if inherit and self.parent:
367 meta_fallbacks = self.parent.cover_info()
369 meta_fallbacks = None
371 return WLDocument.from_file(
373 provider=ORMDocProvider(self),
374 parse_dublincore=parse_dublincore,
375 meta_fallbacks=meta_fallbacks)
378 def zip_format(format_):
379 def pretty_file_name(book):
380 return "%s/%s.%s" % (
381 book.extra_info['author'],
385 field_name = "%s_file" % format_
386 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
387 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
388 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
390 def zip_audiobooks(self, format_):
391 bm = BookMedia.objects.filter(book=self, type=format_)
392 paths = map(lambda bm: (None, bm.file.path), bm)
393 return create_zip(paths, "%s_%s" % (self.slug, format_))
395 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
397 from search.index import Index
400 index.index_book(self, book_info)
405 except Exception as e:
406 index.index.rollback()
409 # will make problems in conjunction with paid previews
410 def download_pictures(self, remote_gallery_url):
411 gallery_path = self.gallery_path()
412 # delete previous files, so we don't include old files in ebooks
413 if os.path.isdir(gallery_path):
414 for filename in os.listdir(gallery_path):
415 file_path = os.path.join(gallery_path, filename)
417 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
419 makedirs(gallery_path)
420 for ilustr in ilustr_elements:
421 ilustr_src = ilustr.get('src')
422 ilustr_path = os.path.join(gallery_path, ilustr_src)
423 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
425 def load_abstract(self):
426 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
427 if abstract is not None:
428 self.abstract = transform_abstrakt(abstract)
433 def from_xml_file(cls, xml_file, **kwargs):
434 from django.core.files import File
435 from librarian import dcparser
437 # use librarian to parse meta-data
438 book_info = dcparser.parse(xml_file)
440 if not isinstance(xml_file, File):
441 xml_file = File(open(xml_file))
444 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
449 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
450 search_index_tags=True, remote_gallery_url=None, days=0):
451 if dont_build is None:
453 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
455 # check for parts before we do anything
457 if hasattr(book_info, 'parts'):
458 for part_url in book_info.parts:
460 children.append(Book.objects.get(slug=part_url.slug))
461 except Book.DoesNotExist:
462 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
465 book_slug = book_info.url.slug
466 if re.search(r'[^a-z0-9-]', book_slug):
467 raise ValueError('Invalid characters in slug')
468 book, created = Book.objects.get_or_create(slug=book_slug)
473 book.preview = bool(days)
475 book.preview_until = date.today() + timedelta(days)
478 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
479 # Save shelves for this book
480 book_shelves = list(book.tags.filter(category='set'))
481 old_cover = book.cover_info()
484 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
486 book.xml_file.set_readable(False)
488 book.language = book_info.language
489 book.title = book_info.title
490 if book_info.variant_of:
491 book.common_slug = book_info.variant_of.slug
493 book.common_slug = book.slug
494 book.extra_info = book_info.to_dict()
498 meta_tags = Tag.tags_from_info(book_info)
500 for tag in meta_tags:
501 if not tag.for_books:
505 book.tags = set(meta_tags + book_shelves)
507 cover_changed = old_cover != book.cover_info()
508 obsolete_children = set(b for b in book.children.all()
509 if b not in children)
510 notify_cover_changed = []
511 for n, child_book in enumerate(children):
512 new_child = child_book.parent != book
513 child_book.parent = book
514 child_book.parent_number = n
516 if new_child or cover_changed:
517 notify_cover_changed.append(child_book)
518 # Disown unfaithful children and let them cope on their own.
519 for child in obsolete_children:
521 child.parent_number = 0
524 notify_cover_changed.append(child)
526 cls.repopulate_ancestors()
527 tasks.update_counters.delay()
529 if remote_gallery_url:
530 book.download_pictures(remote_gallery_url)
532 # No saves beyond this point.
535 if 'cover' not in dont_build:
536 book.cover.build_delay()
537 book.cover_thumb.build_delay()
538 book.cover_api_thumb.build_delay()
539 book.simple_cover.build_delay()
541 # Build HTML and ebooks.
542 book.html_file.build_delay()
544 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
545 if format_ not in dont_build:
546 getattr(book, '%s_file' % format_).build_delay()
547 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
548 if format_ not in dont_build:
549 getattr(book, '%s_file' % format_).build_delay()
551 if not settings.NO_SEARCH_INDEX and search_index:
552 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
554 for child in notify_cover_changed:
555 child.parent_cover_changed()
557 book.save() # update sort_key_author
558 book.update_popularity()
559 cls.published.send(sender=cls, instance=book)
564 def repopulate_ancestors(cls):
565 """Fixes the ancestry cache."""
567 cursor = connection.cursor()
568 if connection.vendor == 'postgres':
569 cursor.execute("TRUNCATE catalogue_book_ancestor")
571 WITH RECURSIVE ancestry AS (
572 SELECT book.id, book.parent_id
573 FROM catalogue_book AS book
574 WHERE book.parent_id IS NOT NULL
576 SELECT ancestor.id, book.parent_id
577 FROM ancestry AS ancestor, catalogue_book AS book
578 WHERE ancestor.parent_id = book.id
579 AND book.parent_id IS NOT NULL
581 INSERT INTO catalogue_book_ancestor
582 (from_book_id, to_book_id)
588 cursor.execute("DELETE FROM catalogue_book_ancestor")
589 for b in cls.objects.exclude(parent=None):
591 while parent is not None:
592 b.ancestor.add(parent)
593 parent = parent.parent
595 def flush_includes(self, languages=True):
596 clear_cached_renders(self.mini_box)
597 clear_cached_renders(self.mini_box_nolink)
600 if languages is True:
601 languages = [lc for (lc, _ln) in settings.LANGUAGES]
603 template % (self.pk, lang)
605 '/katalog/b/%d/short.%s.html',
606 '/katalog/b/%d/wide.%s.html',
607 '/api/include/book/%d.%s.json',
608 '/api/include/book/%d.%s.xml',
610 for lang in languages
613 def cover_info(self, inherit=True):
614 """Returns a dictionary to serve as fallback for BookInfo.
616 For now, the only thing inherited is the cover image.
620 for field in ('cover_url', 'cover_by', 'cover_source'):
621 val = self.extra_info.get(field)
626 if inherit and need and self.parent is not None:
627 parent_info = self.parent.cover_info()
628 parent_info.update(info)
632 def related_themes(self):
633 return Tag.objects.usage_for_queryset(
634 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
635 counts=True).filter(category='theme')
637 def parent_cover_changed(self):
638 """Called when parent book's cover image is changed."""
639 if not self.cover_info(inherit=False):
640 if 'cover' not in app_settings.DONT_BUILD:
641 self.cover.build_delay()
642 self.cover_thumb.build_delay()
643 self.cover_api_thumb.build_delay()
644 self.simple_cover.build_delay()
645 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
646 if format_ not in app_settings.DONT_BUILD:
647 getattr(self, '%s_file' % format_).build_delay()
648 for child in self.children.all():
649 child.parent_cover_changed()
651 def other_versions(self):
652 """Find other versions (i.e. in other languages) of the book."""
653 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
658 while parent is not None:
659 books.insert(0, parent)
660 parent = parent.parent
663 def pretty_title(self, html_links=False):
664 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
665 books = self.parents() + [self]
666 names.extend([(b.title, b.get_absolute_url()) for b in books])
669 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
671 names = [tag[0] for tag in names]
672 return ', '.join(names)
675 publisher = self.extra_info['publisher']
676 if isinstance(publisher, str):
678 elif isinstance(publisher, list):
679 return ', '.join(publisher)
682 def tagged_top_level(cls, tags):
683 """ Returns top-level books tagged with `tags`.
685 It only returns those books which don't have ancestors which are
686 also tagged with those tags.
689 objects = cls.tagged.with_all(tags)
690 return objects.exclude(ancestor__in=objects)
693 def book_list(cls, book_filter=None):
694 """Generates a hierarchical listing of all books.
696 Books are optionally filtered with a test function.
701 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
703 books = books.filter(book_filter).distinct()
705 book_ids = set(b['pk'] for b in books.values("pk").iterator())
706 for book in books.iterator():
707 parent = book.parent_id
708 if parent not in book_ids:
710 books_by_parent.setdefault(parent, []).append(book)
712 for book in books.iterator():
713 books_by_parent.setdefault(book.parent_id, []).append(book)
716 books_by_author = OrderedDict()
717 for tag in Tag.objects.filter(category='author').iterator():
718 books_by_author[tag] = []
720 for book in books_by_parent.get(None, ()):
721 authors = list(book.authors().only('pk'))
723 for author in authors:
724 books_by_author[author].append(book)
728 return books_by_author, orphans, books_by_parent
731 "SP": (1, u"szkoła podstawowa"),
732 "SP1": (1, u"szkoła podstawowa"),
733 "SP2": (1, u"szkoła podstawowa"),
734 "SP3": (1, u"szkoła podstawowa"),
735 "P": (1, u"szkoła podstawowa"),
736 "G": (2, u"gimnazjum"),
738 "LP": (3, u"liceum"),
741 def audiences_pl(self):
742 audiences = self.extra_info.get('audiences', [])
743 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
744 return [a[1] for a in audiences]
746 def stage_note(self):
747 stage = self.extra_info.get('stage')
748 if stage and stage < '0.4':
749 return (_('This work needs modernisation'),
750 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
754 def choose_fragment(self):
755 fragments = self.fragments.order_by()
756 fragments_count = fragments.count()
757 if not fragments_count and self.children.exists():
758 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
759 fragments_count = fragments.count()
761 return fragments[randint(0, fragments_count - 1)]
763 return self.parent.choose_fragment()
767 def fragment_data(self):
768 fragment = self.choose_fragment()
771 'title': fragment.book.pretty_title(),
772 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
777 def update_popularity(self):
778 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
780 pop = self.popularity
783 except BookPopularity.DoesNotExist:
784 BookPopularity.objects.create(book=self, count=count)
786 def ridero_link(self):
787 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
789 def like(self, user):
790 from social.utils import likes, get_set, set_sets
791 if not likes(user, self):
792 tag = get_set(user, '')
793 set_sets(user, self, [tag])
795 def unlike(self, user):
796 from social.utils import likes, set_sets
797 if likes(user, self):
798 set_sets(user, self, [])
800 def full_sort_key(self):
801 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
803 def cover_color(self):
804 return WLCover.epoch_colors.get(self.extra_info.get('epoch'), '#000000')
806 @cached_render('catalogue/book_mini_box.html')
812 @cached_render('catalogue/book_mini_box.html')
813 def mini_box_nolink(self):
819 def add_file_fields():
820 for format_ in Book.formats:
821 field_name = "%s_file" % format_
822 # This weird globals() assignment makes Django migrations comfortable.
823 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
824 _upload_to.__name__ = '_%s_upload_to' % format_
825 globals()[_upload_to.__name__] = _upload_to
828 format_, _("%s file" % format_.upper()),
829 upload_to=_upload_to,
830 storage=bofh_storage,
834 ).contribute_to_class(Book, field_name)
840 class BookPopularity(models.Model):
841 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
842 count = models.IntegerField(default=0, db_index=True)