1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
5 from datetime import date, timedelta
6 from random import randint
9 from urllib.request import urlretrieve
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 import django.dispatch
13 from django.contrib.contenttypes.fields import GenericRelation
14 from django.urls import reverse
15 from django.utils.translation import ugettext_lazy as _, get_language
16 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
50 def _ebook_upload_to(upload_path):
51 return UploadToPath(upload_path)
54 class Book(models.Model):
55 """Represents a book imported from WL-XML."""
56 title = models.CharField(_('title'), max_length=32767)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
58 sort_key_author = models.CharField(
59 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
61 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
63 description = models.TextField(_('description'), blank=True)
64 abstract = models.TextField(_('abstract'), blank=True)
65 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
66 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
67 parent_number = models.IntegerField(_('parent number'), default=0)
68 extra_info = jsonfield.JSONField(_('extra information'), default={})
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
71 print_on_demand = models.BooleanField(_('print on demand'), default=False)
72 recommended = models.BooleanField(_('recommended'), default=False)
73 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
74 preview = models.BooleanField(_('preview'), default=False)
75 preview_until = models.DateField(_('preview until'), blank=True, null=True)
76 preview_key = models.CharField(max_length=32, blank=True, null=True)
78 # files generated during publication
81 null=True, blank=True,
82 upload_to=_cover_upload_to,
83 storage=bofh_storage, max_length=255)
84 # Cleaner version of cover for thumbs
85 cover_thumb = EbookField(
86 'cover_thumb', _('cover thumbnail'),
87 null=True, blank=True,
88 upload_to=_cover_thumb_upload_to,
90 cover_api_thumb = EbookField(
91 'cover_api_thumb', _('cover thumbnail for mobile app'),
92 null=True, blank=True,
93 upload_to=_cover_api_thumb_upload_to,
95 simple_cover = EbookField(
96 'simple_cover', _('cover for mobile app'),
97 null=True, blank=True,
98 upload_to=_simple_cover_upload_to,
100 ebook_formats = constants.EBOOK_FORMATS
101 formats = ebook_formats + ['html', 'xml']
103 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
104 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
106 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
107 has_audience = models.BooleanField(default=False)
109 objects = models.Manager()
110 tagged = managers.ModelTaggedItemManager(Tag)
111 tags = managers.TagDescriptor(Tag)
112 tag_relations = GenericRelation(Tag.intermediary_table_model)
114 html_built = django.dispatch.Signal()
115 published = django.dispatch.Signal()
119 class AlreadyExists(Exception):
123 ordering = ('sort_key_author', 'sort_key')
124 verbose_name = _('book')
125 verbose_name_plural = _('books')
126 app_label = 'catalogue'
131 def get_initial(self):
133 return re.search(r'\w', self.title, re.U).group(0)
134 except AttributeError:
138 return self.tags.filter(category='author')
141 return self.tags.filter(category='epoch')
144 return self.tags.filter(category='genre')
147 return self.tags.filter(category='kind')
149 def tag_unicode(self, category):
150 relations = prefetched_relations(self, category)
152 return ', '.join(rel.tag.name for rel in relations)
154 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
156 def tags_by_category(self):
157 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
159 def author_unicode(self):
160 return self.cached_author
162 def kind_unicode(self):
163 return self.tag_unicode('kind')
165 def epoch_unicode(self):
166 return self.tag_unicode('epoch')
168 def genre_unicode(self):
169 return self.tag_unicode('genre')
171 def translator(self):
172 translators = self.extra_info.get('translators')
175 if len(translators) > 3:
176 translators = translators[:2]
180 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
182 def cover_source(self):
183 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
185 def save(self, force_insert=False, force_update=False, **kwargs):
186 from sortify import sortify
188 self.sort_key = sortify(self.title)[:120]
189 self.title = str(self.title) # ???
192 author = self.authors().first().sort_key
193 except AttributeError:
195 self.sort_key_author = author
197 self.cached_author = self.tag_unicode('author')
198 self.has_audience = 'audience' in self.extra_info
200 if self.preview and not self.preview_key:
201 self.preview_key = get_random_hash(self.slug)[:32]
203 ret = super(Book, self).save(force_insert, force_update, **kwargs)
207 def get_absolute_url(self):
208 return reverse('book_detail', args=[self.slug])
210 def gallery_path(self):
211 return gallery_path(self.slug)
213 def gallery_url(self):
214 return gallery_url(self.slug)
220 def language_code(self):
221 return constants.LANGUAGES_3TO2.get(self.language, self.language)
223 def language_name(self):
224 return dict(settings.LANGUAGES).get(self.language_code(), "")
226 def is_foreign(self):
227 return self.language_code() != settings.LANGUAGE_CODE
229 def set_audio_length(self):
230 length = self.get_audio_length()
232 self.audio_length = self.format_audio_length(length)
236 def format_audio_length(seconds):
238 minutes = seconds // 60
239 seconds = seconds % 60
240 return '%d:%02d' % (minutes, seconds)
242 hours = seconds // 3600
243 minutes = seconds % 3600 // 60
244 seconds = seconds % 60
245 return '%d:%02d:%02d' % (hours, minutes, seconds)
247 def get_audio_length(self):
249 for media in self.get_mp3() or ():
250 total += app_settings.GET_MP3_LENGTH(media.file.path)
253 def has_media(self, type_):
254 if type_ in Book.formats:
255 return bool(getattr(self, "%s_file" % type_))
257 return self.media.filter(type=type_).exists()
260 return self.has_media('mp3')
262 def get_media(self, type_):
263 if self.has_media(type_):
264 if type_ in Book.formats:
265 return getattr(self, "%s_file" % type_)
267 return self.media.filter(type=type_)
272 return self.get_media("mp3")
275 return self.get_media("odt")
278 return self.get_media("ogg")
281 return self.get_media("daisy")
283 def media_url(self, format_):
284 media = self.get_media(format_)
287 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
294 return self.media_url('html')
297 return self.media_url('pdf')
300 return self.media_url('epub')
303 return self.media_url('mobi')
306 return self.media_url('txt')
309 return self.media_url('fb2')
312 return self.media_url('xml')
314 def has_description(self):
315 return len(self.description) > 0
316 has_description.short_description = _('description')
317 has_description.boolean = True
319 def has_mp3_file(self):
320 return self.has_media("mp3")
321 has_mp3_file.short_description = 'MP3'
322 has_mp3_file.boolean = True
324 def has_ogg_file(self):
325 return self.has_media("ogg")
326 has_ogg_file.short_description = 'OGG'
327 has_ogg_file.boolean = True
329 def has_daisy_file(self):
330 return self.has_media("daisy")
331 has_daisy_file.short_description = 'DAISY'
332 has_daisy_file.boolean = True
334 def get_audiobooks(self):
336 for m in self.media.filter(type='ogg').order_by().iterator():
337 ogg_files[m.name] = m
341 for mp3 in self.media.filter(type='mp3').iterator():
342 # ogg files are always from the same project
343 meta = mp3.extra_info
344 project = meta.get('project')
347 project = u'CzytamySłuchając'
349 projects.add((project, meta.get('funded_by', '')))
353 ogg = ogg_files.get(mp3.name)
356 audiobooks.append(media)
358 projects = sorted(projects)
359 return audiobooks, projects
361 def wldocument(self, parse_dublincore=True, inherit=True):
362 from catalogue.import_utils import ORMDocProvider
363 from librarian.parser import WLDocument
365 if inherit and self.parent:
366 meta_fallbacks = self.parent.cover_info()
368 meta_fallbacks = None
370 return WLDocument.from_file(
372 provider=ORMDocProvider(self),
373 parse_dublincore=parse_dublincore,
374 meta_fallbacks=meta_fallbacks)
377 def zip_format(format_):
378 def pretty_file_name(book):
379 return "%s/%s.%s" % (
380 book.extra_info['author'],
384 field_name = "%s_file" % format_
385 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
386 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
387 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
389 def zip_audiobooks(self, format_):
390 bm = BookMedia.objects.filter(book=self, type=format_)
391 paths = map(lambda bm: (None, bm.file.path), bm)
392 return create_zip(paths, "%s_%s" % (self.slug, format_))
394 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
396 from search.index import Index
399 index.index_book(self, book_info)
404 except Exception as e:
405 index.index.rollback()
408 # will make problems in conjunction with paid previews
409 def download_pictures(self, remote_gallery_url):
410 gallery_path = self.gallery_path()
411 # delete previous files, so we don't include old files in ebooks
412 if os.path.isdir(gallery_path):
413 for filename in os.listdir(gallery_path):
414 file_path = os.path.join(gallery_path, filename)
416 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
418 makedirs(gallery_path)
419 for ilustr in ilustr_elements:
420 ilustr_src = ilustr.get('src')
421 ilustr_path = os.path.join(gallery_path, ilustr_src)
422 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
424 def load_abstract(self):
425 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
426 if abstract is not None:
427 self.abstract = transform_abstrakt(abstract)
432 def from_xml_file(cls, xml_file, **kwargs):
433 from django.core.files import File
434 from librarian import dcparser
436 # use librarian to parse meta-data
437 book_info = dcparser.parse(xml_file)
439 if not isinstance(xml_file, File):
440 xml_file = File(open(xml_file))
443 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
448 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
449 search_index_tags=True, remote_gallery_url=None, days=0):
450 if dont_build is None:
452 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
454 # check for parts before we do anything
456 if hasattr(book_info, 'parts'):
457 for part_url in book_info.parts:
459 children.append(Book.objects.get(slug=part_url.slug))
460 except Book.DoesNotExist:
461 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
464 book_slug = book_info.url.slug
465 if re.search(r'[^a-z0-9-]', book_slug):
466 raise ValueError('Invalid characters in slug')
467 book, created = Book.objects.get_or_create(slug=book_slug)
472 book.preview = bool(days)
474 book.preview_until = date.today() + timedelta(days)
477 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
478 # Save shelves for this book
479 book_shelves = list(book.tags.filter(category='set'))
480 old_cover = book.cover_info()
483 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
485 book.xml_file.set_readable(False)
487 book.language = book_info.language
488 book.title = book_info.title
489 if book_info.variant_of:
490 book.common_slug = book_info.variant_of.slug
492 book.common_slug = book.slug
493 book.extra_info = book_info.to_dict()
497 meta_tags = Tag.tags_from_info(book_info)
499 for tag in meta_tags:
500 if not tag.for_books:
504 book.tags = set(meta_tags + book_shelves)
505 book.save() # update sort_key_author
507 cover_changed = old_cover != book.cover_info()
508 obsolete_children = set(b for b in book.children.all()
509 if b not in children)
510 notify_cover_changed = []
511 for n, child_book in enumerate(children):
512 new_child = child_book.parent != book
513 child_book.parent = book
514 child_book.parent_number = n
516 if new_child or cover_changed:
517 notify_cover_changed.append(child_book)
518 # Disown unfaithful children and let them cope on their own.
519 for child in obsolete_children:
521 child.parent_number = 0
524 notify_cover_changed.append(child)
526 cls.repopulate_ancestors()
527 tasks.update_counters.delay()
529 if remote_gallery_url:
530 book.download_pictures(remote_gallery_url)
532 # No saves beyond this point.
535 if 'cover' not in dont_build:
536 book.cover.build_delay()
537 book.cover_thumb.build_delay()
538 book.cover_api_thumb.build_delay()
539 book.simple_cover.build_delay()
541 # Build HTML and ebooks.
542 book.html_file.build_delay()
544 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
545 if format_ not in dont_build:
546 getattr(book, '%s_file' % format_).build_delay()
547 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
548 if format_ not in dont_build:
549 getattr(book, '%s_file' % format_).build_delay()
551 if not settings.NO_SEARCH_INDEX and search_index:
552 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
554 for child in notify_cover_changed:
555 child.parent_cover_changed()
557 book.update_popularity()
558 cls.published.send(sender=cls, instance=book)
563 def repopulate_ancestors(cls):
564 """Fixes the ancestry cache."""
566 cursor = connection.cursor()
567 if connection.vendor == 'postgres':
568 cursor.execute("TRUNCATE catalogue_book_ancestor")
570 WITH RECURSIVE ancestry AS (
571 SELECT book.id, book.parent_id
572 FROM catalogue_book AS book
573 WHERE book.parent_id IS NOT NULL
575 SELECT ancestor.id, book.parent_id
576 FROM ancestry AS ancestor, catalogue_book AS book
577 WHERE ancestor.parent_id = book.id
578 AND book.parent_id IS NOT NULL
580 INSERT INTO catalogue_book_ancestor
581 (from_book_id, to_book_id)
587 cursor.execute("DELETE FROM catalogue_book_ancestor")
588 for b in cls.objects.exclude(parent=None):
590 while parent is not None:
591 b.ancestor.add(parent)
592 parent = parent.parent
594 def clear_cache(self):
595 clear_cached_renders(self.mini_box)
596 clear_cached_renders(self.mini_box_nolink)
598 def cover_info(self, inherit=True):
599 """Returns a dictionary to serve as fallback for BookInfo.
601 For now, the only thing inherited is the cover image.
605 for field in ('cover_url', 'cover_by', 'cover_source'):
606 val = self.extra_info.get(field)
611 if inherit and need and self.parent is not None:
612 parent_info = self.parent.cover_info()
613 parent_info.update(info)
617 def related_themes(self):
618 return Tag.objects.usage_for_queryset(
619 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
620 counts=True).filter(category='theme')
622 def parent_cover_changed(self):
623 """Called when parent book's cover image is changed."""
624 if not self.cover_info(inherit=False):
625 if 'cover' not in app_settings.DONT_BUILD:
626 self.cover.build_delay()
627 self.cover_thumb.build_delay()
628 self.cover_api_thumb.build_delay()
629 self.simple_cover.build_delay()
630 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
631 if format_ not in app_settings.DONT_BUILD:
632 getattr(self, '%s_file' % format_).build_delay()
633 for child in self.children.all():
634 child.parent_cover_changed()
636 def other_versions(self):
637 """Find other versions (i.e. in other languages) of the book."""
638 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
643 while parent is not None:
644 books.insert(0, parent)
645 parent = parent.parent
648 def pretty_title(self, html_links=False):
649 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
650 books = self.parents() + [self]
651 names.extend([(b.title, b.get_absolute_url()) for b in books])
654 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
656 names = [tag[0] for tag in names]
657 return ', '.join(names)
660 publisher = self.extra_info['publisher']
661 if isinstance(publisher, str):
663 elif isinstance(publisher, list):
664 return ', '.join(publisher)
667 def tagged_top_level(cls, tags):
668 """ Returns top-level books tagged with `tags`.
670 It only returns those books which don't have ancestors which are
671 also tagged with those tags.
674 objects = cls.tagged.with_all(tags)
675 return objects.exclude(ancestor__in=objects)
678 def book_list(cls, book_filter=None):
679 """Generates a hierarchical listing of all books.
681 Books are optionally filtered with a test function.
686 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
688 books = books.filter(book_filter).distinct()
690 book_ids = set(b['pk'] for b in books.values("pk").iterator())
691 for book in books.iterator():
692 parent = book.parent_id
693 if parent not in book_ids:
695 books_by_parent.setdefault(parent, []).append(book)
697 for book in books.iterator():
698 books_by_parent.setdefault(book.parent_id, []).append(book)
701 books_by_author = OrderedDict()
702 for tag in Tag.objects.filter(category='author').iterator():
703 books_by_author[tag] = []
705 for book in books_by_parent.get(None, ()):
706 authors = list(book.authors().only('pk'))
708 for author in authors:
709 books_by_author[author].append(book)
713 return books_by_author, orphans, books_by_parent
716 "SP": (1, u"szkoła podstawowa"),
717 "SP1": (1, u"szkoła podstawowa"),
718 "SP2": (1, u"szkoła podstawowa"),
719 "SP3": (1, u"szkoła podstawowa"),
720 "P": (1, u"szkoła podstawowa"),
721 "G": (2, u"gimnazjum"),
723 "LP": (3, u"liceum"),
726 def audiences_pl(self):
727 audiences = self.extra_info.get('audiences', [])
728 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
729 return [a[1] for a in audiences]
731 def stage_note(self):
732 stage = self.extra_info.get('stage')
733 if stage and stage < '0.4':
734 return (_('This work needs modernisation'),
735 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
739 def choose_fragment(self):
740 fragments = self.fragments.order_by()
741 fragments_count = fragments.count()
742 if not fragments_count and self.children.exists():
743 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
744 fragments_count = fragments.count()
746 return fragments[randint(0, fragments_count - 1)]
748 return self.parent.choose_fragment()
752 def fragment_data(self):
753 fragment = self.choose_fragment()
756 'title': fragment.book.pretty_title(),
757 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
762 def update_popularity(self):
763 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
765 pop = self.popularity
768 except BookPopularity.DoesNotExist:
769 BookPopularity.objects.create(book=self, count=count)
771 def ridero_link(self):
772 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
774 def like(self, user):
775 from social.utils import likes, get_set, set_sets
776 if not likes(user, self):
777 tag = get_set(user, '')
778 set_sets(user, self, [tag])
780 def unlike(self, user):
781 from social.utils import likes, set_sets
782 if likes(user, self):
783 set_sets(user, self, [])
785 def full_sort_key(self):
786 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
788 def cover_color(self):
789 return WLCover.epoch_colors.get(self.extra_info.get('epoch'), '#000000')
791 @cached_render('catalogue/book_mini_box.html')
797 @cached_render('catalogue/book_mini_box.html')
798 def mini_box_nolink(self):
804 def add_file_fields():
805 for format_ in Book.formats:
806 field_name = "%s_file" % format_
807 # This weird globals() assignment makes Django migrations comfortable.
808 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
809 _upload_to.__name__ = '_%s_upload_to' % format_
810 globals()[_upload_to.__name__] = _upload_to
813 format_, _("%s file" % format_.upper()),
814 upload_to=_upload_to,
815 storage=bofh_storage,
819 ).contribute_to_class(Book, field_name)
825 class BookPopularity(models.Model):
826 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
827 count = models.IntegerField(default=0, db_index=True)