1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
50 def _ebook_upload_to(upload_path):
51 return UploadToPath(upload_path)
54 class Book(models.Model):
55 """Represents a book imported from WL-XML."""
56 title = models.CharField(_('title'), max_length=32767)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
58 sort_key_author = models.CharField(
59 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
61 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
63 description = models.TextField(_('description'), blank=True)
64 abstract = models.TextField(_('abstract'), blank=True)
65 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
66 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
67 parent_number = models.IntegerField(_('parent number'), default=0)
68 extra_info = models.TextField(_('extra information'), default='{}')
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
71 print_on_demand = models.BooleanField(_('print on demand'), default=False)
72 recommended = models.BooleanField(_('recommended'), default=False)
73 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
74 preview = models.BooleanField(_('preview'), default=False)
75 preview_until = models.DateField(_('preview until'), blank=True, null=True)
76 preview_key = models.CharField(max_length=32, blank=True, null=True)
78 # files generated during publication
81 null=True, blank=True,
82 upload_to=_cover_upload_to,
83 storage=bofh_storage, max_length=255)
84 # Cleaner version of cover for thumbs
85 cover_thumb = EbookField(
86 'cover_thumb', _('cover thumbnail'),
87 null=True, blank=True,
88 upload_to=_cover_thumb_upload_to,
90 cover_api_thumb = EbookField(
91 'cover_api_thumb', _('cover thumbnail for mobile app'),
92 null=True, blank=True,
93 upload_to=_cover_api_thumb_upload_to,
95 simple_cover = EbookField(
96 'simple_cover', _('cover for mobile app'),
97 null=True, blank=True,
98 upload_to=_simple_cover_upload_to,
100 ebook_formats = constants.EBOOK_FORMATS
101 formats = ebook_formats + ['html', 'xml']
103 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
104 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
106 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
107 has_audience = models.BooleanField(default=False)
109 objects = models.Manager()
110 tagged = managers.ModelTaggedItemManager(Tag)
111 tags = managers.TagDescriptor(Tag)
112 tag_relations = GenericRelation(Tag.intermediary_table_model)
114 html_built = django.dispatch.Signal()
115 published = django.dispatch.Signal()
119 class AlreadyExists(Exception):
123 ordering = ('sort_key_author', 'sort_key')
124 verbose_name = _('book')
125 verbose_name_plural = _('books')
126 app_label = 'catalogue'
131 def get_extra_info_json(self):
132 return json.loads(self.extra_info or '{}')
134 def get_initial(self):
136 return re.search(r'\w', self.title, re.U).group(0)
137 except AttributeError:
141 return self.tags.filter(category='author')
144 return self.tags.filter(category='epoch')
147 return self.tags.filter(category='genre')
150 return self.tags.filter(category='kind')
152 def tag_unicode(self, category):
153 relations = prefetched_relations(self, category)
155 return ', '.join(rel.tag.name for rel in relations)
157 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
159 def tags_by_category(self):
160 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
162 def author_unicode(self):
163 return self.cached_author
165 def kind_unicode(self):
166 return self.tag_unicode('kind')
168 def epoch_unicode(self):
169 return self.tag_unicode('epoch')
171 def genre_unicode(self):
172 return self.tag_unicode('genre')
174 def translator(self):
175 translators = self.get_extra_info_json().get('translators')
178 if len(translators) > 3:
179 translators = translators[:2]
183 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
185 def cover_source(self):
186 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
188 def save(self, force_insert=False, force_update=False, **kwargs):
189 from sortify import sortify
191 self.sort_key = sortify(self.title)[:120]
192 self.title = str(self.title) # ???
195 author = self.authors().first().sort_key
196 except AttributeError:
198 self.sort_key_author = author
200 self.cached_author = self.tag_unicode('author')
201 self.has_audience = 'audience' in self.get_extra_info_json()
203 if self.preview and not self.preview_key:
204 self.preview_key = get_random_hash(self.slug)[:32]
206 ret = super(Book, self).save(force_insert, force_update, **kwargs)
210 def get_absolute_url(self):
211 return reverse('book_detail', args=[self.slug])
213 def gallery_path(self):
214 return gallery_path(self.slug)
216 def gallery_url(self):
217 return gallery_url(self.slug)
223 def language_code(self):
224 return constants.LANGUAGES_3TO2.get(self.language, self.language)
226 def language_name(self):
227 return dict(settings.LANGUAGES).get(self.language_code(), "")
229 def is_foreign(self):
230 return self.language_code() != settings.LANGUAGE_CODE
232 def set_audio_length(self):
233 length = self.get_audio_length()
235 self.audio_length = self.format_audio_length(length)
239 def format_audio_length(seconds):
241 >>> Book.format_audio_length(1)
243 >>> Book.format_audio_length(3661)
247 minutes = seconds // 60
248 seconds = seconds % 60
249 return '%d:%02d' % (minutes, seconds)
251 hours = seconds // 3600
252 minutes = seconds % 3600 // 60
253 seconds = seconds % 60
254 return '%d:%02d:%02d' % (hours, minutes, seconds)
256 def get_audio_length(self):
258 for media in self.get_mp3() or ():
259 total += app_settings.GET_MP3_LENGTH(media.file.path)
262 def has_media(self, type_):
263 if type_ in Book.formats:
264 return bool(getattr(self, "%s_file" % type_))
266 return self.media.filter(type=type_).exists()
269 return self.has_media('mp3')
271 def get_media(self, type_):
272 if self.has_media(type_):
273 if type_ in Book.formats:
274 return getattr(self, "%s_file" % type_)
276 return self.media.filter(type=type_)
281 return self.get_media("mp3")
284 return self.get_media("odt")
287 return self.get_media("ogg")
290 return self.get_media("daisy")
292 def media_url(self, format_):
293 media = self.get_media(format_)
296 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
303 return self.media_url('html')
306 return self.media_url('pdf')
309 return self.media_url('epub')
312 return self.media_url('mobi')
315 return self.media_url('txt')
318 return self.media_url('fb2')
321 return self.media_url('xml')
323 def has_description(self):
324 return len(self.description) > 0
325 has_description.short_description = _('description')
326 has_description.boolean = True
328 def has_mp3_file(self):
329 return self.has_media("mp3")
330 has_mp3_file.short_description = 'MP3'
331 has_mp3_file.boolean = True
333 def has_ogg_file(self):
334 return self.has_media("ogg")
335 has_ogg_file.short_description = 'OGG'
336 has_ogg_file.boolean = True
338 def has_daisy_file(self):
339 return self.has_media("daisy")
340 has_daisy_file.short_description = 'DAISY'
341 has_daisy_file.boolean = True
343 def get_audiobooks(self):
345 for m in self.media.filter(type='ogg').order_by().iterator():
346 ogg_files[m.name] = m
350 for mp3 in self.media.filter(type='mp3').iterator():
351 # ogg files are always from the same project
352 meta = mp3.get_extra_info_json()
353 project = meta.get('project')
356 project = 'CzytamySłuchając'
358 projects.add((project, meta.get('funded_by', '')))
362 ogg = ogg_files.get(mp3.name)
365 audiobooks.append(media)
367 projects = sorted(projects)
368 return audiobooks, projects
370 def wldocument(self, parse_dublincore=True, inherit=True):
371 from catalogue.import_utils import ORMDocProvider
372 from librarian.parser import WLDocument
374 if inherit and self.parent:
375 meta_fallbacks = self.parent.cover_info()
377 meta_fallbacks = None
379 return WLDocument.from_file(
381 provider=ORMDocProvider(self),
382 parse_dublincore=parse_dublincore,
383 meta_fallbacks=meta_fallbacks)
386 def zip_format(format_):
387 def pretty_file_name(book):
388 return "%s/%s.%s" % (
389 book.get_extra_info_json()['author'],
393 field_name = "%s_file" % format_
394 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
395 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
396 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
398 def zip_audiobooks(self, format_):
399 bm = BookMedia.objects.filter(book=self, type=format_)
400 paths = map(lambda bm: (None, bm.file.path), bm)
401 return create_zip(paths, "%s_%s" % (self.slug, format_))
403 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
405 from search.index import Index
408 index.index_book(self, book_info)
413 except Exception as e:
414 index.index.rollback()
417 # will make problems in conjunction with paid previews
418 def download_pictures(self, remote_gallery_url):
419 gallery_path = self.gallery_path()
420 # delete previous files, so we don't include old files in ebooks
421 if os.path.isdir(gallery_path):
422 for filename in os.listdir(gallery_path):
423 file_path = os.path.join(gallery_path, filename)
425 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
427 makedirs(gallery_path)
428 for ilustr in ilustr_elements:
429 ilustr_src = ilustr.get('src')
430 ilustr_path = os.path.join(gallery_path, ilustr_src)
431 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
433 def load_abstract(self):
434 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
435 if abstract is not None:
436 self.abstract = transform_abstrakt(abstract)
441 def from_xml_file(cls, xml_file, **kwargs):
442 from django.core.files import File
443 from librarian import dcparser
445 # use librarian to parse meta-data
446 book_info = dcparser.parse(xml_file)
448 if not isinstance(xml_file, File):
449 xml_file = File(open(xml_file))
452 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
457 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
458 search_index_tags=True, remote_gallery_url=None, days=0):
459 if dont_build is None:
461 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
463 # check for parts before we do anything
465 if hasattr(book_info, 'parts'):
466 for part_url in book_info.parts:
468 children.append(Book.objects.get(slug=part_url.slug))
469 except Book.DoesNotExist:
470 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
473 book_slug = book_info.url.slug
474 if re.search(r'[^a-z0-9-]', book_slug):
475 raise ValueError('Invalid characters in slug')
476 book, created = Book.objects.get_or_create(slug=book_slug)
481 book.preview = bool(days)
483 book.preview_until = date.today() + timedelta(days)
486 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
487 # Save shelves for this book
488 book_shelves = list(book.tags.filter(category='set'))
489 old_cover = book.cover_info()
492 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
494 book.xml_file.set_readable(False)
496 book.language = book_info.language
497 book.title = book_info.title
498 if book_info.variant_of:
499 book.common_slug = book_info.variant_of.slug
501 book.common_slug = book.slug
502 book.extra_info = json.dumps(book_info.to_dict())
506 meta_tags = Tag.tags_from_info(book_info)
508 for tag in meta_tags:
509 if not tag.for_books:
513 book.tags = set(meta_tags + book_shelves)
514 book.save() # update sort_key_author
516 cover_changed = old_cover != book.cover_info()
517 obsolete_children = set(b for b in book.children.all()
518 if b not in children)
519 notify_cover_changed = []
520 for n, child_book in enumerate(children):
521 new_child = child_book.parent != book
522 child_book.parent = book
523 child_book.parent_number = n
525 if new_child or cover_changed:
526 notify_cover_changed.append(child_book)
527 # Disown unfaithful children and let them cope on their own.
528 for child in obsolete_children:
530 child.parent_number = 0
533 notify_cover_changed.append(child)
535 cls.repopulate_ancestors()
536 tasks.update_counters.delay()
538 if remote_gallery_url:
539 book.download_pictures(remote_gallery_url)
541 # No saves beyond this point.
544 if 'cover' not in dont_build:
545 book.cover.build_delay()
546 book.cover_thumb.build_delay()
547 book.cover_api_thumb.build_delay()
548 book.simple_cover.build_delay()
550 # Build HTML and ebooks.
551 book.html_file.build_delay()
553 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
554 if format_ not in dont_build:
555 getattr(book, '%s_file' % format_).build_delay()
556 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
557 if format_ not in dont_build:
558 getattr(book, '%s_file' % format_).build_delay()
560 if not settings.NO_SEARCH_INDEX and search_index:
561 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
563 for child in notify_cover_changed:
564 child.parent_cover_changed()
566 book.update_popularity()
567 cls.published.send(sender=cls, instance=book)
572 def repopulate_ancestors(cls):
573 """Fixes the ancestry cache."""
575 cursor = connection.cursor()
576 if connection.vendor == 'postgres':
577 cursor.execute("TRUNCATE catalogue_book_ancestor")
579 WITH RECURSIVE ancestry AS (
580 SELECT book.id, book.parent_id
581 FROM catalogue_book AS book
582 WHERE book.parent_id IS NOT NULL
584 SELECT ancestor.id, book.parent_id
585 FROM ancestry AS ancestor, catalogue_book AS book
586 WHERE ancestor.parent_id = book.id
587 AND book.parent_id IS NOT NULL
589 INSERT INTO catalogue_book_ancestor
590 (from_book_id, to_book_id)
596 cursor.execute("DELETE FROM catalogue_book_ancestor")
597 for b in cls.objects.exclude(parent=None):
599 while parent is not None:
600 b.ancestor.add(parent)
601 parent = parent.parent
603 def clear_cache(self):
604 clear_cached_renders(self.mini_box)
605 clear_cached_renders(self.mini_box_nolink)
607 def cover_info(self, inherit=True):
608 """Returns a dictionary to serve as fallback for BookInfo.
610 For now, the only thing inherited is the cover image.
614 for field in ('cover_url', 'cover_by', 'cover_source'):
615 val = self.get_extra_info_json().get(field)
620 if inherit and need and self.parent is not None:
621 parent_info = self.parent.cover_info()
622 parent_info.update(info)
626 def related_themes(self):
627 return Tag.objects.usage_for_queryset(
628 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
629 counts=True).filter(category='theme')
631 def parent_cover_changed(self):
632 """Called when parent book's cover image is changed."""
633 if not self.cover_info(inherit=False):
634 if 'cover' not in app_settings.DONT_BUILD:
635 self.cover.build_delay()
636 self.cover_thumb.build_delay()
637 self.cover_api_thumb.build_delay()
638 self.simple_cover.build_delay()
639 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
640 if format_ not in app_settings.DONT_BUILD:
641 getattr(self, '%s_file' % format_).build_delay()
642 for child in self.children.all():
643 child.parent_cover_changed()
645 def other_versions(self):
646 """Find other versions (i.e. in other languages) of the book."""
647 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
652 while parent is not None:
653 books.insert(0, parent)
654 parent = parent.parent
657 def pretty_title(self, html_links=False):
658 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
659 books = self.parents() + [self]
660 names.extend([(b.title, b.get_absolute_url()) for b in books])
663 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
665 names = [tag[0] for tag in names]
666 return ', '.join(names)
669 publisher = self.get_extra_info_json()['publisher']
670 if isinstance(publisher, str):
672 elif isinstance(publisher, list):
673 return ', '.join(publisher)
676 def tagged_top_level(cls, tags):
677 """ Returns top-level books tagged with `tags`.
679 It only returns those books which don't have ancestors which are
680 also tagged with those tags.
683 objects = cls.tagged.with_all(tags)
684 return objects.exclude(ancestor__in=objects)
687 def book_list(cls, book_filter=None):
688 """Generates a hierarchical listing of all books.
690 Books are optionally filtered with a test function.
695 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
697 books = books.filter(book_filter).distinct()
699 book_ids = set(b['pk'] for b in books.values("pk").iterator())
700 for book in books.iterator():
701 parent = book.parent_id
702 if parent not in book_ids:
704 books_by_parent.setdefault(parent, []).append(book)
706 for book in books.iterator():
707 books_by_parent.setdefault(book.parent_id, []).append(book)
710 books_by_author = OrderedDict()
711 for tag in Tag.objects.filter(category='author').iterator():
712 books_by_author[tag] = []
714 for book in books_by_parent.get(None, ()):
715 authors = list(book.authors().only('pk'))
717 for author in authors:
718 books_by_author[author].append(book)
722 return books_by_author, orphans, books_by_parent
725 "SP": (1, "szkoła podstawowa"),
726 "SP1": (1, "szkoła podstawowa"),
727 "SP2": (1, "szkoła podstawowa"),
728 "SP3": (1, "szkoła podstawowa"),
729 "P": (1, "szkoła podstawowa"),
730 "G": (2, "gimnazjum"),
735 def audiences_pl(self):
736 audiences = self.get_extra_info_json().get('audiences', [])
737 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
738 return [a[1] for a in audiences]
740 def stage_note(self):
741 stage = self.get_extra_info_json().get('stage')
742 if stage and stage < '0.4':
743 return (_('This work needs modernisation'),
744 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
748 def choose_fragment(self):
749 fragments = self.fragments.order_by()
750 fragments_count = fragments.count()
751 if not fragments_count and self.children.exists():
752 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
753 fragments_count = fragments.count()
755 return fragments[randint(0, fragments_count - 1)]
757 return self.parent.choose_fragment()
761 def fragment_data(self):
762 fragment = self.choose_fragment()
765 'title': fragment.book.pretty_title(),
766 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
771 def update_popularity(self):
772 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
774 pop = self.popularity
777 except BookPopularity.DoesNotExist:
778 BookPopularity.objects.create(book=self, count=count)
780 def ridero_link(self):
781 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
783 def like(self, user):
784 from social.utils import likes, get_set, set_sets
785 if not likes(user, self):
786 tag = get_set(user, '')
787 set_sets(user, self, [tag])
789 def unlike(self, user):
790 from social.utils import likes, set_sets
791 if likes(user, self):
792 set_sets(user, self, [])
794 def full_sort_key(self):
795 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
797 def cover_color(self):
798 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
800 @cached_render('catalogue/book_mini_box.html')
806 @cached_render('catalogue/book_mini_box.html')
807 def mini_box_nolink(self):
813 def add_file_fields():
814 for format_ in Book.formats:
815 field_name = "%s_file" % format_
816 # This weird globals() assignment makes Django migrations comfortable.
817 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
818 _upload_to.__name__ = '_%s_upload_to' % format_
819 globals()[_upload_to.__name__] = _upload_to
822 format_, _("%s file" % format_.upper()),
823 upload_to=_upload_to,
824 storage=bofh_storage,
828 ).contribute_to_class(Book, field_name)
834 class BookPopularity(models.Model):
835 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
836 count = models.IntegerField(default=0, db_index=True)