1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
48 _cover_ebookpoint_upload_to = UploadToPath('book/cover_ebookpoint/%s.jpg')
51 def _ebook_upload_to(upload_path):
52 return UploadToPath(upload_path)
55 class Book(models.Model):
56 """Represents a book imported from WL-XML."""
57 title = models.CharField(_('title'), max_length=32767)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
59 sort_key_author = models.CharField(
60 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
62 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
63 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
64 description = models.TextField(_('description'), blank=True)
65 abstract = models.TextField(_('abstract'), blank=True)
66 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
67 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
68 parent_number = models.IntegerField(_('parent number'), default=0)
69 extra_info = models.TextField(_('extra information'), default='{}')
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
72 print_on_demand = models.BooleanField(_('print on demand'), default=False)
73 recommended = models.BooleanField(_('recommended'), default=False)
74 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
75 preview = models.BooleanField(_('preview'), default=False)
76 preview_until = models.DateField(_('preview until'), blank=True, null=True)
77 preview_key = models.CharField(max_length=32, blank=True, null=True)
78 findable = models.BooleanField(_('findable'), default=True, db_index=True)
80 # files generated during publication
83 null=True, blank=True,
84 upload_to=_cover_upload_to,
85 storage=bofh_storage, max_length=255)
86 # Cleaner version of cover for thumbs
87 cover_thumb = EbookField(
88 'cover_thumb', _('cover thumbnail'),
89 null=True, blank=True,
90 upload_to=_cover_thumb_upload_to,
92 cover_api_thumb = EbookField(
93 'cover_api_thumb', _('cover thumbnail for mobile app'),
94 null=True, blank=True,
95 upload_to=_cover_api_thumb_upload_to,
97 simple_cover = EbookField(
98 'simple_cover', _('cover for mobile app'),
99 null=True, blank=True,
100 upload_to=_simple_cover_upload_to,
102 cover_ebookpoint = EbookField(
103 'cover_ebookpoint', _('cover for Ebookpoint'),
104 null=True, blank=True,
105 upload_to=_cover_ebookpoint_upload_to,
107 ebook_formats = constants.EBOOK_FORMATS
108 formats = ebook_formats + ['html', 'xml']
110 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
111 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
113 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
114 has_audience = models.BooleanField(default=False)
116 objects = models.Manager()
117 tagged = managers.ModelTaggedItemManager(Tag)
118 tags = managers.TagDescriptor(Tag)
119 tag_relations = GenericRelation(Tag.intermediary_table_model)
121 html_built = django.dispatch.Signal()
122 published = django.dispatch.Signal()
126 class AlreadyExists(Exception):
130 ordering = ('sort_key_author', 'sort_key')
131 verbose_name = _('book')
132 verbose_name_plural = _('books')
133 app_label = 'catalogue'
138 def get_extra_info_json(self):
139 return json.loads(self.extra_info or '{}')
141 def get_initial(self):
143 return re.search(r'\w', self.title, re.U).group(0)
144 except AttributeError:
148 return self.tags.filter(category='author')
151 return self.tags.filter(category='epoch')
154 return self.tags.filter(category='genre')
157 return self.tags.filter(category='kind')
159 def tag_unicode(self, category):
160 relations = prefetched_relations(self, category)
162 return ', '.join(rel.tag.name for rel in relations)
164 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
166 def tags_by_category(self):
167 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
169 def author_unicode(self):
170 return self.cached_author
172 def kind_unicode(self):
173 return self.tag_unicode('kind')
175 def epoch_unicode(self):
176 return self.tag_unicode('epoch')
178 def genre_unicode(self):
179 return self.tag_unicode('genre')
181 def translators(self):
182 translators = self.get_extra_info_json().get('translators') or []
184 '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
187 def translator(self):
188 translators = self.get_extra_info_json().get('translators')
191 if len(translators) > 3:
192 translators = translators[:2]
196 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
198 def cover_source(self):
199 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
203 return self.get_extra_info_json().get('isbn_pdf')
207 return self.get_extra_info_json().get('isbn_epub')
211 return self.get_extra_info_json().get('isbn_mobi')
214 def save(self, force_insert=False, force_update=False, **kwargs):
215 from sortify import sortify
217 self.sort_key = sortify(self.title)[:120]
218 self.title = str(self.title) # ???
221 author = self.authors().first().sort_key
222 except AttributeError:
224 self.sort_key_author = author
226 self.cached_author = self.tag_unicode('author')
227 self.has_audience = 'audience' in self.get_extra_info_json()
229 if self.preview and not self.preview_key:
230 self.preview_key = get_random_hash(self.slug)[:32]
232 ret = super(Book, self).save(force_insert, force_update, **kwargs)
236 def get_absolute_url(self):
237 return reverse('book_detail', args=[self.slug])
239 def gallery_path(self):
240 return gallery_path(self.slug)
242 def gallery_url(self):
243 return gallery_url(self.slug)
249 def language_code(self):
250 return constants.LANGUAGES_3TO2.get(self.language, self.language)
252 def language_name(self):
253 return dict(settings.LANGUAGES).get(self.language_code(), "")
255 def is_foreign(self):
256 return self.language_code() != settings.LANGUAGE_CODE
258 def set_audio_length(self):
259 length = self.get_audio_length()
261 self.audio_length = self.format_audio_length(length)
265 def format_audio_length(seconds):
267 >>> Book.format_audio_length(1)
269 >>> Book.format_audio_length(3661)
273 minutes = seconds // 60
274 seconds = seconds % 60
275 return '%d:%02d' % (minutes, seconds)
277 hours = seconds // 3600
278 minutes = seconds % 3600 // 60
279 seconds = seconds % 60
280 return '%d:%02d:%02d' % (hours, minutes, seconds)
282 def get_audio_length(self):
284 for media in self.get_mp3() or ():
285 total += app_settings.GET_MP3_LENGTH(media.file.path)
288 def has_media(self, type_):
289 if type_ in Book.formats:
290 return bool(getattr(self, "%s_file" % type_))
292 return self.media.filter(type=type_).exists()
295 return self.has_media('mp3')
297 def get_media(self, type_):
298 if self.has_media(type_):
299 if type_ in Book.formats:
300 return getattr(self, "%s_file" % type_)
302 return self.media.filter(type=type_)
307 return self.get_media("mp3")
310 return self.get_media("odt")
313 return self.get_media("ogg")
316 return self.get_media("daisy")
318 def media_url(self, format_):
319 media = self.get_media(format_)
322 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
329 return self.media_url('html')
332 return self.media_url('pdf')
335 return self.media_url('epub')
338 return self.media_url('mobi')
341 return self.media_url('txt')
344 return self.media_url('fb2')
347 return self.media_url('xml')
349 def has_description(self):
350 return len(self.description) > 0
351 has_description.short_description = _('description')
352 has_description.boolean = True
354 def has_mp3_file(self):
355 return self.has_media("mp3")
356 has_mp3_file.short_description = 'MP3'
357 has_mp3_file.boolean = True
359 def has_ogg_file(self):
360 return self.has_media("ogg")
361 has_ogg_file.short_description = 'OGG'
362 has_ogg_file.boolean = True
364 def has_daisy_file(self):
365 return self.has_media("daisy")
366 has_daisy_file.short_description = 'DAISY'
367 has_daisy_file.boolean = True
369 def get_audiobooks(self):
371 for m in self.media.filter(type='ogg').order_by().iterator():
372 ogg_files[m.name] = m
376 for mp3 in self.media.filter(type='mp3').iterator():
377 # ogg files are always from the same project
378 meta = mp3.get_extra_info_json()
379 project = meta.get('project')
382 project = 'CzytamySłuchając'
384 projects.add((project, meta.get('funded_by', '')))
388 ogg = ogg_files.get(mp3.name)
391 audiobooks.append(media)
393 projects = sorted(projects)
394 return audiobooks, projects
396 def wldocument(self, parse_dublincore=True, inherit=True):
397 from catalogue.import_utils import ORMDocProvider
398 from librarian.parser import WLDocument
400 if inherit and self.parent:
401 meta_fallbacks = self.parent.cover_info()
403 meta_fallbacks = None
405 return WLDocument.from_file(
407 provider=ORMDocProvider(self),
408 parse_dublincore=parse_dublincore,
409 meta_fallbacks=meta_fallbacks)
412 def zip_format(format_):
413 def pretty_file_name(book):
414 return "%s/%s.%s" % (
415 book.get_extra_info_json()['author'],
419 field_name = "%s_file" % format_
420 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
421 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
422 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
424 def zip_audiobooks(self, format_):
425 bm = BookMedia.objects.filter(book=self, type=format_)
426 paths = map(lambda bm: (None, bm.file.path), bm)
427 return create_zip(paths, "%s_%s" % (self.slug, format_))
429 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
430 if not self.findable:
433 from search.index import Index
436 index.index_book(self, book_info)
441 except Exception as e:
442 index.index.rollback()
445 # will make problems in conjunction with paid previews
446 def download_pictures(self, remote_gallery_url):
447 gallery_path = self.gallery_path()
448 # delete previous files, so we don't include old files in ebooks
449 if os.path.isdir(gallery_path):
450 for filename in os.listdir(gallery_path):
451 file_path = os.path.join(gallery_path, filename)
453 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
455 makedirs(gallery_path)
456 for ilustr in ilustr_elements:
457 ilustr_src = ilustr.get('src')
458 ilustr_path = os.path.join(gallery_path, ilustr_src)
459 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
461 def load_abstract(self):
462 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
463 if abstract is not None:
464 self.abstract = transform_abstrakt(abstract)
469 def from_xml_file(cls, xml_file, **kwargs):
470 from django.core.files import File
471 from librarian import dcparser
473 # use librarian to parse meta-data
474 book_info = dcparser.parse(xml_file)
476 if not isinstance(xml_file, File):
477 xml_file = File(open(xml_file))
480 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
485 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
486 search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
487 if dont_build is None:
489 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
491 # check for parts before we do anything
493 if hasattr(book_info, 'parts'):
494 for part_url in book_info.parts:
496 children.append(Book.objects.get(slug=part_url.slug))
497 except Book.DoesNotExist:
498 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
501 book_slug = book_info.url.slug
502 if re.search(r'[^a-z0-9-]', book_slug):
503 raise ValueError('Invalid characters in slug')
504 book, created = Book.objects.get_or_create(slug=book_slug)
509 book.preview = bool(days)
511 book.preview_until = date.today() + timedelta(days)
514 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
515 # Save shelves for this book
516 book_shelves = list(book.tags.filter(category='set'))
517 old_cover = book.cover_info()
520 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
522 book.xml_file.set_readable(False)
524 book.findable = findable
525 book.language = book_info.language
526 book.title = book_info.title
527 if book_info.variant_of:
528 book.common_slug = book_info.variant_of.slug
530 book.common_slug = book.slug
531 book.extra_info = json.dumps(book_info.to_dict())
535 meta_tags = Tag.tags_from_info(book_info)
537 for tag in meta_tags:
538 if not tag.for_books:
542 book.tags = set(meta_tags + book_shelves)
543 book.save() # update sort_key_author
545 cover_changed = old_cover != book.cover_info()
546 obsolete_children = set(b for b in book.children.all()
547 if b not in children)
548 notify_cover_changed = []
549 for n, child_book in enumerate(children):
550 new_child = child_book.parent != book
551 child_book.parent = book
552 child_book.parent_number = n
554 if new_child or cover_changed:
555 notify_cover_changed.append(child_book)
556 # Disown unfaithful children and let them cope on their own.
557 for child in obsolete_children:
559 child.parent_number = 0
562 notify_cover_changed.append(child)
564 cls.repopulate_ancestors()
565 tasks.update_counters.delay()
567 if remote_gallery_url:
568 book.download_pictures(remote_gallery_url)
570 # No saves beyond this point.
573 if 'cover' not in dont_build:
574 book.cover.build_delay()
575 book.cover_thumb.build_delay()
576 book.cover_api_thumb.build_delay()
577 book.simple_cover.build_delay()
578 book.cover_ebookpoint.build_delay()
580 # Build HTML and ebooks.
581 book.html_file.build_delay()
583 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
584 if format_ not in dont_build:
585 getattr(book, '%s_file' % format_).build_delay()
586 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
587 if format_ not in dont_build:
588 getattr(book, '%s_file' % format_).build_delay()
590 if not settings.NO_SEARCH_INDEX and search_index and findable:
591 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
593 for child in notify_cover_changed:
594 child.parent_cover_changed()
596 book.update_popularity()
597 cls.published.send(sender=cls, instance=book)
602 def repopulate_ancestors(cls):
603 """Fixes the ancestry cache."""
605 cursor = connection.cursor()
606 if connection.vendor == 'postgres':
607 cursor.execute("TRUNCATE catalogue_book_ancestor")
609 WITH RECURSIVE ancestry AS (
610 SELECT book.id, book.parent_id
611 FROM catalogue_book AS book
612 WHERE book.parent_id IS NOT NULL
614 SELECT ancestor.id, book.parent_id
615 FROM ancestry AS ancestor, catalogue_book AS book
616 WHERE ancestor.parent_id = book.id
617 AND book.parent_id IS NOT NULL
619 INSERT INTO catalogue_book_ancestor
620 (from_book_id, to_book_id)
626 cursor.execute("DELETE FROM catalogue_book_ancestor")
627 for b in cls.objects.exclude(parent=None):
629 while parent is not None:
630 b.ancestor.add(parent)
631 parent = parent.parent
633 def clear_cache(self):
634 clear_cached_renders(self.mini_box)
635 clear_cached_renders(self.mini_box_nolink)
637 def cover_info(self, inherit=True):
638 """Returns a dictionary to serve as fallback for BookInfo.
640 For now, the only thing inherited is the cover image.
644 for field in ('cover_url', 'cover_by', 'cover_source'):
645 val = self.get_extra_info_json().get(field)
650 if inherit and need and self.parent is not None:
651 parent_info = self.parent.cover_info()
652 parent_info.update(info)
656 def related_themes(self):
657 return Tag.objects.usage_for_queryset(
658 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
659 counts=True).filter(category='theme')
661 def parent_cover_changed(self):
662 """Called when parent book's cover image is changed."""
663 if not self.cover_info(inherit=False):
664 if 'cover' not in app_settings.DONT_BUILD:
665 self.cover.build_delay()
666 self.cover_thumb.build_delay()
667 self.cover_api_thumb.build_delay()
668 self.simple_cover.build_delay()
669 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
670 if format_ not in app_settings.DONT_BUILD:
671 getattr(self, '%s_file' % format_).build_delay()
672 for child in self.children.all():
673 child.parent_cover_changed()
675 def other_versions(self):
676 """Find other versions (i.e. in other languages) of the book."""
677 return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
682 while parent is not None:
683 books.insert(0, parent)
684 parent = parent.parent
687 def pretty_title(self, html_links=False):
688 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
689 books = self.parents() + [self]
690 names.extend([(b.title, b.get_absolute_url()) for b in books])
693 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
695 names = [tag[0] for tag in names]
696 return ', '.join(names)
699 publisher = self.get_extra_info_json()['publisher']
700 if isinstance(publisher, str):
702 elif isinstance(publisher, list):
703 return ', '.join(publisher)
706 def tagged_top_level(cls, tags):
707 """ Returns top-level books tagged with `tags`.
709 It only returns those books which don't have ancestors which are
710 also tagged with those tags.
713 objects = cls.tagged.with_all(tags)
714 return objects.filter(findable=True).exclude(ancestor__in=objects)
717 def book_list(cls, book_filter=None):
718 """Generates a hierarchical listing of all books.
720 Books are optionally filtered with a test function.
725 books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
727 books = books.filter(book_filter).distinct()
729 book_ids = set(b['pk'] for b in books.values("pk").iterator())
730 for book in books.iterator():
731 parent = book.parent_id
732 if parent not in book_ids:
734 books_by_parent.setdefault(parent, []).append(book)
736 for book in books.iterator():
737 books_by_parent.setdefault(book.parent_id, []).append(book)
740 books_by_author = OrderedDict()
741 for tag in Tag.objects.filter(category='author').iterator():
742 books_by_author[tag] = []
744 for book in books_by_parent.get(None, ()):
745 authors = list(book.authors().only('pk'))
747 for author in authors:
748 books_by_author[author].append(book)
752 return books_by_author, orphans, books_by_parent
755 "SP": (1, "szkoła podstawowa"),
756 "SP1": (1, "szkoła podstawowa"),
757 "SP2": (1, "szkoła podstawowa"),
758 "SP3": (1, "szkoła podstawowa"),
759 "P": (1, "szkoła podstawowa"),
760 "G": (2, "gimnazjum"),
765 def audiences_pl(self):
766 audiences = self.get_extra_info_json().get('audiences', [])
767 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
768 return [a[1] for a in audiences]
770 def stage_note(self):
771 stage = self.get_extra_info_json().get('stage')
772 if stage and stage < '0.4':
773 return (_('This work needs modernisation'),
774 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
778 def choose_fragment(self):
779 fragments = self.fragments.order_by()
780 fragments_count = fragments.count()
781 if not fragments_count and self.children.exists():
782 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
783 fragments_count = fragments.count()
785 return fragments[randint(0, fragments_count - 1)]
787 return self.parent.choose_fragment()
791 def fragment_data(self):
792 fragment = self.choose_fragment()
795 'title': fragment.book.pretty_title(),
796 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
801 def update_popularity(self):
802 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
804 pop = self.popularity
807 except BookPopularity.DoesNotExist:
808 BookPopularity.objects.create(book=self, count=count)
810 def ridero_link(self):
811 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
813 def like(self, user):
814 from social.utils import likes, get_set, set_sets
815 if not likes(user, self):
816 tag = get_set(user, '')
817 set_sets(user, self, [tag])
819 def unlike(self, user):
820 from social.utils import likes, set_sets
821 if likes(user, self):
822 set_sets(user, self, [])
824 def full_sort_key(self):
825 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
827 def cover_color(self):
828 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
830 @cached_render('catalogue/book_mini_box.html')
836 @cached_render('catalogue/book_mini_box.html')
837 def mini_box_nolink(self):
843 def add_file_fields():
844 for format_ in Book.formats:
845 field_name = "%s_file" % format_
846 # This weird globals() assignment makes Django migrations comfortable.
847 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
848 _upload_to.__name__ = '_%s_upload_to' % format_
849 globals()[_upload_to.__name__] = _upload_to
852 format_, _("%s file" % format_.upper()),
853 upload_to=_upload_to,
854 storage=bofh_storage,
858 ).contribute_to_class(Book, field_name)
864 class BookPopularity(models.Model):
865 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
866 count = models.IntegerField(default=0, db_index=True)