1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
50 def _ebook_upload_to(upload_path):
51 return UploadToPath(upload_path)
54 class Book(models.Model):
55 """Represents a book imported from WL-XML."""
56 title = models.CharField(_('title'), max_length=32767)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
58 sort_key_author = models.CharField(
59 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
61 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
63 description = models.TextField(_('description'), blank=True)
64 abstract = models.TextField(_('abstract'), blank=True)
65 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
66 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
67 parent_number = models.IntegerField(_('parent number'), default=0)
68 extra_info = models.TextField(_('extra information'), default='{}')
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
71 print_on_demand = models.BooleanField(_('print on demand'), default=False)
72 recommended = models.BooleanField(_('recommended'), default=False)
73 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
74 preview = models.BooleanField(_('preview'), default=False)
75 preview_until = models.DateField(_('preview until'), blank=True, null=True)
76 preview_key = models.CharField(max_length=32, blank=True, null=True)
77 findable = models.BooleanField(_('findable'), default=True, db_index=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
120 class AlreadyExists(Exception):
124 ordering = ('sort_key_author', 'sort_key')
125 verbose_name = _('book')
126 verbose_name_plural = _('books')
127 app_label = 'catalogue'
132 def get_extra_info_json(self):
133 return json.loads(self.extra_info or '{}')
135 def get_initial(self):
137 return re.search(r'\w', self.title, re.U).group(0)
138 except AttributeError:
142 return self.tags.filter(category='author')
145 return self.tags.filter(category='epoch')
148 return self.tags.filter(category='genre')
151 return self.tags.filter(category='kind')
153 def tag_unicode(self, category):
154 relations = prefetched_relations(self, category)
156 return ', '.join(rel.tag.name for rel in relations)
158 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
160 def tags_by_category(self):
161 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
163 def author_unicode(self):
164 return self.cached_author
166 def kind_unicode(self):
167 return self.tag_unicode('kind')
169 def epoch_unicode(self):
170 return self.tag_unicode('epoch')
172 def genre_unicode(self):
173 return self.tag_unicode('genre')
175 def translators(self):
176 translators = self.get_extra_info_json().get('translators')
178 '\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators
181 def translator(self):
182 translators = self.get_extra_info_json().get('translators')
185 if len(translators) > 3:
186 translators = translators[:2]
190 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
192 def cover_source(self):
193 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
195 def save(self, force_insert=False, force_update=False, **kwargs):
196 from sortify import sortify
198 self.sort_key = sortify(self.title)[:120]
199 self.title = str(self.title) # ???
202 author = self.authors().first().sort_key
203 except AttributeError:
205 self.sort_key_author = author
207 self.cached_author = self.tag_unicode('author')
208 self.has_audience = 'audience' in self.get_extra_info_json()
210 if self.preview and not self.preview_key:
211 self.preview_key = get_random_hash(self.slug)[:32]
213 ret = super(Book, self).save(force_insert, force_update, **kwargs)
217 def get_absolute_url(self):
218 return reverse('book_detail', args=[self.slug])
220 def gallery_path(self):
221 return gallery_path(self.slug)
223 def gallery_url(self):
224 return gallery_url(self.slug)
230 def language_code(self):
231 return constants.LANGUAGES_3TO2.get(self.language, self.language)
233 def language_name(self):
234 return dict(settings.LANGUAGES).get(self.language_code(), "")
236 def is_foreign(self):
237 return self.language_code() != settings.LANGUAGE_CODE
239 def set_audio_length(self):
240 length = self.get_audio_length()
242 self.audio_length = self.format_audio_length(length)
246 def format_audio_length(seconds):
248 >>> Book.format_audio_length(1)
250 >>> Book.format_audio_length(3661)
254 minutes = seconds // 60
255 seconds = seconds % 60
256 return '%d:%02d' % (minutes, seconds)
258 hours = seconds // 3600
259 minutes = seconds % 3600 // 60
260 seconds = seconds % 60
261 return '%d:%02d:%02d' % (hours, minutes, seconds)
263 def get_audio_length(self):
265 for media in self.get_mp3() or ():
266 total += app_settings.GET_MP3_LENGTH(media.file.path)
269 def has_media(self, type_):
270 if type_ in Book.formats:
271 return bool(getattr(self, "%s_file" % type_))
273 return self.media.filter(type=type_).exists()
276 return self.has_media('mp3')
278 def get_media(self, type_):
279 if self.has_media(type_):
280 if type_ in Book.formats:
281 return getattr(self, "%s_file" % type_)
283 return self.media.filter(type=type_)
288 return self.get_media("mp3")
291 return self.get_media("odt")
294 return self.get_media("ogg")
297 return self.get_media("daisy")
299 def media_url(self, format_):
300 media = self.get_media(format_)
303 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
310 return self.media_url('html')
313 return self.media_url('pdf')
316 return self.media_url('epub')
319 return self.media_url('mobi')
322 return self.media_url('txt')
325 return self.media_url('fb2')
328 return self.media_url('xml')
330 def has_description(self):
331 return len(self.description) > 0
332 has_description.short_description = _('description')
333 has_description.boolean = True
335 def has_mp3_file(self):
336 return self.has_media("mp3")
337 has_mp3_file.short_description = 'MP3'
338 has_mp3_file.boolean = True
340 def has_ogg_file(self):
341 return self.has_media("ogg")
342 has_ogg_file.short_description = 'OGG'
343 has_ogg_file.boolean = True
345 def has_daisy_file(self):
346 return self.has_media("daisy")
347 has_daisy_file.short_description = 'DAISY'
348 has_daisy_file.boolean = True
350 def get_audiobooks(self):
352 for m in self.media.filter(type='ogg').order_by().iterator():
353 ogg_files[m.name] = m
357 for mp3 in self.media.filter(type='mp3').iterator():
358 # ogg files are always from the same project
359 meta = mp3.get_extra_info_json()
360 project = meta.get('project')
363 project = 'CzytamySłuchając'
365 projects.add((project, meta.get('funded_by', '')))
369 ogg = ogg_files.get(mp3.name)
372 audiobooks.append(media)
374 projects = sorted(projects)
375 return audiobooks, projects
377 def wldocument(self, parse_dublincore=True, inherit=True):
378 from catalogue.import_utils import ORMDocProvider
379 from librarian.parser import WLDocument
381 if inherit and self.parent:
382 meta_fallbacks = self.parent.cover_info()
384 meta_fallbacks = None
386 return WLDocument.from_file(
388 provider=ORMDocProvider(self),
389 parse_dublincore=parse_dublincore,
390 meta_fallbacks=meta_fallbacks)
393 def zip_format(format_):
394 def pretty_file_name(book):
395 return "%s/%s.%s" % (
396 book.get_extra_info_json()['author'],
400 field_name = "%s_file" % format_
401 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
402 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
403 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
405 def zip_audiobooks(self, format_):
406 bm = BookMedia.objects.filter(book=self, type=format_)
407 paths = map(lambda bm: (None, bm.file.path), bm)
408 return create_zip(paths, "%s_%s" % (self.slug, format_))
410 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
411 if not self.findable:
414 from search.index import Index
417 index.index_book(self, book_info)
422 except Exception as e:
423 index.index.rollback()
426 # will make problems in conjunction with paid previews
427 def download_pictures(self, remote_gallery_url):
428 gallery_path = self.gallery_path()
429 # delete previous files, so we don't include old files in ebooks
430 if os.path.isdir(gallery_path):
431 for filename in os.listdir(gallery_path):
432 file_path = os.path.join(gallery_path, filename)
434 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
436 makedirs(gallery_path)
437 for ilustr in ilustr_elements:
438 ilustr_src = ilustr.get('src')
439 ilustr_path = os.path.join(gallery_path, ilustr_src)
440 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
442 def load_abstract(self):
443 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
444 if abstract is not None:
445 self.abstract = transform_abstrakt(abstract)
450 def from_xml_file(cls, xml_file, **kwargs):
451 from django.core.files import File
452 from librarian import dcparser
454 # use librarian to parse meta-data
455 book_info = dcparser.parse(xml_file)
457 if not isinstance(xml_file, File):
458 xml_file = File(open(xml_file))
461 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
466 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
467 search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
468 if dont_build is None:
470 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
472 # check for parts before we do anything
474 if hasattr(book_info, 'parts'):
475 for part_url in book_info.parts:
477 children.append(Book.objects.get(slug=part_url.slug))
478 except Book.DoesNotExist:
479 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
482 book_slug = book_info.url.slug
483 if re.search(r'[^a-z0-9-]', book_slug):
484 raise ValueError('Invalid characters in slug')
485 book, created = Book.objects.get_or_create(slug=book_slug)
490 book.preview = bool(days)
492 book.preview_until = date.today() + timedelta(days)
495 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
496 # Save shelves for this book
497 book_shelves = list(book.tags.filter(category='set'))
498 old_cover = book.cover_info()
501 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
503 book.xml_file.set_readable(False)
505 book.findable = findable
506 book.language = book_info.language
507 book.title = book_info.title
508 if book_info.variant_of:
509 book.common_slug = book_info.variant_of.slug
511 book.common_slug = book.slug
512 book.extra_info = json.dumps(book_info.to_dict())
516 meta_tags = Tag.tags_from_info(book_info)
518 for tag in meta_tags:
519 if not tag.for_books:
523 book.tags = set(meta_tags + book_shelves)
524 book.save() # update sort_key_author
526 cover_changed = old_cover != book.cover_info()
527 obsolete_children = set(b for b in book.children.all()
528 if b not in children)
529 notify_cover_changed = []
530 for n, child_book in enumerate(children):
531 new_child = child_book.parent != book
532 child_book.parent = book
533 child_book.parent_number = n
535 if new_child or cover_changed:
536 notify_cover_changed.append(child_book)
537 # Disown unfaithful children and let them cope on their own.
538 for child in obsolete_children:
540 child.parent_number = 0
543 notify_cover_changed.append(child)
545 cls.repopulate_ancestors()
546 tasks.update_counters.delay()
548 if remote_gallery_url:
549 book.download_pictures(remote_gallery_url)
551 # No saves beyond this point.
554 if 'cover' not in dont_build:
555 book.cover.build_delay()
556 book.cover_thumb.build_delay()
557 book.cover_api_thumb.build_delay()
558 book.simple_cover.build_delay()
560 # Build HTML and ebooks.
561 book.html_file.build_delay()
563 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
564 if format_ not in dont_build:
565 getattr(book, '%s_file' % format_).build_delay()
566 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
567 if format_ not in dont_build:
568 getattr(book, '%s_file' % format_).build_delay()
570 if not settings.NO_SEARCH_INDEX and search_index and findable:
571 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
573 for child in notify_cover_changed:
574 child.parent_cover_changed()
576 book.update_popularity()
577 cls.published.send(sender=cls, instance=book)
582 def repopulate_ancestors(cls):
583 """Fixes the ancestry cache."""
585 cursor = connection.cursor()
586 if connection.vendor == 'postgres':
587 cursor.execute("TRUNCATE catalogue_book_ancestor")
589 WITH RECURSIVE ancestry AS (
590 SELECT book.id, book.parent_id
591 FROM catalogue_book AS book
592 WHERE book.parent_id IS NOT NULL
594 SELECT ancestor.id, book.parent_id
595 FROM ancestry AS ancestor, catalogue_book AS book
596 WHERE ancestor.parent_id = book.id
597 AND book.parent_id IS NOT NULL
599 INSERT INTO catalogue_book_ancestor
600 (from_book_id, to_book_id)
606 cursor.execute("DELETE FROM catalogue_book_ancestor")
607 for b in cls.objects.exclude(parent=None):
609 while parent is not None:
610 b.ancestor.add(parent)
611 parent = parent.parent
613 def clear_cache(self):
614 clear_cached_renders(self.mini_box)
615 clear_cached_renders(self.mini_box_nolink)
617 def cover_info(self, inherit=True):
618 """Returns a dictionary to serve as fallback for BookInfo.
620 For now, the only thing inherited is the cover image.
624 for field in ('cover_url', 'cover_by', 'cover_source'):
625 val = self.get_extra_info_json().get(field)
630 if inherit and need and self.parent is not None:
631 parent_info = self.parent.cover_info()
632 parent_info.update(info)
636 def related_themes(self):
637 return Tag.objects.usage_for_queryset(
638 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
639 counts=True).filter(category='theme')
641 def parent_cover_changed(self):
642 """Called when parent book's cover image is changed."""
643 if not self.cover_info(inherit=False):
644 if 'cover' not in app_settings.DONT_BUILD:
645 self.cover.build_delay()
646 self.cover_thumb.build_delay()
647 self.cover_api_thumb.build_delay()
648 self.simple_cover.build_delay()
649 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
650 if format_ not in app_settings.DONT_BUILD:
651 getattr(self, '%s_file' % format_).build_delay()
652 for child in self.children.all():
653 child.parent_cover_changed()
655 def other_versions(self):
656 """Find other versions (i.e. in other languages) of the book."""
657 return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
662 while parent is not None:
663 books.insert(0, parent)
664 parent = parent.parent
667 def pretty_title(self, html_links=False):
668 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
669 books = self.parents() + [self]
670 names.extend([(b.title, b.get_absolute_url()) for b in books])
673 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
675 names = [tag[0] for tag in names]
676 return ', '.join(names)
679 publisher = self.get_extra_info_json()['publisher']
680 if isinstance(publisher, str):
682 elif isinstance(publisher, list):
683 return ', '.join(publisher)
686 def tagged_top_level(cls, tags):
687 """ Returns top-level books tagged with `tags`.
689 It only returns those books which don't have ancestors which are
690 also tagged with those tags.
693 objects = cls.tagged.with_all(tags)
694 return objects.filter(findable=True).exclude(ancestor__in=objects)
697 def book_list(cls, book_filter=None):
698 """Generates a hierarchical listing of all books.
700 Books are optionally filtered with a test function.
705 books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
707 books = books.filter(book_filter).distinct()
709 book_ids = set(b['pk'] for b in books.values("pk").iterator())
710 for book in books.iterator():
711 parent = book.parent_id
712 if parent not in book_ids:
714 books_by_parent.setdefault(parent, []).append(book)
716 for book in books.iterator():
717 books_by_parent.setdefault(book.parent_id, []).append(book)
720 books_by_author = OrderedDict()
721 for tag in Tag.objects.filter(category='author').iterator():
722 books_by_author[tag] = []
724 for book in books_by_parent.get(None, ()):
725 authors = list(book.authors().only('pk'))
727 for author in authors:
728 books_by_author[author].append(book)
732 return books_by_author, orphans, books_by_parent
735 "SP": (1, "szkoła podstawowa"),
736 "SP1": (1, "szkoła podstawowa"),
737 "SP2": (1, "szkoła podstawowa"),
738 "SP3": (1, "szkoła podstawowa"),
739 "P": (1, "szkoła podstawowa"),
740 "G": (2, "gimnazjum"),
745 def audiences_pl(self):
746 audiences = self.get_extra_info_json().get('audiences', [])
747 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
748 return [a[1] for a in audiences]
750 def stage_note(self):
751 stage = self.get_extra_info_json().get('stage')
752 if stage and stage < '0.4':
753 return (_('This work needs modernisation'),
754 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
758 def choose_fragment(self):
759 fragments = self.fragments.order_by()
760 fragments_count = fragments.count()
761 if not fragments_count and self.children.exists():
762 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
763 fragments_count = fragments.count()
765 return fragments[randint(0, fragments_count - 1)]
767 return self.parent.choose_fragment()
771 def fragment_data(self):
772 fragment = self.choose_fragment()
775 'title': fragment.book.pretty_title(),
776 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
781 def update_popularity(self):
782 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
784 pop = self.popularity
787 except BookPopularity.DoesNotExist:
788 BookPopularity.objects.create(book=self, count=count)
790 def ridero_link(self):
791 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
793 def like(self, user):
794 from social.utils import likes, get_set, set_sets
795 if not likes(user, self):
796 tag = get_set(user, '')
797 set_sets(user, self, [tag])
799 def unlike(self, user):
800 from social.utils import likes, set_sets
801 if likes(user, self):
802 set_sets(user, self, [])
804 def full_sort_key(self):
805 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
807 def cover_color(self):
808 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
810 @cached_render('catalogue/book_mini_box.html')
816 @cached_render('catalogue/book_mini_box.html')
817 def mini_box_nolink(self):
823 def add_file_fields():
824 for format_ in Book.formats:
825 field_name = "%s_file" % format_
826 # This weird globals() assignment makes Django migrations comfortable.
827 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
828 _upload_to.__name__ = '_%s_upload_to' % format_
829 globals()[_upload_to.__name__] = _upload_to
832 format_, _("%s file" % format_.upper()),
833 upload_to=_upload_to,
834 storage=bofh_storage,
838 ).contribute_to_class(Book, field_name)
844 class BookPopularity(models.Model):
845 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
846 count = models.IntegerField(default=0, db_index=True)