1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
50 def _ebook_upload_to(upload_path):
51 return UploadToPath(upload_path)
54 class Book(models.Model):
55 """Represents a book imported from WL-XML."""
56 title = models.CharField(_('title'), max_length=32767)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
58 sort_key_author = models.CharField(
59 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
61 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
63 description = models.TextField(_('description'), blank=True)
64 abstract = models.TextField(_('abstract'), blank=True)
65 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
66 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
67 parent_number = models.IntegerField(_('parent number'), default=0)
68 extra_info = models.TextField(_('extra information'), default='{}')
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
71 print_on_demand = models.BooleanField(_('print on demand'), default=False)
72 recommended = models.BooleanField(_('recommended'), default=False)
73 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
74 preview = models.BooleanField(_('preview'), default=False)
75 preview_until = models.DateField(_('preview until'), blank=True, null=True)
76 preview_key = models.CharField(max_length=32, blank=True, null=True)
78 # files generated during publication
81 null=True, blank=True,
82 upload_to=_cover_upload_to,
83 storage=bofh_storage, max_length=255)
84 # Cleaner version of cover for thumbs
85 cover_thumb = EbookField(
86 'cover_thumb', _('cover thumbnail'),
87 null=True, blank=True,
88 upload_to=_cover_thumb_upload_to,
90 cover_api_thumb = EbookField(
91 'cover_api_thumb', _('cover thumbnail for mobile app'),
92 null=True, blank=True,
93 upload_to=_cover_api_thumb_upload_to,
95 simple_cover = EbookField(
96 'simple_cover', _('cover for mobile app'),
97 null=True, blank=True,
98 upload_to=_simple_cover_upload_to,
100 ebook_formats = constants.EBOOK_FORMATS
101 formats = ebook_formats + ['html', 'xml']
103 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
104 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
106 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
107 has_audience = models.BooleanField(default=False)
109 objects = models.Manager()
110 tagged = managers.ModelTaggedItemManager(Tag)
111 tags = managers.TagDescriptor(Tag)
112 tag_relations = GenericRelation(Tag.intermediary_table_model)
114 html_built = django.dispatch.Signal()
115 published = django.dispatch.Signal()
119 class AlreadyExists(Exception):
123 ordering = ('sort_key_author', 'sort_key')
124 verbose_name = _('book')
125 verbose_name_plural = _('books')
126 app_label = 'catalogue'
131 def get_extra_info_json(self):
132 return json.loads(self.extra_info or '{}')
134 def get_initial(self):
136 return re.search(r'\w', self.title, re.U).group(0)
137 except AttributeError:
141 return self.tags.filter(category='author')
144 return self.tags.filter(category='epoch')
147 return self.tags.filter(category='genre')
150 return self.tags.filter(category='kind')
152 def tag_unicode(self, category):
153 relations = prefetched_relations(self, category)
155 return ', '.join(rel.tag.name for rel in relations)
157 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
159 def tags_by_category(self):
160 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
162 def author_unicode(self):
163 return self.cached_author
165 def kind_unicode(self):
166 return self.tag_unicode('kind')
168 def epoch_unicode(self):
169 return self.tag_unicode('epoch')
171 def genre_unicode(self):
172 return self.tag_unicode('genre')
174 def translator(self):
175 translators = self.get_extra_info_json().get('translators')
178 if len(translators) > 3:
179 translators = translators[:2]
183 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
185 def cover_source(self):
186 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
188 def save(self, force_insert=False, force_update=False, **kwargs):
189 from sortify import sortify
191 self.sort_key = sortify(self.title)[:120]
192 self.title = str(self.title) # ???
195 author = self.authors().first().sort_key
196 except AttributeError:
198 self.sort_key_author = author
200 self.cached_author = self.tag_unicode('author')
201 self.has_audience = 'audience' in self.get_extra_info_json()
203 if self.preview and not self.preview_key:
204 self.preview_key = get_random_hash(self.slug)[:32]
206 ret = super(Book, self).save(force_insert, force_update, **kwargs)
210 def get_absolute_url(self):
211 return reverse('book_detail', args=[self.slug])
213 def gallery_path(self):
214 return gallery_path(self.slug)
216 def gallery_url(self):
217 return gallery_url(self.slug)
223 def language_code(self):
224 return constants.LANGUAGES_3TO2.get(self.language, self.language)
226 def language_name(self):
227 return dict(settings.LANGUAGES).get(self.language_code(), "")
229 def is_foreign(self):
230 return self.language_code() != settings.LANGUAGE_CODE
232 def set_audio_length(self):
233 length = self.get_audio_length()
235 self.audio_length = self.format_audio_length(length)
239 def format_audio_length(seconds):
241 minutes = seconds // 60
242 seconds = seconds % 60
243 return '%d:%02d' % (minutes, seconds)
245 hours = seconds // 3600
246 minutes = seconds % 3600 // 60
247 seconds = seconds % 60
248 return '%d:%02d:%02d' % (hours, minutes, seconds)
250 def get_audio_length(self):
252 for media in self.get_mp3() or ():
253 total += app_settings.GET_MP3_LENGTH(media.file.path)
256 def has_media(self, type_):
257 if type_ in Book.formats:
258 return bool(getattr(self, "%s_file" % type_))
260 return self.media.filter(type=type_).exists()
263 return self.has_media('mp3')
265 def get_media(self, type_):
266 if self.has_media(type_):
267 if type_ in Book.formats:
268 return getattr(self, "%s_file" % type_)
270 return self.media.filter(type=type_)
275 return self.get_media("mp3")
278 return self.get_media("odt")
281 return self.get_media("ogg")
284 return self.get_media("daisy")
286 def media_url(self, format_):
287 media = self.get_media(format_)
290 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
297 return self.media_url('html')
300 return self.media_url('pdf')
303 return self.media_url('epub')
306 return self.media_url('mobi')
309 return self.media_url('txt')
312 return self.media_url('fb2')
315 return self.media_url('xml')
317 def has_description(self):
318 return len(self.description) > 0
319 has_description.short_description = _('description')
320 has_description.boolean = True
322 def has_mp3_file(self):
323 return self.has_media("mp3")
324 has_mp3_file.short_description = 'MP3'
325 has_mp3_file.boolean = True
327 def has_ogg_file(self):
328 return self.has_media("ogg")
329 has_ogg_file.short_description = 'OGG'
330 has_ogg_file.boolean = True
332 def has_daisy_file(self):
333 return self.has_media("daisy")
334 has_daisy_file.short_description = 'DAISY'
335 has_daisy_file.boolean = True
337 def get_audiobooks(self):
339 for m in self.media.filter(type='ogg').order_by().iterator():
340 ogg_files[m.name] = m
344 for mp3 in self.media.filter(type='mp3').iterator():
345 # ogg files are always from the same project
346 meta = mp3.get_extra_info_json()
347 project = meta.get('project')
350 project = 'CzytamySłuchając'
352 projects.add((project, meta.get('funded_by', '')))
356 ogg = ogg_files.get(mp3.name)
359 audiobooks.append(media)
361 projects = sorted(projects)
362 return audiobooks, projects
364 def wldocument(self, parse_dublincore=True, inherit=True):
365 from catalogue.import_utils import ORMDocProvider
366 from librarian.parser import WLDocument
368 if inherit and self.parent:
369 meta_fallbacks = self.parent.cover_info()
371 meta_fallbacks = None
373 return WLDocument.from_file(
375 provider=ORMDocProvider(self),
376 parse_dublincore=parse_dublincore,
377 meta_fallbacks=meta_fallbacks)
380 def zip_format(format_):
381 def pretty_file_name(book):
382 return "%s/%s.%s" % (
383 book.get_extra_info_json()['author'],
387 field_name = "%s_file" % format_
388 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
389 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
390 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
392 def zip_audiobooks(self, format_):
393 bm = BookMedia.objects.filter(book=self, type=format_)
394 paths = map(lambda bm: (None, bm.file.path), bm)
395 return create_zip(paths, "%s_%s" % (self.slug, format_))
397 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
399 from search.index import Index
402 index.index_book(self, book_info)
407 except Exception as e:
408 index.index.rollback()
411 # will make problems in conjunction with paid previews
412 def download_pictures(self, remote_gallery_url):
413 gallery_path = self.gallery_path()
414 # delete previous files, so we don't include old files in ebooks
415 if os.path.isdir(gallery_path):
416 for filename in os.listdir(gallery_path):
417 file_path = os.path.join(gallery_path, filename)
419 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
421 makedirs(gallery_path)
422 for ilustr in ilustr_elements:
423 ilustr_src = ilustr.get('src')
424 ilustr_path = os.path.join(gallery_path, ilustr_src)
425 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
427 def load_abstract(self):
428 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
429 if abstract is not None:
430 self.abstract = transform_abstrakt(abstract)
435 def from_xml_file(cls, xml_file, **kwargs):
436 from django.core.files import File
437 from librarian import dcparser
439 # use librarian to parse meta-data
440 book_info = dcparser.parse(xml_file)
442 if not isinstance(xml_file, File):
443 xml_file = File(open(xml_file))
446 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
451 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
452 search_index_tags=True, remote_gallery_url=None, days=0):
453 if dont_build is None:
455 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
457 # check for parts before we do anything
459 if hasattr(book_info, 'parts'):
460 for part_url in book_info.parts:
462 children.append(Book.objects.get(slug=part_url.slug))
463 except Book.DoesNotExist:
464 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
467 book_slug = book_info.url.slug
468 if re.search(r'[^a-z0-9-]', book_slug):
469 raise ValueError('Invalid characters in slug')
470 book, created = Book.objects.get_or_create(slug=book_slug)
475 book.preview = bool(days)
477 book.preview_until = date.today() + timedelta(days)
480 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
481 # Save shelves for this book
482 book_shelves = list(book.tags.filter(category='set'))
483 old_cover = book.cover_info()
486 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
488 book.xml_file.set_readable(False)
490 book.language = book_info.language
491 book.title = book_info.title
492 if book_info.variant_of:
493 book.common_slug = book_info.variant_of.slug
495 book.common_slug = book.slug
496 book.extra_info = json.dumps(book_info.to_dict())
500 meta_tags = Tag.tags_from_info(book_info)
502 for tag in meta_tags:
503 if not tag.for_books:
507 book.tags = set(meta_tags + book_shelves)
508 book.save() # update sort_key_author
510 cover_changed = old_cover != book.cover_info()
511 obsolete_children = set(b for b in book.children.all()
512 if b not in children)
513 notify_cover_changed = []
514 for n, child_book in enumerate(children):
515 new_child = child_book.parent != book
516 child_book.parent = book
517 child_book.parent_number = n
519 if new_child or cover_changed:
520 notify_cover_changed.append(child_book)
521 # Disown unfaithful children and let them cope on their own.
522 for child in obsolete_children:
524 child.parent_number = 0
527 notify_cover_changed.append(child)
529 cls.repopulate_ancestors()
530 tasks.update_counters.delay()
532 if remote_gallery_url:
533 book.download_pictures(remote_gallery_url)
535 # No saves beyond this point.
538 if 'cover' not in dont_build:
539 book.cover.build_delay()
540 book.cover_thumb.build_delay()
541 book.cover_api_thumb.build_delay()
542 book.simple_cover.build_delay()
544 # Build HTML and ebooks.
545 book.html_file.build_delay()
547 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
548 if format_ not in dont_build:
549 getattr(book, '%s_file' % format_).build_delay()
550 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
551 if format_ not in dont_build:
552 getattr(book, '%s_file' % format_).build_delay()
554 if not settings.NO_SEARCH_INDEX and search_index:
555 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
557 for child in notify_cover_changed:
558 child.parent_cover_changed()
560 book.update_popularity()
561 cls.published.send(sender=cls, instance=book)
566 def repopulate_ancestors(cls):
567 """Fixes the ancestry cache."""
569 cursor = connection.cursor()
570 if connection.vendor == 'postgres':
571 cursor.execute("TRUNCATE catalogue_book_ancestor")
573 WITH RECURSIVE ancestry AS (
574 SELECT book.id, book.parent_id
575 FROM catalogue_book AS book
576 WHERE book.parent_id IS NOT NULL
578 SELECT ancestor.id, book.parent_id
579 FROM ancestry AS ancestor, catalogue_book AS book
580 WHERE ancestor.parent_id = book.id
581 AND book.parent_id IS NOT NULL
583 INSERT INTO catalogue_book_ancestor
584 (from_book_id, to_book_id)
590 cursor.execute("DELETE FROM catalogue_book_ancestor")
591 for b in cls.objects.exclude(parent=None):
593 while parent is not None:
594 b.ancestor.add(parent)
595 parent = parent.parent
597 def clear_cache(self):
598 clear_cached_renders(self.mini_box)
599 clear_cached_renders(self.mini_box_nolink)
601 def cover_info(self, inherit=True):
602 """Returns a dictionary to serve as fallback for BookInfo.
604 For now, the only thing inherited is the cover image.
608 for field in ('cover_url', 'cover_by', 'cover_source'):
609 val = self.get_extra_info_json().get(field)
614 if inherit and need and self.parent is not None:
615 parent_info = self.parent.cover_info()
616 parent_info.update(info)
620 def related_themes(self):
621 return Tag.objects.usage_for_queryset(
622 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
623 counts=True).filter(category='theme')
625 def parent_cover_changed(self):
626 """Called when parent book's cover image is changed."""
627 if not self.cover_info(inherit=False):
628 if 'cover' not in app_settings.DONT_BUILD:
629 self.cover.build_delay()
630 self.cover_thumb.build_delay()
631 self.cover_api_thumb.build_delay()
632 self.simple_cover.build_delay()
633 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
634 if format_ not in app_settings.DONT_BUILD:
635 getattr(self, '%s_file' % format_).build_delay()
636 for child in self.children.all():
637 child.parent_cover_changed()
639 def other_versions(self):
640 """Find other versions (i.e. in other languages) of the book."""
641 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
646 while parent is not None:
647 books.insert(0, parent)
648 parent = parent.parent
651 def pretty_title(self, html_links=False):
652 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
653 books = self.parents() + [self]
654 names.extend([(b.title, b.get_absolute_url()) for b in books])
657 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
659 names = [tag[0] for tag in names]
660 return ', '.join(names)
663 publisher = self.get_extra_info_json()['publisher']
664 if isinstance(publisher, str):
666 elif isinstance(publisher, list):
667 return ', '.join(publisher)
670 def tagged_top_level(cls, tags):
671 """ Returns top-level books tagged with `tags`.
673 It only returns those books which don't have ancestors which are
674 also tagged with those tags.
677 objects = cls.tagged.with_all(tags)
678 return objects.exclude(ancestor__in=objects)
681 def book_list(cls, book_filter=None):
682 """Generates a hierarchical listing of all books.
684 Books are optionally filtered with a test function.
689 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
691 books = books.filter(book_filter).distinct()
693 book_ids = set(b['pk'] for b in books.values("pk").iterator())
694 for book in books.iterator():
695 parent = book.parent_id
696 if parent not in book_ids:
698 books_by_parent.setdefault(parent, []).append(book)
700 for book in books.iterator():
701 books_by_parent.setdefault(book.parent_id, []).append(book)
704 books_by_author = OrderedDict()
705 for tag in Tag.objects.filter(category='author').iterator():
706 books_by_author[tag] = []
708 for book in books_by_parent.get(None, ()):
709 authors = list(book.authors().only('pk'))
711 for author in authors:
712 books_by_author[author].append(book)
716 return books_by_author, orphans, books_by_parent
719 "SP": (1, "szkoła podstawowa"),
720 "SP1": (1, "szkoła podstawowa"),
721 "SP2": (1, "szkoła podstawowa"),
722 "SP3": (1, "szkoła podstawowa"),
723 "P": (1, "szkoła podstawowa"),
724 "G": (2, "gimnazjum"),
729 def audiences_pl(self):
730 audiences = self.get_extra_info_json().get('audiences', [])
731 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
732 return [a[1] for a in audiences]
734 def stage_note(self):
735 stage = self.get_extra_info_json().get('stage')
736 if stage and stage < '0.4':
737 return (_('This work needs modernisation'),
738 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
742 def choose_fragment(self):
743 fragments = self.fragments.order_by()
744 fragments_count = fragments.count()
745 if not fragments_count and self.children.exists():
746 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
747 fragments_count = fragments.count()
749 return fragments[randint(0, fragments_count - 1)]
751 return self.parent.choose_fragment()
755 def fragment_data(self):
756 fragment = self.choose_fragment()
759 'title': fragment.book.pretty_title(),
760 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
765 def update_popularity(self):
766 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
768 pop = self.popularity
771 except BookPopularity.DoesNotExist:
772 BookPopularity.objects.create(book=self, count=count)
774 def ridero_link(self):
775 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
777 def like(self, user):
778 from social.utils import likes, get_set, set_sets
779 if not likes(user, self):
780 tag = get_set(user, '')
781 set_sets(user, self, [tag])
783 def unlike(self, user):
784 from social.utils import likes, set_sets
785 if likes(user, self):
786 set_sets(user, self, [])
788 def full_sort_key(self):
789 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
791 def cover_color(self):
792 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
794 @cached_render('catalogue/book_mini_box.html')
800 @cached_render('catalogue/book_mini_box.html')
801 def mini_box_nolink(self):
807 def add_file_fields():
808 for format_ in Book.formats:
809 field_name = "%s_file" % format_
810 # This weird globals() assignment makes Django migrations comfortable.
811 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
812 _upload_to.__name__ = '_%s_upload_to' % format_
813 globals()[_upload_to.__name__] = _upload_to
816 format_, _("%s file" % format_.upper()),
817 upload_to=_upload_to,
818 storage=bofh_storage,
822 ).contribute_to_class(Book, field_name)
828 class BookPopularity(models.Model):
829 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
830 count = models.IntegerField(default=0, db_index=True)