1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
10 from urllib.request import urlretrieve
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.urls import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 from fnpdjango.storage import BofhFileSystemStorage
20 from librarian.cover import WLCover
21 from librarian.html import transform_abstrakt
22 from newtagging import managers
23 from catalogue import constants
24 from catalogue.fields import EbookField
25 from catalogue.models import Tag, Fragment, BookMedia
26 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
27 from catalogue.models.tag import prefetched_relations
28 from catalogue import app_settings
29 from catalogue import tasks
30 from wolnelektury.utils import makedirs, cached_render, clear_cached_renders
32 bofh_storage = BofhFileSystemStorage()
36 class UploadToPath(object):
37 def __init__(self, path):
40 def __call__(self, instance, filename):
41 return self.path % instance.slug
44 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
45 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
46 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
47 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
50 def _ebook_upload_to(upload_path):
51 return UploadToPath(upload_path)
54 class Book(models.Model):
55 """Represents a book imported from WL-XML."""
56 title = models.CharField(_('title'), max_length=32767)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
58 sort_key_author = models.CharField(
59 _('sort key by author'), max_length=120, db_index=True, editable=False, default='')
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
61 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
63 description = models.TextField(_('description'), blank=True)
64 abstract = models.TextField(_('abstract'), blank=True)
65 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
66 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
67 parent_number = models.IntegerField(_('parent number'), default=0)
68 extra_info = models.TextField(_('extra information'), default='{}')
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
71 print_on_demand = models.BooleanField(_('print on demand'), default=False)
72 recommended = models.BooleanField(_('recommended'), default=False)
73 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
74 preview = models.BooleanField(_('preview'), default=False)
75 preview_until = models.DateField(_('preview until'), blank=True, null=True)
76 preview_key = models.CharField(max_length=32, blank=True, null=True)
77 findable = models.BooleanField(_('findable'), default=True, db_index=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', models.CASCADE, blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
120 class AlreadyExists(Exception):
124 ordering = ('sort_key_author', 'sort_key')
125 verbose_name = _('book')
126 verbose_name_plural = _('books')
127 app_label = 'catalogue'
132 def get_extra_info_json(self):
133 return json.loads(self.extra_info or '{}')
135 def get_initial(self):
137 return re.search(r'\w', self.title, re.U).group(0)
138 except AttributeError:
142 return self.tags.filter(category='author')
145 return self.tags.filter(category='epoch')
148 return self.tags.filter(category='genre')
151 return self.tags.filter(category='kind')
153 def tag_unicode(self, category):
154 relations = prefetched_relations(self, category)
156 return ', '.join(rel.tag.name for rel in relations)
158 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
160 def tags_by_category(self):
161 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
163 def author_unicode(self):
164 return self.cached_author
166 def kind_unicode(self):
167 return self.tag_unicode('kind')
169 def epoch_unicode(self):
170 return self.tag_unicode('epoch')
172 def genre_unicode(self):
173 return self.tag_unicode('genre')
175 def translator(self):
176 translators = self.get_extra_info_json().get('translators')
179 if len(translators) > 3:
180 translators = translators[:2]
184 return ', '.join('\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
186 def cover_source(self):
187 return self.get_extra_info_json().get('cover_source', self.parent.cover_source() if self.parent else '')
189 def save(self, force_insert=False, force_update=False, **kwargs):
190 from sortify import sortify
192 self.sort_key = sortify(self.title)[:120]
193 self.title = str(self.title) # ???
196 author = self.authors().first().sort_key
197 except AttributeError:
199 self.sort_key_author = author
201 self.cached_author = self.tag_unicode('author')
202 self.has_audience = 'audience' in self.get_extra_info_json()
204 if self.preview and not self.preview_key:
205 self.preview_key = get_random_hash(self.slug)[:32]
207 ret = super(Book, self).save(force_insert, force_update, **kwargs)
211 def get_absolute_url(self):
212 return reverse('book_detail', args=[self.slug])
214 def gallery_path(self):
215 return gallery_path(self.slug)
217 def gallery_url(self):
218 return gallery_url(self.slug)
224 def language_code(self):
225 return constants.LANGUAGES_3TO2.get(self.language, self.language)
227 def language_name(self):
228 return dict(settings.LANGUAGES).get(self.language_code(), "")
230 def is_foreign(self):
231 return self.language_code() != settings.LANGUAGE_CODE
233 def set_audio_length(self):
234 length = self.get_audio_length()
236 self.audio_length = self.format_audio_length(length)
240 def format_audio_length(seconds):
242 >>> Book.format_audio_length(1)
244 >>> Book.format_audio_length(3661)
248 minutes = seconds // 60
249 seconds = seconds % 60
250 return '%d:%02d' % (minutes, seconds)
252 hours = seconds // 3600
253 minutes = seconds % 3600 // 60
254 seconds = seconds % 60
255 return '%d:%02d:%02d' % (hours, minutes, seconds)
257 def get_audio_length(self):
259 for media in self.get_mp3() or ():
260 total += app_settings.GET_MP3_LENGTH(media.file.path)
263 def has_media(self, type_):
264 if type_ in Book.formats:
265 return bool(getattr(self, "%s_file" % type_))
267 return self.media.filter(type=type_).exists()
270 return self.has_media('mp3')
272 def get_media(self, type_):
273 if self.has_media(type_):
274 if type_ in Book.formats:
275 return getattr(self, "%s_file" % type_)
277 return self.media.filter(type=type_)
282 return self.get_media("mp3")
285 return self.get_media("odt")
288 return self.get_media("ogg")
291 return self.get_media("daisy")
293 def media_url(self, format_):
294 media = self.get_media(format_)
297 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
304 return self.media_url('html')
307 return self.media_url('pdf')
310 return self.media_url('epub')
313 return self.media_url('mobi')
316 return self.media_url('txt')
319 return self.media_url('fb2')
322 return self.media_url('xml')
324 def has_description(self):
325 return len(self.description) > 0
326 has_description.short_description = _('description')
327 has_description.boolean = True
329 def has_mp3_file(self):
330 return self.has_media("mp3")
331 has_mp3_file.short_description = 'MP3'
332 has_mp3_file.boolean = True
334 def has_ogg_file(self):
335 return self.has_media("ogg")
336 has_ogg_file.short_description = 'OGG'
337 has_ogg_file.boolean = True
339 def has_daisy_file(self):
340 return self.has_media("daisy")
341 has_daisy_file.short_description = 'DAISY'
342 has_daisy_file.boolean = True
344 def get_audiobooks(self):
346 for m in self.media.filter(type='ogg').order_by().iterator():
347 ogg_files[m.name] = m
351 for mp3 in self.media.filter(type='mp3').iterator():
352 # ogg files are always from the same project
353 meta = mp3.get_extra_info_json()
354 project = meta.get('project')
357 project = 'CzytamySłuchając'
359 projects.add((project, meta.get('funded_by', '')))
363 ogg = ogg_files.get(mp3.name)
366 audiobooks.append(media)
368 projects = sorted(projects)
369 return audiobooks, projects
371 def wldocument(self, parse_dublincore=True, inherit=True):
372 from catalogue.import_utils import ORMDocProvider
373 from librarian.parser import WLDocument
375 if inherit and self.parent:
376 meta_fallbacks = self.parent.cover_info()
378 meta_fallbacks = None
380 return WLDocument.from_file(
382 provider=ORMDocProvider(self),
383 parse_dublincore=parse_dublincore,
384 meta_fallbacks=meta_fallbacks)
387 def zip_format(format_):
388 def pretty_file_name(book):
389 return "%s/%s.%s" % (
390 book.get_extra_info_json()['author'],
394 field_name = "%s_file" % format_
395 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True).exclude(findable=False)
396 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
397 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
399 def zip_audiobooks(self, format_):
400 bm = BookMedia.objects.filter(book=self, type=format_)
401 paths = map(lambda bm: (None, bm.file.path), bm)
402 return create_zip(paths, "%s_%s" % (self.slug, format_))
404 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
405 if not self.findable:
408 from search.index import Index
411 index.index_book(self, book_info)
416 except Exception as e:
417 index.index.rollback()
420 # will make problems in conjunction with paid previews
421 def download_pictures(self, remote_gallery_url):
422 gallery_path = self.gallery_path()
423 # delete previous files, so we don't include old files in ebooks
424 if os.path.isdir(gallery_path):
425 for filename in os.listdir(gallery_path):
426 file_path = os.path.join(gallery_path, filename)
428 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
430 makedirs(gallery_path)
431 for ilustr in ilustr_elements:
432 ilustr_src = ilustr.get('src')
433 ilustr_path = os.path.join(gallery_path, ilustr_src)
434 urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
436 def load_abstract(self):
437 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
438 if abstract is not None:
439 self.abstract = transform_abstrakt(abstract)
444 def from_xml_file(cls, xml_file, **kwargs):
445 from django.core.files import File
446 from librarian import dcparser
448 # use librarian to parse meta-data
449 book_info = dcparser.parse(xml_file)
451 if not isinstance(xml_file, File):
452 xml_file = File(open(xml_file))
455 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
460 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
461 search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
462 if dont_build is None:
464 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
466 # check for parts before we do anything
468 if hasattr(book_info, 'parts'):
469 for part_url in book_info.parts:
471 children.append(Book.objects.get(slug=part_url.slug))
472 except Book.DoesNotExist:
473 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
476 book_slug = book_info.url.slug
477 if re.search(r'[^a-z0-9-]', book_slug):
478 raise ValueError('Invalid characters in slug')
479 book, created = Book.objects.get_or_create(slug=book_slug)
484 book.preview = bool(days)
486 book.preview_until = date.today() + timedelta(days)
489 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
490 # Save shelves for this book
491 book_shelves = list(book.tags.filter(category='set'))
492 old_cover = book.cover_info()
495 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
497 book.xml_file.set_readable(False)
499 book.findable = findable
500 book.language = book_info.language
501 book.title = book_info.title
502 if book_info.variant_of:
503 book.common_slug = book_info.variant_of.slug
505 book.common_slug = book.slug
506 book.extra_info = json.dumps(book_info.to_dict())
510 meta_tags = Tag.tags_from_info(book_info)
512 for tag in meta_tags:
513 if not tag.for_books:
517 book.tags = set(meta_tags + book_shelves)
518 book.save() # update sort_key_author
520 cover_changed = old_cover != book.cover_info()
521 obsolete_children = set(b for b in book.children.all()
522 if b not in children)
523 notify_cover_changed = []
524 for n, child_book in enumerate(children):
525 new_child = child_book.parent != book
526 child_book.parent = book
527 child_book.parent_number = n
529 if new_child or cover_changed:
530 notify_cover_changed.append(child_book)
531 # Disown unfaithful children and let them cope on their own.
532 for child in obsolete_children:
534 child.parent_number = 0
537 notify_cover_changed.append(child)
539 cls.repopulate_ancestors()
540 tasks.update_counters.delay()
542 if remote_gallery_url:
543 book.download_pictures(remote_gallery_url)
545 # No saves beyond this point.
548 if 'cover' not in dont_build:
549 book.cover.build_delay()
550 book.cover_thumb.build_delay()
551 book.cover_api_thumb.build_delay()
552 book.simple_cover.build_delay()
554 # Build HTML and ebooks.
555 book.html_file.build_delay()
557 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
558 if format_ not in dont_build:
559 getattr(book, '%s_file' % format_).build_delay()
560 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
561 if format_ not in dont_build:
562 getattr(book, '%s_file' % format_).build_delay()
564 if not settings.NO_SEARCH_INDEX and search_index and findable:
565 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
567 for child in notify_cover_changed:
568 child.parent_cover_changed()
570 book.update_popularity()
571 cls.published.send(sender=cls, instance=book)
576 def repopulate_ancestors(cls):
577 """Fixes the ancestry cache."""
579 cursor = connection.cursor()
580 if connection.vendor == 'postgres':
581 cursor.execute("TRUNCATE catalogue_book_ancestor")
583 WITH RECURSIVE ancestry AS (
584 SELECT book.id, book.parent_id
585 FROM catalogue_book AS book
586 WHERE book.parent_id IS NOT NULL
588 SELECT ancestor.id, book.parent_id
589 FROM ancestry AS ancestor, catalogue_book AS book
590 WHERE ancestor.parent_id = book.id
591 AND book.parent_id IS NOT NULL
593 INSERT INTO catalogue_book_ancestor
594 (from_book_id, to_book_id)
600 cursor.execute("DELETE FROM catalogue_book_ancestor")
601 for b in cls.objects.exclude(parent=None):
603 while parent is not None:
604 b.ancestor.add(parent)
605 parent = parent.parent
607 def clear_cache(self):
608 clear_cached_renders(self.mini_box)
609 clear_cached_renders(self.mini_box_nolink)
611 def cover_info(self, inherit=True):
612 """Returns a dictionary to serve as fallback for BookInfo.
614 For now, the only thing inherited is the cover image.
618 for field in ('cover_url', 'cover_by', 'cover_source'):
619 val = self.get_extra_info_json().get(field)
624 if inherit and need and self.parent is not None:
625 parent_info = self.parent.cover_info()
626 parent_info.update(info)
630 def related_themes(self):
631 return Tag.objects.usage_for_queryset(
632 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
633 counts=True).filter(category='theme')
635 def parent_cover_changed(self):
636 """Called when parent book's cover image is changed."""
637 if not self.cover_info(inherit=False):
638 if 'cover' not in app_settings.DONT_BUILD:
639 self.cover.build_delay()
640 self.cover_thumb.build_delay()
641 self.cover_api_thumb.build_delay()
642 self.simple_cover.build_delay()
643 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
644 if format_ not in app_settings.DONT_BUILD:
645 getattr(self, '%s_file' % format_).build_delay()
646 for child in self.children.all():
647 child.parent_cover_changed()
649 def other_versions(self):
650 """Find other versions (i.e. in other languages) of the book."""
651 return type(self).objects.filter(common_slug=self.common_slug, findable=True).exclude(pk=self.pk)
656 while parent is not None:
657 books.insert(0, parent)
658 parent = parent.parent
661 def pretty_title(self, html_links=False):
662 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
663 books = self.parents() + [self]
664 names.extend([(b.title, b.get_absolute_url()) for b in books])
667 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
669 names = [tag[0] for tag in names]
670 return ', '.join(names)
673 publisher = self.get_extra_info_json()['publisher']
674 if isinstance(publisher, str):
676 elif isinstance(publisher, list):
677 return ', '.join(publisher)
680 def tagged_top_level(cls, tags):
681 """ Returns top-level books tagged with `tags`.
683 It only returns those books which don't have ancestors which are
684 also tagged with those tags.
687 objects = cls.tagged.with_all(tags)
688 return objects.filter(findable=True).exclude(ancestor__in=objects)
691 def book_list(cls, book_filter=None):
692 """Generates a hierarchical listing of all books.
694 Books are optionally filtered with a test function.
699 books = cls.objects.filter(findable=True).order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
701 books = books.filter(book_filter).distinct()
703 book_ids = set(b['pk'] for b in books.values("pk").iterator())
704 for book in books.iterator():
705 parent = book.parent_id
706 if parent not in book_ids:
708 books_by_parent.setdefault(parent, []).append(book)
710 for book in books.iterator():
711 books_by_parent.setdefault(book.parent_id, []).append(book)
714 books_by_author = OrderedDict()
715 for tag in Tag.objects.filter(category='author').iterator():
716 books_by_author[tag] = []
718 for book in books_by_parent.get(None, ()):
719 authors = list(book.authors().only('pk'))
721 for author in authors:
722 books_by_author[author].append(book)
726 return books_by_author, orphans, books_by_parent
729 "SP": (1, "szkoła podstawowa"),
730 "SP1": (1, "szkoła podstawowa"),
731 "SP2": (1, "szkoła podstawowa"),
732 "SP3": (1, "szkoła podstawowa"),
733 "P": (1, "szkoła podstawowa"),
734 "G": (2, "gimnazjum"),
739 def audiences_pl(self):
740 audiences = self.get_extra_info_json().get('audiences', [])
741 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
742 return [a[1] for a in audiences]
744 def stage_note(self):
745 stage = self.get_extra_info_json().get('stage')
746 if stage and stage < '0.4':
747 return (_('This work needs modernisation'),
748 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
752 def choose_fragment(self):
753 fragments = self.fragments.order_by()
754 fragments_count = fragments.count()
755 if not fragments_count and self.children.exists():
756 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
757 fragments_count = fragments.count()
759 return fragments[randint(0, fragments_count - 1)]
761 return self.parent.choose_fragment()
765 def fragment_data(self):
766 fragment = self.choose_fragment()
769 'title': fragment.book.pretty_title(),
770 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
775 def update_popularity(self):
776 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
778 pop = self.popularity
781 except BookPopularity.DoesNotExist:
782 BookPopularity.objects.create(book=self, count=count)
784 def ridero_link(self):
785 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
787 def like(self, user):
788 from social.utils import likes, get_set, set_sets
789 if not likes(user, self):
790 tag = get_set(user, '')
791 set_sets(user, self, [tag])
793 def unlike(self, user):
794 from social.utils import likes, set_sets
795 if likes(user, self):
796 set_sets(user, self, [])
798 def full_sort_key(self):
799 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
801 def cover_color(self):
802 return WLCover.epoch_colors.get(self.get_extra_info_json().get('epoch'), '#000000')
804 @cached_render('catalogue/book_mini_box.html')
810 @cached_render('catalogue/book_mini_box.html')
811 def mini_box_nolink(self):
817 def add_file_fields():
818 for format_ in Book.formats:
819 field_name = "%s_file" % format_
820 # This weird globals() assignment makes Django migrations comfortable.
821 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
822 _upload_to.__name__ = '_%s_upload_to' % format_
823 globals()[_upload_to.__name__] = _upload_to
826 format_, _("%s file" % format_.upper()),
827 upload_to=_upload_to,
828 storage=bofh_storage,
832 ).contribute_to_class(Book, field_name)
838 class BookPopularity(models.Model):
839 book = models.OneToOneField(Book, models.CASCADE, related_name='popularity')
840 count = models.IntegerField(default=0, db_index=True)