1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from newtagging import managers
26 from catalogue import constants
27 from catalogue.fields import EbookField
28 from catalogue.models import Tag, Fragment, BookMedia
29 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
30 from catalogue.models.tag import prefetched_relations
31 from catalogue import app_settings
32 from catalogue import tasks
33 from wolnelektury.utils import makedirs
35 bofh_storage = BofhFileSystemStorage()
39 class UploadToPath(object):
40 def __init__(self, path):
43 def __call__(self, instance, filename):
44 return self.path % instance.slug
47 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
53 def _ebook_upload_to(upload_path):
54 return UploadToPath(upload_path)
57 class Book(models.Model):
58 """Represents a book imported from WL-XML."""
59 title = models.CharField(_('title'), max_length=32767)
60 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
61 sort_key_author = models.CharField(
62 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
63 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
64 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
65 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
66 description = models.TextField(_('description'), blank=True)
67 abstract = models.TextField(_('abstract'), blank=True)
68 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
69 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
70 parent_number = models.IntegerField(_('parent number'), default=0)
71 extra_info = jsonfield.JSONField(_('extra information'), default={})
72 gazeta_link = models.CharField(blank=True, max_length=240)
73 wiki_link = models.CharField(blank=True, max_length=240)
74 print_on_demand = models.BooleanField(_('print on demand'), default=False)
75 recommended = models.BooleanField(_('recommended'), default=False)
76 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
77 preview = models.BooleanField(_('preview'), default=False)
78 preview_until = models.DateField(_('preview until'), blank=True, null=True)
80 # files generated during publication
83 null=True, blank=True,
84 upload_to=_cover_upload_to,
85 storage=bofh_storage, max_length=255)
86 # Cleaner version of cover for thumbs
87 cover_thumb = EbookField(
88 'cover_thumb', _('cover thumbnail'),
89 null=True, blank=True,
90 upload_to=_cover_thumb_upload_to,
92 cover_api_thumb = EbookField(
93 'cover_api_thumb', _('cover thumbnail for mobile app'),
94 null=True, blank=True,
95 upload_to=_cover_api_thumb_upload_to,
97 simple_cover = EbookField(
98 'simple_cover', _('cover for mobile app'),
99 null=True, blank=True,
100 upload_to=_simple_cover_upload_to,
102 ebook_formats = constants.EBOOK_FORMATS
103 formats = ebook_formats + ['html', 'xml']
105 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
106 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
108 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
109 has_audience = models.BooleanField(default=False)
111 objects = models.Manager()
112 tagged = managers.ModelTaggedItemManager(Tag)
113 tags = managers.TagDescriptor(Tag)
114 tag_relations = GenericRelation(Tag.intermediary_table_model)
116 html_built = django.dispatch.Signal()
117 published = django.dispatch.Signal()
121 class AlreadyExists(Exception):
125 ordering = ('sort_key_author', 'sort_key')
126 verbose_name = _('book')
127 verbose_name_plural = _('books')
128 app_label = 'catalogue'
133 def get_initial(self):
135 return re.search(r'\w', self.title, re.U).group(0)
136 except AttributeError:
140 return self.tags.filter(category='author')
143 return self.tags.filter(category='epoch')
146 return self.tags.filter(category='genre')
149 return self.tags.filter(category='kind')
151 def tag_unicode(self, category):
152 relations = prefetched_relations(self, category)
154 return ', '.join(rel.tag.name for rel in relations)
156 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
158 def tags_by_category(self):
159 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
161 def author_unicode(self):
162 return self.cached_author
164 def kind_unicode(self):
165 return self.tag_unicode('kind')
167 def epoch_unicode(self):
168 return self.tag_unicode('epoch')
170 def genre_unicode(self):
171 return self.tag_unicode('genre')
173 def translator(self):
174 translators = self.extra_info.get('translators')
177 if len(translators) > 3:
178 translators = translators[:2]
182 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
184 def cover_source(self):
185 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
187 def save(self, force_insert=False, force_update=False, **kwargs):
188 from sortify import sortify
190 self.sort_key = sortify(self.title)[:120]
191 self.title = str(self.title) # ???
194 author = self.authors().first().sort_key
195 except AttributeError:
197 self.sort_key_author = author
199 self.cached_author = self.tag_unicode('author')
200 self.has_audience = 'audience' in self.extra_info
202 ret = super(Book, self).save(force_insert, force_update, **kwargs)
207 def get_absolute_url(self):
208 return 'book_detail', [self.slug]
210 def gallery_path(self):
211 return gallery_path(self.slug)
213 def gallery_url(self):
214 return gallery_url(self.slug)
220 def language_code(self):
221 return constants.LANGUAGES_3TO2.get(self.language, self.language)
223 def language_name(self):
224 return dict(settings.LANGUAGES).get(self.language_code(), "")
226 def is_foreign(self):
227 return self.language_code() != settings.LANGUAGE_CODE
229 def set_audio_length(self):
230 length = self.get_audio_length()
232 self.audio_length = self.format_audio_length(length)
236 def format_audio_length(seconds):
238 minutes = seconds // 60
239 seconds = seconds % 60
240 return '%d:%02d' % (minutes, seconds)
242 hours = seconds // 3600
243 minutes = seconds % 3600 // 60
244 seconds = seconds % 60
245 return '%d:%02d:%02d' % (hours, minutes, seconds)
247 def get_audio_length(self):
249 for media in self.get_mp3() or ():
250 total += app_settings.GET_MP3_LENGTH(media.file.path)
253 def has_media(self, type_):
254 if type_ in Book.formats:
255 return bool(getattr(self, "%s_file" % type_))
257 return self.media.filter(type=type_).exists()
260 return self.has_media('mp3')
262 def get_media(self, type_):
263 if self.has_media(type_):
264 if type_ in Book.formats:
265 return getattr(self, "%s_file" % type_)
267 return self.media.filter(type=type_)
272 return self.get_media("mp3")
275 return self.get_media("odt")
278 return self.get_media("ogg")
281 return self.get_media("daisy")
283 def media_url(self, format_):
284 media = self.get_media(format_)
287 return reverse('embargo_link', kwargs={'slug': self.slug, 'format_': format_})
294 return self.media_url('html')
297 return self.media_url('pdf')
300 return self.media_url('epub')
303 return self.media_url('mobi')
306 return self.media_url('txt')
309 return self.media_url('fb2')
312 return self.media_url('xml')
314 def has_description(self):
315 return len(self.description) > 0
316 has_description.short_description = _('description')
317 has_description.boolean = True
319 def has_mp3_file(self):
320 return self.has_media("mp3")
321 has_mp3_file.short_description = 'MP3'
322 has_mp3_file.boolean = True
324 def has_ogg_file(self):
325 return self.has_media("ogg")
326 has_ogg_file.short_description = 'OGG'
327 has_ogg_file.boolean = True
329 def has_daisy_file(self):
330 return self.has_media("daisy")
331 has_daisy_file.short_description = 'DAISY'
332 has_daisy_file.boolean = True
334 def get_audiobooks(self):
336 for m in self.media.filter(type='ogg').order_by().iterator():
337 ogg_files[m.name] = m
341 for mp3 in self.media.filter(type='mp3').iterator():
342 # ogg files are always from the same project
343 meta = mp3.extra_info
344 project = meta.get('project')
347 project = u'CzytamySłuchając'
349 projects.add((project, meta.get('funded_by', '')))
353 ogg = ogg_files.get(mp3.name)
356 audiobooks.append(media)
358 projects = sorted(projects)
359 return audiobooks, projects
361 def wldocument(self, parse_dublincore=True, inherit=True):
362 from catalogue.import_utils import ORMDocProvider
363 from librarian.parser import WLDocument
365 if inherit and self.parent:
366 meta_fallbacks = self.parent.cover_info()
368 meta_fallbacks = None
370 return WLDocument.from_file(
372 provider=ORMDocProvider(self),
373 parse_dublincore=parse_dublincore,
374 meta_fallbacks=meta_fallbacks)
377 def zip_format(format_):
378 def pretty_file_name(book):
379 return "%s/%s.%s" % (
380 book.extra_info['author'],
384 field_name = "%s_file" % format_
385 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
386 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
387 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
389 def zip_audiobooks(self, format_):
390 bm = BookMedia.objects.filter(book=self, type=format_)
391 paths = map(lambda bm: (None, bm.file.path), bm)
392 return create_zip(paths, "%s_%s" % (self.slug, format_))
394 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
396 from search.index import Index
399 index.index_book(self, book_info)
404 except Exception as e:
405 index.index.rollback()
408 # will make problems in conjunction with paid previews
409 def download_pictures(self, remote_gallery_url):
410 gallery_path = self.gallery_path()
411 # delete previous files, so we don't include old files in ebooks
412 if os.path.isdir(gallery_path):
413 for filename in os.listdir(gallery_path):
414 file_path = os.path.join(gallery_path, filename)
416 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
418 makedirs(gallery_path)
419 for ilustr in ilustr_elements:
420 ilustr_src = ilustr.get('src')
421 ilustr_path = os.path.join(gallery_path, ilustr_src)
422 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
424 def load_abstract(self):
425 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
426 if abstract is not None:
427 self.abstract = transform_abstrakt(abstract)
432 def from_xml_file(cls, xml_file, **kwargs):
433 from django.core.files import File
434 from librarian import dcparser
436 # use librarian to parse meta-data
437 book_info = dcparser.parse(xml_file)
439 if not isinstance(xml_file, File):
440 xml_file = File(open(xml_file))
443 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
448 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
449 search_index_tags=True, remote_gallery_url=None, days=0):
450 if dont_build is None:
452 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
454 # check for parts before we do anything
456 if hasattr(book_info, 'parts'):
457 for part_url in book_info.parts:
459 children.append(Book.objects.get(slug=part_url.slug))
460 except Book.DoesNotExist:
461 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
464 book_slug = book_info.url.slug
465 if re.search(r'[^a-z0-9-]', book_slug):
466 raise ValueError('Invalid characters in slug')
467 book, created = Book.objects.get_or_create(slug=book_slug)
472 book.preview = bool(days)
474 book.preview_until = date.today() + timedelta(days)
477 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
478 # Save shelves for this book
479 book_shelves = list(book.tags.filter(category='set'))
480 old_cover = book.cover_info()
483 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
485 book.xml_file.set_readable(False)
487 book.language = book_info.language
488 book.title = book_info.title
489 if book_info.variant_of:
490 book.common_slug = book_info.variant_of.slug
492 book.common_slug = book.slug
493 book.extra_info = book_info.to_dict()
497 meta_tags = Tag.tags_from_info(book_info)
499 for tag in meta_tags:
500 if not tag.for_books:
504 book.tags = set(meta_tags + book_shelves)
506 cover_changed = old_cover != book.cover_info()
507 obsolete_children = set(b for b in book.children.all()
508 if b not in children)
509 notify_cover_changed = []
510 for n, child_book in enumerate(children):
511 new_child = child_book.parent != book
512 child_book.parent = book
513 child_book.parent_number = n
515 if new_child or cover_changed:
516 notify_cover_changed.append(child_book)
517 # Disown unfaithful children and let them cope on their own.
518 for child in obsolete_children:
520 child.parent_number = 0
523 notify_cover_changed.append(child)
525 cls.repopulate_ancestors()
526 tasks.update_counters.delay()
528 if remote_gallery_url:
529 book.download_pictures(remote_gallery_url)
531 # No saves beyond this point.
534 if 'cover' not in dont_build:
535 book.cover.build_delay()
536 book.cover_thumb.build_delay()
537 book.cover_api_thumb.build_delay()
538 book.simple_cover.build_delay()
540 # Build HTML and ebooks.
541 book.html_file.build_delay()
543 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
544 if format_ not in dont_build:
545 getattr(book, '%s_file' % format_).build_delay()
546 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
547 if format_ not in dont_build:
548 getattr(book, '%s_file' % format_).build_delay()
550 if not settings.NO_SEARCH_INDEX and search_index:
551 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
553 for child in notify_cover_changed:
554 child.parent_cover_changed()
556 book.save() # update sort_key_author
557 book.update_popularity()
558 cls.published.send(sender=cls, instance=book)
563 def repopulate_ancestors(cls):
564 """Fixes the ancestry cache."""
566 cursor = connection.cursor()
567 if connection.vendor == 'postgres':
568 cursor.execute("TRUNCATE catalogue_book_ancestor")
570 WITH RECURSIVE ancestry AS (
571 SELECT book.id, book.parent_id
572 FROM catalogue_book AS book
573 WHERE book.parent_id IS NOT NULL
575 SELECT ancestor.id, book.parent_id
576 FROM ancestry AS ancestor, catalogue_book AS book
577 WHERE ancestor.parent_id = book.id
578 AND book.parent_id IS NOT NULL
580 INSERT INTO catalogue_book_ancestor
581 (from_book_id, to_book_id)
587 cursor.execute("DELETE FROM catalogue_book_ancestor")
588 for b in cls.objects.exclude(parent=None):
590 while parent is not None:
591 b.ancestor.add(parent)
592 parent = parent.parent
594 def flush_includes(self, languages=True):
597 if languages is True:
598 languages = [lc for (lc, _ln) in settings.LANGUAGES]
600 template % (self.pk, lang)
602 '/katalog/b/%d/mini.%s.html',
603 '/katalog/b/%d/mini_nolink.%s.html',
604 '/katalog/b/%d/short.%s.html',
605 '/katalog/b/%d/wide.%s.html',
606 '/api/include/book/%d.%s.json',
607 '/api/include/book/%d.%s.xml',
609 for lang in languages
612 def cover_info(self, inherit=True):
613 """Returns a dictionary to serve as fallback for BookInfo.
615 For now, the only thing inherited is the cover image.
619 for field in ('cover_url', 'cover_by', 'cover_source'):
620 val = self.extra_info.get(field)
625 if inherit and need and self.parent is not None:
626 parent_info = self.parent.cover_info()
627 parent_info.update(info)
631 def related_themes(self):
632 return Tag.objects.usage_for_queryset(
633 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
634 counts=True).filter(category='theme')
636 def parent_cover_changed(self):
637 """Called when parent book's cover image is changed."""
638 if not self.cover_info(inherit=False):
639 if 'cover' not in app_settings.DONT_BUILD:
640 self.cover.build_delay()
641 self.cover_thumb.build_delay()
642 self.cover_api_thumb.build_delay()
643 self.simple_cover.build_delay()
644 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
645 if format_ not in app_settings.DONT_BUILD:
646 getattr(self, '%s_file' % format_).build_delay()
647 for child in self.children.all():
648 child.parent_cover_changed()
650 def other_versions(self):
651 """Find other versions (i.e. in other languages) of the book."""
652 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
657 while parent is not None:
658 books.insert(0, parent)
659 parent = parent.parent
662 def pretty_title(self, html_links=False):
663 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
664 books = self.parents() + [self]
665 names.extend([(b.title, b.get_absolute_url()) for b in books])
668 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
670 names = [tag[0] for tag in names]
671 return ', '.join(names)
674 publisher = self.extra_info['publisher']
675 if isinstance(publisher, str):
677 elif isinstance(publisher, list):
678 return ', '.join(publisher)
681 def tagged_top_level(cls, tags):
682 """ Returns top-level books tagged with `tags`.
684 It only returns those books which don't have ancestors which are
685 also tagged with those tags.
688 objects = cls.tagged.with_all(tags)
689 return objects.exclude(ancestor__in=objects)
692 def book_list(cls, book_filter=None):
693 """Generates a hierarchical listing of all books.
695 Books are optionally filtered with a test function.
700 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
702 books = books.filter(book_filter).distinct()
704 book_ids = set(b['pk'] for b in books.values("pk").iterator())
705 for book in books.iterator():
706 parent = book.parent_id
707 if parent not in book_ids:
709 books_by_parent.setdefault(parent, []).append(book)
711 for book in books.iterator():
712 books_by_parent.setdefault(book.parent_id, []).append(book)
715 books_by_author = OrderedDict()
716 for tag in Tag.objects.filter(category='author').iterator():
717 books_by_author[tag] = []
719 for book in books_by_parent.get(None, ()):
720 authors = list(book.authors().only('pk'))
722 for author in authors:
723 books_by_author[author].append(book)
727 return books_by_author, orphans, books_by_parent
730 "SP": (1, u"szkoła podstawowa"),
731 "SP1": (1, u"szkoła podstawowa"),
732 "SP2": (1, u"szkoła podstawowa"),
733 "SP3": (1, u"szkoła podstawowa"),
734 "P": (1, u"szkoła podstawowa"),
735 "G": (2, u"gimnazjum"),
737 "LP": (3, u"liceum"),
740 def audiences_pl(self):
741 audiences = self.extra_info.get('audiences', [])
742 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
743 return [a[1] for a in audiences]
745 def stage_note(self):
746 stage = self.extra_info.get('stage')
747 if stage and stage < '0.4':
748 return (_('This work needs modernisation'),
749 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
753 def choose_fragment(self):
754 fragments = self.fragments.order_by()
755 fragments_count = fragments.count()
756 if not fragments_count and self.children.exists():
757 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
758 fragments_count = fragments.count()
760 return fragments[randint(0, fragments_count - 1)]
762 return self.parent.choose_fragment()
766 def fragment_data(self):
767 fragment = self.choose_fragment()
770 'title': fragment.book.pretty_title(),
771 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
776 def update_popularity(self):
777 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
779 pop = self.popularity
782 except BookPopularity.DoesNotExist:
783 BookPopularity.objects.create(book=self, count=count)
785 def ridero_link(self):
786 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
788 def like(self, user):
789 from social.utils import likes, get_set, set_sets
790 if not likes(user, self):
791 tag = get_set(user, '')
792 set_sets(user, self, [tag])
794 def unlike(self, user):
795 from social.utils import likes, set_sets
796 if likes(user, self):
797 set_sets(user, self, [])
799 def full_sort_key(self):
800 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
802 def cover_color(self):
803 return WLCover.epoch_colors.get(self.extra_info.get('epoch'), '#000000')
806 def add_file_fields():
807 for format_ in Book.formats:
808 field_name = "%s_file" % format_
809 # This weird globals() assignment makes Django migrations comfortable.
810 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
811 _upload_to.__name__ = '_%s_upload_to' % format_
812 globals()[_upload_to.__name__] = _upload_to
815 format_, _("%s file" % format_.upper()),
816 upload_to=_upload_to,
817 storage=bofh_storage,
821 ).contribute_to_class(Book, field_name)
827 class BookPopularity(models.Model):
828 book = models.OneToOneField(Book, related_name='popularity')
829 count = models.IntegerField(default=0, db_index=True)