1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from newtagging import managers
26 from catalogue import constants
27 from catalogue.fields import EbookField
28 from catalogue.models import Tag, Fragment, BookMedia
29 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
30 from catalogue.models.tag import prefetched_relations
31 from catalogue import app_settings
32 from catalogue import tasks
33 from wolnelektury.utils import makedirs
35 bofh_storage = BofhFileSystemStorage()
39 class UploadToPath(object):
40 def __init__(self, path):
43 def __call__(self, instance, filename):
44 return self.path % instance.slug
47 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
53 def _ebook_upload_to(upload_path):
54 return UploadToPath(upload_path)
57 class Book(models.Model):
58 """Represents a book imported from WL-XML."""
59 title = models.CharField(_('title'), max_length=32767)
60 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
61 sort_key_author = models.CharField(
62 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
63 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
64 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
65 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
66 description = models.TextField(_('description'), blank=True)
67 abstract = models.TextField(_('abstract'), blank=True)
68 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
69 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
70 parent_number = models.IntegerField(_('parent number'), default=0)
71 extra_info = jsonfield.JSONField(_('extra information'), default={})
72 gazeta_link = models.CharField(blank=True, max_length=240)
73 wiki_link = models.CharField(blank=True, max_length=240)
74 print_on_demand = models.BooleanField(_('print on demand'), default=False)
75 recommended = models.BooleanField(_('recommended'), default=False)
76 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
77 preview = models.BooleanField(_('preview'), default=False)
78 preview_until = models.DateField(_('preview until'), blank=True, null=True)
80 # files generated during publication
83 null=True, blank=True,
84 upload_to=_cover_upload_to,
85 storage=bofh_storage, max_length=255)
86 # Cleaner version of cover for thumbs
87 cover_thumb = EbookField(
88 'cover_thumb', _('cover thumbnail'),
89 null=True, blank=True,
90 upload_to=_cover_thumb_upload_to,
92 cover_api_thumb = EbookField(
93 'cover_api_thumb', _('cover thumbnail for mobile app'),
94 null=True, blank=True,
95 upload_to=_cover_api_thumb_upload_to,
97 simple_cover = EbookField(
98 'simple_cover', _('cover for mobile app'),
99 null=True, blank=True,
100 upload_to=_simple_cover_upload_to,
102 ebook_formats = constants.EBOOK_FORMATS
103 formats = ebook_formats + ['html', 'xml']
105 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
106 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
108 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
109 has_audience = models.BooleanField(default=False)
111 objects = models.Manager()
112 tagged = managers.ModelTaggedItemManager(Tag)
113 tags = managers.TagDescriptor(Tag)
114 tag_relations = GenericRelation(Tag.intermediary_table_model)
116 html_built = django.dispatch.Signal()
117 published = django.dispatch.Signal()
121 class AlreadyExists(Exception):
125 ordering = ('sort_key_author', 'sort_key')
126 verbose_name = _('book')
127 verbose_name_plural = _('books')
128 app_label = 'catalogue'
130 def __unicode__(self):
133 def get_initial(self):
135 return re.search(r'\w', self.title, re.U).group(0)
136 except AttributeError:
140 return self.tags.filter(category='author')
143 return self.tags.filter(category='epoch')
146 return self.tags.filter(category='genre')
149 return self.tags.filter(category='kind')
151 def tag_unicode(self, category):
152 relations = prefetched_relations(self, category)
154 return ', '.join(rel.tag.name for rel in relations)
156 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
158 def tags_by_category(self):
159 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
161 def author_unicode(self):
162 return self.cached_author
164 def kind_unicode(self):
165 return self.tag_unicode('kind')
167 def epoch_unicode(self):
168 return self.tag_unicode('epoch')
170 def genre_unicode(self):
171 return self.tag_unicode('genre')
173 def translator(self):
174 translators = self.extra_info.get('translators')
177 if len(translators) > 3:
178 translators = translators[:2]
182 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
184 def cover_source(self):
185 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
187 def save(self, force_insert=False, force_update=False, **kwargs):
188 from sortify import sortify
190 self.sort_key = sortify(self.title)[:120]
191 self.title = unicode(self.title) # ???
194 author = self.authors().first().sort_key
195 except AttributeError:
197 self.sort_key_author = author
199 self.cached_author = self.tag_unicode('author')
200 self.has_audience = 'audience' in self.extra_info
202 ret = super(Book, self).save(force_insert, force_update, **kwargs)
207 def get_absolute_url(self):
208 return 'book_detail', [self.slug]
212 def create_url(slug):
213 return 'book_detail', [slug]
215 def gallery_path(self):
216 return gallery_path(self.slug)
218 def gallery_url(self):
219 return gallery_url(self.slug)
225 def language_code(self):
226 return constants.LANGUAGES_3TO2.get(self.language, self.language)
228 def language_name(self):
229 return dict(settings.LANGUAGES).get(self.language_code(), "")
231 def is_foreign(self):
232 return self.language_code() != settings.LANGUAGE_CODE
234 def set_audio_length(self):
235 length = self.get_audio_length()
237 self.audio_length = self.format_audio_length(length)
241 def format_audio_length(seconds):
243 minutes = seconds // 60
244 seconds = seconds % 60
245 return '%d:%02d' % (minutes, seconds)
247 hours = seconds // 3600
248 minutes = seconds % 3600 // 60
249 seconds = seconds % 60
250 return '%d:%02d:%02d' % (hours, minutes, seconds)
252 def get_audio_length(self):
254 for media in self.get_mp3() or ():
255 total += app_settings.GET_MP3_LENGTH(media.file.path)
258 def has_media(self, type_):
259 if type_ in Book.formats:
260 return bool(getattr(self, "%s_file" % type_))
262 return self.media.filter(type=type_).exists()
265 return self.has_media('mp3')
267 def get_media(self, type_):
268 if self.has_media(type_):
269 if type_ in Book.formats:
270 return getattr(self, "%s_file" % type_)
272 return self.media.filter(type=type_)
277 return self.get_media("mp3")
280 return self.get_media("odt")
283 return self.get_media("ogg")
286 return self.get_media("daisy")
288 def media_url(self, format_):
289 media = self.get_media(format_)
292 return reverse('embargo_link', kwargs={'slug': self.slug, 'format_': format_})
299 return self.media_url('html')
302 return self.media_url('pdf')
305 return self.media_url('epub')
308 return self.media_url('mobi')
311 return self.media_url('txt')
314 return self.media_url('fb2')
317 return self.media_url('xml')
319 def has_description(self):
320 return len(self.description) > 0
321 has_description.short_description = _('description')
322 has_description.boolean = True
324 def has_mp3_file(self):
325 return self.has_media("mp3")
326 has_mp3_file.short_description = 'MP3'
327 has_mp3_file.boolean = True
329 def has_ogg_file(self):
330 return self.has_media("ogg")
331 has_ogg_file.short_description = 'OGG'
332 has_ogg_file.boolean = True
334 def has_daisy_file(self):
335 return self.has_media("daisy")
336 has_daisy_file.short_description = 'DAISY'
337 has_daisy_file.boolean = True
339 def get_audiobooks(self):
341 for m in self.media.filter(type='ogg').order_by().iterator():
342 ogg_files[m.name] = m
346 for mp3 in self.media.filter(type='mp3').iterator():
347 # ogg files are always from the same project
348 meta = mp3.extra_info
349 project = meta.get('project')
352 project = u'CzytamySłuchając'
354 projects.add((project, meta.get('funded_by', '')))
358 ogg = ogg_files.get(mp3.name)
361 audiobooks.append(media)
363 projects = sorted(projects)
364 return audiobooks, projects
366 def wldocument(self, parse_dublincore=True, inherit=True):
367 from catalogue.import_utils import ORMDocProvider
368 from librarian.parser import WLDocument
370 if inherit and self.parent:
371 meta_fallbacks = self.parent.cover_info()
373 meta_fallbacks = None
375 return WLDocument.from_file(
377 provider=ORMDocProvider(self),
378 parse_dublincore=parse_dublincore,
379 meta_fallbacks=meta_fallbacks)
382 def zip_format(format_):
383 def pretty_file_name(book):
384 return "%s/%s.%s" % (
385 book.extra_info['author'],
389 field_name = "%s_file" % format_
390 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
391 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
392 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
394 def zip_audiobooks(self, format_):
395 bm = BookMedia.objects.filter(book=self, type=format_)
396 paths = map(lambda bm: (None, bm.file.path), bm)
397 return create_zip(paths, "%s_%s" % (self.slug, format_))
399 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
401 from search.index import Index
404 index.index_book(self, book_info)
410 index.index.rollback()
413 # will make problems in conjunction with paid previews
414 def download_pictures(self, remote_gallery_url):
415 gallery_path = self.gallery_path()
416 # delete previous files, so we don't include old files in ebooks
417 if os.path.isdir(gallery_path):
418 for filename in os.listdir(gallery_path):
419 file_path = os.path.join(gallery_path, filename)
421 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
423 makedirs(gallery_path)
424 for ilustr in ilustr_elements:
425 ilustr_src = ilustr.get('src')
426 ilustr_path = os.path.join(gallery_path, ilustr_src)
427 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
429 def load_abstract(self):
430 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
431 if abstract is not None:
432 self.abstract = transform_abstrakt(abstract)
437 def from_xml_file(cls, xml_file, **kwargs):
438 from django.core.files import File
439 from librarian import dcparser
441 # use librarian to parse meta-data
442 book_info = dcparser.parse(xml_file)
444 if not isinstance(xml_file, File):
445 xml_file = File(open(xml_file))
448 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
453 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
454 search_index_tags=True, remote_gallery_url=None, days=0):
455 if dont_build is None:
457 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
459 # check for parts before we do anything
461 if hasattr(book_info, 'parts'):
462 for part_url in book_info.parts:
464 children.append(Book.objects.get(slug=part_url.slug))
465 except Book.DoesNotExist:
466 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
469 book_slug = book_info.url.slug
470 if re.search(r'[^a-z0-9-]', book_slug):
471 raise ValueError('Invalid characters in slug')
472 book, created = Book.objects.get_or_create(slug=book_slug)
477 book.preview = bool(days)
479 book.preview_until = date.today() + timedelta(days)
482 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
483 # Save shelves for this book
484 book_shelves = list(book.tags.filter(category='set'))
485 old_cover = book.cover_info()
488 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
490 book.xml_file.set_readable(False)
492 book.language = book_info.language
493 book.title = book_info.title
494 if book_info.variant_of:
495 book.common_slug = book_info.variant_of.slug
497 book.common_slug = book.slug
498 book.extra_info = book_info.to_dict()
502 meta_tags = Tag.tags_from_info(book_info)
504 for tag in meta_tags:
505 if not tag.for_books:
509 book.tags = set(meta_tags + book_shelves)
511 cover_changed = old_cover != book.cover_info()
512 obsolete_children = set(b for b in book.children.all()
513 if b not in children)
514 notify_cover_changed = []
515 for n, child_book in enumerate(children):
516 new_child = child_book.parent != book
517 child_book.parent = book
518 child_book.parent_number = n
520 if new_child or cover_changed:
521 notify_cover_changed.append(child_book)
522 # Disown unfaithful children and let them cope on their own.
523 for child in obsolete_children:
525 child.parent_number = 0
528 notify_cover_changed.append(child)
530 cls.repopulate_ancestors()
531 tasks.update_counters.delay()
533 if remote_gallery_url:
534 book.download_pictures(remote_gallery_url)
536 # No saves beyond this point.
539 if 'cover' not in dont_build:
540 book.cover.build_delay()
541 book.cover_thumb.build_delay()
542 book.cover_api_thumb.build_delay()
543 book.simple_cover.build_delay()
545 # Build HTML and ebooks.
546 book.html_file.build_delay()
548 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
549 if format_ not in dont_build:
550 getattr(book, '%s_file' % format_).build_delay()
551 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
552 if format_ not in dont_build:
553 getattr(book, '%s_file' % format_).build_delay()
555 if not settings.NO_SEARCH_INDEX and search_index:
556 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
558 for child in notify_cover_changed:
559 child.parent_cover_changed()
561 book.save() # update sort_key_author
562 book.update_popularity()
563 cls.published.send(sender=cls, instance=book)
568 def repopulate_ancestors(cls):
569 """Fixes the ancestry cache."""
571 cursor = connection.cursor()
572 if connection.vendor == 'postgres':
573 cursor.execute("TRUNCATE catalogue_book_ancestor")
575 WITH RECURSIVE ancestry AS (
576 SELECT book.id, book.parent_id
577 FROM catalogue_book AS book
578 WHERE book.parent_id IS NOT NULL
580 SELECT ancestor.id, book.parent_id
581 FROM ancestry AS ancestor, catalogue_book AS book
582 WHERE ancestor.parent_id = book.id
583 AND book.parent_id IS NOT NULL
585 INSERT INTO catalogue_book_ancestor
586 (from_book_id, to_book_id)
592 cursor.execute("DELETE FROM catalogue_book_ancestor")
593 for b in cls.objects.exclude(parent=None):
595 while parent is not None:
596 b.ancestor.add(parent)
597 parent = parent.parent
599 def flush_includes(self, languages=True):
602 if languages is True:
603 languages = [lc for (lc, _ln) in settings.LANGUAGES]
605 template % (self.pk, lang)
607 '/katalog/b/%d/mini.%s.html',
608 '/katalog/b/%d/mini_nolink.%s.html',
609 '/katalog/b/%d/short.%s.html',
610 '/katalog/b/%d/wide.%s.html',
611 '/api/include/book/%d.%s.json',
612 '/api/include/book/%d.%s.xml',
614 for lang in languages
617 def cover_info(self, inherit=True):
618 """Returns a dictionary to serve as fallback for BookInfo.
620 For now, the only thing inherited is the cover image.
624 for field in ('cover_url', 'cover_by', 'cover_source'):
625 val = self.extra_info.get(field)
630 if inherit and need and self.parent is not None:
631 parent_info = self.parent.cover_info()
632 parent_info.update(info)
636 def related_themes(self):
637 return Tag.objects.usage_for_queryset(
638 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
639 counts=True).filter(category='theme')
641 def parent_cover_changed(self):
642 """Called when parent book's cover image is changed."""
643 if not self.cover_info(inherit=False):
644 if 'cover' not in app_settings.DONT_BUILD:
645 self.cover.build_delay()
646 self.cover_thumb.build_delay()
647 self.cover_api_thumb.build_delay()
648 self.simple_cover.build_delay()
649 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
650 if format_ not in app_settings.DONT_BUILD:
651 getattr(self, '%s_file' % format_).build_delay()
652 for child in self.children.all():
653 child.parent_cover_changed()
655 def other_versions(self):
656 """Find other versions (i.e. in other languages) of the book."""
657 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
662 while parent is not None:
663 books.insert(0, parent)
664 parent = parent.parent
667 def pretty_title(self, html_links=False):
668 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
669 books = self.parents() + [self]
670 names.extend([(b.title, b.get_absolute_url()) for b in books])
673 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
675 names = [tag[0] for tag in names]
676 return ', '.join(names)
679 publisher = self.extra_info['publisher']
680 if isinstance(publisher, basestring):
682 elif isinstance(publisher, list):
683 return ', '.join(publisher)
686 def tagged_top_level(cls, tags):
687 """ Returns top-level books tagged with `tags`.
689 It only returns those books which don't have ancestors which are
690 also tagged with those tags.
693 objects = cls.tagged.with_all(tags)
694 return objects.exclude(ancestor__in=objects)
697 def book_list(cls, book_filter=None):
698 """Generates a hierarchical listing of all books.
700 Books are optionally filtered with a test function.
705 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
707 books = books.filter(book_filter).distinct()
709 book_ids = set(b['pk'] for b in books.values("pk").iterator())
710 for book in books.iterator():
711 parent = book.parent_id
712 if parent not in book_ids:
714 books_by_parent.setdefault(parent, []).append(book)
716 for book in books.iterator():
717 books_by_parent.setdefault(book.parent_id, []).append(book)
720 books_by_author = OrderedDict()
721 for tag in Tag.objects.filter(category='author').iterator():
722 books_by_author[tag] = []
724 for book in books_by_parent.get(None, ()):
725 authors = list(book.authors().only('pk'))
727 for author in authors:
728 books_by_author[author].append(book)
732 return books_by_author, orphans, books_by_parent
735 "SP": (1, u"szkoła podstawowa"),
736 "SP1": (1, u"szkoła podstawowa"),
737 "SP2": (1, u"szkoła podstawowa"),
738 "SP3": (1, u"szkoła podstawowa"),
739 "P": (1, u"szkoła podstawowa"),
740 "G": (2, u"gimnazjum"),
742 "LP": (3, u"liceum"),
745 def audiences_pl(self):
746 audiences = self.extra_info.get('audiences', [])
747 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
748 return [a[1] for a in audiences]
750 def stage_note(self):
751 stage = self.extra_info.get('stage')
752 if stage and stage < '0.4':
753 return (_('This work needs modernisation'),
754 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
758 def choose_fragment(self):
759 fragments = self.fragments.order_by()
760 fragments_count = fragments.count()
761 if not fragments_count and self.children.exists():
762 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
763 fragments_count = fragments.count()
765 return fragments[randint(0, fragments_count - 1)]
767 return self.parent.choose_fragment()
771 def fragment_data(self):
772 fragment = self.choose_fragment()
775 'title': fragment.book.pretty_title(),
776 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
781 def update_popularity(self):
782 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
784 pop = self.popularity
787 except BookPopularity.DoesNotExist:
788 BookPopularity.objects.create(book=self, count=count)
790 def ridero_link(self):
791 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
793 def like(self, user):
794 from social.utils import likes, get_set, set_sets
795 if not likes(user, self):
796 tag = get_set(user, '')
797 set_sets(user, self, [tag])
799 def unlike(self, user):
800 from social.utils import likes, set_sets
801 if likes(user, self):
802 set_sets(user, self, [])
804 def full_sort_key(self):
805 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
807 def cover_color(self):
808 return WLCover.epoch_colors.get(self.extra_info.get('epoch'), '#000000')
811 def add_file_fields():
812 for format_ in Book.formats:
813 field_name = "%s_file" % format_
814 # This weird globals() assignment makes Django migrations comfortable.
815 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
816 _upload_to.__name__ = '_%s_upload_to' % format_
817 globals()[_upload_to.__name__] = _upload_to
820 format_, _("%s file" % format_.upper()),
821 upload_to=_upload_to,
822 storage=bofh_storage,
826 ).contribute_to_class(Book, field_name)
832 class BookPopularity(models.Model):
833 book = models.OneToOneField(Book, related_name='popularity')
834 count = models.IntegerField(default=0, db_index=True)