1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs
34 bofh_storage = BofhFileSystemStorage()
38 class UploadToPath(object):
39 def __init__(self, path):
42 def __call__(self, instance, filename):
43 return self.path % instance.slug
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
48 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
49 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
52 def _ebook_upload_to(upload_path):
53 return UploadToPath(upload_path)
56 class Book(models.Model):
57 """Represents a book imported from WL-XML."""
58 title = models.CharField(_('title'), max_length=32767)
59 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
60 sort_key_author = models.CharField(
61 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
62 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
63 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
64 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
65 description = models.TextField(_('description'), blank=True)
66 abstract = models.TextField(_('abstract'), blank=True)
67 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
68 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
69 parent_number = models.IntegerField(_('parent number'), default=0)
70 extra_info = jsonfield.JSONField(_('extra information'), default={})
71 gazeta_link = models.CharField(blank=True, max_length=240)
72 wiki_link = models.CharField(blank=True, max_length=240)
73 print_on_demand = models.BooleanField(_('print on demand'), default=False)
74 recommended = models.BooleanField(_('recommended'), default=False)
75 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
76 preview = models.BooleanField(_('preview'), default=False)
77 preview_until = models.DateField(_('preview until'), blank=True, null=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
118 class AlreadyExists(Exception):
122 ordering = ('sort_key_author', 'sort_key')
123 verbose_name = _('book')
124 verbose_name_plural = _('books')
125 app_label = 'catalogue'
127 def __unicode__(self):
130 def get_initial(self):
132 return re.search(r'\w', self.title, re.U).group(0)
133 except AttributeError:
137 return self.tags.filter(category='author')
139 def tag_unicode(self, category):
140 relations = prefetched_relations(self, category)
142 return ', '.join(rel.tag.name for rel in relations)
144 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
146 def tags_by_category(self):
147 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
149 def author_unicode(self):
150 return self.cached_author
152 def translator(self):
153 translators = self.extra_info.get('translators')
156 if len(translators) > 3:
157 translators = translators[:2]
161 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
163 def cover_source(self):
164 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
166 def save(self, force_insert=False, force_update=False, **kwargs):
167 from sortify import sortify
169 self.sort_key = sortify(self.title)[:120]
170 self.title = unicode(self.title) # ???
173 author = self.authors().first().sort_key
174 except AttributeError:
176 self.sort_key_author = author
178 self.cached_author = self.tag_unicode('author')
179 self.has_audience = 'audience' in self.extra_info
181 ret = super(Book, self).save(force_insert, force_update, **kwargs)
186 def get_absolute_url(self):
187 return 'catalogue.views.book_detail', [self.slug]
191 def create_url(slug):
192 return 'catalogue.views.book_detail', [slug]
194 def gallery_path(self):
195 return gallery_path(self.slug)
197 def gallery_url(self):
198 return gallery_url(self.slug)
204 def language_code(self):
205 return constants.LANGUAGES_3TO2.get(self.language, self.language)
207 def language_name(self):
208 return dict(settings.LANGUAGES).get(self.language_code(), "")
210 def is_foreign(self):
211 return self.language_code() != settings.LANGUAGE_CODE
213 def set_audio_length(self):
214 length = self.get_audio_length()
216 self.audio_length = self.format_audio_length(length)
220 def format_audio_length(seconds):
222 minutes = seconds // 60
223 seconds = seconds % 60
224 return '%d:%02d' % (minutes, seconds)
226 hours = seconds // 3600
227 minutes = seconds % 3600 // 60
228 seconds = seconds % 60
229 return '%d:%02d:%02d' % (hours, minutes, seconds)
231 def get_audio_length(self):
232 from mutagen.mp3 import MP3
234 for media in self.get_mp3() or ():
235 audio = MP3(media.file.path)
236 total += audio.info.length
239 def has_media(self, type_):
240 if type_ in Book.formats:
241 return bool(getattr(self, "%s_file" % type_))
243 return self.media.filter(type=type_).exists()
246 return self.has_media('mp3')
248 def get_media(self, type_):
249 if self.has_media(type_):
250 if type_ in Book.formats:
251 return getattr(self, "%s_file" % type_)
253 return self.media.filter(type=type_)
258 return self.get_media("mp3")
261 return self.get_media("odt")
264 return self.get_media("ogg")
267 return self.get_media("daisy")
269 def media_url(self, format_):
270 media = self.get_media(format_)
273 return reverse('embargo_link', kwargs={'slug': self.slug, 'format_': format_})
280 return self.media_url('html')
283 return self.media_url('pdf')
286 return self.media_url('epub')
289 return self.media_url('mobi')
292 return self.media_url('txt')
295 return self.media_url('fb2')
298 return self.media_url('xml')
300 def has_description(self):
301 return len(self.description) > 0
302 has_description.short_description = _('description')
303 has_description.boolean = True
305 def has_mp3_file(self):
306 return self.has_media("mp3")
307 has_mp3_file.short_description = 'MP3'
308 has_mp3_file.boolean = True
310 def has_ogg_file(self):
311 return self.has_media("ogg")
312 has_ogg_file.short_description = 'OGG'
313 has_ogg_file.boolean = True
315 def has_daisy_file(self):
316 return self.has_media("daisy")
317 has_daisy_file.short_description = 'DAISY'
318 has_daisy_file.boolean = True
320 def get_audiobooks(self):
322 for m in self.media.filter(type='ogg').order_by().iterator():
323 ogg_files[m.name] = m
327 for mp3 in self.media.filter(type='mp3').iterator():
328 # ogg files are always from the same project
329 meta = mp3.extra_info
330 project = meta.get('project')
333 project = u'CzytamySłuchając'
335 projects.add((project, meta.get('funded_by', '')))
339 ogg = ogg_files.get(mp3.name)
342 audiobooks.append(media)
344 projects = sorted(projects)
345 return audiobooks, projects
347 def wldocument(self, parse_dublincore=True, inherit=True):
348 from catalogue.import_utils import ORMDocProvider
349 from librarian.parser import WLDocument
351 if inherit and self.parent:
352 meta_fallbacks = self.parent.cover_info()
354 meta_fallbacks = None
356 return WLDocument.from_file(
358 provider=ORMDocProvider(self),
359 parse_dublincore=parse_dublincore,
360 meta_fallbacks=meta_fallbacks)
363 def zip_format(format_):
364 def pretty_file_name(book):
365 return "%s/%s.%s" % (
366 book.extra_info['author'],
370 field_name = "%s_file" % format_
371 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
372 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
373 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
375 def zip_audiobooks(self, format_):
376 bm = BookMedia.objects.filter(book=self, type=format_)
377 paths = map(lambda bm: (None, bm.file.path), bm)
378 return create_zip(paths, "%s_%s" % (self.slug, format_))
380 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
382 from search.index import Index
385 index.index_book(self, book_info)
391 index.index.rollback()
394 # will make problems in conjunction with paid previews
395 def download_pictures(self, remote_gallery_url):
396 gallery_path = self.gallery_path()
397 # delete previous files, so we don't include old files in ebooks
398 if os.path.isdir(gallery_path):
399 for filename in os.listdir(gallery_path):
400 file_path = os.path.join(gallery_path, filename)
402 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
404 makedirs(gallery_path)
405 for ilustr in ilustr_elements:
406 ilustr_src = ilustr.get('src')
407 ilustr_path = os.path.join(gallery_path, ilustr_src)
408 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
410 def load_abstract(self):
411 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
412 if abstract is not None:
413 self.abstract = transform_abstrakt(abstract)
418 def from_xml_file(cls, xml_file, **kwargs):
419 from django.core.files import File
420 from librarian import dcparser
422 # use librarian to parse meta-data
423 book_info = dcparser.parse(xml_file)
425 if not isinstance(xml_file, File):
426 xml_file = File(open(xml_file))
429 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
434 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
435 search_index_tags=True, remote_gallery_url=None, days=0):
436 if dont_build is None:
438 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
440 # check for parts before we do anything
442 if hasattr(book_info, 'parts'):
443 for part_url in book_info.parts:
445 children.append(Book.objects.get(slug=part_url.slug))
446 except Book.DoesNotExist:
447 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
450 book_slug = book_info.url.slug
451 if re.search(r'[^a-z0-9-]', book_slug):
452 raise ValueError('Invalid characters in slug')
453 book, created = Book.objects.get_or_create(slug=book_slug)
458 book.preview = bool(days)
460 book.preview_until = date.today() + timedelta(days)
463 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
464 # Save shelves for this book
465 book_shelves = list(book.tags.filter(category='set'))
466 old_cover = book.cover_info()
469 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
471 book.xml_file.set_readable(False)
473 book.language = book_info.language
474 book.title = book_info.title
475 if book_info.variant_of:
476 book.common_slug = book_info.variant_of.slug
478 book.common_slug = book.slug
479 book.extra_info = book_info.to_dict()
483 meta_tags = Tag.tags_from_info(book_info)
485 for tag in meta_tags:
486 if not tag.for_books:
490 book.tags = set(meta_tags + book_shelves)
492 cover_changed = old_cover != book.cover_info()
493 obsolete_children = set(b for b in book.children.all()
494 if b not in children)
495 notify_cover_changed = []
496 for n, child_book in enumerate(children):
497 new_child = child_book.parent != book
498 child_book.parent = book
499 child_book.parent_number = n
501 if new_child or cover_changed:
502 notify_cover_changed.append(child_book)
503 # Disown unfaithful children and let them cope on their own.
504 for child in obsolete_children:
506 child.parent_number = 0
509 notify_cover_changed.append(child)
511 cls.repopulate_ancestors()
512 tasks.update_counters.delay()
514 if remote_gallery_url:
515 book.download_pictures(remote_gallery_url)
517 # No saves beyond this point.
520 if 'cover' not in dont_build:
521 book.cover.build_delay()
522 book.cover_thumb.build_delay()
523 book.cover_api_thumb.build_delay()
524 book.simple_cover.build_delay()
526 # Build HTML and ebooks.
527 book.html_file.build_delay()
529 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
530 if format_ not in dont_build:
531 getattr(book, '%s_file' % format_).build_delay()
532 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
533 if format_ not in dont_build:
534 getattr(book, '%s_file' % format_).build_delay()
536 if not settings.NO_SEARCH_INDEX and search_index:
537 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
539 for child in notify_cover_changed:
540 child.parent_cover_changed()
542 book.save() # update sort_key_author
543 book.update_popularity()
544 cls.published.send(sender=cls, instance=book)
549 def repopulate_ancestors(cls):
550 """Fixes the ancestry cache."""
552 cursor = connection.cursor()
553 if connection.vendor == 'postgres':
554 cursor.execute("TRUNCATE catalogue_book_ancestor")
556 WITH RECURSIVE ancestry AS (
557 SELECT book.id, book.parent_id
558 FROM catalogue_book AS book
559 WHERE book.parent_id IS NOT NULL
561 SELECT ancestor.id, book.parent_id
562 FROM ancestry AS ancestor, catalogue_book AS book
563 WHERE ancestor.parent_id = book.id
564 AND book.parent_id IS NOT NULL
566 INSERT INTO catalogue_book_ancestor
567 (from_book_id, to_book_id)
573 cursor.execute("DELETE FROM catalogue_book_ancestor")
574 for b in cls.objects.exclude(parent=None):
576 while parent is not None:
577 b.ancestor.add(parent)
578 parent = parent.parent
580 def flush_includes(self, languages=True):
583 if languages is True:
584 languages = [lc for (lc, _ln) in settings.LANGUAGES]
586 template % (self.pk, lang)
588 '/katalog/b/%d/mini.%s.html',
589 '/katalog/b/%d/mini_nolink.%s.html',
590 '/katalog/b/%d/short.%s.html',
591 '/katalog/b/%d/wide.%s.html',
592 '/api/include/book/%d.%s.json',
593 '/api/include/book/%d.%s.xml',
595 for lang in languages
598 def cover_info(self, inherit=True):
599 """Returns a dictionary to serve as fallback for BookInfo.
601 For now, the only thing inherited is the cover image.
605 for field in ('cover_url', 'cover_by', 'cover_source'):
606 val = self.extra_info.get(field)
611 if inherit and need and self.parent is not None:
612 parent_info = self.parent.cover_info()
613 parent_info.update(info)
617 def related_themes(self):
618 return Tag.objects.usage_for_queryset(
619 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
620 counts=True).filter(category='theme')
622 def parent_cover_changed(self):
623 """Called when parent book's cover image is changed."""
624 if not self.cover_info(inherit=False):
625 if 'cover' not in app_settings.DONT_BUILD:
626 self.cover.build_delay()
627 self.cover_thumb.build_delay()
628 self.cover_api_thumb.build_delay()
629 self.simple_cover.build_delay()
630 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
631 if format_ not in app_settings.DONT_BUILD:
632 getattr(self, '%s_file' % format_).build_delay()
633 for child in self.children.all():
634 child.parent_cover_changed()
636 def other_versions(self):
637 """Find other versions (i.e. in other languages) of the book."""
638 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
643 while parent is not None:
644 books.insert(0, parent)
645 parent = parent.parent
648 def pretty_title(self, html_links=False):
649 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
650 books = self.parents() + [self]
651 names.extend([(b.title, b.get_absolute_url()) for b in books])
654 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
656 names = [tag[0] for tag in names]
657 return ', '.join(names)
660 publisher = self.extra_info['publisher']
661 if isinstance(publisher, basestring):
663 elif isinstance(publisher, list):
664 return ', '.join(publisher)
667 def tagged_top_level(cls, tags):
668 """ Returns top-level books tagged with `tags`.
670 It only returns those books which don't have ancestors which are
671 also tagged with those tags.
674 objects = cls.tagged.with_all(tags)
675 return objects.exclude(ancestor__in=objects)
678 def book_list(cls, book_filter=None):
679 """Generates a hierarchical listing of all books.
681 Books are optionally filtered with a test function.
686 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
688 books = books.filter(book_filter).distinct()
690 book_ids = set(b['pk'] for b in books.values("pk").iterator())
691 for book in books.iterator():
692 parent = book.parent_id
693 if parent not in book_ids:
695 books_by_parent.setdefault(parent, []).append(book)
697 for book in books.iterator():
698 books_by_parent.setdefault(book.parent_id, []).append(book)
701 books_by_author = OrderedDict()
702 for tag in Tag.objects.filter(category='author').iterator():
703 books_by_author[tag] = []
705 for book in books_by_parent.get(None, ()):
706 authors = list(book.authors().only('pk'))
708 for author in authors:
709 books_by_author[author].append(book)
713 return books_by_author, orphans, books_by_parent
716 "SP": (1, u"szkoła podstawowa"),
717 "SP1": (1, u"szkoła podstawowa"),
718 "SP2": (1, u"szkoła podstawowa"),
719 "SP3": (1, u"szkoła podstawowa"),
720 "P": (1, u"szkoła podstawowa"),
721 "G": (2, u"gimnazjum"),
723 "LP": (3, u"liceum"),
726 def audiences_pl(self):
727 audiences = self.extra_info.get('audiences', [])
728 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
729 return [a[1] for a in audiences]
731 def stage_note(self):
732 stage = self.extra_info.get('stage')
733 if stage and stage < '0.4':
734 return (_('This work needs modernisation'),
735 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
739 def choose_fragment(self):
740 fragments = self.fragments.order_by()
741 fragments_count = fragments.count()
742 if not fragments_count and self.children.exists():
743 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
744 fragments_count = fragments.count()
746 return fragments[randint(0, fragments_count - 1)]
748 return self.parent.choose_fragment()
752 def fragment_data(self):
753 fragment = self.choose_fragment()
756 'title': fragment.book.pretty_title(),
757 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
762 def update_popularity(self):
763 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
765 pop = self.popularity
768 except BookPopularity.DoesNotExist:
769 BookPopularity.objects.create(book=self, count=count)
771 def ridero_link(self):
772 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
774 def like(self, user):
775 from social.utils import likes, get_set, set_sets
776 if not likes(user, self):
777 tag = get_set(user, '')
778 set_sets(user, self, [tag])
780 def unlike(self, user):
781 from social.utils import likes, set_sets
782 if likes(user, self):
783 set_sets(user, self, [])
786 def add_file_fields():
787 for format_ in Book.formats:
788 field_name = "%s_file" % format_
789 # This weird globals() assignment makes Django migrations comfortable.
790 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
791 _upload_to.__name__ = '_%s_upload_to' % format_
792 globals()[_upload_to.__name__] = _upload_to
795 format_, _("%s file" % format_.upper()),
796 upload_to=_upload_to,
797 storage=bofh_storage,
801 ).contribute_to_class(Book, field_name)
807 class BookPopularity(models.Model):
808 book = models.OneToOneField(Book, related_name='popularity')
809 count = models.IntegerField(default=0, db_index=True)