1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs
34 bofh_storage = BofhFileSystemStorage()
38 class UploadToPath(object):
39 def __init__(self, path):
42 def __call__(self, instance, filename):
43 return self.path % instance.slug
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
48 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
49 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
52 def _ebook_upload_to(upload_path):
53 return UploadToPath(upload_path)
56 class Book(models.Model):
57 """Represents a book imported from WL-XML."""
58 title = models.CharField(_('title'), max_length=32767)
59 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
60 sort_key_author = models.CharField(
61 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
62 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
63 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
64 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
65 description = models.TextField(_('description'), blank=True)
66 abstract = models.TextField(_('abstract'), blank=True)
67 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
68 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
69 parent_number = models.IntegerField(_('parent number'), default=0)
70 extra_info = jsonfield.JSONField(_('extra information'), default={})
71 gazeta_link = models.CharField(blank=True, max_length=240)
72 wiki_link = models.CharField(blank=True, max_length=240)
73 print_on_demand = models.BooleanField(_('print on demand'), default=False)
74 recommended = models.BooleanField(_('recommended'), default=False)
75 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
76 preview = models.BooleanField(_('preview'), default=False)
77 preview_until = models.DateField(_('preview until'), blank=True, null=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
118 class AlreadyExists(Exception):
122 ordering = ('sort_key_author', 'sort_key')
123 verbose_name = _('book')
124 verbose_name_plural = _('books')
125 app_label = 'catalogue'
127 def __unicode__(self):
130 def get_initial(self):
132 return re.search(r'\w', self.title, re.U).group(0)
133 except AttributeError:
137 return self.tags.filter(category='author')
139 def tag_unicode(self, category):
140 relations = prefetched_relations(self, category)
142 return ', '.join(rel.tag.name for rel in relations)
144 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
146 def tags_by_category(self):
147 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
149 def author_unicode(self):
150 return self.cached_author
152 def translator(self):
153 translators = self.extra_info.get('translators')
156 if len(translators) > 3:
157 translators = translators[:2]
161 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
163 def cover_source(self):
164 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
166 def save(self, force_insert=False, force_update=False, **kwargs):
167 from sortify import sortify
169 self.sort_key = sortify(self.title)[:120]
170 self.title = unicode(self.title) # ???
173 author = self.authors().first().sort_key
174 except AttributeError:
176 self.sort_key_author = author
178 self.cached_author = self.tag_unicode('author')
179 self.has_audience = 'audience' in self.extra_info
181 ret = super(Book, self).save(force_insert, force_update, **kwargs)
186 def get_absolute_url(self):
187 return 'catalogue.views.book_detail', [self.slug]
191 def create_url(slug):
192 return 'catalogue.views.book_detail', [slug]
194 def gallery_path(self):
195 return gallery_path(self.slug)
197 def gallery_url(self):
198 return gallery_url(self.slug)
204 def language_code(self):
205 return constants.LANGUAGES_3TO2.get(self.language, self.language)
207 def language_name(self):
208 return dict(settings.LANGUAGES).get(self.language_code(), "")
210 def is_foreign(self):
211 return self.language_code() != settings.LANGUAGE_CODE
213 def set_audio_length(self):
214 length = self.get_audio_length()
216 self.audio_length = self.format_audio_length(length)
220 def format_audio_length(seconds):
222 minutes = seconds // 60
223 seconds = seconds % 60
224 return '%d:%02d' % (minutes, seconds)
226 hours = seconds // 3600
227 minutes = seconds % 3600 // 60
228 seconds = seconds % 60
229 return '%d:%02d:%02d' % (hours, minutes, seconds)
231 def get_audio_length(self):
233 for media in self.get_mp3() or ():
234 total += app_settings.GET_MP3_LENGTH(media.file.path)
237 def has_media(self, type_):
238 if type_ in Book.formats:
239 return bool(getattr(self, "%s_file" % type_))
241 return self.media.filter(type=type_).exists()
244 return self.has_media('mp3')
246 def get_media(self, type_):
247 if self.has_media(type_):
248 if type_ in Book.formats:
249 return getattr(self, "%s_file" % type_)
251 return self.media.filter(type=type_)
256 return self.get_media("mp3")
259 return self.get_media("odt")
262 return self.get_media("ogg")
265 return self.get_media("daisy")
267 def media_url(self, format_):
268 media = self.get_media(format_)
271 return reverse('embargo_link', kwargs={'slug': self.slug, 'format_': format_})
278 return self.media_url('html')
281 return self.media_url('pdf')
284 return self.media_url('epub')
287 return self.media_url('mobi')
290 return self.media_url('txt')
293 return self.media_url('fb2')
296 return self.media_url('xml')
298 def has_description(self):
299 return len(self.description) > 0
300 has_description.short_description = _('description')
301 has_description.boolean = True
303 def has_mp3_file(self):
304 return self.has_media("mp3")
305 has_mp3_file.short_description = 'MP3'
306 has_mp3_file.boolean = True
308 def has_ogg_file(self):
309 return self.has_media("ogg")
310 has_ogg_file.short_description = 'OGG'
311 has_ogg_file.boolean = True
313 def has_daisy_file(self):
314 return self.has_media("daisy")
315 has_daisy_file.short_description = 'DAISY'
316 has_daisy_file.boolean = True
318 def get_audiobooks(self):
320 for m in self.media.filter(type='ogg').order_by().iterator():
321 ogg_files[m.name] = m
325 for mp3 in self.media.filter(type='mp3').iterator():
326 # ogg files are always from the same project
327 meta = mp3.extra_info
328 project = meta.get('project')
331 project = u'CzytamySłuchając'
333 projects.add((project, meta.get('funded_by', '')))
337 ogg = ogg_files.get(mp3.name)
340 audiobooks.append(media)
342 projects = sorted(projects)
343 return audiobooks, projects
345 def wldocument(self, parse_dublincore=True, inherit=True):
346 from catalogue.import_utils import ORMDocProvider
347 from librarian.parser import WLDocument
349 if inherit and self.parent:
350 meta_fallbacks = self.parent.cover_info()
352 meta_fallbacks = None
354 return WLDocument.from_file(
356 provider=ORMDocProvider(self),
357 parse_dublincore=parse_dublincore,
358 meta_fallbacks=meta_fallbacks)
361 def zip_format(format_):
362 def pretty_file_name(book):
363 return "%s/%s.%s" % (
364 book.extra_info['author'],
368 field_name = "%s_file" % format_
369 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
370 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
371 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
373 def zip_audiobooks(self, format_):
374 bm = BookMedia.objects.filter(book=self, type=format_)
375 paths = map(lambda bm: (None, bm.file.path), bm)
376 return create_zip(paths, "%s_%s" % (self.slug, format_))
378 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
380 from search.index import Index
383 index.index_book(self, book_info)
389 index.index.rollback()
392 # will make problems in conjunction with paid previews
393 def download_pictures(self, remote_gallery_url):
394 gallery_path = self.gallery_path()
395 # delete previous files, so we don't include old files in ebooks
396 if os.path.isdir(gallery_path):
397 for filename in os.listdir(gallery_path):
398 file_path = os.path.join(gallery_path, filename)
400 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
402 makedirs(gallery_path)
403 for ilustr in ilustr_elements:
404 ilustr_src = ilustr.get('src')
405 ilustr_path = os.path.join(gallery_path, ilustr_src)
406 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
408 def load_abstract(self):
409 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
410 if abstract is not None:
411 self.abstract = transform_abstrakt(abstract)
416 def from_xml_file(cls, xml_file, **kwargs):
417 from django.core.files import File
418 from librarian import dcparser
420 # use librarian to parse meta-data
421 book_info = dcparser.parse(xml_file)
423 if not isinstance(xml_file, File):
424 xml_file = File(open(xml_file))
427 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
432 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
433 search_index_tags=True, remote_gallery_url=None, days=0):
434 if dont_build is None:
436 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
438 # check for parts before we do anything
440 if hasattr(book_info, 'parts'):
441 for part_url in book_info.parts:
443 children.append(Book.objects.get(slug=part_url.slug))
444 except Book.DoesNotExist:
445 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
448 book_slug = book_info.url.slug
449 if re.search(r'[^a-z0-9-]', book_slug):
450 raise ValueError('Invalid characters in slug')
451 book, created = Book.objects.get_or_create(slug=book_slug)
456 book.preview = bool(days)
458 book.preview_until = date.today() + timedelta(days)
461 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
462 # Save shelves for this book
463 book_shelves = list(book.tags.filter(category='set'))
464 old_cover = book.cover_info()
467 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
469 book.xml_file.set_readable(False)
471 book.language = book_info.language
472 book.title = book_info.title
473 if book_info.variant_of:
474 book.common_slug = book_info.variant_of.slug
476 book.common_slug = book.slug
477 book.extra_info = book_info.to_dict()
481 meta_tags = Tag.tags_from_info(book_info)
483 for tag in meta_tags:
484 if not tag.for_books:
488 book.tags = set(meta_tags + book_shelves)
490 cover_changed = old_cover != book.cover_info()
491 obsolete_children = set(b for b in book.children.all()
492 if b not in children)
493 notify_cover_changed = []
494 for n, child_book in enumerate(children):
495 new_child = child_book.parent != book
496 child_book.parent = book
497 child_book.parent_number = n
499 if new_child or cover_changed:
500 notify_cover_changed.append(child_book)
501 # Disown unfaithful children and let them cope on their own.
502 for child in obsolete_children:
504 child.parent_number = 0
507 notify_cover_changed.append(child)
509 cls.repopulate_ancestors()
510 tasks.update_counters.delay()
512 if remote_gallery_url:
513 book.download_pictures(remote_gallery_url)
515 # No saves beyond this point.
518 if 'cover' not in dont_build:
519 book.cover.build_delay()
520 book.cover_thumb.build_delay()
521 book.cover_api_thumb.build_delay()
522 book.simple_cover.build_delay()
524 # Build HTML and ebooks.
525 book.html_file.build_delay()
527 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
528 if format_ not in dont_build:
529 getattr(book, '%s_file' % format_).build_delay()
530 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
531 if format_ not in dont_build:
532 getattr(book, '%s_file' % format_).build_delay()
534 if not settings.NO_SEARCH_INDEX and search_index:
535 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
537 for child in notify_cover_changed:
538 child.parent_cover_changed()
540 book.save() # update sort_key_author
541 book.update_popularity()
542 cls.published.send(sender=cls, instance=book)
547 def repopulate_ancestors(cls):
548 """Fixes the ancestry cache."""
550 cursor = connection.cursor()
551 if connection.vendor == 'postgres':
552 cursor.execute("TRUNCATE catalogue_book_ancestor")
554 WITH RECURSIVE ancestry AS (
555 SELECT book.id, book.parent_id
556 FROM catalogue_book AS book
557 WHERE book.parent_id IS NOT NULL
559 SELECT ancestor.id, book.parent_id
560 FROM ancestry AS ancestor, catalogue_book AS book
561 WHERE ancestor.parent_id = book.id
562 AND book.parent_id IS NOT NULL
564 INSERT INTO catalogue_book_ancestor
565 (from_book_id, to_book_id)
571 cursor.execute("DELETE FROM catalogue_book_ancestor")
572 for b in cls.objects.exclude(parent=None):
574 while parent is not None:
575 b.ancestor.add(parent)
576 parent = parent.parent
578 def flush_includes(self, languages=True):
581 if languages is True:
582 languages = [lc for (lc, _ln) in settings.LANGUAGES]
584 template % (self.pk, lang)
586 '/katalog/b/%d/mini.%s.html',
587 '/katalog/b/%d/mini_nolink.%s.html',
588 '/katalog/b/%d/short.%s.html',
589 '/katalog/b/%d/wide.%s.html',
590 '/api/include/book/%d.%s.json',
591 '/api/include/book/%d.%s.xml',
593 for lang in languages
596 def cover_info(self, inherit=True):
597 """Returns a dictionary to serve as fallback for BookInfo.
599 For now, the only thing inherited is the cover image.
603 for field in ('cover_url', 'cover_by', 'cover_source'):
604 val = self.extra_info.get(field)
609 if inherit and need and self.parent is not None:
610 parent_info = self.parent.cover_info()
611 parent_info.update(info)
615 def related_themes(self):
616 return Tag.objects.usage_for_queryset(
617 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
618 counts=True).filter(category='theme')
620 def parent_cover_changed(self):
621 """Called when parent book's cover image is changed."""
622 if not self.cover_info(inherit=False):
623 if 'cover' not in app_settings.DONT_BUILD:
624 self.cover.build_delay()
625 self.cover_thumb.build_delay()
626 self.cover_api_thumb.build_delay()
627 self.simple_cover.build_delay()
628 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
629 if format_ not in app_settings.DONT_BUILD:
630 getattr(self, '%s_file' % format_).build_delay()
631 for child in self.children.all():
632 child.parent_cover_changed()
634 def other_versions(self):
635 """Find other versions (i.e. in other languages) of the book."""
636 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
641 while parent is not None:
642 books.insert(0, parent)
643 parent = parent.parent
646 def pretty_title(self, html_links=False):
647 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
648 books = self.parents() + [self]
649 names.extend([(b.title, b.get_absolute_url()) for b in books])
652 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
654 names = [tag[0] for tag in names]
655 return ', '.join(names)
658 publisher = self.extra_info['publisher']
659 if isinstance(publisher, basestring):
661 elif isinstance(publisher, list):
662 return ', '.join(publisher)
665 def tagged_top_level(cls, tags):
666 """ Returns top-level books tagged with `tags`.
668 It only returns those books which don't have ancestors which are
669 also tagged with those tags.
672 objects = cls.tagged.with_all(tags)
673 return objects.exclude(ancestor__in=objects)
676 def book_list(cls, book_filter=None):
677 """Generates a hierarchical listing of all books.
679 Books are optionally filtered with a test function.
684 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
686 books = books.filter(book_filter).distinct()
688 book_ids = set(b['pk'] for b in books.values("pk").iterator())
689 for book in books.iterator():
690 parent = book.parent_id
691 if parent not in book_ids:
693 books_by_parent.setdefault(parent, []).append(book)
695 for book in books.iterator():
696 books_by_parent.setdefault(book.parent_id, []).append(book)
699 books_by_author = OrderedDict()
700 for tag in Tag.objects.filter(category='author').iterator():
701 books_by_author[tag] = []
703 for book in books_by_parent.get(None, ()):
704 authors = list(book.authors().only('pk'))
706 for author in authors:
707 books_by_author[author].append(book)
711 return books_by_author, orphans, books_by_parent
714 "SP": (1, u"szkoła podstawowa"),
715 "SP1": (1, u"szkoła podstawowa"),
716 "SP2": (1, u"szkoła podstawowa"),
717 "SP3": (1, u"szkoła podstawowa"),
718 "P": (1, u"szkoła podstawowa"),
719 "G": (2, u"gimnazjum"),
721 "LP": (3, u"liceum"),
724 def audiences_pl(self):
725 audiences = self.extra_info.get('audiences', [])
726 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
727 return [a[1] for a in audiences]
729 def stage_note(self):
730 stage = self.extra_info.get('stage')
731 if stage and stage < '0.4':
732 return (_('This work needs modernisation'),
733 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
737 def choose_fragment(self):
738 fragments = self.fragments.order_by()
739 fragments_count = fragments.count()
740 if not fragments_count and self.children.exists():
741 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
742 fragments_count = fragments.count()
744 return fragments[randint(0, fragments_count - 1)]
746 return self.parent.choose_fragment()
750 def fragment_data(self):
751 fragment = self.choose_fragment()
754 'title': fragment.book.pretty_title(),
755 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
760 def update_popularity(self):
761 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
763 pop = self.popularity
766 except BookPopularity.DoesNotExist:
767 BookPopularity.objects.create(book=self, count=count)
769 def ridero_link(self):
770 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
772 def like(self, user):
773 from social.utils import likes, get_set, set_sets
774 if not likes(user, self):
775 tag = get_set(user, '')
776 set_sets(user, self, [tag])
778 def unlike(self, user):
779 from social.utils import likes, set_sets
780 if likes(user, self):
781 set_sets(user, self, [])
784 def add_file_fields():
785 for format_ in Book.formats:
786 field_name = "%s_file" % format_
787 # This weird globals() assignment makes Django migrations comfortable.
788 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
789 _upload_to.__name__ = '_%s_upload_to' % format_
790 globals()[_upload_to.__name__] = _upload_to
793 format_, _("%s file" % format_.upper()),
794 upload_to=_upload_to,
795 storage=bofh_storage,
799 ).contribute_to_class(Book, field_name)
805 class BookPopularity(models.Model):
806 book = models.OneToOneField(Book, related_name='popularity')
807 count = models.IntegerField(default=0, db_index=True)