1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.html import transform_abstrakt
24 from newtagging import managers
25 from catalogue import constants
26 from catalogue.fields import EbookField
27 from catalogue.models import Tag, Fragment, BookMedia
28 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
29 from catalogue.models.tag import prefetched_relations
30 from catalogue import app_settings
31 from catalogue import tasks
32 from wolnelektury.utils import makedirs
34 bofh_storage = BofhFileSystemStorage()
38 class UploadToPath(object):
39 def __init__(self, path):
42 def __call__(self, instance, filename):
43 return self.path % instance.slug
46 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
47 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
48 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
49 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
52 def _ebook_upload_to(upload_path):
53 return UploadToPath(upload_path)
56 class Book(models.Model):
57 """Represents a book imported from WL-XML."""
58 title = models.CharField(_('title'), max_length=32767)
59 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
60 sort_key_author = models.CharField(
61 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
62 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
63 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
64 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
65 description = models.TextField(_('description'), blank=True)
66 abstract = models.TextField(_('abstract'), blank=True)
67 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
68 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
69 parent_number = models.IntegerField(_('parent number'), default=0)
70 extra_info = jsonfield.JSONField(_('extra information'), default={})
71 gazeta_link = models.CharField(blank=True, max_length=240)
72 wiki_link = models.CharField(blank=True, max_length=240)
73 print_on_demand = models.BooleanField(_('print on demand'), default=False)
74 recommended = models.BooleanField(_('recommended'), default=False)
75 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
76 preview = models.BooleanField(_('preview'), default=False)
77 preview_until = models.DateField(_('preview until'), blank=True, null=True)
79 # files generated during publication
82 null=True, blank=True,
83 upload_to=_cover_upload_to,
84 storage=bofh_storage, max_length=255)
85 # Cleaner version of cover for thumbs
86 cover_thumb = EbookField(
87 'cover_thumb', _('cover thumbnail'),
88 null=True, blank=True,
89 upload_to=_cover_thumb_upload_to,
91 cover_api_thumb = EbookField(
92 'cover_api_thumb', _('cover thumbnail for mobile app'),
93 null=True, blank=True,
94 upload_to=_cover_api_thumb_upload_to,
96 simple_cover = EbookField(
97 'simple_cover', _('cover for mobile app'),
98 null=True, blank=True,
99 upload_to=_simple_cover_upload_to,
101 ebook_formats = constants.EBOOK_FORMATS
102 formats = ebook_formats + ['html', 'xml']
104 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
105 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
107 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
108 has_audience = models.BooleanField(default=False)
110 objects = models.Manager()
111 tagged = managers.ModelTaggedItemManager(Tag)
112 tags = managers.TagDescriptor(Tag)
113 tag_relations = GenericRelation(Tag.intermediary_table_model)
115 html_built = django.dispatch.Signal()
116 published = django.dispatch.Signal()
118 short_html_url_name = 'catalogue_book_short'
120 class AlreadyExists(Exception):
124 ordering = ('sort_key_author', 'sort_key')
125 verbose_name = _('book')
126 verbose_name_plural = _('books')
127 app_label = 'catalogue'
129 def __unicode__(self):
132 def get_initial(self):
134 return re.search(r'\w', self.title, re.U).group(0)
135 except AttributeError:
139 return self.tags.filter(category='author')
141 def tag_unicode(self, category):
142 relations = prefetched_relations(self, category)
144 return ', '.join(rel.tag.name for rel in relations)
146 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
148 def tags_by_category(self):
149 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
151 def author_unicode(self):
152 return self.cached_author
154 def translator(self):
155 translators = self.extra_info.get('translators')
158 if len(translators) > 3:
159 translators = translators[:2]
163 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
165 def cover_source(self):
166 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
168 def save(self, force_insert=False, force_update=False, **kwargs):
169 from sortify import sortify
171 self.sort_key = sortify(self.title)[:120]
172 self.title = unicode(self.title) # ???
175 author = self.authors().first().sort_key
176 except AttributeError:
178 self.sort_key_author = author
180 self.cached_author = self.tag_unicode('author')
181 self.has_audience = 'audience' in self.extra_info
183 ret = super(Book, self).save(force_insert, force_update, **kwargs)
188 def get_absolute_url(self):
189 return 'catalogue.views.book_detail', [self.slug]
193 def create_url(slug):
194 return 'catalogue.views.book_detail', [slug]
196 def gallery_path(self):
197 return gallery_path(self.slug)
199 def gallery_url(self):
200 return gallery_url(self.slug)
206 def language_code(self):
207 return constants.LANGUAGES_3TO2.get(self.language, self.language)
209 def language_name(self):
210 return dict(settings.LANGUAGES).get(self.language_code(), "")
212 def is_foreign(self):
213 return self.language_code() != settings.LANGUAGE_CODE
215 def set_audio_length(self):
216 length = self.get_audio_length()
218 self.audio_length = self.format_audio_length(length)
222 def format_audio_length(seconds):
224 minutes = seconds // 60
225 seconds = seconds % 60
226 return '%d:%02d' % (minutes, seconds)
228 hours = seconds // 3600
229 minutes = seconds % 3600 // 60
230 seconds = seconds % 60
231 return '%d:%02d:%02d' % (hours, minutes, seconds)
233 def get_audio_length(self):
234 from mutagen.mp3 import MP3
236 for media in self.get_mp3() or ():
237 audio = MP3(media.file.path)
238 total += audio.info.length
241 def has_media(self, type_):
242 if type_ in Book.formats:
243 return bool(getattr(self, "%s_file" % type_))
245 return self.media.filter(type=type_).exists()
248 return self.has_media('mp3')
250 def get_media(self, type_):
251 if self.has_media(type_):
252 if type_ in Book.formats:
253 return getattr(self, "%s_file" % type_)
255 return self.media.filter(type=type_)
260 return self.get_media("mp3")
263 return self.get_media("odt")
266 return self.get_media("ogg")
269 return self.get_media("daisy")
271 def media_url(self, format_):
272 media = self.get_media(format_)
275 return reverse('embargo_link', kwargs={'slug': self.slug, 'format_': format_})
282 return self.media_url('html')
285 return self.media_url('pdf')
288 return self.media_url('epub')
291 return self.media_url('mobi')
294 return self.media_url('txt')
297 return self.media_url('fb2')
300 return self.media_url('xml')
302 def has_description(self):
303 return len(self.description) > 0
304 has_description.short_description = _('description')
305 has_description.boolean = True
307 def has_mp3_file(self):
308 return self.has_media("mp3")
309 has_mp3_file.short_description = 'MP3'
310 has_mp3_file.boolean = True
312 def has_ogg_file(self):
313 return self.has_media("ogg")
314 has_ogg_file.short_description = 'OGG'
315 has_ogg_file.boolean = True
317 def has_daisy_file(self):
318 return self.has_media("daisy")
319 has_daisy_file.short_description = 'DAISY'
320 has_daisy_file.boolean = True
322 def get_audiobooks(self):
324 for m in self.media.filter(type='ogg').order_by().iterator():
325 ogg_files[m.name] = m
329 for mp3 in self.media.filter(type='mp3').iterator():
330 # ogg files are always from the same project
331 meta = mp3.extra_info
332 project = meta.get('project')
335 project = u'CzytamySłuchając'
337 projects.add((project, meta.get('funded_by', '')))
341 ogg = ogg_files.get(mp3.name)
344 audiobooks.append(media)
346 projects = sorted(projects)
347 return audiobooks, projects
349 def wldocument(self, parse_dublincore=True, inherit=True):
350 from catalogue.import_utils import ORMDocProvider
351 from librarian.parser import WLDocument
353 if inherit and self.parent:
354 meta_fallbacks = self.parent.cover_info()
356 meta_fallbacks = None
358 return WLDocument.from_file(
360 provider=ORMDocProvider(self),
361 parse_dublincore=parse_dublincore,
362 meta_fallbacks=meta_fallbacks)
365 def zip_format(format_):
366 def pretty_file_name(book):
367 return "%s/%s.%s" % (
368 book.extra_info['author'],
372 field_name = "%s_file" % format_
373 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
374 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
375 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
377 def zip_audiobooks(self, format_):
378 bm = BookMedia.objects.filter(book=self, type=format_)
379 paths = map(lambda bm: (None, bm.file.path), bm)
380 return create_zip(paths, "%s_%s" % (self.slug, format_))
382 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
384 from search.index import Index
387 index.index_book(self, book_info)
393 index.index.rollback()
396 # will make problems in conjunction with paid previews
397 def download_pictures(self, remote_gallery_url):
398 gallery_path = self.gallery_path()
399 # delete previous files, so we don't include old files in ebooks
400 if os.path.isdir(gallery_path):
401 for filename in os.listdir(gallery_path):
402 file_path = os.path.join(gallery_path, filename)
404 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
406 makedirs(gallery_path)
407 for ilustr in ilustr_elements:
408 ilustr_src = ilustr.get('src')
409 ilustr_path = os.path.join(gallery_path, ilustr_src)
410 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
412 def load_abstract(self):
413 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
414 if abstract is not None:
415 self.abstract = transform_abstrakt(abstract)
420 def from_xml_file(cls, xml_file, **kwargs):
421 from django.core.files import File
422 from librarian import dcparser
424 # use librarian to parse meta-data
425 book_info = dcparser.parse(xml_file)
427 if not isinstance(xml_file, File):
428 xml_file = File(open(xml_file))
431 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
436 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
437 search_index_tags=True, remote_gallery_url=None, days=0):
438 if dont_build is None:
440 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
442 # check for parts before we do anything
444 if hasattr(book_info, 'parts'):
445 for part_url in book_info.parts:
447 children.append(Book.objects.get(slug=part_url.slug))
448 except Book.DoesNotExist:
449 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
452 book_slug = book_info.url.slug
453 if re.search(r'[^a-z0-9-]', book_slug):
454 raise ValueError('Invalid characters in slug')
455 book, created = Book.objects.get_or_create(slug=book_slug)
460 book.preview = bool(days)
462 book.preview_until = date.today() + timedelta(days)
465 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
466 # Save shelves for this book
467 book_shelves = list(book.tags.filter(category='set'))
468 old_cover = book.cover_info()
471 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
473 book.xml_file.set_readable(False)
475 book.language = book_info.language
476 book.title = book_info.title
477 if book_info.variant_of:
478 book.common_slug = book_info.variant_of.slug
480 book.common_slug = book.slug
481 book.extra_info = book_info.to_dict()
485 meta_tags = Tag.tags_from_info(book_info)
487 for tag in meta_tags:
488 if not tag.for_books:
492 book.tags = set(meta_tags + book_shelves)
494 cover_changed = old_cover != book.cover_info()
495 obsolete_children = set(b for b in book.children.all()
496 if b not in children)
497 notify_cover_changed = []
498 for n, child_book in enumerate(children):
499 new_child = child_book.parent != book
500 child_book.parent = book
501 child_book.parent_number = n
503 if new_child or cover_changed:
504 notify_cover_changed.append(child_book)
505 # Disown unfaithful children and let them cope on their own.
506 for child in obsolete_children:
508 child.parent_number = 0
511 notify_cover_changed.append(child)
513 cls.repopulate_ancestors()
514 tasks.update_counters.delay()
516 if remote_gallery_url:
517 book.download_pictures(remote_gallery_url)
519 # No saves beyond this point.
522 if 'cover' not in dont_build:
523 book.cover.build_delay()
524 book.cover_thumb.build_delay()
525 book.cover_api_thumb.build_delay()
526 book.simple_cover.build_delay()
528 # Build HTML and ebooks.
529 book.html_file.build_delay()
531 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
532 if format_ not in dont_build:
533 getattr(book, '%s_file' % format_).build_delay()
534 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
535 if format_ not in dont_build:
536 getattr(book, '%s_file' % format_).build_delay()
538 if not settings.NO_SEARCH_INDEX and search_index:
539 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
541 for child in notify_cover_changed:
542 child.parent_cover_changed()
544 book.save() # update sort_key_author
545 book.update_popularity()
546 cls.published.send(sender=cls, instance=book)
551 def repopulate_ancestors(cls):
552 """Fixes the ancestry cache."""
554 cursor = connection.cursor()
555 if connection.vendor == 'postgres':
556 cursor.execute("TRUNCATE catalogue_book_ancestor")
558 WITH RECURSIVE ancestry AS (
559 SELECT book.id, book.parent_id
560 FROM catalogue_book AS book
561 WHERE book.parent_id IS NOT NULL
563 SELECT ancestor.id, book.parent_id
564 FROM ancestry AS ancestor, catalogue_book AS book
565 WHERE ancestor.parent_id = book.id
566 AND book.parent_id IS NOT NULL
568 INSERT INTO catalogue_book_ancestor
569 (from_book_id, to_book_id)
575 cursor.execute("DELETE FROM catalogue_book_ancestor")
576 for b in cls.objects.exclude(parent=None):
578 while parent is not None:
579 b.ancestor.add(parent)
580 parent = parent.parent
582 def flush_includes(self, languages=True):
585 if languages is True:
586 languages = [lc for (lc, _ln) in settings.LANGUAGES]
588 template % (self.pk, lang)
590 '/katalog/b/%d/mini.%s.html',
591 '/katalog/b/%d/mini_nolink.%s.html',
592 '/katalog/b/%d/short.%s.html',
593 '/katalog/b/%d/wide.%s.html',
594 '/api/include/book/%d.%s.json',
595 '/api/include/book/%d.%s.xml',
597 for lang in languages
600 def cover_info(self, inherit=True):
601 """Returns a dictionary to serve as fallback for BookInfo.
603 For now, the only thing inherited is the cover image.
607 for field in ('cover_url', 'cover_by', 'cover_source'):
608 val = self.extra_info.get(field)
613 if inherit and need and self.parent is not None:
614 parent_info = self.parent.cover_info()
615 parent_info.update(info)
619 def related_themes(self):
620 return Tag.objects.usage_for_queryset(
621 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
622 counts=True).filter(category='theme')
624 def parent_cover_changed(self):
625 """Called when parent book's cover image is changed."""
626 if not self.cover_info(inherit=False):
627 if 'cover' not in app_settings.DONT_BUILD:
628 self.cover.build_delay()
629 self.cover_thumb.build_delay()
630 self.cover_api_thumb.build_delay()
631 self.simple_cover.build_delay()
632 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
633 if format_ not in app_settings.DONT_BUILD:
634 getattr(self, '%s_file' % format_).build_delay()
635 for child in self.children.all():
636 child.parent_cover_changed()
638 def other_versions(self):
639 """Find other versions (i.e. in other languages) of the book."""
640 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
645 while parent is not None:
646 books.insert(0, parent)
647 parent = parent.parent
650 def pretty_title(self, html_links=False):
651 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
652 books = self.parents() + [self]
653 names.extend([(b.title, b.get_absolute_url()) for b in books])
656 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
658 names = [tag[0] for tag in names]
659 return ', '.join(names)
662 publisher = self.extra_info['publisher']
663 if isinstance(publisher, basestring):
665 elif isinstance(publisher, list):
666 return ', '.join(publisher)
669 def tagged_top_level(cls, tags):
670 """ Returns top-level books tagged with `tags`.
672 It only returns those books which don't have ancestors which are
673 also tagged with those tags.
676 objects = cls.tagged.with_all(tags)
677 return objects.exclude(ancestor__in=objects)
680 def book_list(cls, book_filter=None):
681 """Generates a hierarchical listing of all books.
683 Books are optionally filtered with a test function.
688 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
690 books = books.filter(book_filter).distinct()
692 book_ids = set(b['pk'] for b in books.values("pk").iterator())
693 for book in books.iterator():
694 parent = book.parent_id
695 if parent not in book_ids:
697 books_by_parent.setdefault(parent, []).append(book)
699 for book in books.iterator():
700 books_by_parent.setdefault(book.parent_id, []).append(book)
703 books_by_author = OrderedDict()
704 for tag in Tag.objects.filter(category='author').iterator():
705 books_by_author[tag] = []
707 for book in books_by_parent.get(None, ()):
708 authors = list(book.authors().only('pk'))
710 for author in authors:
711 books_by_author[author].append(book)
715 return books_by_author, orphans, books_by_parent
718 "SP": (1, u"szkoła podstawowa"),
719 "SP1": (1, u"szkoła podstawowa"),
720 "SP2": (1, u"szkoła podstawowa"),
721 "SP3": (1, u"szkoła podstawowa"),
722 "P": (1, u"szkoła podstawowa"),
723 "G": (2, u"gimnazjum"),
725 "LP": (3, u"liceum"),
728 def audiences_pl(self):
729 audiences = self.extra_info.get('audiences', [])
730 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
731 return [a[1] for a in audiences]
733 def stage_note(self):
734 stage = self.extra_info.get('stage')
735 if stage and stage < '0.4':
736 return (_('This work needs modernisation'),
737 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
741 def choose_fragment(self):
742 fragments = self.fragments.order_by()
743 fragments_count = fragments.count()
744 if not fragments_count and self.children.exists():
745 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
746 fragments_count = fragments.count()
748 return fragments[randint(0, fragments_count - 1)]
750 return self.parent.choose_fragment()
754 def fragment_data(self):
755 fragment = self.choose_fragment()
757 return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
761 def update_popularity(self):
762 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
764 pop = self.popularity
767 except BookPopularity.DoesNotExist:
768 BookPopularity.objects.create(book=self, count=count)
770 def ridero_link(self):
771 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
774 def add_file_fields():
775 for format_ in Book.formats:
776 field_name = "%s_file" % format_
777 # This weird globals() assignment makes Django migrations comfortable.
778 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
779 _upload_to.__name__ = '_%s_upload_to' % format_
780 globals()[_upload_to.__name__] = _upload_to
783 format_, _("%s file" % format_.upper()),
784 upload_to=_upload_to,
785 storage=bofh_storage,
789 ).contribute_to_class(Book, field_name)
795 class BookPopularity(models.Model):
796 book = models.OneToOneField(Book, related_name='popularity')
797 count = models.IntegerField(default=0, db_index=True)