1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from random import randint
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
19 from fnpdjango.storage import BofhFileSystemStorage
20 from ssify import flush_ssi_includes
22 from librarian.html import transform_abstrakt
23 from newtagging import managers
24 from catalogue import constants
25 from catalogue.fields import EbookField
26 from catalogue.models import Tag, Fragment, BookMedia
27 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
28 from catalogue.models.tag import prefetched_relations
29 from catalogue import app_settings
30 from catalogue import tasks
31 from wolnelektury.utils import makedirs
33 bofh_storage = BofhFileSystemStorage()
37 class UploadToPath(object):
38 def __init__(self, path):
41 def __call__(self, instance, filename):
42 return self.path % instance.slug
45 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
46 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
47 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
48 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
51 def _ebook_upload_to(upload_path):
52 return UploadToPath(upload_path)
55 class Book(models.Model):
56 """Represents a book imported from WL-XML."""
57 title = models.CharField(_('title'), max_length=32767)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
59 sort_key_author = models.CharField(
60 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
62 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
63 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
64 description = models.TextField(_('description'), blank=True)
65 abstract = models.TextField(_('abstract'), blank=True)
66 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
67 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
68 parent_number = models.IntegerField(_('parent number'), default=0)
69 extra_info = jsonfield.JSONField(_('extra information'), default={})
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
72 print_on_demand = models.BooleanField(_('print on demand'), default=False)
73 recommended = models.BooleanField(_('recommended'), default=False)
74 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
76 # files generated during publication
79 null=True, blank=True,
80 upload_to=_cover_upload_to,
81 storage=bofh_storage, max_length=255)
82 # Cleaner version of cover for thumbs
83 cover_thumb = EbookField(
84 'cover_thumb', _('cover thumbnail'),
85 null=True, blank=True,
86 upload_to=_cover_thumb_upload_to,
88 cover_api_thumb = EbookField(
89 'cover_api_thumb', _('cover thumbnail for mobile app'),
90 null=True, blank=True,
91 upload_to=_cover_api_thumb_upload_to,
93 simple_cover = EbookField(
94 'simple_cover', _('cover for mobile app'),
95 null=True, blank=True,
96 upload_to=_simple_cover_upload_to,
98 ebook_formats = constants.EBOOK_FORMATS
99 formats = ebook_formats + ['html', 'xml']
101 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
102 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
104 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
105 has_audience = models.BooleanField(default=False)
107 objects = models.Manager()
108 tagged = managers.ModelTaggedItemManager(Tag)
109 tags = managers.TagDescriptor(Tag)
110 tag_relations = GenericRelation(Tag.intermediary_table_model)
112 html_built = django.dispatch.Signal()
113 published = django.dispatch.Signal()
115 short_html_url_name = 'catalogue_book_short'
117 class AlreadyExists(Exception):
121 ordering = ('sort_key_author', 'sort_key')
122 verbose_name = _('book')
123 verbose_name_plural = _('books')
124 app_label = 'catalogue'
126 def __unicode__(self):
129 def get_initial(self):
131 return re.search(r'\w', self.title, re.U).group(0)
132 except AttributeError:
136 return self.tags.filter(category='author')
138 def tag_unicode(self, category):
139 relations = prefetched_relations(self, category)
141 return ', '.join(rel.tag.name for rel in relations)
143 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
145 def tags_by_category(self):
146 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
148 def author_unicode(self):
149 return self.cached_author
151 def translator(self):
152 translators = self.extra_info.get('translators')
155 if len(translators) > 3:
156 translators = translators[:2]
160 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
162 def cover_source(self):
163 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
165 def save(self, force_insert=False, force_update=False, **kwargs):
166 from sortify import sortify
168 self.sort_key = sortify(self.title)[:120]
169 self.title = unicode(self.title) # ???
172 author = self.authors().first().sort_key
173 except AttributeError:
175 self.sort_key_author = author
177 self.cached_author = self.tag_unicode('author')
178 self.has_audience = 'audience' in self.extra_info
180 ret = super(Book, self).save(force_insert, force_update, **kwargs)
185 def get_absolute_url(self):
186 return 'catalogue.views.book_detail', [self.slug]
190 def create_url(slug):
191 return 'catalogue.views.book_detail', [slug]
193 def gallery_path(self):
194 return gallery_path(self.slug)
196 def gallery_url(self):
197 return gallery_url(self.slug)
203 def language_code(self):
204 return constants.LANGUAGES_3TO2.get(self.language, self.language)
206 def language_name(self):
207 return dict(settings.LANGUAGES).get(self.language_code(), "")
209 def is_foreign(self):
210 return self.language_code() != settings.LANGUAGE_CODE
212 def set_audio_length(self):
213 length = self.get_audio_length()
215 self.audio_length = self.format_audio_length(length)
219 def format_audio_length(seconds):
221 minutes = seconds // 60
222 seconds = seconds % 60
223 return '%d:%02d' % (minutes, seconds)
225 hours = seconds // 3600
226 minutes = seconds % 3600 // 60
227 seconds = seconds % 60
228 return '%d:%02d:%02d' % (hours, minutes, seconds)
230 def get_audio_length(self):
231 from mutagen.mp3 import MP3
233 for media in self.get_mp3():
234 audio = MP3(media.file.path)
235 total += audio.info.length
238 def has_media(self, type_):
239 if type_ in Book.formats:
240 return bool(getattr(self, "%s_file" % type_))
242 return self.media.filter(type=type_).exists()
245 return self.has_media('mp3')
247 def get_media(self, type_):
248 if self.has_media(type_):
249 if type_ in Book.formats:
250 return getattr(self, "%s_file" % type_)
252 return self.media.filter(type=type_)
257 return self.get_media("mp3")
260 return self.get_media("odt")
263 return self.get_media("ogg")
266 return self.get_media("daisy")
268 def has_description(self):
269 return len(self.description) > 0
270 has_description.short_description = _('description')
271 has_description.boolean = True
273 def has_mp3_file(self):
274 return self.has_media("mp3")
275 has_mp3_file.short_description = 'MP3'
276 has_mp3_file.boolean = True
278 def has_ogg_file(self):
279 return self.has_media("ogg")
280 has_ogg_file.short_description = 'OGG'
281 has_ogg_file.boolean = True
283 def has_daisy_file(self):
284 return self.has_media("daisy")
285 has_daisy_file.short_description = 'DAISY'
286 has_daisy_file.boolean = True
288 def get_audiobooks(self):
290 for m in self.media.filter(type='ogg').order_by().iterator():
291 ogg_files[m.name] = m
295 for mp3 in self.media.filter(type='mp3').iterator():
296 # ogg files are always from the same project
297 meta = mp3.extra_info
298 project = meta.get('project')
301 project = u'CzytamySłuchając'
303 projects.add((project, meta.get('funded_by', '')))
307 ogg = ogg_files.get(mp3.name)
310 audiobooks.append(media)
312 projects = sorted(projects)
313 return audiobooks, projects
315 def wldocument(self, parse_dublincore=True, inherit=True):
316 from catalogue.import_utils import ORMDocProvider
317 from librarian.parser import WLDocument
319 if inherit and self.parent:
320 meta_fallbacks = self.parent.cover_info()
322 meta_fallbacks = None
324 return WLDocument.from_file(
326 provider=ORMDocProvider(self),
327 parse_dublincore=parse_dublincore,
328 meta_fallbacks=meta_fallbacks)
331 def zip_format(format_):
332 def pretty_file_name(book):
333 return "%s/%s.%s" % (
334 book.extra_info['author'],
338 field_name = "%s_file" % format_
339 books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
340 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
341 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
343 def zip_audiobooks(self, format_):
344 bm = BookMedia.objects.filter(book=self, type=format_)
345 paths = map(lambda bm: (None, bm.file.path), bm)
346 return create_zip(paths, "%s_%s" % (self.slug, format_))
348 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
350 from search.index import Index
353 index.index_book(self, book_info)
359 index.index.rollback()
362 def download_pictures(self, remote_gallery_url):
363 gallery_path = self.gallery_path()
364 # delete previous files, so we don't include old files in ebooks
365 if os.path.isdir(gallery_path):
366 for filename in os.listdir(gallery_path):
367 file_path = os.path.join(gallery_path, filename)
369 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
371 makedirs(gallery_path)
372 for ilustr in ilustr_elements:
373 ilustr_src = ilustr.get('src')
374 ilustr_path = os.path.join(gallery_path, ilustr_src)
375 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
377 def load_abstract(self):
378 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
379 if abstract is not None:
380 self.abstract = transform_abstrakt(abstract)
385 def from_xml_file(cls, xml_file, **kwargs):
386 from django.core.files import File
387 from librarian import dcparser
389 # use librarian to parse meta-data
390 book_info = dcparser.parse(xml_file)
392 if not isinstance(xml_file, File):
393 xml_file = File(open(xml_file))
396 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
401 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
402 search_index_tags=True, remote_gallery_url=None):
403 if dont_build is None:
405 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
407 # check for parts before we do anything
409 if hasattr(book_info, 'parts'):
410 for part_url in book_info.parts:
412 children.append(Book.objects.get(slug=part_url.slug))
413 except Book.DoesNotExist:
414 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
417 book_slug = book_info.url.slug
418 if re.search(r'[^a-z0-9-]', book_slug):
419 raise ValueError('Invalid characters in slug')
420 book, created = Book.objects.get_or_create(slug=book_slug)
427 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
428 # Save shelves for this book
429 book_shelves = list(book.tags.filter(category='set'))
430 old_cover = book.cover_info()
433 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
435 book.language = book_info.language
436 book.title = book_info.title
437 if book_info.variant_of:
438 book.common_slug = book_info.variant_of.slug
440 book.common_slug = book.slug
441 book.extra_info = book_info.to_dict()
445 meta_tags = Tag.tags_from_info(book_info)
447 for tag in meta_tags:
448 if not tag.for_books:
452 book.tags = set(meta_tags + book_shelves)
454 cover_changed = old_cover != book.cover_info()
455 obsolete_children = set(b for b in book.children.all()
456 if b not in children)
457 notify_cover_changed = []
458 for n, child_book in enumerate(children):
459 new_child = child_book.parent != book
460 child_book.parent = book
461 child_book.parent_number = n
463 if new_child or cover_changed:
464 notify_cover_changed.append(child_book)
465 # Disown unfaithful children and let them cope on their own.
466 for child in obsolete_children:
468 child.parent_number = 0
471 notify_cover_changed.append(child)
473 cls.repopulate_ancestors()
474 tasks.update_counters.delay()
476 if remote_gallery_url:
477 book.download_pictures(remote_gallery_url)
479 # No saves beyond this point.
482 if 'cover' not in dont_build:
483 book.cover.build_delay()
484 book.cover_thumb.build_delay()
485 book.cover_api_thumb.build_delay()
486 book.simple_cover.build_delay()
488 # Build HTML and ebooks.
489 book.html_file.build_delay()
491 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
492 if format_ not in dont_build:
493 getattr(book, '%s_file' % format_).build_delay()
494 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
495 if format_ not in dont_build:
496 getattr(book, '%s_file' % format_).build_delay()
498 if not settings.NO_SEARCH_INDEX and search_index:
499 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
501 for child in notify_cover_changed:
502 child.parent_cover_changed()
504 book.save() # update sort_key_author
505 book.update_popularity()
506 cls.published.send(sender=cls, instance=book)
511 def repopulate_ancestors(cls):
512 """Fixes the ancestry cache."""
514 cursor = connection.cursor()
515 if connection.vendor == 'postgres':
516 cursor.execute("TRUNCATE catalogue_book_ancestor")
518 WITH RECURSIVE ancestry AS (
519 SELECT book.id, book.parent_id
520 FROM catalogue_book AS book
521 WHERE book.parent_id IS NOT NULL
523 SELECT ancestor.id, book.parent_id
524 FROM ancestry AS ancestor, catalogue_book AS book
525 WHERE ancestor.parent_id = book.id
526 AND book.parent_id IS NOT NULL
528 INSERT INTO catalogue_book_ancestor
529 (from_book_id, to_book_id)
535 cursor.execute("DELETE FROM catalogue_book_ancestor")
536 for b in cls.objects.exclude(parent=None):
538 while parent is not None:
539 b.ancestor.add(parent)
540 parent = parent.parent
542 def flush_includes(self, languages=True):
545 if languages is True:
546 languages = [lc for (lc, _ln) in settings.LANGUAGES]
548 template % (self.pk, lang)
550 '/katalog/b/%d/mini.%s.html',
551 '/katalog/b/%d/mini_nolink.%s.html',
552 '/katalog/b/%d/short.%s.html',
553 '/katalog/b/%d/wide.%s.html',
554 '/api/include/book/%d.%s.json',
555 '/api/include/book/%d.%s.xml',
557 for lang in languages
560 def cover_info(self, inherit=True):
561 """Returns a dictionary to serve as fallback for BookInfo.
563 For now, the only thing inherited is the cover image.
567 for field in ('cover_url', 'cover_by', 'cover_source'):
568 val = self.extra_info.get(field)
573 if inherit and need and self.parent is not None:
574 parent_info = self.parent.cover_info()
575 parent_info.update(info)
579 def related_themes(self):
580 return Tag.objects.usage_for_queryset(
581 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
582 counts=True).filter(category='theme')
584 def parent_cover_changed(self):
585 """Called when parent book's cover image is changed."""
586 if not self.cover_info(inherit=False):
587 if 'cover' not in app_settings.DONT_BUILD:
588 self.cover.build_delay()
589 self.cover_thumb.build_delay()
590 self.cover_api_thumb.build_delay()
591 self.simple_cover.build_delay()
592 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
593 if format_ not in app_settings.DONT_BUILD:
594 getattr(self, '%s_file' % format_).build_delay()
595 for child in self.children.all():
596 child.parent_cover_changed()
598 def other_versions(self):
599 """Find other versions (i.e. in other languages) of the book."""
600 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
605 while parent is not None:
606 books.insert(0, parent)
607 parent = parent.parent
610 def pretty_title(self, html_links=False):
611 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
612 books = self.parents() + [self]
613 names.extend([(b.title, b.get_absolute_url()) for b in books])
616 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
618 names = [tag[0] for tag in names]
619 return ', '.join(names)
622 publisher = self.extra_info['publisher']
623 if isinstance(publisher, basestring):
625 elif isinstance(publisher, list):
626 return ', '.join(publisher)
629 def tagged_top_level(cls, tags):
630 """ Returns top-level books tagged with `tags`.
632 It only returns those books which don't have ancestors which are
633 also tagged with those tags.
636 objects = cls.tagged.with_all(tags)
637 return objects.exclude(ancestor__in=objects)
640 def book_list(cls, book_filter=None):
641 """Generates a hierarchical listing of all books.
643 Books are optionally filtered with a test function.
648 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
650 books = books.filter(book_filter).distinct()
652 book_ids = set(b['pk'] for b in books.values("pk").iterator())
653 for book in books.iterator():
654 parent = book.parent_id
655 if parent not in book_ids:
657 books_by_parent.setdefault(parent, []).append(book)
659 for book in books.iterator():
660 books_by_parent.setdefault(book.parent_id, []).append(book)
663 books_by_author = OrderedDict()
664 for tag in Tag.objects.filter(category='author').iterator():
665 books_by_author[tag] = []
667 for book in books_by_parent.get(None, ()):
668 authors = list(book.authors().only('pk'))
670 for author in authors:
671 books_by_author[author].append(book)
675 return books_by_author, orphans, books_by_parent
678 "SP": (1, u"szkoła podstawowa"),
679 "SP1": (1, u"szkoła podstawowa"),
680 "SP2": (1, u"szkoła podstawowa"),
681 "SP3": (1, u"szkoła podstawowa"),
682 "P": (1, u"szkoła podstawowa"),
683 "G": (2, u"gimnazjum"),
685 "LP": (3, u"liceum"),
688 def audiences_pl(self):
689 audiences = self.extra_info.get('audiences', [])
690 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
691 return [a[1] for a in audiences]
693 def stage_note(self):
694 stage = self.extra_info.get('stage')
695 if stage and stage < '0.4':
696 return (_('This work needs modernisation'),
697 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
701 def choose_fragment(self):
702 fragments = self.fragments.order_by()
703 fragments_count = fragments.count()
704 if not fragments_count and self.children.exists():
705 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
706 fragments_count = fragments.count()
708 return fragments[randint(0, fragments_count - 1)]
710 return self.parent.choose_fragment()
714 def fragment_data(self):
715 fragment = self.choose_fragment()
717 return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
721 def update_popularity(self):
722 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
724 pop = self.popularity
727 except BookPopularity.DoesNotExist:
728 BookPopularity.objects.create(book=self, count=count)
730 def ridero_link(self):
731 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
734 def add_file_fields():
735 for format_ in Book.formats:
736 field_name = "%s_file" % format_
737 # This weird globals() assignment makes Django migrations comfortable.
738 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
739 _upload_to.__name__ = '_%s_upload_to' % format_
740 globals()[_upload_to.__name__] = _upload_to
743 format_, _("%s file" % format_.upper()),
744 upload_to=_upload_to,
745 storage=bofh_storage,
749 ).contribute_to_class(Book, field_name)
754 class BookPopularity(models.Model):
755 book = models.OneToOneField(Book, related_name='popularity')
756 count = models.IntegerField(default=0, db_index=True)