1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from collections import OrderedDict
6 from datetime import date, timedelta
7 from random import randint
11 from django.conf import settings
12 from django.db import connection, models, transaction
13 from django.db.models import permalink
14 import django.dispatch
15 from django.contrib.contenttypes.fields import GenericRelation
16 from django.core.urlresolvers import reverse
17 from django.utils.translation import ugettext_lazy as _, get_language
18 from django.utils.deconstruct import deconstructible
20 from fnpdjango.storage import BofhFileSystemStorage
21 from ssify import flush_ssi_includes
23 from librarian.cover import WLCover
24 from librarian.html import transform_abstrakt
25 from newtagging import managers
26 from catalogue import constants
27 from catalogue.fields import EbookField
28 from catalogue.models import Tag, Fragment, BookMedia
29 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags, get_random_hash
30 from catalogue.models.tag import prefetched_relations
31 from catalogue import app_settings
32 from catalogue import tasks
33 from wolnelektury.utils import makedirs
35 bofh_storage = BofhFileSystemStorage()
39 class UploadToPath(object):
40 def __init__(self, path):
43 def __call__(self, instance, filename):
44 return self.path % instance.slug
47 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
48 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
49 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
50 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
53 def _ebook_upload_to(upload_path):
54 return UploadToPath(upload_path)
57 class Book(models.Model):
58 """Represents a book imported from WL-XML."""
59 title = models.CharField(_('title'), max_length=32767)
60 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
61 sort_key_author = models.CharField(
62 _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
63 slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
64 common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
65 language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
66 description = models.TextField(_('description'), blank=True)
67 abstract = models.TextField(_('abstract'), blank=True)
68 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
69 changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
70 parent_number = models.IntegerField(_('parent number'), default=0)
71 extra_info = jsonfield.JSONField(_('extra information'), default={})
72 gazeta_link = models.CharField(blank=True, max_length=240)
73 wiki_link = models.CharField(blank=True, max_length=240)
74 print_on_demand = models.BooleanField(_('print on demand'), default=False)
75 recommended = models.BooleanField(_('recommended'), default=False)
76 audio_length = models.CharField(_('audio length'), blank=True, max_length=8)
77 preview = models.BooleanField(_('preview'), default=False)
78 preview_until = models.DateField(_('preview until'), blank=True, null=True)
79 preview_key = models.CharField(max_length=32, blank=True, null=True)
81 # files generated during publication
84 null=True, blank=True,
85 upload_to=_cover_upload_to,
86 storage=bofh_storage, max_length=255)
87 # Cleaner version of cover for thumbs
88 cover_thumb = EbookField(
89 'cover_thumb', _('cover thumbnail'),
90 null=True, blank=True,
91 upload_to=_cover_thumb_upload_to,
93 cover_api_thumb = EbookField(
94 'cover_api_thumb', _('cover thumbnail for mobile app'),
95 null=True, blank=True,
96 upload_to=_cover_api_thumb_upload_to,
98 simple_cover = EbookField(
99 'simple_cover', _('cover for mobile app'),
100 null=True, blank=True,
101 upload_to=_simple_cover_upload_to,
103 ebook_formats = constants.EBOOK_FORMATS
104 formats = ebook_formats + ['html', 'xml']
106 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
107 ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
109 cached_author = models.CharField(blank=True, max_length=240, db_index=True)
110 has_audience = models.BooleanField(default=False)
112 objects = models.Manager()
113 tagged = managers.ModelTaggedItemManager(Tag)
114 tags = managers.TagDescriptor(Tag)
115 tag_relations = GenericRelation(Tag.intermediary_table_model)
117 html_built = django.dispatch.Signal()
118 published = django.dispatch.Signal()
122 class AlreadyExists(Exception):
126 ordering = ('sort_key_author', 'sort_key')
127 verbose_name = _('book')
128 verbose_name_plural = _('books')
129 app_label = 'catalogue'
134 def get_initial(self):
136 return re.search(r'\w', self.title, re.U).group(0)
137 except AttributeError:
141 return self.tags.filter(category='author')
144 return self.tags.filter(category='epoch')
147 return self.tags.filter(category='genre')
150 return self.tags.filter(category='kind')
152 def tag_unicode(self, category):
153 relations = prefetched_relations(self, category)
155 return ', '.join(rel.tag.name for rel in relations)
157 return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
159 def tags_by_category(self):
160 return split_tags(self.tags.exclude(category__in=('set', 'theme')))
162 def author_unicode(self):
163 return self.cached_author
165 def kind_unicode(self):
166 return self.tag_unicode('kind')
168 def epoch_unicode(self):
169 return self.tag_unicode('epoch')
171 def genre_unicode(self):
172 return self.tag_unicode('genre')
174 def translator(self):
175 translators = self.extra_info.get('translators')
178 if len(translators) > 3:
179 translators = translators[:2]
183 return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
185 def cover_source(self):
186 return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
188 def save(self, force_insert=False, force_update=False, **kwargs):
189 from sortify import sortify
191 self.sort_key = sortify(self.title)[:120]
192 self.title = str(self.title) # ???
195 author = self.authors().first().sort_key
196 except AttributeError:
198 self.sort_key_author = author
200 self.cached_author = self.tag_unicode('author')
201 self.has_audience = 'audience' in self.extra_info
203 if self.preview and not self.preview_key:
204 self.preview_key = get_random_hash(self.slug)[:32]
206 ret = super(Book, self).save(force_insert, force_update, **kwargs)
211 def get_absolute_url(self):
212 return 'book_detail', [self.slug]
214 def gallery_path(self):
215 return gallery_path(self.slug)
217 def gallery_url(self):
218 return gallery_url(self.slug)
224 def language_code(self):
225 return constants.LANGUAGES_3TO2.get(self.language, self.language)
227 def language_name(self):
228 return dict(settings.LANGUAGES).get(self.language_code(), "")
230 def is_foreign(self):
231 return self.language_code() != settings.LANGUAGE_CODE
233 def set_audio_length(self):
234 length = self.get_audio_length()
236 self.audio_length = self.format_audio_length(length)
240 def format_audio_length(seconds):
242 minutes = seconds // 60
243 seconds = seconds % 60
244 return '%d:%02d' % (minutes, seconds)
246 hours = seconds // 3600
247 minutes = seconds % 3600 // 60
248 seconds = seconds % 60
249 return '%d:%02d:%02d' % (hours, minutes, seconds)
251 def get_audio_length(self):
253 for media in self.get_mp3() or ():
254 total += app_settings.GET_MP3_LENGTH(media.file.path)
257 def has_media(self, type_):
258 if type_ in Book.formats:
259 return bool(getattr(self, "%s_file" % type_))
261 return self.media.filter(type=type_).exists()
264 return self.has_media('mp3')
266 def get_media(self, type_):
267 if self.has_media(type_):
268 if type_ in Book.formats:
269 return getattr(self, "%s_file" % type_)
271 return self.media.filter(type=type_)
276 return self.get_media("mp3")
279 return self.get_media("odt")
282 return self.get_media("ogg")
285 return self.get_media("daisy")
287 def media_url(self, format_):
288 media = self.get_media(format_)
291 return reverse('embargo_link', kwargs={'key': self.preview_key, 'slug': self.slug, 'format_': format_})
298 return self.media_url('html')
301 return self.media_url('pdf')
304 return self.media_url('epub')
307 return self.media_url('mobi')
310 return self.media_url('txt')
313 return self.media_url('fb2')
316 return self.media_url('xml')
318 def has_description(self):
319 return len(self.description) > 0
320 has_description.short_description = _('description')
321 has_description.boolean = True
323 def has_mp3_file(self):
324 return self.has_media("mp3")
325 has_mp3_file.short_description = 'MP3'
326 has_mp3_file.boolean = True
328 def has_ogg_file(self):
329 return self.has_media("ogg")
330 has_ogg_file.short_description = 'OGG'
331 has_ogg_file.boolean = True
333 def has_daisy_file(self):
334 return self.has_media("daisy")
335 has_daisy_file.short_description = 'DAISY'
336 has_daisy_file.boolean = True
338 def get_audiobooks(self):
340 for m in self.media.filter(type='ogg').order_by().iterator():
341 ogg_files[m.name] = m
345 for mp3 in self.media.filter(type='mp3').iterator():
346 # ogg files are always from the same project
347 meta = mp3.extra_info
348 project = meta.get('project')
351 project = u'CzytamySłuchając'
353 projects.add((project, meta.get('funded_by', '')))
357 ogg = ogg_files.get(mp3.name)
360 audiobooks.append(media)
362 projects = sorted(projects)
363 return audiobooks, projects
365 def wldocument(self, parse_dublincore=True, inherit=True):
366 from catalogue.import_utils import ORMDocProvider
367 from librarian.parser import WLDocument
369 if inherit and self.parent:
370 meta_fallbacks = self.parent.cover_info()
372 meta_fallbacks = None
374 return WLDocument.from_file(
376 provider=ORMDocProvider(self),
377 parse_dublincore=parse_dublincore,
378 meta_fallbacks=meta_fallbacks)
381 def zip_format(format_):
382 def pretty_file_name(book):
383 return "%s/%s.%s" % (
384 book.extra_info['author'],
388 field_name = "%s_file" % format_
389 books = Book.objects.filter(parent=None).exclude(**{field_name: ""}).exclude(preview=True)
390 paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
391 return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
393 def zip_audiobooks(self, format_):
394 bm = BookMedia.objects.filter(book=self, type=format_)
395 paths = map(lambda bm: (None, bm.file.path), bm)
396 return create_zip(paths, "%s_%s" % (self.slug, format_))
398 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
400 from search.index import Index
403 index.index_book(self, book_info)
408 except Exception as e:
409 index.index.rollback()
412 # will make problems in conjunction with paid previews
413 def download_pictures(self, remote_gallery_url):
414 gallery_path = self.gallery_path()
415 # delete previous files, so we don't include old files in ebooks
416 if os.path.isdir(gallery_path):
417 for filename in os.listdir(gallery_path):
418 file_path = os.path.join(gallery_path, filename)
420 ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
422 makedirs(gallery_path)
423 for ilustr in ilustr_elements:
424 ilustr_src = ilustr.get('src')
425 ilustr_path = os.path.join(gallery_path, ilustr_src)
426 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
428 def load_abstract(self):
429 abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
430 if abstract is not None:
431 self.abstract = transform_abstrakt(abstract)
436 def from_xml_file(cls, xml_file, **kwargs):
437 from django.core.files import File
438 from librarian import dcparser
440 # use librarian to parse meta-data
441 book_info = dcparser.parse(xml_file)
443 if not isinstance(xml_file, File):
444 xml_file = File(open(xml_file))
447 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
452 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
453 search_index_tags=True, remote_gallery_url=None, days=0):
454 if dont_build is None:
456 dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
458 # check for parts before we do anything
460 if hasattr(book_info, 'parts'):
461 for part_url in book_info.parts:
463 children.append(Book.objects.get(slug=part_url.slug))
464 except Book.DoesNotExist:
465 raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
468 book_slug = book_info.url.slug
469 if re.search(r'[^a-z0-9-]', book_slug):
470 raise ValueError('Invalid characters in slug')
471 book, created = Book.objects.get_or_create(slug=book_slug)
476 book.preview = bool(days)
478 book.preview_until = date.today() + timedelta(days)
481 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
482 # Save shelves for this book
483 book_shelves = list(book.tags.filter(category='set'))
484 old_cover = book.cover_info()
487 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
489 book.xml_file.set_readable(False)
491 book.language = book_info.language
492 book.title = book_info.title
493 if book_info.variant_of:
494 book.common_slug = book_info.variant_of.slug
496 book.common_slug = book.slug
497 book.extra_info = book_info.to_dict()
501 meta_tags = Tag.tags_from_info(book_info)
503 for tag in meta_tags:
504 if not tag.for_books:
508 book.tags = set(meta_tags + book_shelves)
510 cover_changed = old_cover != book.cover_info()
511 obsolete_children = set(b for b in book.children.all()
512 if b not in children)
513 notify_cover_changed = []
514 for n, child_book in enumerate(children):
515 new_child = child_book.parent != book
516 child_book.parent = book
517 child_book.parent_number = n
519 if new_child or cover_changed:
520 notify_cover_changed.append(child_book)
521 # Disown unfaithful children and let them cope on their own.
522 for child in obsolete_children:
524 child.parent_number = 0
527 notify_cover_changed.append(child)
529 cls.repopulate_ancestors()
530 tasks.update_counters.delay()
532 if remote_gallery_url:
533 book.download_pictures(remote_gallery_url)
535 # No saves beyond this point.
538 if 'cover' not in dont_build:
539 book.cover.build_delay()
540 book.cover_thumb.build_delay()
541 book.cover_api_thumb.build_delay()
542 book.simple_cover.build_delay()
544 # Build HTML and ebooks.
545 book.html_file.build_delay()
547 for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
548 if format_ not in dont_build:
549 getattr(book, '%s_file' % format_).build_delay()
550 for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
551 if format_ not in dont_build:
552 getattr(book, '%s_file' % format_).build_delay()
554 if not settings.NO_SEARCH_INDEX and search_index:
555 tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
557 for child in notify_cover_changed:
558 child.parent_cover_changed()
560 book.save() # update sort_key_author
561 book.update_popularity()
562 cls.published.send(sender=cls, instance=book)
567 def repopulate_ancestors(cls):
568 """Fixes the ancestry cache."""
570 cursor = connection.cursor()
571 if connection.vendor == 'postgres':
572 cursor.execute("TRUNCATE catalogue_book_ancestor")
574 WITH RECURSIVE ancestry AS (
575 SELECT book.id, book.parent_id
576 FROM catalogue_book AS book
577 WHERE book.parent_id IS NOT NULL
579 SELECT ancestor.id, book.parent_id
580 FROM ancestry AS ancestor, catalogue_book AS book
581 WHERE ancestor.parent_id = book.id
582 AND book.parent_id IS NOT NULL
584 INSERT INTO catalogue_book_ancestor
585 (from_book_id, to_book_id)
591 cursor.execute("DELETE FROM catalogue_book_ancestor")
592 for b in cls.objects.exclude(parent=None):
594 while parent is not None:
595 b.ancestor.add(parent)
596 parent = parent.parent
598 def flush_includes(self, languages=True):
601 if languages is True:
602 languages = [lc for (lc, _ln) in settings.LANGUAGES]
604 template % (self.pk, lang)
606 '/katalog/b/%d/mini.%s.html',
607 '/katalog/b/%d/mini_nolink.%s.html',
608 '/katalog/b/%d/short.%s.html',
609 '/katalog/b/%d/wide.%s.html',
610 '/api/include/book/%d.%s.json',
611 '/api/include/book/%d.%s.xml',
613 for lang in languages
616 def cover_info(self, inherit=True):
617 """Returns a dictionary to serve as fallback for BookInfo.
619 For now, the only thing inherited is the cover image.
623 for field in ('cover_url', 'cover_by', 'cover_source'):
624 val = self.extra_info.get(field)
629 if inherit and need and self.parent is not None:
630 parent_info = self.parent.cover_info()
631 parent_info.update(info)
635 def related_themes(self):
636 return Tag.objects.usage_for_queryset(
637 Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
638 counts=True).filter(category='theme')
640 def parent_cover_changed(self):
641 """Called when parent book's cover image is changed."""
642 if not self.cover_info(inherit=False):
643 if 'cover' not in app_settings.DONT_BUILD:
644 self.cover.build_delay()
645 self.cover_thumb.build_delay()
646 self.cover_api_thumb.build_delay()
647 self.simple_cover.build_delay()
648 for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
649 if format_ not in app_settings.DONT_BUILD:
650 getattr(self, '%s_file' % format_).build_delay()
651 for child in self.children.all():
652 child.parent_cover_changed()
654 def other_versions(self):
655 """Find other versions (i.e. in other languages) of the book."""
656 return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
661 while parent is not None:
662 books.insert(0, parent)
663 parent = parent.parent
666 def pretty_title(self, html_links=False):
667 names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
668 books = self.parents() + [self]
669 names.extend([(b.title, b.get_absolute_url()) for b in books])
672 names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
674 names = [tag[0] for tag in names]
675 return ', '.join(names)
678 publisher = self.extra_info['publisher']
679 if isinstance(publisher, str):
681 elif isinstance(publisher, list):
682 return ', '.join(publisher)
685 def tagged_top_level(cls, tags):
686 """ Returns top-level books tagged with `tags`.
688 It only returns those books which don't have ancestors which are
689 also tagged with those tags.
692 objects = cls.tagged.with_all(tags)
693 return objects.exclude(ancestor__in=objects)
696 def book_list(cls, book_filter=None):
697 """Generates a hierarchical listing of all books.
699 Books are optionally filtered with a test function.
704 books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug', 'extra_info')
706 books = books.filter(book_filter).distinct()
708 book_ids = set(b['pk'] for b in books.values("pk").iterator())
709 for book in books.iterator():
710 parent = book.parent_id
711 if parent not in book_ids:
713 books_by_parent.setdefault(parent, []).append(book)
715 for book in books.iterator():
716 books_by_parent.setdefault(book.parent_id, []).append(book)
719 books_by_author = OrderedDict()
720 for tag in Tag.objects.filter(category='author').iterator():
721 books_by_author[tag] = []
723 for book in books_by_parent.get(None, ()):
724 authors = list(book.authors().only('pk'))
726 for author in authors:
727 books_by_author[author].append(book)
731 return books_by_author, orphans, books_by_parent
734 "SP": (1, u"szkoła podstawowa"),
735 "SP1": (1, u"szkoła podstawowa"),
736 "SP2": (1, u"szkoła podstawowa"),
737 "SP3": (1, u"szkoła podstawowa"),
738 "P": (1, u"szkoła podstawowa"),
739 "G": (2, u"gimnazjum"),
741 "LP": (3, u"liceum"),
744 def audiences_pl(self):
745 audiences = self.extra_info.get('audiences', [])
746 audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
747 return [a[1] for a in audiences]
749 def stage_note(self):
750 stage = self.extra_info.get('stage')
751 if stage and stage < '0.4':
752 return (_('This work needs modernisation'),
753 reverse('infopage', args=['wymagajace-uwspolczesnienia']))
757 def choose_fragment(self):
758 fragments = self.fragments.order_by()
759 fragments_count = fragments.count()
760 if not fragments_count and self.children.exists():
761 fragments = Fragment.objects.filter(book__ancestor=self).order_by()
762 fragments_count = fragments.count()
764 return fragments[randint(0, fragments_count - 1)]
766 return self.parent.choose_fragment()
770 def fragment_data(self):
771 fragment = self.choose_fragment()
774 'title': fragment.book.pretty_title(),
775 'html': re.sub('</?blockquote[^>]*>', '', fragment.get_short_text()),
780 def update_popularity(self):
781 count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
783 pop = self.popularity
786 except BookPopularity.DoesNotExist:
787 BookPopularity.objects.create(book=self, count=count)
789 def ridero_link(self):
790 return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
792 def like(self, user):
793 from social.utils import likes, get_set, set_sets
794 if not likes(user, self):
795 tag = get_set(user, '')
796 set_sets(user, self, [tag])
798 def unlike(self, user):
799 from social.utils import likes, set_sets
800 if likes(user, self):
801 set_sets(user, self, [])
803 def full_sort_key(self):
804 return self.SORT_KEY_SEP.join((self.sort_key_author, self.sort_key, str(self.id)))
806 def cover_color(self):
807 return WLCover.epoch_colors.get(self.extra_info.get('epoch'), '#000000')
810 def add_file_fields():
811 for format_ in Book.formats:
812 field_name = "%s_file" % format_
813 # This weird globals() assignment makes Django migrations comfortable.
814 _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
815 _upload_to.__name__ = '_%s_upload_to' % format_
816 globals()[_upload_to.__name__] = _upload_to
819 format_, _("%s file" % format_.upper()),
820 upload_to=_upload_to,
821 storage=bofh_storage,
825 ).contribute_to_class(Book, field_name)
831 class BookPopularity(models.Model):
832 book = models.OneToOneField(Book, related_name='popularity')
833 count = models.IntegerField(default=0, db_index=True)