1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
23 from mutagen import id3
24 from slughifi import slughifi
28 ('author', _('author')),
29 ('epoch', _('epoch')),
31 ('genre', _('genre')),
32 ('theme', _('theme')),
38 ('odt', _('ODT file')),
39 ('mp3', _('MP3 file')),
40 ('ogg', _('OGG file')),
41 ('daisy', _('DAISY file')),
44 class TagSubcategoryManager(models.Manager):
45 def __init__(self, subcategory):
46 super(TagSubcategoryManager, self).__init__()
47 self.subcategory = subcategory
49 def get_query_set(self):
50 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
54 name = models.CharField(_('name'), max_length=50, db_index=True)
55 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
56 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
57 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
58 db_index=True, choices=TAG_CATEGORIES)
59 description = models.TextField(_('description'), blank=True)
60 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
62 user = models.ForeignKey(User, blank=True, null=True)
63 book_count = models.IntegerField(_('book count'), blank=True, null=True)
64 gazeta_link = models.CharField(blank=True, max_length=240)
65 wiki_link = models.CharField(blank=True, max_length=240)
75 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
78 ordering = ('sort_key',)
79 verbose_name = _('tag')
80 verbose_name_plural = _('tags')
81 unique_together = (("slug", "category"),)
83 def __unicode__(self):
87 return "Tag(slug=%r)" % self.slug
90 def get_absolute_url(self):
91 return ('catalogue.views.tagged_object_list', [self.url_chunk])
93 def has_description(self):
94 return len(self.description) > 0
95 has_description.short_description = _('description')
96 has_description.boolean = True
99 """ returns global book count for book tags, fragment count for themes """
101 if self.book_count is None:
102 if self.category == 'book':
104 objects = Book.objects.none()
105 elif self.category == 'theme':
106 objects = Fragment.tagged.with_all((self,))
108 objects = Book.tagged.with_all((self,)).order_by()
109 if self.category != 'set':
110 # eliminate descendants
111 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
112 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
114 objects = objects.exclude(pk__in=descendants_keys)
115 self.book_count = objects.count()
117 return self.book_count
120 def get_tag_list(tags):
121 if isinstance(tags, basestring):
125 tags_splitted = tags.split('/')
126 for index, name in enumerate(tags_splitted):
127 if name in Tag.categories_rev:
128 category = Tag.categories_rev[name]
131 real_tags.append(Tag.objects.get(slug=name, category=category))
135 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
136 except Tag.MultipleObjectsReturned, e:
137 ambiguous_slugs.append(name)
140 # something strange left off
141 raise Tag.DoesNotExist()
143 # some tags should be qualified
144 e = Tag.MultipleObjectsReturned()
146 e.ambiguous_slugs = ambiguous_slugs
151 return TagBase.get_tag_list(tags)
155 return '/'.join((Tag.categories_dict[self.category], self.slug))
158 # TODO: why is this hard-coded ?
159 def book_upload_path(ext=None):
160 def get_dynamic_path(media, filename, ext=ext):
161 # how to put related book's slug here?
165 name = slughifi(filename.split(".")[0])
167 name = slughifi(media.name)
168 return 'lektura/%s.%s' % (name, ext)
169 return get_dynamic_path
172 class BookMedia(models.Model):
173 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
174 name = models.CharField(_('name'), max_length="100", blank=True)
175 file = models.FileField(_('file'), upload_to=book_upload_path(), blank=True)
176 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
177 extra_info = JSONField(_('extra information'), default='{}')
179 def __unicode__(self):
180 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
183 ordering = ('type', 'name')
184 verbose_name = _('book media')
185 verbose_name_plural = _('book media')
187 def save(self, force_insert=False, force_update=False, **kwargs):
188 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
189 if self.type == 'mp3':
191 extra_info = self.get_extra_info_value()
192 extra_info.update(self.get_mp3_info())
193 self.set_extra_info_value(extra_info)
194 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
197 def get_mp3_info(self):
198 """Retrieves artist and director names from audio ID3 tags."""
200 audio = id3.ID3(self.file.path)
201 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
202 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
204 artist_name = director_name = ''
205 return {'artist_name': artist_name, 'director_name': director_name}
208 class Book(models.Model):
209 title = models.CharField(_('title'), max_length=120)
210 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
211 description = models.TextField(_('description'), blank=True)
212 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
213 _short_html = models.TextField(_('short HTML'), editable=False)
214 parent_number = models.IntegerField(_('parent number'), default=0)
215 extra_info = JSONField(_('extra information'))
216 gazeta_link = models.CharField(blank=True, max_length=240)
217 wiki_link = models.CharField(blank=True, max_length=240)
218 # files generated during publication
219 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
220 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
221 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
222 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
223 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
225 medias = models.ManyToManyField(BookMedia, blank=True)
227 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
228 objects = models.Manager()
229 tagged = managers.ModelTaggedItemManager(Tag)
230 tags = managers.TagDescriptor(Tag)
232 _tag_counter = JSONField(null=True, editable=False)
233 _theme_counter = JSONField(null=True, editable=False)
235 class AlreadyExists(Exception):
239 ordering = ('title',)
240 verbose_name = _('book')
241 verbose_name_plural = _('books')
243 def __unicode__(self):
246 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
248 # Reset _short_html during save
250 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
252 self.__setattr__(key, '')
253 # Fragment.short_html relies on book's tags, so reset it here too
254 self.fragments.all().update(**update)
256 return super(Book, self).save(force_insert, force_update)
259 def get_absolute_url(self):
260 return ('catalogue.views.book_detail', [self.slug])
266 def book_tag_slug(self):
267 return ('l-' + self.slug)[:120]
270 slug = self.book_tag_slug()
271 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
273 book_tag.name = self.title[:50]
274 book_tag.sort_key = self.title.lower()
278 def has_media(self, type):
305 if self.medias.filter(book=self, type=type).count() > 0:
310 def get_media(self, type):
311 if self.has_media(type):
315 return self.html_file
317 return self.epub_file
323 return self.medias.filter(book=self, type=type)
328 return self.get_media("mp3")
330 return self.get_media("odt")
332 return self.get_media("ogg")
334 return self.get_media("daisy")
336 def short_html(self):
337 key = '_short_html_%s' % get_language()
338 short_html = getattr(self, key)
340 if short_html and len(short_html):
341 return mark_safe(short_html)
343 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
344 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
347 # files generated during publication
348 if self.has_media("html"):
349 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
350 if self.has_media("pdf"):
351 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
352 if self.root_ancestor.has_media("epub"):
353 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
354 if self.has_media("txt"):
355 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
357 for m in self.medias.order_by('type'):
358 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
360 formats = [mark_safe(format) for format in formats]
362 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
363 {'book': self, 'tags': tags, 'formats': formats})))
364 self.save(reset_short_html=False)
365 return mark_safe(getattr(self, key))
369 def root_ancestor(self):
370 """ returns the oldest ancestor """
372 if not hasattr(self, '_root_ancestor'):
376 self._root_ancestor = book
377 return self._root_ancestor
380 def has_description(self):
381 return len(self.description) > 0
382 has_description.short_description = _('description')
383 has_description.boolean = True
386 def has_pdf_file(self):
387 return bool(self.pdf_file)
388 has_pdf_file.short_description = 'PDF'
389 has_pdf_file.boolean = True
391 def has_epub_file(self):
392 return bool(self.epub_file)
393 has_epub_file.short_description = 'EPUB'
394 has_epub_file.boolean = True
396 def has_txt_file(self):
397 return bool(self.txt_file)
398 has_txt_file.short_description = 'HTML'
399 has_txt_file.boolean = True
401 def has_html_file(self):
402 return bool(self.html_file)
403 has_html_file.short_description = 'HTML'
404 has_html_file.boolean = True
406 def has_odt_file(self):
407 return bool(self.has_media("odt"))
408 has_odt_file.short_description = 'ODT'
409 has_odt_file.boolean = True
411 def has_mp3_file(self):
412 return bool(self.has_media("mp3"))
413 has_mp3_file.short_description = 'MP3'
414 has_mp3_file.boolean = True
416 def has_ogg_file(self):
417 return bool(self.has_media("ogg"))
418 has_ogg_file.short_description = 'OGG'
419 has_ogg_file.boolean = True
421 def has_daisy_file(self):
422 return bool(self.has_media("daisy"))
423 has_daisy_file.short_description = 'DAISY'
424 has_daisy_file.boolean = True
426 def build_epub(self, remove_descendants=True):
427 """ (Re)builds the epub file.
428 If book has a parent, does nothing.
429 Unless remove_descendants is False, descendants' epubs are removed.
432 from StringIO import StringIO
433 from hashlib import sha1
434 from django.core.files.base import ContentFile
435 from librarian import DocProvider
437 class BookImportDocProvider(DocProvider):
438 """ used for joined EPUBs """
440 def __init__(self, book):
443 def by_slug(self, slug):
444 if slug == self.book.slug:
445 return self.book.xml_file
447 return Book.objects.get(slug=slug).xml_file
453 epub_file = StringIO()
455 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
456 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
457 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
461 book_descendants = list(self.children.all())
462 while len(book_descendants) > 0:
463 child_book = book_descendants.pop(0)
464 if remove_descendants and child_book.has_epub_file():
465 child_book.epub_file.delete()
466 # save anyway, to refresh short_html
468 book_descendants += list(child_book.children.all())
471 from StringIO import StringIO
472 from django.core.files.base import ContentFile
473 from librarian import text
476 text.transform(open(self.xml_file.path), out)
477 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
482 def from_xml_file(cls, xml_file, **kwargs):
483 # use librarian to parse meta-data
484 book_info = dcparser.parse(xml_file)
486 if not isinstance(xml_file, File):
487 xml_file = File(open(xml_file))
490 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
495 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
497 from tempfile import NamedTemporaryFile
498 from markupstring import MarkupString
499 from django.core.files.storage import default_storage
501 # check for parts before we do anything
503 if hasattr(book_info, 'parts'):
504 for part_url in book_info.parts:
505 base, slug = part_url.rsplit('/', 1)
507 children.append(Book.objects.get(slug=slug))
508 except Book.DoesNotExist, e:
509 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
513 book_base, book_slug = book_info.url.rsplit('/', 1)
514 if re.search(r'[^a-zA-Z0-9-]', book_slug):
515 raise ValueError('Invalid characters in slug')
516 book, created = Book.objects.get_or_create(slug=book_slug)
522 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
523 # Save shelves for this book
524 book_shelves = list(book.tags.filter(category='set'))
526 book.title = book_info.title
527 book.set_extra_info_value(book_info.to_dict())
528 book._short_html = ''
532 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
533 for field_name, category in categories:
535 tag_names = getattr(book_info, field_name)
537 tag_names = [getattr(book_info, category)]
538 for tag_name in tag_names:
539 tag_sort_key = tag_name
540 if category == 'author':
541 tag_sort_key = tag_name.last_name
542 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
543 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
546 tag.sort_key = tag_sort_key.lower()
548 book_tags.append(tag)
550 book.tags = book_tags + book_shelves
552 book_tag = book.book_tag()
554 for n, child_book in enumerate(children):
555 child_book.parent = book
556 child_book.parent_number = n
559 # Save XML and HTML files
560 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
562 # delete old fragments when overwriting
563 book.fragments.all().delete()
565 html_file = NamedTemporaryFile()
566 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
567 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
569 # get ancestor l-tags for adding to new fragments
573 ancestor_tags.append(p.book_tag())
577 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
578 for fragment in closed_fragments.values():
580 theme_names = [s.strip() for s in fragment.themes.split(',')]
581 except AttributeError:
584 for theme_name in theme_names:
587 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
589 tag.name = theme_name
590 tag.sort_key = theme_name.lower()
596 text = fragment.to_string()
598 if (len(MarkupString(text)) > 240):
599 short_text = unicode(MarkupString(text)[:160])
600 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
601 defaults={'text': text, 'short_text': short_text})
604 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
606 if not settings.NO_BUILD_TXT and build_txt:
609 if not settings.NO_BUILD_EPUB and build_epub:
610 book.root_ancestor.build_epub()
612 book_descendants = list(book.children.all())
613 # add l-tag to descendants and their fragments
614 # delete unnecessary EPUB files
615 while len(book_descendants) > 0:
616 child_book = book_descendants.pop(0)
617 child_book.tags = list(child_book.tags) + [book_tag]
619 for fragment in child_book.fragments.all():
620 fragment.tags = set(list(fragment.tags) + [book_tag])
621 book_descendants += list(child_book.children.all())
624 book.reset_tag_counter()
625 book.reset_theme_counter()
631 def refresh_tag_counter(self):
633 for child in self.children.all().order_by():
634 for tag_pk, value in child.tag_counter.iteritems():
635 tags[tag_pk] = tags.get(tag_pk, 0) + value
636 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
638 self.set__tag_counter_value(tags)
639 self.save(reset_short_html=False)
642 def reset_tag_counter(self):
643 self._tag_counter = None
644 self.save(reset_short_html=False)
646 self.parent.reset_tag_counter()
649 def tag_counter(self):
650 if self._tag_counter is None:
651 return self.refresh_tag_counter()
652 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
654 def refresh_theme_counter(self):
656 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
657 for tag in fragment.tags.filter(category='theme').order_by():
658 tags[tag.pk] = tags.get(tag.pk, 0) + 1
659 self.set__theme_counter_value(tags)
660 self.save(reset_short_html=False)
663 def reset_theme_counter(self):
664 self._theme_counter = None
665 self.save(reset_short_html=False)
667 self.parent.reset_theme_counter()
670 def theme_counter(self):
671 if self._theme_counter is None:
672 return self.refresh_theme_counter()
673 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
675 def pretty_title(self, html_links=False):
677 names = list(book.tags.filter(category='author'))
683 names.extend(reversed(books))
686 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
688 names = [tag.name for tag in names]
690 return ', '.join(names)
693 class Fragment(models.Model):
694 text = models.TextField()
695 short_text = models.TextField(editable=False)
696 _short_html = models.TextField(editable=False)
697 anchor = models.CharField(max_length=120)
698 book = models.ForeignKey(Book, related_name='fragments')
700 objects = models.Manager()
701 tagged = managers.ModelTaggedItemManager(Tag)
702 tags = managers.TagDescriptor(Tag)
705 ordering = ('book', 'anchor',)
706 verbose_name = _('fragment')
707 verbose_name_plural = _('fragments')
709 def get_absolute_url(self):
710 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
712 def short_html(self):
713 key = '_short_html_%s' % get_language()
714 short_html = getattr(self, key)
715 if short_html and len(short_html):
716 return mark_safe(short_html)
718 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
719 {'fragment': self})))
721 return mark_safe(getattr(self, key))
724 class FileRecord(models.Model):
725 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
726 type = models.CharField(_('type'), max_length=20, db_index=True)
727 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
728 time = models.DateTimeField(_('time'), auto_now_add=True)
731 ordering = ('-time','-slug', '-type')
732 verbose_name = _('file record')
733 verbose_name_plural = _('file records')
735 def __unicode__(self):
736 return "%s %s.%s" % (self.sha1, self.slug, self.type)
745 def _tags_updated_handler(sender, affected_tags, **kwargs):
746 # reset tag global counter
747 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
749 # if book tags changed, reset book tag counter
750 if isinstance(sender, Book) and \
751 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
752 exclude(category__in=('book', 'theme', 'set')).count():
753 sender.reset_tag_counter()
754 # if fragment theme changed, reset book theme counter
755 elif isinstance(sender, Fragment) and \
756 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
757 filter(category='theme').count():
758 sender.book.reset_theme_counter()
759 tags_updated.connect(_tags_updated_handler)
762 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
763 """ refresh all the short_html stuff on BookMedia delete """
764 if sender == Book.medias.through and reverse and action == 'pre_clear':
765 for book in instance.book_set.all():
767 m2m_changed.connect(_m2m_changed_handler)
769 def _pre_delete_handler(sender, instance, **kwargs):
770 """ explicitly clear m2m, so that Books can be refreshed """
771 if sender == BookMedia:
772 instance.book_set.clear()
773 pre_delete.connect(_pre_delete_handler)
775 def _post_save_handler(sender, instance, **kwargs):
776 """ refresh all the short_html stuff on BookMedia update """
777 if sender == BookMedia:
778 for book in instance.book_set.all():
780 post_save.connect(_post_save_handler)