1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
23 from mutagen import id3
24 from slughifi import slughifi
28 ('author', _('author')),
29 ('epoch', _('epoch')),
31 ('genre', _('genre')),
32 ('theme', _('theme')),
38 ('odt', _('ODT file')),
39 ('mp3', _('MP3 file')),
40 ('ogg', _('OGG file')),
41 ('daisy', _('DAISY file')),
44 class TagSubcategoryManager(models.Manager):
45 def __init__(self, subcategory):
46 super(TagSubcategoryManager, self).__init__()
47 self.subcategory = subcategory
49 def get_query_set(self):
50 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
54 name = models.CharField(_('name'), max_length=50, db_index=True)
55 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
56 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
57 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
58 db_index=True, choices=TAG_CATEGORIES)
59 description = models.TextField(_('description'), blank=True)
60 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
62 user = models.ForeignKey(User, blank=True, null=True)
63 book_count = models.IntegerField(_('book count'), blank=True, null=True)
64 gazeta_link = models.CharField(blank=True, max_length=240)
65 wiki_link = models.CharField(blank=True, max_length=240)
75 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
78 ordering = ('sort_key',)
79 verbose_name = _('tag')
80 verbose_name_plural = _('tags')
81 unique_together = (("slug", "category"),)
83 def __unicode__(self):
87 return "Tag(slug=%r)" % self.slug
90 def get_absolute_url(self):
91 return ('catalogue.views.tagged_object_list', [self.url_chunk])
93 def has_description(self):
94 return len(self.description) > 0
95 has_description.short_description = _('description')
96 has_description.boolean = True
99 """ returns global book count for book tags, fragment count for themes """
101 if self.book_count is None:
102 if self.category == 'book':
104 objects = Book.objects.none()
105 elif self.category == 'theme':
106 objects = Fragment.tagged.with_all((self,))
108 objects = Book.tagged.with_all((self,)).order_by()
109 if self.category != 'set':
110 # eliminate descendants
111 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
112 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
114 objects = objects.exclude(pk__in=descendants_keys)
115 self.book_count = objects.count()
117 return self.book_count
120 def get_tag_list(tags):
121 if isinstance(tags, basestring):
125 tags_splitted = tags.split('/')
126 for index, name in enumerate(tags_splitted):
127 if name in Tag.categories_rev:
128 category = Tag.categories_rev[name]
131 real_tags.append(Tag.objects.get(slug=name, category=category))
135 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
136 except Tag.MultipleObjectsReturned, e:
137 ambiguous_slugs.append(name)
140 # something strange left off
141 raise Tag.DoesNotExist()
143 # some tags should be qualified
144 e = Tag.MultipleObjectsReturned()
146 e.ambiguous_slugs = ambiguous_slugs
151 return TagBase.get_tag_list(tags)
155 return '/'.join((Tag.categories_dict[self.category], self.slug))
158 # TODO: why is this hard-coded ?
159 def book_upload_path(ext=None, maxlen=100):
160 def get_dynamic_path(media, filename, ext=ext):
161 # how to put related book's slug here?
163 if media.type == 'daisy':
168 name = slughifi(filename.split(".")[0])
170 name = slughifi(media.name)
171 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
172 return get_dynamic_path
175 class BookMedia(models.Model):
176 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
177 name = models.CharField(_('name'), max_length="100")
178 file = models.FileField(_('file'), upload_to=book_upload_path())
179 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
180 extra_info = JSONField(_('extra information'), default='{}')
182 def book_count(self):
183 return self.book_set.count()
184 book_count.short_description = _('book count')
187 return mark_safe('<br/>'.join("<a href='%s'>%s</a>" % (reverse('admin:catalogue_book_change', args=[b.id]), b.title) for b in self.book_set.all()))
188 books.short_description = _('books')
190 def __unicode__(self):
191 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
194 ordering = ('type', 'name')
195 verbose_name = _('book media')
196 verbose_name_plural = _('book media')
198 def save(self, force_insert=False, force_update=False, **kwargs):
199 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
200 if self.type == 'mp3':
202 extra_info = self.get_extra_info_value()
203 extra_info.update(self.get_mp3_info())
204 self.set_extra_info_value(extra_info)
205 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
208 def get_mp3_info(self):
209 """Retrieves artist and director names from audio ID3 tags."""
211 audio = id3.ID3(self.file.path)
212 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
213 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
215 artist_name = director_name = ''
216 return {'artist_name': artist_name, 'director_name': director_name}
219 class Book(models.Model):
220 title = models.CharField(_('title'), max_length=120)
221 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
222 description = models.TextField(_('description'), blank=True)
223 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
224 _short_html = models.TextField(_('short HTML'), editable=False)
225 parent_number = models.IntegerField(_('parent number'), default=0)
226 extra_info = JSONField(_('extra information'))
227 gazeta_link = models.CharField(blank=True, max_length=240)
228 wiki_link = models.CharField(blank=True, max_length=240)
229 # files generated during publication
230 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
231 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
232 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
233 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
234 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
236 medias = models.ManyToManyField(BookMedia, blank=True)
238 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
239 objects = models.Manager()
240 tagged = managers.ModelTaggedItemManager(Tag)
241 tags = managers.TagDescriptor(Tag)
243 _tag_counter = JSONField(null=True, editable=False)
244 _theme_counter = JSONField(null=True, editable=False)
246 class AlreadyExists(Exception):
250 ordering = ('title',)
251 verbose_name = _('book')
252 verbose_name_plural = _('books')
254 def __unicode__(self):
257 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
259 # Reset _short_html during save
261 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
263 self.__setattr__(key, '')
264 # Fragment.short_html relies on book's tags, so reset it here too
265 self.fragments.all().update(**update)
267 return super(Book, self).save(force_insert, force_update)
270 def get_absolute_url(self):
271 return ('catalogue.views.book_detail', [self.slug])
277 def book_tag_slug(self):
278 return ('l-' + self.slug)[:120]
281 slug = self.book_tag_slug()
282 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
284 book_tag.name = self.title[:50]
285 book_tag.sort_key = self.title.lower()
289 def has_media(self, type):
316 if self.medias.filter(book=self, type=type).count() > 0:
321 def get_media(self, type):
322 if self.has_media(type):
326 return self.html_file
328 return self.epub_file
334 return self.medias.filter(book=self, type=type)
339 return self.get_media("mp3")
341 return self.get_media("odt")
343 return self.get_media("ogg")
345 return self.get_media("daisy")
347 def short_html(self):
348 key = '_short_html_%s' % get_language()
349 short_html = getattr(self, key)
351 if short_html and len(short_html):
352 return mark_safe(short_html)
354 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
355 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
358 # files generated during publication
359 if self.has_media("html"):
360 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
361 if self.has_media("pdf"):
362 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
363 if self.root_ancestor.has_media("epub"):
364 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
365 if self.has_media("txt"):
366 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
368 for m in self.medias.order_by('type'):
369 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
371 formats = [mark_safe(format) for format in formats]
373 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
374 {'book': self, 'tags': tags, 'formats': formats})))
375 self.save(reset_short_html=False)
376 return mark_safe(getattr(self, key))
380 def root_ancestor(self):
381 """ returns the oldest ancestor """
383 if not hasattr(self, '_root_ancestor'):
387 self._root_ancestor = book
388 return self._root_ancestor
391 def has_description(self):
392 return len(self.description) > 0
393 has_description.short_description = _('description')
394 has_description.boolean = True
397 def has_pdf_file(self):
398 return bool(self.pdf_file)
399 has_pdf_file.short_description = 'PDF'
400 has_pdf_file.boolean = True
402 def has_epub_file(self):
403 return bool(self.epub_file)
404 has_epub_file.short_description = 'EPUB'
405 has_epub_file.boolean = True
407 def has_txt_file(self):
408 return bool(self.txt_file)
409 has_txt_file.short_description = 'HTML'
410 has_txt_file.boolean = True
412 def has_html_file(self):
413 return bool(self.html_file)
414 has_html_file.short_description = 'HTML'
415 has_html_file.boolean = True
417 def has_odt_file(self):
418 return bool(self.has_media("odt"))
419 has_odt_file.short_description = 'ODT'
420 has_odt_file.boolean = True
422 def has_mp3_file(self):
423 return bool(self.has_media("mp3"))
424 has_mp3_file.short_description = 'MP3'
425 has_mp3_file.boolean = True
427 def has_ogg_file(self):
428 return bool(self.has_media("ogg"))
429 has_ogg_file.short_description = 'OGG'
430 has_ogg_file.boolean = True
432 def has_daisy_file(self):
433 return bool(self.has_media("daisy"))
434 has_daisy_file.short_description = 'DAISY'
435 has_daisy_file.boolean = True
437 def build_epub(self, remove_descendants=True):
438 """ (Re)builds the epub file.
439 If book has a parent, does nothing.
440 Unless remove_descendants is False, descendants' epubs are removed.
443 from StringIO import StringIO
444 from hashlib import sha1
445 from django.core.files.base import ContentFile
446 from librarian import DocProvider
448 class BookImportDocProvider(DocProvider):
449 """ used for joined EPUBs """
451 def __init__(self, book):
454 def by_slug(self, slug):
455 if slug == self.book.slug:
456 return self.book.xml_file
458 return Book.objects.get(slug=slug).xml_file
464 epub_file = StringIO()
466 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
467 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
468 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
472 book_descendants = list(self.children.all())
473 while len(book_descendants) > 0:
474 child_book = book_descendants.pop(0)
475 if remove_descendants and child_book.has_epub_file():
476 child_book.epub_file.delete()
477 # save anyway, to refresh short_html
479 book_descendants += list(child_book.children.all())
482 from StringIO import StringIO
483 from django.core.files.base import ContentFile
484 from librarian import text
487 text.transform(open(self.xml_file.path), out)
488 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
493 def from_xml_file(cls, xml_file, **kwargs):
494 # use librarian to parse meta-data
495 book_info = dcparser.parse(xml_file)
497 if not isinstance(xml_file, File):
498 xml_file = File(open(xml_file))
501 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
506 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
508 from tempfile import NamedTemporaryFile
509 from markupstring import MarkupString
510 from django.core.files.storage import default_storage
512 # check for parts before we do anything
514 if hasattr(book_info, 'parts'):
515 for part_url in book_info.parts:
516 base, slug = part_url.rsplit('/', 1)
518 children.append(Book.objects.get(slug=slug))
519 except Book.DoesNotExist, e:
520 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
524 book_base, book_slug = book_info.url.rsplit('/', 1)
525 if re.search(r'[^a-zA-Z0-9-]', book_slug):
526 raise ValueError('Invalid characters in slug')
527 book, created = Book.objects.get_or_create(slug=book_slug)
533 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
534 # Save shelves for this book
535 book_shelves = list(book.tags.filter(category='set'))
537 book.title = book_info.title
538 book.set_extra_info_value(book_info.to_dict())
539 book._short_html = ''
543 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
544 for field_name, category in categories:
546 tag_names = getattr(book_info, field_name)
548 tag_names = [getattr(book_info, category)]
549 for tag_name in tag_names:
550 tag_sort_key = tag_name
551 if category == 'author':
552 tag_sort_key = tag_name.last_name
553 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
554 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
557 tag.sort_key = tag_sort_key.lower()
559 book_tags.append(tag)
561 book.tags = set(book_tags + book_shelves)
563 book_tag = book.book_tag()
565 for n, child_book in enumerate(children):
566 child_book.parent = book
567 child_book.parent_number = n
570 # Save XML and HTML files
571 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
573 # delete old fragments when overwriting
574 book.fragments.all().delete()
576 html_file = NamedTemporaryFile()
577 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
578 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
580 # get ancestor l-tags for adding to new fragments
584 ancestor_tags.append(p.book_tag())
588 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
589 for fragment in closed_fragments.values():
591 theme_names = [s.strip() for s in fragment.themes.split(',')]
592 except AttributeError:
595 for theme_name in theme_names:
598 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
600 tag.name = theme_name
601 tag.sort_key = theme_name.lower()
607 text = fragment.to_string()
609 if (len(MarkupString(text)) > 240):
610 short_text = unicode(MarkupString(text)[:160])
611 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
612 defaults={'text': text, 'short_text': short_text})
615 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
617 if not settings.NO_BUILD_TXT and build_txt:
620 if not settings.NO_BUILD_EPUB and build_epub:
621 book.root_ancestor.build_epub()
623 book_descendants = list(book.children.all())
624 # add l-tag to descendants and their fragments
625 # delete unnecessary EPUB files
626 while len(book_descendants) > 0:
627 child_book = book_descendants.pop(0)
628 child_book.tags = list(child_book.tags) + [book_tag]
630 for fragment in child_book.fragments.all():
631 fragment.tags = set(list(fragment.tags) + [book_tag])
632 book_descendants += list(child_book.children.all())
635 book.reset_tag_counter()
636 book.reset_theme_counter()
642 def refresh_tag_counter(self):
644 for child in self.children.all().order_by():
645 for tag_pk, value in child.tag_counter.iteritems():
646 tags[tag_pk] = tags.get(tag_pk, 0) + value
647 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
649 self.set__tag_counter_value(tags)
650 self.save(reset_short_html=False)
653 def reset_tag_counter(self):
654 self._tag_counter = None
655 self.save(reset_short_html=False)
657 self.parent.reset_tag_counter()
660 def tag_counter(self):
661 if self._tag_counter is None:
662 return self.refresh_tag_counter()
663 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
665 def refresh_theme_counter(self):
667 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
668 for tag in fragment.tags.filter(category='theme').order_by():
669 tags[tag.pk] = tags.get(tag.pk, 0) + 1
670 self.set__theme_counter_value(tags)
671 self.save(reset_short_html=False)
674 def reset_theme_counter(self):
675 self._theme_counter = None
676 self.save(reset_short_html=False)
678 self.parent.reset_theme_counter()
681 def theme_counter(self):
682 if self._theme_counter is None:
683 return self.refresh_theme_counter()
684 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
686 def pretty_title(self, html_links=False):
688 names = list(book.tags.filter(category='author'))
694 names.extend(reversed(books))
697 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
699 names = [tag.name for tag in names]
701 return ', '.join(names)
704 class Fragment(models.Model):
705 text = models.TextField()
706 short_text = models.TextField(editable=False)
707 _short_html = models.TextField(editable=False)
708 anchor = models.CharField(max_length=120)
709 book = models.ForeignKey(Book, related_name='fragments')
711 objects = models.Manager()
712 tagged = managers.ModelTaggedItemManager(Tag)
713 tags = managers.TagDescriptor(Tag)
716 ordering = ('book', 'anchor',)
717 verbose_name = _('fragment')
718 verbose_name_plural = _('fragments')
720 def get_absolute_url(self):
721 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
723 def short_html(self):
724 key = '_short_html_%s' % get_language()
725 short_html = getattr(self, key)
726 if short_html and len(short_html):
727 return mark_safe(short_html)
729 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
730 {'fragment': self})))
732 return mark_safe(getattr(self, key))
735 class FileRecord(models.Model):
736 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
737 type = models.CharField(_('type'), max_length=20, db_index=True)
738 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
739 time = models.DateTimeField(_('time'), auto_now_add=True)
742 ordering = ('-time','-slug', '-type')
743 verbose_name = _('file record')
744 verbose_name_plural = _('file records')
746 def __unicode__(self):
747 return "%s %s.%s" % (self.sha1, self.slug, self.type)
756 def _tags_updated_handler(sender, affected_tags, **kwargs):
757 # reset tag global counter
758 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
760 # if book tags changed, reset book tag counter
761 if isinstance(sender, Book) and \
762 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
763 exclude(category__in=('book', 'theme', 'set')).count():
764 sender.reset_tag_counter()
765 # if fragment theme changed, reset book theme counter
766 elif isinstance(sender, Fragment) and \
767 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
768 filter(category='theme').count():
769 sender.book.reset_theme_counter()
770 tags_updated.connect(_tags_updated_handler)
773 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
774 """ refresh all the short_html stuff on BookMedia delete """
775 if sender == Book.medias.through and reverse and action == 'pre_clear':
776 for book in instance.book_set.all():
778 m2m_changed.connect(_m2m_changed_handler)
780 def _pre_delete_handler(sender, instance, **kwargs):
781 """ explicitly clear m2m, so that Books can be refreshed """
782 if sender == BookMedia:
783 instance.book_set.clear()
784 pre_delete.connect(_pre_delete_handler)
786 def _post_save_handler(sender, instance, **kwargs):
787 """ refresh all the short_html stuff on BookMedia update """
788 if sender == BookMedia:
789 for book in instance.book_set.all():
791 post_save.connect(_post_save_handler)