1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from datetime import datetime
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.utils.translation import ugettext_lazy as _
10 from django.contrib.auth.models import User
11 from django.core.files import File
12 from django.template.loader import render_to_string
13 from django.utils.safestring import mark_safe
14 from django.utils.translation import get_language
15 from django.core.urlresolvers import reverse
16 from django.db.models.signals import post_save, m2m_changed, pre_delete
18 from django.conf import settings
20 from newtagging.models import TagBase, tags_updated
21 from newtagging import managers
22 from catalogue.fields import JSONField
24 from librarian import dcparser, html, epub, NoDublinCore
25 from mutagen import id3
26 from slughifi import slughifi
27 from sortify import sortify
31 ('author', _('author')),
32 ('epoch', _('epoch')),
34 ('genre', _('genre')),
35 ('theme', _('theme')),
41 ('odt', _('ODT file')),
42 ('mp3', _('MP3 file')),
43 ('ogg', _('OGG file')),
44 ('daisy', _('DAISY file')),
47 class TagSubcategoryManager(models.Manager):
48 def __init__(self, subcategory):
49 super(TagSubcategoryManager, self).__init__()
50 self.subcategory = subcategory
52 def get_query_set(self):
53 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
57 name = models.CharField(_('name'), max_length=50, db_index=True)
58 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
59 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
60 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
61 db_index=True, choices=TAG_CATEGORIES)
62 description = models.TextField(_('description'), blank=True)
63 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
65 user = models.ForeignKey(User, blank=True, null=True)
66 book_count = models.IntegerField(_('book count'), blank=True, null=True)
67 gazeta_link = models.CharField(blank=True, max_length=240)
68 wiki_link = models.CharField(blank=True, max_length=240)
70 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
71 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
81 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
84 ordering = ('sort_key',)
85 verbose_name = _('tag')
86 verbose_name_plural = _('tags')
87 unique_together = (("slug", "category"),)
89 def __unicode__(self):
93 return "Tag(slug=%r)" % self.slug
96 def get_absolute_url(self):
97 return ('catalogue.views.tagged_object_list', [self.url_chunk])
99 def has_description(self):
100 return len(self.description) > 0
101 has_description.short_description = _('description')
102 has_description.boolean = True
105 """ returns global book count for book tags, fragment count for themes """
107 if self.book_count is None:
108 if self.category == 'book':
110 objects = Book.objects.none()
111 elif self.category == 'theme':
112 objects = Fragment.tagged.with_all((self,))
114 objects = Book.tagged.with_all((self,)).order_by()
115 if self.category != 'set':
116 # eliminate descendants
117 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
118 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
120 objects = objects.exclude(pk__in=descendants_keys)
121 self.book_count = objects.count()
123 return self.book_count
126 def get_tag_list(tags):
127 if isinstance(tags, basestring):
131 tags_splitted = tags.split('/')
132 for index, name in enumerate(tags_splitted):
133 if name in Tag.categories_rev:
134 category = Tag.categories_rev[name]
137 real_tags.append(Tag.objects.get(slug=name, category=category))
141 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
142 except Tag.MultipleObjectsReturned, e:
143 ambiguous_slugs.append(name)
146 # something strange left off
147 raise Tag.DoesNotExist()
149 # some tags should be qualified
150 e = Tag.MultipleObjectsReturned()
152 e.ambiguous_slugs = ambiguous_slugs
157 return TagBase.get_tag_list(tags)
161 return '/'.join((Tag.categories_dict[self.category], self.slug))
164 # TODO: why is this hard-coded ?
165 def book_upload_path(ext=None, maxlen=100):
166 def get_dynamic_path(media, filename, ext=ext):
167 # how to put related book's slug here?
169 if media.type == 'daisy':
174 name = slughifi(filename.split(".")[0])
176 name = slughifi(media.name)
177 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
178 return get_dynamic_path
181 class BookMedia(models.Model):
182 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
183 name = models.CharField(_('name'), max_length="100")
184 file = models.FileField(_('file'), upload_to=book_upload_path())
185 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
186 extra_info = JSONField(_('extra information'), default='{}')
188 def book_count(self):
189 return self.book_set.count()
190 book_count.short_description = _('book count')
193 return mark_safe('<br/>'.join("<a href='%s'>%s</a>" % (reverse('admin:catalogue_book_change', args=[b.id]), b.title) for b in self.book_set.all()))
194 books.short_description = _('books')
196 def __unicode__(self):
197 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
200 ordering = ('type', 'name')
201 verbose_name = _('book media')
202 verbose_name_plural = _('book media')
204 def save(self, force_insert=False, force_update=False, **kwargs):
205 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
206 if self.type == 'mp3':
208 extra_info = self.get_extra_info_value()
209 extra_info.update(self.get_mp3_info())
210 self.set_extra_info_value(extra_info)
211 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
214 def get_mp3_info(self):
215 """Retrieves artist and director names from audio ID3 tags."""
217 audio = id3.ID3(self.file.path)
218 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
219 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
221 artist_name = director_name = ''
222 return {'artist_name': artist_name, 'director_name': director_name}
225 class Book(models.Model):
226 title = models.CharField(_('title'), max_length=120)
227 sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
228 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
229 description = models.TextField(_('description'), blank=True)
230 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
231 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
232 _short_html = models.TextField(_('short HTML'), editable=False)
233 parent_number = models.IntegerField(_('parent number'), default=0)
234 extra_info = JSONField(_('extra information'), default='{}')
235 gazeta_link = models.CharField(blank=True, max_length=240)
236 wiki_link = models.CharField(blank=True, max_length=240)
237 # files generated during publication
238 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
239 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
240 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
241 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
242 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
244 medias = models.ManyToManyField(BookMedia, blank=True)
246 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
247 objects = models.Manager()
248 tagged = managers.ModelTaggedItemManager(Tag)
249 tags = managers.TagDescriptor(Tag)
251 _tag_counter = JSONField(null=True, editable=False)
252 _theme_counter = JSONField(null=True, editable=False)
254 class AlreadyExists(Exception):
258 ordering = ('sort_key',)
259 verbose_name = _('book')
260 verbose_name_plural = _('books')
262 def __unicode__(self):
265 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
266 self.sort_key = sortify(self.title)
269 # Reset _short_html during save
271 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
273 self.__setattr__(key, '')
274 # Fragment.short_html relies on book's tags, so reset it here too
275 self.fragments.all().update(**update)
277 return super(Book, self).save(force_insert, force_update)
280 def get_absolute_url(self):
281 return ('catalogue.views.book_detail', [self.slug])
287 def book_tag_slug(self):
288 return ('l-' + self.slug)[:120]
291 slug = self.book_tag_slug()
292 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
294 book_tag.name = self.title[:50]
295 book_tag.sort_key = self.title.lower()
299 def has_media(self, type):
326 if self.medias.filter(book=self, type=type).count() > 0:
331 def get_media(self, type):
332 if self.has_media(type):
336 return self.html_file
338 return self.epub_file
344 return self.medias.filter(book=self, type=type)
349 return self.get_media("mp3")
351 return self.get_media("odt")
353 return self.get_media("ogg")
355 return self.get_media("daisy")
357 def short_html(self):
358 key = '_short_html_%s' % get_language()
359 short_html = getattr(self, key)
361 if short_html and len(short_html):
362 return mark_safe(short_html)
364 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
365 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
368 # files generated during publication
369 if self.has_media("html"):
370 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
371 if self.has_media("pdf"):
372 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
373 if self.root_ancestor.has_media("epub"):
374 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
375 if self.has_media("txt"):
376 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
378 for m in self.medias.order_by('type'):
379 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
381 formats = [mark_safe(format) for format in formats]
383 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
384 {'book': self, 'tags': tags, 'formats': formats})))
385 self.save(reset_short_html=False)
386 return mark_safe(getattr(self, key))
390 def root_ancestor(self):
391 """ returns the oldest ancestor """
393 if not hasattr(self, '_root_ancestor'):
397 self._root_ancestor = book
398 return self._root_ancestor
401 def has_description(self):
402 return len(self.description) > 0
403 has_description.short_description = _('description')
404 has_description.boolean = True
407 def has_pdf_file(self):
408 return bool(self.pdf_file)
409 has_pdf_file.short_description = 'PDF'
410 has_pdf_file.boolean = True
412 def has_epub_file(self):
413 return bool(self.epub_file)
414 has_epub_file.short_description = 'EPUB'
415 has_epub_file.boolean = True
417 def has_txt_file(self):
418 return bool(self.txt_file)
419 has_txt_file.short_description = 'HTML'
420 has_txt_file.boolean = True
422 def has_html_file(self):
423 return bool(self.html_file)
424 has_html_file.short_description = 'HTML'
425 has_html_file.boolean = True
427 def has_odt_file(self):
428 return bool(self.has_media("odt"))
429 has_odt_file.short_description = 'ODT'
430 has_odt_file.boolean = True
432 def has_mp3_file(self):
433 return bool(self.has_media("mp3"))
434 has_mp3_file.short_description = 'MP3'
435 has_mp3_file.boolean = True
437 def has_ogg_file(self):
438 return bool(self.has_media("ogg"))
439 has_ogg_file.short_description = 'OGG'
440 has_ogg_file.boolean = True
442 def has_daisy_file(self):
443 return bool(self.has_media("daisy"))
444 has_daisy_file.short_description = 'DAISY'
445 has_daisy_file.boolean = True
447 def build_epub(self, remove_descendants=True):
448 """ (Re)builds the epub file.
449 If book has a parent, does nothing.
450 Unless remove_descendants is False, descendants' epubs are removed.
453 from StringIO import StringIO
454 from hashlib import sha1
455 from django.core.files.base import ContentFile
456 from librarian import DocProvider
458 class BookImportDocProvider(DocProvider):
459 """ used for joined EPUBs """
461 def __init__(self, book):
464 def by_slug(self, slug):
465 if slug == self.book.slug:
466 return self.book.xml_file
468 return Book.objects.get(slug=slug).xml_file
474 epub_file = StringIO()
476 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
477 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
478 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
482 book_descendants = list(self.children.all())
483 while len(book_descendants) > 0:
484 child_book = book_descendants.pop(0)
485 if remove_descendants and child_book.has_epub_file():
486 child_book.epub_file.delete()
487 # save anyway, to refresh short_html
489 book_descendants += list(child_book.children.all())
492 from StringIO import StringIO
493 from django.core.files.base import ContentFile
494 from librarian import text
497 text.transform(open(self.xml_file.path), out)
498 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
503 def from_xml_file(cls, xml_file, **kwargs):
504 # use librarian to parse meta-data
505 book_info = dcparser.parse(xml_file)
507 if not isinstance(xml_file, File):
508 xml_file = File(open(xml_file))
511 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
516 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
518 from tempfile import NamedTemporaryFile
519 from markupstring import MarkupString
520 from django.core.files.storage import default_storage
522 # check for parts before we do anything
524 if hasattr(book_info, 'parts'):
525 for part_url in book_info.parts:
526 base, slug = part_url.rsplit('/', 1)
528 children.append(Book.objects.get(slug=slug))
529 except Book.DoesNotExist, e:
530 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
534 book_base, book_slug = book_info.url.rsplit('/', 1)
535 if re.search(r'[^a-zA-Z0-9-]', book_slug):
536 raise ValueError('Invalid characters in slug')
537 book, created = Book.objects.get_or_create(slug=book_slug)
543 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
544 # Save shelves for this book
545 book_shelves = list(book.tags.filter(category='set'))
547 book.title = book_info.title
548 book.set_extra_info_value(book_info.to_dict())
549 book._short_html = ''
553 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
554 for field_name, category in categories:
556 tag_names = getattr(book_info, field_name)
558 tag_names = [getattr(book_info, category)]
559 for tag_name in tag_names:
560 tag_sort_key = tag_name
561 if category == 'author':
562 tag_sort_key = tag_name.last_name
563 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
564 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
567 tag.sort_key = sortify(tag_sort_key.lower())
569 book_tags.append(tag)
571 book.tags = set(book_tags + book_shelves)
573 book_tag = book.book_tag()
575 for n, child_book in enumerate(children):
576 child_book.parent = book
577 child_book.parent_number = n
580 # Save XML and HTML files
581 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
583 # delete old fragments when overwriting
584 book.fragments.all().delete()
586 html_file = NamedTemporaryFile()
587 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
588 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
590 # get ancestor l-tags for adding to new fragments
594 ancestor_tags.append(p.book_tag())
598 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
599 for fragment in closed_fragments.values():
601 theme_names = [s.strip() for s in fragment.themes.split(',')]
602 except AttributeError:
605 for theme_name in theme_names:
608 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
610 tag.name = theme_name
611 tag.sort_key = theme_name.lower()
617 text = fragment.to_string()
619 if (len(MarkupString(text)) > 240):
620 short_text = unicode(MarkupString(text)[:160])
621 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
622 defaults={'text': text, 'short_text': short_text})
625 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
627 if not settings.NO_BUILD_TXT and build_txt:
630 if not settings.NO_BUILD_EPUB and build_epub:
631 book.root_ancestor.build_epub()
633 book_descendants = list(book.children.all())
634 # add l-tag to descendants and their fragments
635 # delete unnecessary EPUB files
636 while len(book_descendants) > 0:
637 child_book = book_descendants.pop(0)
638 child_book.tags = list(child_book.tags) + [book_tag]
640 for fragment in child_book.fragments.all():
641 fragment.tags = set(list(fragment.tags) + [book_tag])
642 book_descendants += list(child_book.children.all())
645 book.reset_tag_counter()
646 book.reset_theme_counter()
652 def refresh_tag_counter(self):
654 for child in self.children.all().order_by():
655 for tag_pk, value in child.tag_counter.iteritems():
656 tags[tag_pk] = tags.get(tag_pk, 0) + value
657 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
659 self.set__tag_counter_value(tags)
660 self.save(reset_short_html=False)
663 def reset_tag_counter(self):
664 self._tag_counter = None
665 self.save(reset_short_html=False)
667 self.parent.reset_tag_counter()
670 def tag_counter(self):
671 if self._tag_counter is None:
672 return self.refresh_tag_counter()
673 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
675 def refresh_theme_counter(self):
677 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
678 for tag in fragment.tags.filter(category='theme').order_by():
679 tags[tag.pk] = tags.get(tag.pk, 0) + 1
680 self.set__theme_counter_value(tags)
681 self.save(reset_short_html=False)
684 def reset_theme_counter(self):
685 self._theme_counter = None
686 self.save(reset_short_html=False)
688 self.parent.reset_theme_counter()
691 def theme_counter(self):
692 if self._theme_counter is None:
693 return self.refresh_theme_counter()
694 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
696 def pretty_title(self, html_links=False):
698 names = list(book.tags.filter(category='author'))
704 names.extend(reversed(books))
707 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
709 names = [tag.name for tag in names]
711 return ', '.join(names)
714 def tagged_top_level(cls, tags):
715 """ Returns top-level books tagged with `tags'.
717 It only returns those books which don't have ancestors which are
718 also tagged with those tags.
721 # get relevant books and their tags
722 objects = cls.tagged.with_all(tags)
723 # eliminate descendants
724 l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
725 descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
727 objects = objects.exclude(pk__in=descendants_keys)
732 class Fragment(models.Model):
733 text = models.TextField()
734 short_text = models.TextField(editable=False)
735 _short_html = models.TextField(editable=False)
736 anchor = models.CharField(max_length=120)
737 book = models.ForeignKey(Book, related_name='fragments')
739 objects = models.Manager()
740 tagged = managers.ModelTaggedItemManager(Tag)
741 tags = managers.TagDescriptor(Tag)
744 ordering = ('book', 'anchor',)
745 verbose_name = _('fragment')
746 verbose_name_plural = _('fragments')
748 def get_absolute_url(self):
749 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
751 def short_html(self):
752 key = '_short_html_%s' % get_language()
753 short_html = getattr(self, key)
754 if short_html and len(short_html):
755 return mark_safe(short_html)
757 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
758 {'fragment': self})))
760 return mark_safe(getattr(self, key))
763 class FileRecord(models.Model):
764 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
765 type = models.CharField(_('type'), max_length=20, db_index=True)
766 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
767 time = models.DateTimeField(_('time'), auto_now_add=True)
770 ordering = ('-time','-slug', '-type')
771 verbose_name = _('file record')
772 verbose_name_plural = _('file records')
774 def __unicode__(self):
775 return "%s %s.%s" % (self.sha1, self.slug, self.type)
784 def _tags_updated_handler(sender, affected_tags, **kwargs):
785 # reset tag global counter
786 # we want Tag.changed_at updated for API to know the tag was touched
787 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
789 # if book tags changed, reset book tag counter
790 if isinstance(sender, Book) and \
791 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
792 exclude(category__in=('book', 'theme', 'set')).count():
793 sender.reset_tag_counter()
794 # if fragment theme changed, reset book theme counter
795 elif isinstance(sender, Fragment) and \
796 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
797 filter(category='theme').count():
798 sender.book.reset_theme_counter()
799 tags_updated.connect(_tags_updated_handler)
802 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
803 """ refresh all the short_html stuff on BookMedia delete """
804 if sender == Book.medias.through and reverse and action == 'pre_clear':
805 for book in instance.book_set.all():
807 m2m_changed.connect(_m2m_changed_handler)
809 def _pre_delete_handler(sender, instance, **kwargs):
810 """ explicitly clear m2m, so that Books can be refreshed """
811 if sender == BookMedia:
812 instance.book_set.clear()
813 pre_delete.connect(_pre_delete_handler)
815 def _post_save_handler(sender, instance, **kwargs):
816 """ refresh all the short_html stuff on BookMedia update """
817 if sender == BookMedia:
818 for book in instance.book_set.all():
820 post_save.connect(_post_save_handler)