1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from datetime import datetime
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.core.cache import cache
10 from django.utils.translation import ugettext_lazy as _
11 from django.contrib.auth.models import User
12 from django.core.files import File
13 from django.template.loader import render_to_string
14 from django.utils.safestring import mark_safe
15 from django.utils.translation import get_language
16 from django.core.urlresolvers import reverse
17 from django.db.models.signals import post_save, m2m_changed, pre_delete
19 from django.conf import settings
21 from newtagging.models import TagBase, tags_updated
22 from newtagging import managers
23 from catalogue.fields import JSONField, OverwritingFileField
24 from catalogue.utils import ExistingFile
26 from librarian import dcparser, html, epub, NoDublinCore
28 from mutagen import id3
29 from slughifi import slughifi
30 from sortify import sortify
34 ('author', _('author')),
35 ('epoch', _('epoch')),
37 ('genre', _('genre')),
38 ('theme', _('theme')),
44 ('odt', _('ODT file')),
45 ('mp3', _('MP3 file')),
46 ('ogg', _('OGG file')),
47 ('daisy', _('DAISY file')),
50 class TagSubcategoryManager(models.Manager):
51 def __init__(self, subcategory):
52 super(TagSubcategoryManager, self).__init__()
53 self.subcategory = subcategory
55 def get_query_set(self):
56 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
60 name = models.CharField(_('name'), max_length=50, db_index=True)
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
63 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
64 db_index=True, choices=TAG_CATEGORIES)
65 description = models.TextField(_('description'), blank=True)
66 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
68 user = models.ForeignKey(User, blank=True, null=True)
69 book_count = models.IntegerField(_('book count'), blank=True, null=True)
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
73 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
74 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
76 class UrlDeprecationWarning(DeprecationWarning):
87 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
90 ordering = ('sort_key',)
91 verbose_name = _('tag')
92 verbose_name_plural = _('tags')
93 unique_together = (("slug", "category"),)
95 def __unicode__(self):
99 return "Tag(slug=%r)" % self.slug
102 def get_absolute_url(self):
103 return ('catalogue.views.tagged_object_list', [self.url_chunk])
105 def has_description(self):
106 return len(self.description) > 0
107 has_description.short_description = _('description')
108 has_description.boolean = True
111 """ returns global book count for book tags, fragment count for themes """
113 if self.book_count is None:
114 if self.category == 'book':
116 objects = Book.objects.none()
117 elif self.category == 'theme':
118 objects = Fragment.tagged.with_all((self,))
120 objects = Book.tagged.with_all((self,)).order_by()
121 if self.category != 'set':
122 # eliminate descendants
123 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
124 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
126 objects = objects.exclude(pk__in=descendants_keys)
127 self.book_count = objects.count()
129 return self.book_count
132 def get_tag_list(tags):
133 if isinstance(tags, basestring):
138 tags_splitted = tags.split('/')
139 for name in tags_splitted:
141 real_tags.append(Tag.objects.get(slug=name, category=category))
143 elif name in Tag.categories_rev:
144 category = Tag.categories_rev[name]
147 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
149 except Tag.MultipleObjectsReturned, e:
150 ambiguous_slugs.append(name)
153 # something strange left off
154 raise Tag.DoesNotExist()
156 # some tags should be qualified
157 e = Tag.MultipleObjectsReturned()
159 e.ambiguous_slugs = ambiguous_slugs
162 e = Tag.UrlDeprecationWarning()
167 return TagBase.get_tag_list(tags)
171 return '/'.join((Tag.categories_dict[self.category], self.slug))
174 # TODO: why is this hard-coded ?
175 def book_upload_path(ext=None, maxlen=100):
176 def get_dynamic_path(media, filename, ext=ext):
177 # how to put related book's slug here?
179 if media.type == 'daisy':
184 name = slughifi(filename.split(".")[0])
186 name = slughifi(media.name)
187 return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
188 return get_dynamic_path
191 class BookMedia(models.Model):
192 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
193 name = models.CharField(_('name'), max_length="100")
194 file = OverwritingFileField(_('file'), upload_to=book_upload_path())
195 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
196 extra_info = JSONField(_('extra information'), default='{}', editable=False)
197 book = models.ForeignKey('Book', related_name='media')
198 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
200 def __unicode__(self):
201 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
204 ordering = ('type', 'name')
205 verbose_name = _('book media')
206 verbose_name_plural = _('book media')
208 def save(self, *args, **kwargs):
210 old = BookMedia.objects.get(pk=self.pk)
211 except BookMedia.DoesNotExist, e:
214 # if name changed, change the file name, too
215 if slughifi(self.name) != slughifi(old.name):
216 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
218 super(BookMedia, self).save(*args, **kwargs)
219 extra_info = self.get_extra_info_value()
220 extra_info.update(self.read_meta())
221 self.set_extra_info_value(extra_info)
222 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
223 return super(BookMedia, self).save(*args, **kwargs)
227 Reads some metadata from the audiobook.
230 artist_name = director_name = project = funded_by = ''
231 if self.type == 'mp3':
233 audio = id3.ID3(self.file.path)
234 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
235 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
236 project = ", ".join([t.data for t in audio.getall('PRIV')
237 if t.owner=='wolnelektury.pl?project'])
238 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
239 if t.owner=='wolnelektury.pl?funded_by'])
242 elif self.type == 'ogg':
244 audio = mutagen.File(self.file.path)
245 artist_name = ', '.join(audio.get('artist', []))
246 director_name = ', '.join(audio.get('conductor', []))
247 project = ", ".join(audio.get('project', []))
248 funded_by = ", ".join(audio.get('funded_by', []))
253 return {'artist_name': artist_name, 'director_name': director_name,
254 'project': project, 'funded_by': funded_by}
257 def read_source_sha1(filepath, filetype):
259 Reads source file SHA1 from audiobok metadata.
262 if filetype == 'mp3':
264 audio = id3.ID3(filepath)
265 return [t.data for t in audio.getall('PRIV')
266 if t.owner=='wolnelektury.pl?flac_sha1'][0]
269 elif filetype == 'ogg':
271 audio = mutagen.File(filepath)
272 return audio.get('flac_sha1', [None])[0]
279 class Book(models.Model):
280 title = models.CharField(_('title'), max_length=120)
281 sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
282 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
283 description = models.TextField(_('description'), blank=True)
284 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
285 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
286 parent_number = models.IntegerField(_('parent number'), default=0)
287 extra_info = JSONField(_('extra information'), default='{}')
288 gazeta_link = models.CharField(blank=True, max_length=240)
289 wiki_link = models.CharField(blank=True, max_length=240)
290 # files generated during publication
291 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
292 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
293 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
294 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
295 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
297 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
298 objects = models.Manager()
299 tagged = managers.ModelTaggedItemManager(Tag)
300 tags = managers.TagDescriptor(Tag)
302 class AlreadyExists(Exception):
306 ordering = ('sort_key',)
307 verbose_name = _('book')
308 verbose_name_plural = _('books')
310 def __unicode__(self):
313 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
314 self.sort_key = sortify(self.title)
317 self.reset_short_html()
319 return super(Book, self).save(force_insert, force_update)
322 def get_absolute_url(self):
323 return ('catalogue.views.book_detail', [self.slug])
329 def book_tag_slug(self):
330 return ('l-' + self.slug)[:120]
333 slug = self.book_tag_slug()
334 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
336 book_tag.name = self.title[:50]
337 book_tag.sort_key = self.title.lower()
341 def has_media(self, type):
368 if self.media.filter(type=type).exists():
373 def get_media(self, type):
374 if self.has_media(type):
378 return self.html_file
380 return self.epub_file
386 return self.media.filter(type=type)
391 return self.get_media("mp3")
393 return self.get_media("odt")
395 return self.get_media("ogg")
397 return self.get_media("daisy")
399 def reset_short_html(self):
400 cache_key = "Book.short_html/%d/%s"
401 for lang, langname in settings.LANGUAGES:
402 cache.delete(cache_key % (self.id, lang))
403 # Fragment.short_html relies on book's tags, so reset it here too
404 for fragm in self.fragments.all():
405 fragm.reset_short_html()
407 def short_html(self):
408 cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
409 short_html = cache.get(cache_key)
411 if short_html is not None:
412 return mark_safe(short_html)
414 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
415 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
418 # files generated during publication
419 if self.has_media("html"):
420 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
421 if self.has_media("pdf"):
422 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
423 if self.root_ancestor.has_media("epub"):
424 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
425 if self.has_media("txt"):
426 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
428 for m in self.media.order_by('type'):
429 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
431 formats = [mark_safe(format) for format in formats]
433 short_html = unicode(render_to_string('catalogue/book_short.html',
434 {'book': self, 'tags': tags, 'formats': formats}))
435 cache.set(cache_key, short_html)
436 return mark_safe(short_html)
439 def root_ancestor(self):
440 """ returns the oldest ancestor """
442 if not hasattr(self, '_root_ancestor'):
446 self._root_ancestor = book
447 return self._root_ancestor
450 def has_description(self):
451 return len(self.description) > 0
452 has_description.short_description = _('description')
453 has_description.boolean = True
456 def has_pdf_file(self):
457 return bool(self.pdf_file)
458 has_pdf_file.short_description = 'PDF'
459 has_pdf_file.boolean = True
461 def has_epub_file(self):
462 return bool(self.epub_file)
463 has_epub_file.short_description = 'EPUB'
464 has_epub_file.boolean = True
466 def has_txt_file(self):
467 return bool(self.txt_file)
468 has_txt_file.short_description = 'HTML'
469 has_txt_file.boolean = True
471 def has_html_file(self):
472 return bool(self.html_file)
473 has_html_file.short_description = 'HTML'
474 has_html_file.boolean = True
476 def has_odt_file(self):
477 return bool(self.has_media("odt"))
478 has_odt_file.short_description = 'ODT'
479 has_odt_file.boolean = True
481 def has_mp3_file(self):
482 return bool(self.has_media("mp3"))
483 has_mp3_file.short_description = 'MP3'
484 has_mp3_file.boolean = True
486 def has_ogg_file(self):
487 return bool(self.has_media("ogg"))
488 has_ogg_file.short_description = 'OGG'
489 has_ogg_file.boolean = True
491 def has_daisy_file(self):
492 return bool(self.has_media("daisy"))
493 has_daisy_file.short_description = 'DAISY'
494 has_daisy_file.boolean = True
496 def build_epub(self, remove_descendants=True):
497 """ (Re)builds the epub file.
498 If book has a parent, does nothing.
499 Unless remove_descendants is False, descendants' epubs are removed.
502 from StringIO import StringIO
503 from hashlib import sha1
504 from django.core.files.base import ContentFile
505 from librarian import DocProvider
507 class BookImportDocProvider(DocProvider):
508 """ used for joined EPUBs """
510 def __init__(self, book):
513 def by_slug(self, slug):
514 if slug == self.book.slug:
515 return self.book.xml_file
517 return Book.objects.get(slug=slug).xml_file
523 epub_file = StringIO()
525 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
526 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
527 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
531 book_descendants = list(self.children.all())
532 while len(book_descendants) > 0:
533 child_book = book_descendants.pop(0)
534 if remove_descendants and child_book.has_epub_file():
535 child_book.epub_file.delete()
536 # save anyway, to refresh short_html
538 book_descendants += list(child_book.children.all())
541 from StringIO import StringIO
542 from django.core.files.base import ContentFile
543 from librarian import text
546 text.transform(open(self.xml_file.path), out)
547 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
550 def build_html(self):
551 from tempfile import NamedTemporaryFile
552 from markupstring import MarkupString
554 meta_tags = list(self.tags.filter(
555 category__in=('author', 'epoch', 'genre', 'kind')))
556 book_tag = self.book_tag()
558 html_file = NamedTemporaryFile()
559 if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
560 self.html_file.save('%s.html' % self.slug, File(html_file))
562 # get ancestor l-tags for adding to new fragments
566 ancestor_tags.append(p.book_tag())
569 # Delete old fragments and create them from scratch
570 self.fragments.all().delete()
572 closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
573 for fragment in closed_fragments.values():
575 theme_names = [s.strip() for s in fragment.themes.split(',')]
576 except AttributeError:
579 for theme_name in theme_names:
582 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
584 tag.name = theme_name
585 tag.sort_key = theme_name.lower()
591 text = fragment.to_string()
593 if (len(MarkupString(text)) > 240):
594 short_text = unicode(MarkupString(text)[:160])
595 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
596 text=text, short_text=short_text)
599 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
606 def from_xml_file(cls, xml_file, **kwargs):
607 # use librarian to parse meta-data
608 book_info = dcparser.parse(xml_file)
610 if not isinstance(xml_file, File):
611 xml_file = File(open(xml_file))
614 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
619 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
622 # check for parts before we do anything
624 if hasattr(book_info, 'parts'):
625 for part_url in book_info.parts:
626 base, slug = part_url.rsplit('/', 1)
628 children.append(Book.objects.get(slug=slug))
629 except Book.DoesNotExist, e:
630 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
634 book_base, book_slug = book_info.url.rsplit('/', 1)
635 if re.search(r'[^a-zA-Z0-9-]', book_slug):
636 raise ValueError('Invalid characters in slug')
637 book, created = Book.objects.get_or_create(slug=book_slug)
643 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
644 # Save shelves for this book
645 book_shelves = list(book.tags.filter(category='set'))
647 book.title = book_info.title
648 book.set_extra_info_value(book_info.to_dict())
652 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
653 for field_name, category in categories:
655 tag_names = getattr(book_info, field_name)
657 tag_names = [getattr(book_info, category)]
658 for tag_name in tag_names:
659 tag_sort_key = tag_name
660 if category == 'author':
661 tag_sort_key = tag_name.last_name
662 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
663 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
666 tag.sort_key = sortify(tag_sort_key.lower())
668 meta_tags.append(tag)
670 book.tags = set(meta_tags + book_shelves)
672 book_tag = book.book_tag()
674 for n, child_book in enumerate(children):
675 child_book.parent = book
676 child_book.parent_number = n
679 # Save XML and HTML files
680 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
682 # delete old fragments when overwriting
683 book.fragments.all().delete()
685 if book.build_html():
686 if not settings.NO_BUILD_TXT and build_txt:
689 if not settings.NO_BUILD_EPUB and build_epub:
690 book.root_ancestor.build_epub()
692 book_descendants = list(book.children.all())
693 # add l-tag to descendants and their fragments
694 # delete unnecessary EPUB files
695 while len(book_descendants) > 0:
696 child_book = book_descendants.pop(0)
697 child_book.tags = list(child_book.tags) + [book_tag]
699 for fragment in child_book.fragments.all():
700 fragment.tags = set(list(fragment.tags) + [book_tag])
701 book_descendants += list(child_book.children.all())
704 book.reset_tag_counter()
705 book.reset_theme_counter()
710 def reset_tag_counter(self):
711 cache_key = "Book.tag_counter/%d" % self.id
712 cache.delete(cache_key)
714 self.parent.reset_tag_counter()
717 def tag_counter(self):
718 cache_key = "Book.tag_counter/%d" % self.id
719 tags = cache.get(cache_key)
722 for child in self.children.all().order_by():
723 for tag_pk, value in child.tag_counter.iteritems():
724 tags[tag_pk] = tags.get(tag_pk, 0) + value
725 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
728 cache.set(cache_key, tags)
731 def reset_theme_counter(self):
732 cache_key = "Book.theme_counter/%d" % self.id
733 cache.delete(cache_key)
735 self.parent.reset_theme_counter()
738 def theme_counter(self):
739 cache_key = "Book.theme_counter/%d" % self.id
740 tags = cache.get(cache_key)
743 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
744 for tag in fragment.tags.filter(category='theme').order_by():
745 tags[tag.pk] = tags.get(tag.pk, 0) + 1
747 cache.set(cache_key, tags)
750 def pretty_title(self, html_links=False):
752 names = list(book.tags.filter(category='author'))
758 names.extend(reversed(books))
761 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
763 names = [tag.name for tag in names]
765 return ', '.join(names)
768 def tagged_top_level(cls, tags):
769 """ Returns top-level books tagged with `tags'.
771 It only returns those books which don't have ancestors which are
772 also tagged with those tags.
775 # get relevant books and their tags
776 objects = cls.tagged.with_all(tags)
777 # eliminate descendants
778 l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
779 descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
781 objects = objects.exclude(pk__in=descendants_keys)
786 class Fragment(models.Model):
787 text = models.TextField()
788 short_text = models.TextField(editable=False)
789 anchor = models.CharField(max_length=120)
790 book = models.ForeignKey(Book, related_name='fragments')
792 objects = models.Manager()
793 tagged = managers.ModelTaggedItemManager(Tag)
794 tags = managers.TagDescriptor(Tag)
797 ordering = ('book', 'anchor',)
798 verbose_name = _('fragment')
799 verbose_name_plural = _('fragments')
801 def get_absolute_url(self):
802 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
804 def reset_short_html(self):
805 cache_key = "Fragment.short_html/%d/%s"
806 for lang, langname in settings.LANGUAGES:
807 cache.delete(cache_key % (self.id, lang))
809 def short_html(self):
810 cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
811 short_html = cache.get(cache_key)
813 if short_html is not None:
814 return mark_safe(short_html)
816 short_html = unicode(render_to_string('catalogue/fragment_short.html',
818 cache.set(cache_key, short_html)
819 return mark_safe(short_html)
822 class FileRecord(models.Model):
823 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
824 type = models.CharField(_('type'), max_length=20, db_index=True)
825 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
826 time = models.DateTimeField(_('time'), auto_now_add=True)
829 ordering = ('-time','-slug', '-type')
830 verbose_name = _('file record')
831 verbose_name_plural = _('file records')
833 def __unicode__(self):
834 return "%s %s.%s" % (self.sha1, self.slug, self.type)
843 def _tags_updated_handler(sender, affected_tags, **kwargs):
844 # reset tag global counter
845 # we want Tag.changed_at updated for API to know the tag was touched
846 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
848 # if book tags changed, reset book tag counter
849 if isinstance(sender, Book) and \
850 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
851 exclude(category__in=('book', 'theme', 'set')).count():
852 sender.reset_tag_counter()
853 # if fragment theme changed, reset book theme counter
854 elif isinstance(sender, Fragment) and \
855 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
856 filter(category='theme').count():
857 sender.book.reset_theme_counter()
858 tags_updated.connect(_tags_updated_handler)
861 def _pre_delete_handler(sender, instance, **kwargs):
862 """ refresh Book on BookMedia delete """
863 if sender == BookMedia:
865 pre_delete.connect(_pre_delete_handler)
867 def _post_save_handler(sender, instance, **kwargs):
868 """ refresh all the short_html stuff on BookMedia update """
869 if sender == BookMedia:
871 post_save.connect(_post_save_handler)