1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from datetime import datetime
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.core.cache import cache
10 from django.utils.translation import ugettext_lazy as _
11 from django.contrib.auth.models import User
12 from django.core.files import File
13 from django.template.loader import render_to_string
14 from django.utils.safestring import mark_safe
15 from django.utils.translation import get_language
16 from django.core.urlresolvers import reverse
17 from django.db.models.signals import post_save, m2m_changed, pre_delete
19 from django.conf import settings
21 from newtagging.models import TagBase, tags_updated
22 from newtagging import managers
23 from catalogue.fields import JSONField, OverwritingFileField
24 from catalogue.utils import ExistingFile
26 from librarian import dcparser, html, epub, NoDublinCore
28 from mutagen import id3
29 from slughifi import slughifi
30 from sortify import sortify
34 ('author', _('author')),
35 ('epoch', _('epoch')),
37 ('genre', _('genre')),
38 ('theme', _('theme')),
44 ('odt', _('ODT file')),
45 ('mp3', _('MP3 file')),
46 ('ogg', _('OGG file')),
47 ('daisy', _('DAISY file')),
50 class TagSubcategoryManager(models.Manager):
51 def __init__(self, subcategory):
52 super(TagSubcategoryManager, self).__init__()
53 self.subcategory = subcategory
55 def get_query_set(self):
56 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
60 name = models.CharField(_('name'), max_length=50, db_index=True)
61 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
63 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
64 db_index=True, choices=TAG_CATEGORIES)
65 description = models.TextField(_('description'), blank=True)
66 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
68 user = models.ForeignKey(User, blank=True, null=True)
69 book_count = models.IntegerField(_('book count'), blank=True, null=True)
70 gazeta_link = models.CharField(blank=True, max_length=240)
71 wiki_link = models.CharField(blank=True, max_length=240)
73 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
74 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
76 class UrlDeprecationWarning(DeprecationWarning):
87 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
90 ordering = ('sort_key',)
91 verbose_name = _('tag')
92 verbose_name_plural = _('tags')
93 unique_together = (("slug", "category"),)
95 def __unicode__(self):
99 return "Tag(slug=%r)" % self.slug
102 def get_absolute_url(self):
103 return ('catalogue.views.tagged_object_list', [self.url_chunk])
105 def has_description(self):
106 return len(self.description) > 0
107 has_description.short_description = _('description')
108 has_description.boolean = True
111 """ returns global book count for book tags, fragment count for themes """
113 if self.book_count is None:
114 if self.category == 'book':
116 objects = Book.objects.none()
117 elif self.category == 'theme':
118 objects = Fragment.tagged.with_all((self,))
120 objects = Book.tagged.with_all((self,)).order_by()
121 if self.category != 'set':
122 # eliminate descendants
123 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
124 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
126 objects = objects.exclude(pk__in=descendants_keys)
127 self.book_count = objects.count()
129 return self.book_count
132 def get_tag_list(tags):
133 if isinstance(tags, basestring):
138 tags_splitted = tags.split('/')
139 for name in tags_splitted:
141 real_tags.append(Tag.objects.get(slug=name, category=category))
143 elif name in Tag.categories_rev:
144 category = Tag.categories_rev[name]
147 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
149 except Tag.MultipleObjectsReturned, e:
150 ambiguous_slugs.append(name)
153 # something strange left off
154 raise Tag.DoesNotExist()
156 # some tags should be qualified
157 e = Tag.MultipleObjectsReturned()
159 e.ambiguous_slugs = ambiguous_slugs
162 e = Tag.UrlDeprecationWarning()
167 return TagBase.get_tag_list(tags)
171 return '/'.join((Tag.categories_dict[self.category], self.slug))
174 # TODO: why is this hard-coded ?
175 def book_upload_path(ext=None, maxlen=100):
176 def get_dynamic_path(media, filename, ext=ext):
177 # how to put related book's slug here?
179 if media.type == 'daisy':
184 name = slughifi(filename.split(".")[0])
186 name = slughifi(media.name)
187 return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
188 return get_dynamic_path
191 class BookMedia(models.Model):
192 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
193 name = models.CharField(_('name'), max_length="100")
194 file = OverwritingFileField(_('file'), upload_to=book_upload_path())
195 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
196 extra_info = JSONField(_('extra information'), default='{}', editable=False)
197 book = models.ForeignKey('Book', related_name='media')
198 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
200 def __unicode__(self):
201 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
204 ordering = ('type', 'name')
205 verbose_name = _('book media')
206 verbose_name_plural = _('book media')
208 def save(self, *args, **kwargs):
210 old = BookMedia.objects.get(pk=self.pk)
211 except BookMedia.DoesNotExist, e:
214 # if name changed, change the file name, too
215 if slughifi(self.name) != slughifi(old.name):
216 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
218 super(BookMedia, self).save(*args, **kwargs)
219 extra_info = self.get_extra_info_value()
220 extra_info.update(self.read_meta())
221 self.set_extra_info_value(extra_info)
222 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
223 return super(BookMedia, self).save(*args, **kwargs)
227 Reads some metadata from the audiobook.
230 artist_name = director_name = project = funded_by = ''
231 if self.type == 'mp3':
233 audio = id3.ID3(self.file.path)
234 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
235 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
236 project = ", ".join([t.data for t in audio.getall('PRIV')
237 if t.owner=='wolnelektury.pl?project'])
238 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
239 if t.owner=='wolnelektury.pl?funded_by'])
242 elif self.type == 'ogg':
244 audio = mutagen.File(self.file.path)
245 artist_name = ', '.join(audio.get('artist', []))
246 director_name = ', '.join(audio.get('conductor', []))
247 project = ", ".join(audio.get('project', []))
248 funded_by = ", ".join(audio.get('funded_by', []))
253 return {'artist_name': artist_name, 'director_name': director_name,
254 'project': project, 'funded_by': funded_by}
257 def read_source_sha1(filepath, filetype):
259 Reads source file SHA1 from audiobok metadata.
262 if filetype == 'mp3':
264 audio = id3.ID3(filepath)
265 return [t.data for t in audio.getall('PRIV')
266 if t.owner=='wolnelektury.pl?flac_sha1'][0]
269 elif filetype == 'ogg':
271 audio = mutagen.File(filepath)
272 return audio.get('flac_sha1', [None])[0]
279 class Book(models.Model):
280 title = models.CharField(_('title'), max_length=120)
281 sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
282 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
283 description = models.TextField(_('description'), blank=True)
284 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
285 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
286 parent_number = models.IntegerField(_('parent number'), default=0)
287 extra_info = JSONField(_('extra information'), default='{}')
288 gazeta_link = models.CharField(blank=True, max_length=240)
289 wiki_link = models.CharField(blank=True, max_length=240)
290 # files generated during publication
291 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
292 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
293 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
294 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
295 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
297 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
298 objects = models.Manager()
299 tagged = managers.ModelTaggedItemManager(Tag)
300 tags = managers.TagDescriptor(Tag)
302 class AlreadyExists(Exception):
306 ordering = ('sort_key',)
307 verbose_name = _('book')
308 verbose_name_plural = _('books')
310 def __unicode__(self):
313 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
314 self.sort_key = sortify(self.title)
317 self.reset_short_html()
319 return super(Book, self).save(force_insert, force_update)
322 def get_absolute_url(self):
323 return ('catalogue.views.book_detail', [self.slug])
329 def book_tag_slug(self):
330 return ('l-' + self.slug)[:120]
333 slug = self.book_tag_slug()
334 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
336 book_tag.name = self.title[:50]
337 book_tag.sort_key = self.title.lower()
341 def has_media(self, type):
368 if self.media.filter(type=type).exists():
373 def get_media(self, type):
374 if self.has_media(type):
378 return self.html_file
380 return self.epub_file
386 return self.media.filter(type=type)
391 return self.get_media("mp3")
393 return self.get_media("odt")
395 return self.get_media("ogg")
397 return self.get_media("daisy")
399 def reset_short_html(self):
403 cache_key = "Book.short_html/%d/%s"
404 for lang, langname in settings.LANGUAGES:
405 cache.delete(cache_key % (self.id, lang))
406 # Fragment.short_html relies on book's tags, so reset it here too
407 for fragm in self.fragments.all():
408 fragm.reset_short_html()
410 def short_html(self):
412 cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
413 short_html = cache.get(cache_key)
417 if short_html is not None:
418 return mark_safe(short_html)
420 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
421 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
424 # files generated during publication
425 if self.has_media("html"):
426 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
427 if self.has_media("pdf"):
428 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
429 if self.root_ancestor.has_media("epub"):
430 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
431 if self.has_media("txt"):
432 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
434 for m in self.media.order_by('type'):
435 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
437 formats = [mark_safe(format) for format in formats]
439 short_html = unicode(render_to_string('catalogue/book_short.html',
440 {'book': self, 'tags': tags, 'formats': formats}))
443 cache.set(cache_key, short_html)
444 return mark_safe(short_html)
447 def root_ancestor(self):
448 """ returns the oldest ancestor """
450 if not hasattr(self, '_root_ancestor'):
454 self._root_ancestor = book
455 return self._root_ancestor
458 def has_description(self):
459 return len(self.description) > 0
460 has_description.short_description = _('description')
461 has_description.boolean = True
464 def has_pdf_file(self):
465 return bool(self.pdf_file)
466 has_pdf_file.short_description = 'PDF'
467 has_pdf_file.boolean = True
469 def has_epub_file(self):
470 return bool(self.epub_file)
471 has_epub_file.short_description = 'EPUB'
472 has_epub_file.boolean = True
474 def has_txt_file(self):
475 return bool(self.txt_file)
476 has_txt_file.short_description = 'HTML'
477 has_txt_file.boolean = True
479 def has_html_file(self):
480 return bool(self.html_file)
481 has_html_file.short_description = 'HTML'
482 has_html_file.boolean = True
484 def has_odt_file(self):
485 return bool(self.has_media("odt"))
486 has_odt_file.short_description = 'ODT'
487 has_odt_file.boolean = True
489 def has_mp3_file(self):
490 return bool(self.has_media("mp3"))
491 has_mp3_file.short_description = 'MP3'
492 has_mp3_file.boolean = True
494 def has_ogg_file(self):
495 return bool(self.has_media("ogg"))
496 has_ogg_file.short_description = 'OGG'
497 has_ogg_file.boolean = True
499 def has_daisy_file(self):
500 return bool(self.has_media("daisy"))
501 has_daisy_file.short_description = 'DAISY'
502 has_daisy_file.boolean = True
504 def build_epub(self, remove_descendants=True):
505 """ (Re)builds the epub file.
506 If book has a parent, does nothing.
507 Unless remove_descendants is False, descendants' epubs are removed.
510 from StringIO import StringIO
511 from hashlib import sha1
512 from django.core.files.base import ContentFile
513 from librarian import DocProvider
515 class BookImportDocProvider(DocProvider):
516 """ used for joined EPUBs """
518 def __init__(self, book):
521 def by_slug(self, slug):
522 if slug == self.book.slug:
523 return self.book.xml_file
525 return Book.objects.get(slug=slug).xml_file
531 epub_file = StringIO()
533 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
534 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
535 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
539 book_descendants = list(self.children.all())
540 while len(book_descendants) > 0:
541 child_book = book_descendants.pop(0)
542 if remove_descendants and child_book.has_epub_file():
543 child_book.epub_file.delete()
544 # save anyway, to refresh short_html
546 book_descendants += list(child_book.children.all())
549 from StringIO import StringIO
550 from django.core.files.base import ContentFile
551 from librarian import text
554 text.transform(open(self.xml_file.path), out)
555 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
558 def build_html(self):
559 from tempfile import NamedTemporaryFile
560 from markupstring import MarkupString
562 meta_tags = list(self.tags.filter(
563 category__in=('author', 'epoch', 'genre', 'kind')))
564 book_tag = self.book_tag()
566 html_file = NamedTemporaryFile()
567 if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
568 self.html_file.save('%s.html' % self.slug, File(html_file))
570 # get ancestor l-tags for adding to new fragments
574 ancestor_tags.append(p.book_tag())
577 # Delete old fragments and create them from scratch
578 self.fragments.all().delete()
580 closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
581 for fragment in closed_fragments.values():
583 theme_names = [s.strip() for s in fragment.themes.split(',')]
584 except AttributeError:
587 for theme_name in theme_names:
590 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
592 tag.name = theme_name
593 tag.sort_key = theme_name.lower()
599 text = fragment.to_string()
601 if (len(MarkupString(text)) > 240):
602 short_text = unicode(MarkupString(text)[:160])
603 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
604 text=text, short_text=short_text)
607 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
614 def from_xml_file(cls, xml_file, **kwargs):
615 # use librarian to parse meta-data
616 book_info = dcparser.parse(xml_file)
618 if not isinstance(xml_file, File):
619 xml_file = File(open(xml_file))
622 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
627 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
630 # check for parts before we do anything
632 if hasattr(book_info, 'parts'):
633 for part_url in book_info.parts:
634 base, slug = part_url.rsplit('/', 1)
636 children.append(Book.objects.get(slug=slug))
637 except Book.DoesNotExist, e:
638 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
642 book_base, book_slug = book_info.url.rsplit('/', 1)
643 if re.search(r'[^a-zA-Z0-9-]', book_slug):
644 raise ValueError('Invalid characters in slug')
645 book, created = Book.objects.get_or_create(slug=book_slug)
651 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
652 # Save shelves for this book
653 book_shelves = list(book.tags.filter(category='set'))
655 book.title = book_info.title
656 book.set_extra_info_value(book_info.to_dict())
660 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
661 for field_name, category in categories:
663 tag_names = getattr(book_info, field_name)
665 tag_names = [getattr(book_info, category)]
666 for tag_name in tag_names:
667 tag_sort_key = tag_name
668 if category == 'author':
669 tag_sort_key = tag_name.last_name
670 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
671 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
674 tag.sort_key = sortify(tag_sort_key.lower())
676 meta_tags.append(tag)
678 book.tags = set(meta_tags + book_shelves)
680 book_tag = book.book_tag()
682 for n, child_book in enumerate(children):
683 child_book.parent = book
684 child_book.parent_number = n
687 # Save XML and HTML files
688 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
690 # delete old fragments when overwriting
691 book.fragments.all().delete()
693 if book.build_html():
694 if not settings.NO_BUILD_TXT and build_txt:
697 if not settings.NO_BUILD_EPUB and build_epub:
698 book.root_ancestor.build_epub()
700 book_descendants = list(book.children.all())
701 # add l-tag to descendants and their fragments
702 # delete unnecessary EPUB files
703 while len(book_descendants) > 0:
704 child_book = book_descendants.pop(0)
705 child_book.tags = list(child_book.tags) + [book_tag]
707 for fragment in child_book.fragments.all():
708 fragment.tags = set(list(fragment.tags) + [book_tag])
709 book_descendants += list(child_book.children.all())
712 book.reset_tag_counter()
713 book.reset_theme_counter()
718 def reset_tag_counter(self):
722 cache_key = "Book.tag_counter/%d" % self.id
723 cache.delete(cache_key)
725 self.parent.reset_tag_counter()
728 def tag_counter(self):
730 cache_key = "Book.tag_counter/%d" % self.id
731 tags = cache.get(cache_key)
737 for child in self.children.all().order_by():
738 for tag_pk, value in child.tag_counter.iteritems():
739 tags[tag_pk] = tags.get(tag_pk, 0) + value
740 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
744 cache.set(cache_key, tags)
747 def reset_theme_counter(self):
751 cache_key = "Book.theme_counter/%d" % self.id
752 cache.delete(cache_key)
754 self.parent.reset_theme_counter()
757 def theme_counter(self):
759 cache_key = "Book.theme_counter/%d" % self.id
760 tags = cache.get(cache_key)
766 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
767 for tag in fragment.tags.filter(category='theme').order_by():
768 tags[tag.pk] = tags.get(tag.pk, 0) + 1
771 cache.set(cache_key, tags)
774 def pretty_title(self, html_links=False):
776 names = list(book.tags.filter(category='author'))
782 names.extend(reversed(books))
785 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
787 names = [tag.name for tag in names]
789 return ', '.join(names)
792 def tagged_top_level(cls, tags):
793 """ Returns top-level books tagged with `tags'.
795 It only returns those books which don't have ancestors which are
796 also tagged with those tags.
799 # get relevant books and their tags
800 objects = cls.tagged.with_all(tags)
801 # eliminate descendants
802 l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
803 descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
805 objects = objects.exclude(pk__in=descendants_keys)
810 class Fragment(models.Model):
811 text = models.TextField()
812 short_text = models.TextField(editable=False)
813 anchor = models.CharField(max_length=120)
814 book = models.ForeignKey(Book, related_name='fragments')
816 objects = models.Manager()
817 tagged = managers.ModelTaggedItemManager(Tag)
818 tags = managers.TagDescriptor(Tag)
821 ordering = ('book', 'anchor',)
822 verbose_name = _('fragment')
823 verbose_name_plural = _('fragments')
825 def get_absolute_url(self):
826 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
828 def reset_short_html(self):
832 cache_key = "Fragment.short_html/%d/%s"
833 for lang, langname in settings.LANGUAGES:
834 cache.delete(cache_key % (self.id, lang))
836 def short_html(self):
838 cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
839 short_html = cache.get(cache_key)
843 if short_html is not None:
844 return mark_safe(short_html)
846 short_html = unicode(render_to_string('catalogue/fragment_short.html',
849 cache.set(cache_key, short_html)
850 return mark_safe(short_html)
853 class FileRecord(models.Model):
854 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
855 type = models.CharField(_('type'), max_length=20, db_index=True)
856 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
857 time = models.DateTimeField(_('time'), auto_now_add=True)
860 ordering = ('-time','-slug', '-type')
861 verbose_name = _('file record')
862 verbose_name_plural = _('file records')
864 def __unicode__(self):
865 return "%s %s.%s" % (self.sha1, self.slug, self.type)
874 def _tags_updated_handler(sender, affected_tags, **kwargs):
875 # reset tag global counter
876 # we want Tag.changed_at updated for API to know the tag was touched
877 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
879 # if book tags changed, reset book tag counter
880 if isinstance(sender, Book) and \
881 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
882 exclude(category__in=('book', 'theme', 'set')).count():
883 sender.reset_tag_counter()
884 # if fragment theme changed, reset book theme counter
885 elif isinstance(sender, Fragment) and \
886 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
887 filter(category='theme').count():
888 sender.book.reset_theme_counter()
889 tags_updated.connect(_tags_updated_handler)
892 def _pre_delete_handler(sender, instance, **kwargs):
893 """ refresh Book on BookMedia delete """
894 if sender == BookMedia:
896 pre_delete.connect(_pre_delete_handler)
898 def _post_save_handler(sender, instance, **kwargs):
899 """ refresh all the short_html stuff on BookMedia update """
900 if sender == BookMedia:
902 post_save.connect(_post_save_handler)