1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField, OverwritingFileField
21 from catalogue.utils import ExistingFile
23 from librarian import dcparser, html, epub, NoDublinCore
25 from mutagen import id3
26 from slughifi import slughifi
30 ('author', _('author')),
31 ('epoch', _('epoch')),
33 ('genre', _('genre')),
34 ('theme', _('theme')),
40 ('odt', _('ODT file')),
41 ('mp3', _('MP3 file')),
42 ('ogg', _('OGG file')),
43 ('daisy', _('DAISY file')),
46 class TagSubcategoryManager(models.Manager):
47 def __init__(self, subcategory):
48 super(TagSubcategoryManager, self).__init__()
49 self.subcategory = subcategory
51 def get_query_set(self):
52 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
56 name = models.CharField(_('name'), max_length=50, db_index=True)
57 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
59 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
60 db_index=True, choices=TAG_CATEGORIES)
61 description = models.TextField(_('description'), blank=True)
62 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
64 user = models.ForeignKey(User, blank=True, null=True)
65 book_count = models.IntegerField(_('book count'), blank=True, null=True)
66 gazeta_link = models.CharField(blank=True, max_length=240)
67 wiki_link = models.CharField(blank=True, max_length=240)
69 class UrlDeprecationWarning(DeprecationWarning):
80 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
83 ordering = ('sort_key',)
84 verbose_name = _('tag')
85 verbose_name_plural = _('tags')
86 unique_together = (("slug", "category"),)
88 def __unicode__(self):
92 return "Tag(slug=%r)" % self.slug
95 def get_absolute_url(self):
96 return ('catalogue.views.tagged_object_list', [self.url_chunk])
98 def has_description(self):
99 return len(self.description) > 0
100 has_description.short_description = _('description')
101 has_description.boolean = True
104 """ returns global book count for book tags, fragment count for themes """
106 if self.book_count is None:
107 if self.category == 'book':
109 objects = Book.objects.none()
110 elif self.category == 'theme':
111 objects = Fragment.tagged.with_all((self,))
113 objects = Book.tagged.with_all((self,)).order_by()
114 if self.category != 'set':
115 # eliminate descendants
116 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
117 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
119 objects = objects.exclude(pk__in=descendants_keys)
120 self.book_count = objects.count()
122 return self.book_count
125 def get_tag_list(tags):
126 if isinstance(tags, basestring):
131 tags_splitted = tags.split('/')
132 for name in tags_splitted:
134 real_tags.append(Tag.objects.get(slug=name, category=category))
136 elif name in Tag.categories_rev:
137 category = Tag.categories_rev[name]
140 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
142 except Tag.MultipleObjectsReturned, e:
143 ambiguous_slugs.append(name)
146 # something strange left off
147 raise Tag.DoesNotExist()
149 # some tags should be qualified
150 e = Tag.MultipleObjectsReturned()
152 e.ambiguous_slugs = ambiguous_slugs
155 e = Tag.UrlDeprecationWarning()
160 return TagBase.get_tag_list(tags)
164 return '/'.join((Tag.categories_dict[self.category], self.slug))
167 # TODO: why is this hard-coded ?
168 def book_upload_path(ext=None, maxlen=100):
169 def get_dynamic_path(media, filename, ext=ext):
170 # how to put related book's slug here?
172 if media.type == 'daisy':
177 name = slughifi(filename.split(".")[0])
179 name = slughifi(media.name)
180 return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
181 return get_dynamic_path
184 class BookMedia(models.Model):
185 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100", editable=False)
186 name = models.CharField(_('name'), max_length="100")
187 file = OverwritingFileField(_('file'), upload_to=book_upload_path())
188 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
189 extra_info = JSONField(_('extra information'), default='{}', editable=False)
190 book = models.ForeignKey('Book', related_name='media')
191 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
193 def __unicode__(self):
194 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
197 ordering = ('type', 'name')
198 verbose_name = _('book media')
199 verbose_name_plural = _('book media')
201 def save(self, *args, **kwargs):
203 old = BookMedia.objects.get(pk=self.pk)
204 except BookMedia.DoesNotExist, e:
207 # if name changed, change the file name, too
208 if self.name != old.name:
209 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
211 super(BookMedia, self).save(*args, **kwargs)
212 extra_info = self.get_extra_info_value()
213 extra_info.update(self.read_meta())
214 self.set_extra_info_value(extra_info)
215 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
216 return super(BookMedia, self).save(*args, **kwargs)
220 Reads some metadata from the audiobook.
223 artist_name = director_name = project = funded_by = ''
224 if self.type == 'mp3':
226 audio = id3.ID3(self.file.path)
227 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
228 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
229 project = ", ".join([t.data for t in audio.getall('PRIV')
230 if t.owner=='wolnelektury.pl?project'])
231 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
232 if t.owner=='wolnelektury.pl?funded_by'])
235 elif self.type == 'ogg':
237 audio = mutagen.File(self.file.path)
238 artist_name = ', '.join(audio.get('artist', []))
239 director_name = ', '.join(audio.get('conductor', []))
240 project = ", ".join(audio.get('project', []))
241 funded_by = ", ".join(audio.get('funded_by', []))
246 return {'artist_name': artist_name, 'director_name': director_name,
247 'project': project, 'funded_by': funded_by}
250 def read_source_sha1(filepath, filetype):
252 Reads source file SHA1 from audiobok metadata.
255 if filetype == 'mp3':
257 audio = id3.ID3(filepath)
258 return [t.data for t in audio.getall('PRIV')
259 if t.owner=='wolnelektury.pl?flac_sha1'][0]
262 elif filetype == 'ogg':
264 audio = mutagen.File(filepath)
265 return audio.get('flac_sha1', [None])[0]
272 class Book(models.Model):
273 title = models.CharField(_('title'), max_length=120)
274 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
275 description = models.TextField(_('description'), blank=True)
276 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
277 _short_html = models.TextField(_('short HTML'), editable=False)
278 parent_number = models.IntegerField(_('parent number'), default=0)
279 extra_info = JSONField(_('extra information'))
280 gazeta_link = models.CharField(blank=True, max_length=240)
281 wiki_link = models.CharField(blank=True, max_length=240)
282 # files generated during publication
283 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
284 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
285 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
286 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
287 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
289 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
290 objects = models.Manager()
291 tagged = managers.ModelTaggedItemManager(Tag)
292 tags = managers.TagDescriptor(Tag)
294 _tag_counter = JSONField(null=True, editable=False)
295 _theme_counter = JSONField(null=True, editable=False)
297 class AlreadyExists(Exception):
301 ordering = ('title',)
302 verbose_name = _('book')
303 verbose_name_plural = _('books')
305 def __unicode__(self):
308 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
310 # Reset _short_html during save
312 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
314 self.__setattr__(key, '')
315 # Fragment.short_html relies on book's tags, so reset it here too
316 self.fragments.all().update(**update)
318 return super(Book, self).save(force_insert, force_update)
321 def get_absolute_url(self):
322 return ('catalogue.views.book_detail', [self.slug])
328 def book_tag_slug(self):
329 return ('l-' + self.slug)[:120]
332 slug = self.book_tag_slug()
333 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
335 book_tag.name = self.title[:50]
336 book_tag.sort_key = self.title.lower()
340 def has_media(self, type):
367 if self.media.filter(type=type).exists():
372 def get_media(self, type):
373 if self.has_media(type):
377 return self.html_file
379 return self.epub_file
385 return self.media.filter(type=type)
390 return self.get_media("mp3")
392 return self.get_media("odt")
394 return self.get_media("ogg")
396 return self.get_media("daisy")
398 def short_html(self):
399 key = '_short_html_%s' % get_language()
400 short_html = getattr(self, key)
402 if short_html and len(short_html):
403 return mark_safe(short_html)
405 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
406 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
409 # files generated during publication
410 if self.has_media("html"):
411 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
412 if self.has_media("pdf"):
413 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
414 if self.root_ancestor.has_media("epub"):
415 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
416 if self.has_media("txt"):
417 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
419 for m in self.media.order_by('type'):
420 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
422 formats = [mark_safe(format) for format in formats]
424 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
425 {'book': self, 'tags': tags, 'formats': formats})))
426 self.save(reset_short_html=False)
427 return mark_safe(getattr(self, key))
431 def root_ancestor(self):
432 """ returns the oldest ancestor """
434 if not hasattr(self, '_root_ancestor'):
438 self._root_ancestor = book
439 return self._root_ancestor
442 def has_description(self):
443 return len(self.description) > 0
444 has_description.short_description = _('description')
445 has_description.boolean = True
448 def has_pdf_file(self):
449 return bool(self.pdf_file)
450 has_pdf_file.short_description = 'PDF'
451 has_pdf_file.boolean = True
453 def has_epub_file(self):
454 return bool(self.epub_file)
455 has_epub_file.short_description = 'EPUB'
456 has_epub_file.boolean = True
458 def has_txt_file(self):
459 return bool(self.txt_file)
460 has_txt_file.short_description = 'HTML'
461 has_txt_file.boolean = True
463 def has_html_file(self):
464 return bool(self.html_file)
465 has_html_file.short_description = 'HTML'
466 has_html_file.boolean = True
468 def has_odt_file(self):
469 return bool(self.has_media("odt"))
470 has_odt_file.short_description = 'ODT'
471 has_odt_file.boolean = True
473 def has_mp3_file(self):
474 return bool(self.has_media("mp3"))
475 has_mp3_file.short_description = 'MP3'
476 has_mp3_file.boolean = True
478 def has_ogg_file(self):
479 return bool(self.has_media("ogg"))
480 has_ogg_file.short_description = 'OGG'
481 has_ogg_file.boolean = True
483 def has_daisy_file(self):
484 return bool(self.has_media("daisy"))
485 has_daisy_file.short_description = 'DAISY'
486 has_daisy_file.boolean = True
488 def build_epub(self, remove_descendants=True):
489 """ (Re)builds the epub file.
490 If book has a parent, does nothing.
491 Unless remove_descendants is False, descendants' epubs are removed.
494 from StringIO import StringIO
495 from hashlib import sha1
496 from django.core.files.base import ContentFile
497 from librarian import DocProvider
499 class BookImportDocProvider(DocProvider):
500 """ used for joined EPUBs """
502 def __init__(self, book):
505 def by_slug(self, slug):
506 if slug == self.book.slug:
507 return self.book.xml_file
509 return Book.objects.get(slug=slug).xml_file
515 epub_file = StringIO()
517 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
518 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
519 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
523 book_descendants = list(self.children.all())
524 while len(book_descendants) > 0:
525 child_book = book_descendants.pop(0)
526 if remove_descendants and child_book.has_epub_file():
527 child_book.epub_file.delete()
528 # save anyway, to refresh short_html
530 book_descendants += list(child_book.children.all())
533 from StringIO import StringIO
534 from django.core.files.base import ContentFile
535 from librarian import text
538 text.transform(open(self.xml_file.path), out)
539 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
544 def from_xml_file(cls, xml_file, **kwargs):
545 # use librarian to parse meta-data
546 book_info = dcparser.parse(xml_file)
548 if not isinstance(xml_file, File):
549 xml_file = File(open(xml_file))
552 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
557 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
559 from tempfile import NamedTemporaryFile
560 from markupstring import MarkupString
561 from django.core.files.storage import default_storage
563 # check for parts before we do anything
565 if hasattr(book_info, 'parts'):
566 for part_url in book_info.parts:
567 base, slug = part_url.rsplit('/', 1)
569 children.append(Book.objects.get(slug=slug))
570 except Book.DoesNotExist, e:
571 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
575 book_base, book_slug = book_info.url.rsplit('/', 1)
576 if re.search(r'[^a-zA-Z0-9-]', book_slug):
577 raise ValueError('Invalid characters in slug')
578 book, created = Book.objects.get_or_create(slug=book_slug)
584 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
585 # Save shelves for this book
586 book_shelves = list(book.tags.filter(category='set'))
588 book.title = book_info.title
589 book.set_extra_info_value(book_info.to_dict())
590 book._short_html = ''
594 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
595 for field_name, category in categories:
597 tag_names = getattr(book_info, field_name)
599 tag_names = [getattr(book_info, category)]
600 for tag_name in tag_names:
601 tag_sort_key = tag_name
602 if category == 'author':
603 tag_sort_key = tag_name.last_name
604 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
605 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
608 tag.sort_key = tag_sort_key.lower()
610 book_tags.append(tag)
612 book.tags = set(book_tags + book_shelves)
614 book_tag = book.book_tag()
616 for n, child_book in enumerate(children):
617 child_book.parent = book
618 child_book.parent_number = n
621 # Save XML and HTML files
622 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
624 # delete old fragments when overwriting
625 book.fragments.all().delete()
627 html_file = NamedTemporaryFile()
628 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
629 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
631 # get ancestor l-tags for adding to new fragments
635 ancestor_tags.append(p.book_tag())
639 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
640 for fragment in closed_fragments.values():
642 theme_names = [s.strip() for s in fragment.themes.split(',')]
643 except AttributeError:
646 for theme_name in theme_names:
649 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
651 tag.name = theme_name
652 tag.sort_key = theme_name.lower()
658 text = fragment.to_string()
660 if (len(MarkupString(text)) > 240):
661 short_text = unicode(MarkupString(text)[:160])
662 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
663 defaults={'text': text, 'short_text': short_text})
666 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
668 if not settings.NO_BUILD_TXT and build_txt:
671 if not settings.NO_BUILD_EPUB and build_epub:
672 book.root_ancestor.build_epub()
674 book_descendants = list(book.children.all())
675 # add l-tag to descendants and their fragments
676 # delete unnecessary EPUB files
677 while len(book_descendants) > 0:
678 child_book = book_descendants.pop(0)
679 child_book.tags = list(child_book.tags) + [book_tag]
681 for fragment in child_book.fragments.all():
682 fragment.tags = set(list(fragment.tags) + [book_tag])
683 book_descendants += list(child_book.children.all())
686 book.reset_tag_counter()
687 book.reset_theme_counter()
693 def refresh_tag_counter(self):
695 for child in self.children.all().order_by():
696 for tag_pk, value in child.tag_counter.iteritems():
697 tags[tag_pk] = tags.get(tag_pk, 0) + value
698 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
700 self.set__tag_counter_value(tags)
701 self.save(reset_short_html=False)
704 def reset_tag_counter(self):
705 self._tag_counter = None
706 self.save(reset_short_html=False)
708 self.parent.reset_tag_counter()
711 def tag_counter(self):
712 if self._tag_counter is None:
713 return self.refresh_tag_counter()
714 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
716 def refresh_theme_counter(self):
718 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
719 for tag in fragment.tags.filter(category='theme').order_by():
720 tags[tag.pk] = tags.get(tag.pk, 0) + 1
721 self.set__theme_counter_value(tags)
722 self.save(reset_short_html=False)
725 def reset_theme_counter(self):
726 self._theme_counter = None
727 self.save(reset_short_html=False)
729 self.parent.reset_theme_counter()
732 def theme_counter(self):
733 if self._theme_counter is None:
734 return self.refresh_theme_counter()
735 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
737 def pretty_title(self, html_links=False):
739 names = list(book.tags.filter(category='author'))
745 names.extend(reversed(books))
748 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
750 names = [tag.name for tag in names]
752 return ', '.join(names)
755 class Fragment(models.Model):
756 text = models.TextField()
757 short_text = models.TextField(editable=False)
758 _short_html = models.TextField(editable=False)
759 anchor = models.CharField(max_length=120)
760 book = models.ForeignKey(Book, related_name='fragments')
762 objects = models.Manager()
763 tagged = managers.ModelTaggedItemManager(Tag)
764 tags = managers.TagDescriptor(Tag)
767 ordering = ('book', 'anchor',)
768 verbose_name = _('fragment')
769 verbose_name_plural = _('fragments')
771 def get_absolute_url(self):
772 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
774 def short_html(self):
775 key = '_short_html_%s' % get_language()
776 short_html = getattr(self, key)
777 if short_html and len(short_html):
778 return mark_safe(short_html)
780 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
781 {'fragment': self})))
783 return mark_safe(getattr(self, key))
786 class FileRecord(models.Model):
787 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
788 type = models.CharField(_('type'), max_length=20, db_index=True)
789 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
790 time = models.DateTimeField(_('time'), auto_now_add=True)
793 ordering = ('-time','-slug', '-type')
794 verbose_name = _('file record')
795 verbose_name_plural = _('file records')
797 def __unicode__(self):
798 return "%s %s.%s" % (self.sha1, self.slug, self.type)
807 def _tags_updated_handler(sender, affected_tags, **kwargs):
808 # reset tag global counter
809 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
811 # if book tags changed, reset book tag counter
812 if isinstance(sender, Book) and \
813 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
814 exclude(category__in=('book', 'theme', 'set')).count():
815 sender.reset_tag_counter()
816 # if fragment theme changed, reset book theme counter
817 elif isinstance(sender, Fragment) and \
818 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
819 filter(category='theme').count():
820 sender.book.reset_theme_counter()
821 tags_updated.connect(_tags_updated_handler)
824 def _pre_delete_handler(sender, instance, **kwargs):
825 """ refresh Book on BookMedia delete """
826 if sender == BookMedia:
828 pre_delete.connect(_pre_delete_handler)
830 def _post_save_handler(sender, instance, **kwargs):
831 """ refresh all the short_html stuff on BookMedia update """
832 if sender == BookMedia:
834 post_save.connect(_post_save_handler)