1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
24 from mutagen import id3
25 from slughifi import slughifi
29 ('author', _('author')),
30 ('epoch', _('epoch')),
32 ('genre', _('genre')),
33 ('theme', _('theme')),
39 ('odt', _('ODT file')),
40 ('mp3', _('MP3 file')),
41 ('ogg', _('OGG file')),
42 ('daisy', _('DAISY file')),
45 class TagSubcategoryManager(models.Manager):
46 def __init__(self, subcategory):
47 super(TagSubcategoryManager, self).__init__()
48 self.subcategory = subcategory
50 def get_query_set(self):
51 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
55 name = models.CharField(_('name'), max_length=50, db_index=True)
56 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
58 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
59 db_index=True, choices=TAG_CATEGORIES)
60 description = models.TextField(_('description'), blank=True)
61 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
63 user = models.ForeignKey(User, blank=True, null=True)
64 book_count = models.IntegerField(_('book count'), blank=True, null=True)
65 gazeta_link = models.CharField(blank=True, max_length=240)
66 wiki_link = models.CharField(blank=True, max_length=240)
68 class UrlDeprecationWarning(DeprecationWarning):
79 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
82 ordering = ('sort_key',)
83 verbose_name = _('tag')
84 verbose_name_plural = _('tags')
85 unique_together = (("slug", "category"),)
87 def __unicode__(self):
91 return "Tag(slug=%r)" % self.slug
94 def get_absolute_url(self):
95 return ('catalogue.views.tagged_object_list', [self.url_chunk])
97 def has_description(self):
98 return len(self.description) > 0
99 has_description.short_description = _('description')
100 has_description.boolean = True
103 """ returns global book count for book tags, fragment count for themes """
105 if self.book_count is None:
106 if self.category == 'book':
108 objects = Book.objects.none()
109 elif self.category == 'theme':
110 objects = Fragment.tagged.with_all((self,))
112 objects = Book.tagged.with_all((self,)).order_by()
113 if self.category != 'set':
114 # eliminate descendants
115 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
116 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
118 objects = objects.exclude(pk__in=descendants_keys)
119 self.book_count = objects.count()
121 return self.book_count
124 def get_tag_list(tags):
125 if isinstance(tags, basestring):
130 tags_splitted = tags.split('/')
131 for name in tags_splitted:
133 real_tags.append(Tag.objects.get(slug=name, category=category))
135 elif name in Tag.categories_rev:
136 category = Tag.categories_rev[name]
139 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
141 except Tag.MultipleObjectsReturned, e:
142 ambiguous_slugs.append(name)
145 # something strange left off
146 raise Tag.DoesNotExist()
148 # some tags should be qualified
149 e = Tag.MultipleObjectsReturned()
151 e.ambiguous_slugs = ambiguous_slugs
154 e = Tag.UrlDeprecationWarning()
159 return TagBase.get_tag_list(tags)
163 return '/'.join((Tag.categories_dict[self.category], self.slug))
166 # TODO: why is this hard-coded ?
167 def book_upload_path(ext=None, maxlen=100):
168 def get_dynamic_path(media, filename, ext=ext):
169 # how to put related book's slug here?
171 if media.type == 'daisy':
176 name = slughifi(filename.split(".")[0])
178 name = slughifi(media.name)
179 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
180 return get_dynamic_path
183 class BookMedia(models.Model):
184 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
185 name = models.CharField(_('name'), max_length="100")
186 file = models.FileField(_('file'), upload_to=book_upload_path())
187 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
188 extra_info = JSONField(_('extra information'), default='{}', editable=False)
189 book = models.ForeignKey('Book', related_name='media')
190 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
192 def __unicode__(self):
193 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
196 ordering = ('type', 'name')
197 verbose_name = _('book media')
198 verbose_name_plural = _('book media')
200 def save(self, *args, **kwargs):
201 super(BookMedia, self).save(*args, **kwargs)
202 extra_info = self.get_extra_info_value()
203 extra_info.update(self.read_meta())
204 self.set_extra_info_value(extra_info)
205 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
206 return super(BookMedia, self).save(*args, **kwargs)
210 Reads some metadata from the audiobook.
213 artist_name = director_name = ''
214 if self.type == 'mp3':
216 audio = id3.ID3(self.file.path)
217 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
218 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
221 elif self.type == 'ogg':
223 audio = mutagen.File(self.file.path)
224 artist_name = ', '.join(audio.get('artist', []))
225 director_name = ', '.join(audio.get('conductor', []))
230 return {'artist_name': artist_name, 'director_name': director_name}
233 def read_source_sha1(filepath, filetype):
235 Reads source file SHA1 from audiobok metadata.
238 if filetype == 'mp3':
240 audio = id3.ID3(filepath)
241 return [t.data for t in audio.getall('PRIV')
242 if t.owner=='http://wolnelektury.pl?flac_sha1'][0]
245 elif filetype == 'ogg':
247 audio = mutagen.File(filepath)
248 return audio.get('flac_sha1', [None])[0]
255 class Book(models.Model):
256 title = models.CharField(_('title'), max_length=120)
257 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
258 description = models.TextField(_('description'), blank=True)
259 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
260 _short_html = models.TextField(_('short HTML'), editable=False)
261 parent_number = models.IntegerField(_('parent number'), default=0)
262 extra_info = JSONField(_('extra information'))
263 gazeta_link = models.CharField(blank=True, max_length=240)
264 wiki_link = models.CharField(blank=True, max_length=240)
265 # files generated during publication
266 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
267 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
268 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
269 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
270 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
272 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
273 objects = models.Manager()
274 tagged = managers.ModelTaggedItemManager(Tag)
275 tags = managers.TagDescriptor(Tag)
277 _tag_counter = JSONField(null=True, editable=False)
278 _theme_counter = JSONField(null=True, editable=False)
280 class AlreadyExists(Exception):
284 ordering = ('title',)
285 verbose_name = _('book')
286 verbose_name_plural = _('books')
288 def __unicode__(self):
291 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
293 # Reset _short_html during save
295 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
297 self.__setattr__(key, '')
298 # Fragment.short_html relies on book's tags, so reset it here too
299 self.fragments.all().update(**update)
301 return super(Book, self).save(force_insert, force_update)
304 def get_absolute_url(self):
305 return ('catalogue.views.book_detail', [self.slug])
311 def book_tag_slug(self):
312 return ('l-' + self.slug)[:120]
315 slug = self.book_tag_slug()
316 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
318 book_tag.name = self.title[:50]
319 book_tag.sort_key = self.title.lower()
323 def has_media(self, type):
350 if self.media.filter(type=type).exists():
355 def get_media(self, type):
356 if self.has_media(type):
360 return self.html_file
362 return self.epub_file
368 return self.media.filter(type=type)
373 return self.get_media("mp3")
375 return self.get_media("odt")
377 return self.get_media("ogg")
379 return self.get_media("daisy")
381 def short_html(self):
382 key = '_short_html_%s' % get_language()
383 short_html = getattr(self, key)
385 if short_html and len(short_html):
386 return mark_safe(short_html)
388 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
389 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
392 # files generated during publication
393 if self.has_media("html"):
394 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
395 if self.has_media("pdf"):
396 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
397 if self.root_ancestor.has_media("epub"):
398 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
399 if self.has_media("txt"):
400 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
402 for m in self.media.order_by('type'):
403 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
405 formats = [mark_safe(format) for format in formats]
407 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
408 {'book': self, 'tags': tags, 'formats': formats})))
409 self.save(reset_short_html=False)
410 return mark_safe(getattr(self, key))
414 def root_ancestor(self):
415 """ returns the oldest ancestor """
417 if not hasattr(self, '_root_ancestor'):
421 self._root_ancestor = book
422 return self._root_ancestor
425 def has_description(self):
426 return len(self.description) > 0
427 has_description.short_description = _('description')
428 has_description.boolean = True
431 def has_pdf_file(self):
432 return bool(self.pdf_file)
433 has_pdf_file.short_description = 'PDF'
434 has_pdf_file.boolean = True
436 def has_epub_file(self):
437 return bool(self.epub_file)
438 has_epub_file.short_description = 'EPUB'
439 has_epub_file.boolean = True
441 def has_txt_file(self):
442 return bool(self.txt_file)
443 has_txt_file.short_description = 'HTML'
444 has_txt_file.boolean = True
446 def has_html_file(self):
447 return bool(self.html_file)
448 has_html_file.short_description = 'HTML'
449 has_html_file.boolean = True
451 def has_odt_file(self):
452 return bool(self.has_media("odt"))
453 has_odt_file.short_description = 'ODT'
454 has_odt_file.boolean = True
456 def has_mp3_file(self):
457 return bool(self.has_media("mp3"))
458 has_mp3_file.short_description = 'MP3'
459 has_mp3_file.boolean = True
461 def has_ogg_file(self):
462 return bool(self.has_media("ogg"))
463 has_ogg_file.short_description = 'OGG'
464 has_ogg_file.boolean = True
466 def has_daisy_file(self):
467 return bool(self.has_media("daisy"))
468 has_daisy_file.short_description = 'DAISY'
469 has_daisy_file.boolean = True
471 def build_epub(self, remove_descendants=True):
472 """ (Re)builds the epub file.
473 If book has a parent, does nothing.
474 Unless remove_descendants is False, descendants' epubs are removed.
477 from StringIO import StringIO
478 from hashlib import sha1
479 from django.core.files.base import ContentFile
480 from librarian import DocProvider
482 class BookImportDocProvider(DocProvider):
483 """ used for joined EPUBs """
485 def __init__(self, book):
488 def by_slug(self, slug):
489 if slug == self.book.slug:
490 return self.book.xml_file
492 return Book.objects.get(slug=slug).xml_file
498 epub_file = StringIO()
500 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
501 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
502 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
506 book_descendants = list(self.children.all())
507 while len(book_descendants) > 0:
508 child_book = book_descendants.pop(0)
509 if remove_descendants and child_book.has_epub_file():
510 child_book.epub_file.delete()
511 # save anyway, to refresh short_html
513 book_descendants += list(child_book.children.all())
516 from StringIO import StringIO
517 from django.core.files.base import ContentFile
518 from librarian import text
521 text.transform(open(self.xml_file.path), out)
522 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
527 def from_xml_file(cls, xml_file, **kwargs):
528 # use librarian to parse meta-data
529 book_info = dcparser.parse(xml_file)
531 if not isinstance(xml_file, File):
532 xml_file = File(open(xml_file))
535 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
540 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
542 from tempfile import NamedTemporaryFile
543 from markupstring import MarkupString
544 from django.core.files.storage import default_storage
546 # check for parts before we do anything
548 if hasattr(book_info, 'parts'):
549 for part_url in book_info.parts:
550 base, slug = part_url.rsplit('/', 1)
552 children.append(Book.objects.get(slug=slug))
553 except Book.DoesNotExist, e:
554 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
558 book_base, book_slug = book_info.url.rsplit('/', 1)
559 if re.search(r'[^a-zA-Z0-9-]', book_slug):
560 raise ValueError('Invalid characters in slug')
561 book, created = Book.objects.get_or_create(slug=book_slug)
567 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
568 # Save shelves for this book
569 book_shelves = list(book.tags.filter(category='set'))
571 book.title = book_info.title
572 book.set_extra_info_value(book_info.to_dict())
573 book._short_html = ''
577 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
578 for field_name, category in categories:
580 tag_names = getattr(book_info, field_name)
582 tag_names = [getattr(book_info, category)]
583 for tag_name in tag_names:
584 tag_sort_key = tag_name
585 if category == 'author':
586 tag_sort_key = tag_name.last_name
587 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
588 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
591 tag.sort_key = tag_sort_key.lower()
593 book_tags.append(tag)
595 book.tags = set(book_tags + book_shelves)
597 book_tag = book.book_tag()
599 for n, child_book in enumerate(children):
600 child_book.parent = book
601 child_book.parent_number = n
604 # Save XML and HTML files
605 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
607 # delete old fragments when overwriting
608 book.fragments.all().delete()
610 html_file = NamedTemporaryFile()
611 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
612 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
614 # get ancestor l-tags for adding to new fragments
618 ancestor_tags.append(p.book_tag())
622 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
623 for fragment in closed_fragments.values():
625 theme_names = [s.strip() for s in fragment.themes.split(',')]
626 except AttributeError:
629 for theme_name in theme_names:
632 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
634 tag.name = theme_name
635 tag.sort_key = theme_name.lower()
641 text = fragment.to_string()
643 if (len(MarkupString(text)) > 240):
644 short_text = unicode(MarkupString(text)[:160])
645 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
646 defaults={'text': text, 'short_text': short_text})
649 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
651 if not settings.NO_BUILD_TXT and build_txt:
654 if not settings.NO_BUILD_EPUB and build_epub:
655 book.root_ancestor.build_epub()
657 book_descendants = list(book.children.all())
658 # add l-tag to descendants and their fragments
659 # delete unnecessary EPUB files
660 while len(book_descendants) > 0:
661 child_book = book_descendants.pop(0)
662 child_book.tags = list(child_book.tags) + [book_tag]
664 for fragment in child_book.fragments.all():
665 fragment.tags = set(list(fragment.tags) + [book_tag])
666 book_descendants += list(child_book.children.all())
669 book.reset_tag_counter()
670 book.reset_theme_counter()
676 def refresh_tag_counter(self):
678 for child in self.children.all().order_by():
679 for tag_pk, value in child.tag_counter.iteritems():
680 tags[tag_pk] = tags.get(tag_pk, 0) + value
681 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
683 self.set__tag_counter_value(tags)
684 self.save(reset_short_html=False)
687 def reset_tag_counter(self):
688 self._tag_counter = None
689 self.save(reset_short_html=False)
691 self.parent.reset_tag_counter()
694 def tag_counter(self):
695 if self._tag_counter is None:
696 return self.refresh_tag_counter()
697 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
699 def refresh_theme_counter(self):
701 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
702 for tag in fragment.tags.filter(category='theme').order_by():
703 tags[tag.pk] = tags.get(tag.pk, 0) + 1
704 self.set__theme_counter_value(tags)
705 self.save(reset_short_html=False)
708 def reset_theme_counter(self):
709 self._theme_counter = None
710 self.save(reset_short_html=False)
712 self.parent.reset_theme_counter()
715 def theme_counter(self):
716 if self._theme_counter is None:
717 return self.refresh_theme_counter()
718 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
720 def pretty_title(self, html_links=False):
722 names = list(book.tags.filter(category='author'))
728 names.extend(reversed(books))
731 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
733 names = [tag.name for tag in names]
735 return ', '.join(names)
738 class Fragment(models.Model):
739 text = models.TextField()
740 short_text = models.TextField(editable=False)
741 _short_html = models.TextField(editable=False)
742 anchor = models.CharField(max_length=120)
743 book = models.ForeignKey(Book, related_name='fragments')
745 objects = models.Manager()
746 tagged = managers.ModelTaggedItemManager(Tag)
747 tags = managers.TagDescriptor(Tag)
750 ordering = ('book', 'anchor',)
751 verbose_name = _('fragment')
752 verbose_name_plural = _('fragments')
754 def get_absolute_url(self):
755 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
757 def short_html(self):
758 key = '_short_html_%s' % get_language()
759 short_html = getattr(self, key)
760 if short_html and len(short_html):
761 return mark_safe(short_html)
763 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
764 {'fragment': self})))
766 return mark_safe(getattr(self, key))
769 class FileRecord(models.Model):
770 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
771 type = models.CharField(_('type'), max_length=20, db_index=True)
772 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
773 time = models.DateTimeField(_('time'), auto_now_add=True)
776 ordering = ('-time','-slug', '-type')
777 verbose_name = _('file record')
778 verbose_name_plural = _('file records')
780 def __unicode__(self):
781 return "%s %s.%s" % (self.sha1, self.slug, self.type)
790 def _tags_updated_handler(sender, affected_tags, **kwargs):
791 # reset tag global counter
792 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
794 # if book tags changed, reset book tag counter
795 if isinstance(sender, Book) and \
796 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
797 exclude(category__in=('book', 'theme', 'set')).count():
798 sender.reset_tag_counter()
799 # if fragment theme changed, reset book theme counter
800 elif isinstance(sender, Fragment) and \
801 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
802 filter(category='theme').count():
803 sender.book.reset_theme_counter()
804 tags_updated.connect(_tags_updated_handler)
807 def _pre_delete_handler(sender, instance, **kwargs):
808 """ refresh Book on BookMedia delete """
809 if sender == BookMedia:
811 pre_delete.connect(_pre_delete_handler)
813 def _post_save_handler(sender, instance, **kwargs):
814 """ refresh all the short_html stuff on BookMedia update """
815 if sender == BookMedia:
817 post_save.connect(_post_save_handler)