1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
24 from mutagen import id3
25 from slughifi import slughifi
29 ('author', _('author')),
30 ('epoch', _('epoch')),
32 ('genre', _('genre')),
33 ('theme', _('theme')),
39 ('odt', _('ODT file')),
40 ('mp3', _('MP3 file')),
41 ('ogg', _('OGG file')),
42 ('daisy', _('DAISY file')),
45 class TagSubcategoryManager(models.Manager):
46 def __init__(self, subcategory):
47 super(TagSubcategoryManager, self).__init__()
48 self.subcategory = subcategory
50 def get_query_set(self):
51 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
55 name = models.CharField(_('name'), max_length=50, db_index=True)
56 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
57 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
58 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
59 db_index=True, choices=TAG_CATEGORIES)
60 description = models.TextField(_('description'), blank=True)
61 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
63 user = models.ForeignKey(User, blank=True, null=True)
64 book_count = models.IntegerField(_('book count'), blank=True, null=True)
65 gazeta_link = models.CharField(blank=True, max_length=240)
66 wiki_link = models.CharField(blank=True, max_length=240)
68 class UrlDeprecationWarning(DeprecationWarning):
79 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
82 ordering = ('sort_key',)
83 verbose_name = _('tag')
84 verbose_name_plural = _('tags')
85 unique_together = (("slug", "category"),)
87 def __unicode__(self):
91 return "Tag(slug=%r)" % self.slug
94 def get_absolute_url(self):
95 return ('catalogue.views.tagged_object_list', [self.url_chunk])
97 def has_description(self):
98 return len(self.description) > 0
99 has_description.short_description = _('description')
100 has_description.boolean = True
103 """ returns global book count for book tags, fragment count for themes """
105 if self.book_count is None:
106 if self.category == 'book':
108 objects = Book.objects.none()
109 elif self.category == 'theme':
110 objects = Fragment.tagged.with_all((self,))
112 objects = Book.tagged.with_all((self,)).order_by()
113 if self.category != 'set':
114 # eliminate descendants
115 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
116 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
118 objects = objects.exclude(pk__in=descendants_keys)
119 self.book_count = objects.count()
121 return self.book_count
124 def get_tag_list(tags):
125 if isinstance(tags, basestring):
130 tags_splitted = tags.split('/')
131 for name in tags_splitted:
133 real_tags.append(Tag.objects.get(slug=name, category=category))
135 elif name in Tag.categories_rev:
136 category = Tag.categories_rev[name]
139 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
141 except Tag.MultipleObjectsReturned, e:
142 ambiguous_slugs.append(name)
145 # something strange left off
146 raise Tag.DoesNotExist()
148 # some tags should be qualified
149 e = Tag.MultipleObjectsReturned()
151 e.ambiguous_slugs = ambiguous_slugs
154 e = Tag.UrlDeprecationWarning()
159 return TagBase.get_tag_list(tags)
163 return '/'.join((Tag.categories_dict[self.category], self.slug))
166 # TODO: why is this hard-coded ?
167 def book_upload_path(ext=None, maxlen=100):
168 def get_dynamic_path(media, filename, ext=ext):
169 # how to put related book's slug here?
171 if media.type == 'daisy':
176 name = slughifi(filename.split(".")[0])
178 name = slughifi(media.name)
179 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
180 return get_dynamic_path
183 class BookMedia(models.Model):
184 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
185 name = models.CharField(_('name'), max_length="100")
186 file = models.FileField(_('file'), upload_to=book_upload_path())
187 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
188 extra_info = JSONField(_('extra information'), default='{}', editable=False)
189 book = models.ForeignKey('Book', related_name='media')
190 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
192 def __unicode__(self):
193 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
196 ordering = ('type', 'name')
197 verbose_name = _('book media')
198 verbose_name_plural = _('book media')
200 def save(self, *args, **kwargs):
201 super(BookMedia, self).save(*args, **kwargs)
202 extra_info = self.get_extra_info_value()
203 extra_info.update(self.read_meta())
204 self.set_extra_info_value(extra_info)
205 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
206 print self.extra_info, self.source_sha1
207 return super(BookMedia, self).save(*args, **kwargs)
211 Reads some metadata from the audiobook.
214 artist_name = director_name = ''
215 if self.type == 'mp3':
217 audio = id3.ID3(self.file.path)
218 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
219 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
222 elif self.type == 'ogg':
224 audio = mutagen.File(self.file.path)
225 artist_name = ', '.join(audio.get('artist', []))
226 director_name = ', '.join(audio.get('conductor', []))
231 return {'artist_name': artist_name, 'director_name': director_name}
234 def read_source_sha1(filepath, filetype):
236 Reads source file SHA1 from audiobok metadata.
239 if filetype == 'mp3':
241 audio = id3.ID3(filepath)
242 return [t.data for t in audio.getall('PRIV')
243 if t.owner=='http://wolnelektury.pl?flac_sha1'][0]
246 elif filetype == 'ogg':
248 audio = mutagen.File(filepath)
249 return audio.get('flac_sha1', [None])[0]
256 class Book(models.Model):
257 title = models.CharField(_('title'), max_length=120)
258 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
259 description = models.TextField(_('description'), blank=True)
260 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
261 _short_html = models.TextField(_('short HTML'), editable=False)
262 parent_number = models.IntegerField(_('parent number'), default=0)
263 extra_info = JSONField(_('extra information'))
264 gazeta_link = models.CharField(blank=True, max_length=240)
265 wiki_link = models.CharField(blank=True, max_length=240)
266 # files generated during publication
267 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
268 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
269 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
270 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
271 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
273 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
274 objects = models.Manager()
275 tagged = managers.ModelTaggedItemManager(Tag)
276 tags = managers.TagDescriptor(Tag)
278 _tag_counter = JSONField(null=True, editable=False)
279 _theme_counter = JSONField(null=True, editable=False)
281 class AlreadyExists(Exception):
285 ordering = ('title',)
286 verbose_name = _('book')
287 verbose_name_plural = _('books')
289 def __unicode__(self):
292 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
294 # Reset _short_html during save
296 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
298 self.__setattr__(key, '')
299 # Fragment.short_html relies on book's tags, so reset it here too
300 self.fragments.all().update(**update)
302 return super(Book, self).save(force_insert, force_update)
305 def get_absolute_url(self):
306 return ('catalogue.views.book_detail', [self.slug])
312 def book_tag_slug(self):
313 return ('l-' + self.slug)[:120]
316 slug = self.book_tag_slug()
317 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
319 book_tag.name = self.title[:50]
320 book_tag.sort_key = self.title.lower()
324 def has_media(self, type):
351 if self.media.filter(type=type).exists():
356 def get_media(self, type):
357 if self.has_media(type):
361 return self.html_file
363 return self.epub_file
369 return self.media.filter(type=type)
374 return self.get_media("mp3")
376 return self.get_media("odt")
378 return self.get_media("ogg")
380 return self.get_media("daisy")
382 def short_html(self):
383 key = '_short_html_%s' % get_language()
384 short_html = getattr(self, key)
386 if short_html and len(short_html):
387 return mark_safe(short_html)
389 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
390 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
393 # files generated during publication
394 if self.has_media("html"):
395 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
396 if self.has_media("pdf"):
397 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
398 if self.root_ancestor.has_media("epub"):
399 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
400 if self.has_media("txt"):
401 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
403 for m in self.media.order_by('type'):
404 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
406 formats = [mark_safe(format) for format in formats]
408 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
409 {'book': self, 'tags': tags, 'formats': formats})))
410 self.save(reset_short_html=False)
411 return mark_safe(getattr(self, key))
415 def root_ancestor(self):
416 """ returns the oldest ancestor """
418 if not hasattr(self, '_root_ancestor'):
422 self._root_ancestor = book
423 return self._root_ancestor
426 def has_description(self):
427 return len(self.description) > 0
428 has_description.short_description = _('description')
429 has_description.boolean = True
432 def has_pdf_file(self):
433 return bool(self.pdf_file)
434 has_pdf_file.short_description = 'PDF'
435 has_pdf_file.boolean = True
437 def has_epub_file(self):
438 return bool(self.epub_file)
439 has_epub_file.short_description = 'EPUB'
440 has_epub_file.boolean = True
442 def has_txt_file(self):
443 return bool(self.txt_file)
444 has_txt_file.short_description = 'HTML'
445 has_txt_file.boolean = True
447 def has_html_file(self):
448 return bool(self.html_file)
449 has_html_file.short_description = 'HTML'
450 has_html_file.boolean = True
452 def has_odt_file(self):
453 return bool(self.has_media("odt"))
454 has_odt_file.short_description = 'ODT'
455 has_odt_file.boolean = True
457 def has_mp3_file(self):
458 return bool(self.has_media("mp3"))
459 has_mp3_file.short_description = 'MP3'
460 has_mp3_file.boolean = True
462 def has_ogg_file(self):
463 return bool(self.has_media("ogg"))
464 has_ogg_file.short_description = 'OGG'
465 has_ogg_file.boolean = True
467 def has_daisy_file(self):
468 return bool(self.has_media("daisy"))
469 has_daisy_file.short_description = 'DAISY'
470 has_daisy_file.boolean = True
472 def build_epub(self, remove_descendants=True):
473 """ (Re)builds the epub file.
474 If book has a parent, does nothing.
475 Unless remove_descendants is False, descendants' epubs are removed.
478 from StringIO import StringIO
479 from hashlib import sha1
480 from django.core.files.base import ContentFile
481 from librarian import DocProvider
483 class BookImportDocProvider(DocProvider):
484 """ used for joined EPUBs """
486 def __init__(self, book):
489 def by_slug(self, slug):
490 if slug == self.book.slug:
491 return self.book.xml_file
493 return Book.objects.get(slug=slug).xml_file
499 epub_file = StringIO()
501 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
502 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
503 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
507 book_descendants = list(self.children.all())
508 while len(book_descendants) > 0:
509 child_book = book_descendants.pop(0)
510 if remove_descendants and child_book.has_epub_file():
511 child_book.epub_file.delete()
512 # save anyway, to refresh short_html
514 book_descendants += list(child_book.children.all())
517 from StringIO import StringIO
518 from django.core.files.base import ContentFile
519 from librarian import text
522 text.transform(open(self.xml_file.path), out)
523 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
528 def from_xml_file(cls, xml_file, **kwargs):
529 # use librarian to parse meta-data
530 book_info = dcparser.parse(xml_file)
532 if not isinstance(xml_file, File):
533 xml_file = File(open(xml_file))
536 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
541 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
543 from tempfile import NamedTemporaryFile
544 from markupstring import MarkupString
545 from django.core.files.storage import default_storage
547 # check for parts before we do anything
549 if hasattr(book_info, 'parts'):
550 for part_url in book_info.parts:
551 base, slug = part_url.rsplit('/', 1)
553 children.append(Book.objects.get(slug=slug))
554 except Book.DoesNotExist, e:
555 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
559 book_base, book_slug = book_info.url.rsplit('/', 1)
560 if re.search(r'[^a-zA-Z0-9-]', book_slug):
561 raise ValueError('Invalid characters in slug')
562 book, created = Book.objects.get_or_create(slug=book_slug)
568 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
569 # Save shelves for this book
570 book_shelves = list(book.tags.filter(category='set'))
572 book.title = book_info.title
573 book.set_extra_info_value(book_info.to_dict())
574 book._short_html = ''
578 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
579 for field_name, category in categories:
581 tag_names = getattr(book_info, field_name)
583 tag_names = [getattr(book_info, category)]
584 for tag_name in tag_names:
585 tag_sort_key = tag_name
586 if category == 'author':
587 tag_sort_key = tag_name.last_name
588 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
589 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
592 tag.sort_key = tag_sort_key.lower()
594 book_tags.append(tag)
596 book.tags = set(book_tags + book_shelves)
598 book_tag = book.book_tag()
600 for n, child_book in enumerate(children):
601 child_book.parent = book
602 child_book.parent_number = n
605 # Save XML and HTML files
606 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
608 # delete old fragments when overwriting
609 book.fragments.all().delete()
611 html_file = NamedTemporaryFile()
612 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
613 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
615 # get ancestor l-tags for adding to new fragments
619 ancestor_tags.append(p.book_tag())
623 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
624 for fragment in closed_fragments.values():
626 theme_names = [s.strip() for s in fragment.themes.split(',')]
627 except AttributeError:
630 for theme_name in theme_names:
633 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
635 tag.name = theme_name
636 tag.sort_key = theme_name.lower()
642 text = fragment.to_string()
644 if (len(MarkupString(text)) > 240):
645 short_text = unicode(MarkupString(text)[:160])
646 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
647 defaults={'text': text, 'short_text': short_text})
650 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
652 if not settings.NO_BUILD_TXT and build_txt:
655 if not settings.NO_BUILD_EPUB and build_epub:
656 book.root_ancestor.build_epub()
658 book_descendants = list(book.children.all())
659 # add l-tag to descendants and their fragments
660 # delete unnecessary EPUB files
661 while len(book_descendants) > 0:
662 child_book = book_descendants.pop(0)
663 child_book.tags = list(child_book.tags) + [book_tag]
665 for fragment in child_book.fragments.all():
666 fragment.tags = set(list(fragment.tags) + [book_tag])
667 book_descendants += list(child_book.children.all())
670 book.reset_tag_counter()
671 book.reset_theme_counter()
677 def refresh_tag_counter(self):
679 for child in self.children.all().order_by():
680 for tag_pk, value in child.tag_counter.iteritems():
681 tags[tag_pk] = tags.get(tag_pk, 0) + value
682 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
684 self.set__tag_counter_value(tags)
685 self.save(reset_short_html=False)
688 def reset_tag_counter(self):
689 self._tag_counter = None
690 self.save(reset_short_html=False)
692 self.parent.reset_tag_counter()
695 def tag_counter(self):
696 if self._tag_counter is None:
697 return self.refresh_tag_counter()
698 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
700 def refresh_theme_counter(self):
702 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
703 for tag in fragment.tags.filter(category='theme').order_by():
704 tags[tag.pk] = tags.get(tag.pk, 0) + 1
705 self.set__theme_counter_value(tags)
706 self.save(reset_short_html=False)
709 def reset_theme_counter(self):
710 self._theme_counter = None
711 self.save(reset_short_html=False)
713 self.parent.reset_theme_counter()
716 def theme_counter(self):
717 if self._theme_counter is None:
718 return self.refresh_theme_counter()
719 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
721 def pretty_title(self, html_links=False):
723 names = list(book.tags.filter(category='author'))
729 names.extend(reversed(books))
732 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
734 names = [tag.name for tag in names]
736 return ', '.join(names)
739 class Fragment(models.Model):
740 text = models.TextField()
741 short_text = models.TextField(editable=False)
742 _short_html = models.TextField(editable=False)
743 anchor = models.CharField(max_length=120)
744 book = models.ForeignKey(Book, related_name='fragments')
746 objects = models.Manager()
747 tagged = managers.ModelTaggedItemManager(Tag)
748 tags = managers.TagDescriptor(Tag)
751 ordering = ('book', 'anchor',)
752 verbose_name = _('fragment')
753 verbose_name_plural = _('fragments')
755 def get_absolute_url(self):
756 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
758 def short_html(self):
759 key = '_short_html_%s' % get_language()
760 short_html = getattr(self, key)
761 if short_html and len(short_html):
762 return mark_safe(short_html)
764 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
765 {'fragment': self})))
767 return mark_safe(getattr(self, key))
770 class FileRecord(models.Model):
771 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
772 type = models.CharField(_('type'), max_length=20, db_index=True)
773 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
774 time = models.DateTimeField(_('time'), auto_now_add=True)
777 ordering = ('-time','-slug', '-type')
778 verbose_name = _('file record')
779 verbose_name_plural = _('file records')
781 def __unicode__(self):
782 return "%s %s.%s" % (self.sha1, self.slug, self.type)
791 def _tags_updated_handler(sender, affected_tags, **kwargs):
792 # reset tag global counter
793 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
795 # if book tags changed, reset book tag counter
796 if isinstance(sender, Book) and \
797 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
798 exclude(category__in=('book', 'theme', 'set')).count():
799 sender.reset_tag_counter()
800 # if fragment theme changed, reset book theme counter
801 elif isinstance(sender, Fragment) and \
802 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
803 filter(category='theme').count():
804 sender.book.reset_theme_counter()
805 tags_updated.connect(_tags_updated_handler)
808 def _pre_delete_handler(sender, instance, **kwargs):
809 """ refresh Book on BookMedia delete """
810 if sender == BookMedia:
812 pre_delete.connect(_pre_delete_handler)
814 def _post_save_handler(sender, instance, **kwargs):
815 """ refresh all the short_html stuff on BookMedia update """
816 if sender == BookMedia:
818 post_save.connect(_post_save_handler)