1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
21 from catalogue.utils import ExistingFile
23 from librarian import dcparser, html, epub, NoDublinCore
25 from mutagen import id3
26 from slughifi import slughifi
30 ('author', _('author')),
31 ('epoch', _('epoch')),
33 ('genre', _('genre')),
34 ('theme', _('theme')),
40 ('odt', _('ODT file')),
41 ('mp3', _('MP3 file')),
42 ('ogg', _('OGG file')),
43 ('daisy', _('DAISY file')),
46 class TagSubcategoryManager(models.Manager):
47 def __init__(self, subcategory):
48 super(TagSubcategoryManager, self).__init__()
49 self.subcategory = subcategory
51 def get_query_set(self):
52 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
56 name = models.CharField(_('name'), max_length=50, db_index=True)
57 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
58 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
59 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
60 db_index=True, choices=TAG_CATEGORIES)
61 description = models.TextField(_('description'), blank=True)
62 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
64 user = models.ForeignKey(User, blank=True, null=True)
65 book_count = models.IntegerField(_('book count'), blank=True, null=True)
66 gazeta_link = models.CharField(blank=True, max_length=240)
67 wiki_link = models.CharField(blank=True, max_length=240)
69 class UrlDeprecationWarning(DeprecationWarning):
80 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
83 ordering = ('sort_key',)
84 verbose_name = _('tag')
85 verbose_name_plural = _('tags')
86 unique_together = (("slug", "category"),)
88 def __unicode__(self):
92 return "Tag(slug=%r)" % self.slug
95 def get_absolute_url(self):
96 return ('catalogue.views.tagged_object_list', [self.url_chunk])
98 def has_description(self):
99 return len(self.description) > 0
100 has_description.short_description = _('description')
101 has_description.boolean = True
104 """ returns global book count for book tags, fragment count for themes """
106 if self.book_count is None:
107 if self.category == 'book':
109 objects = Book.objects.none()
110 elif self.category == 'theme':
111 objects = Fragment.tagged.with_all((self,))
113 objects = Book.tagged.with_all((self,)).order_by()
114 if self.category != 'set':
115 # eliminate descendants
116 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
117 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
119 objects = objects.exclude(pk__in=descendants_keys)
120 self.book_count = objects.count()
122 return self.book_count
125 def get_tag_list(tags):
126 if isinstance(tags, basestring):
131 tags_splitted = tags.split('/')
132 for name in tags_splitted:
134 real_tags.append(Tag.objects.get(slug=name, category=category))
136 elif name in Tag.categories_rev:
137 category = Tag.categories_rev[name]
140 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
142 except Tag.MultipleObjectsReturned, e:
143 ambiguous_slugs.append(name)
146 # something strange left off
147 raise Tag.DoesNotExist()
149 # some tags should be qualified
150 e = Tag.MultipleObjectsReturned()
152 e.ambiguous_slugs = ambiguous_slugs
155 e = Tag.UrlDeprecationWarning()
160 return TagBase.get_tag_list(tags)
164 return '/'.join((Tag.categories_dict[self.category], self.slug))
167 # TODO: why is this hard-coded ?
168 def book_upload_path(ext=None, maxlen=100):
169 def get_dynamic_path(media, filename, ext=ext):
170 # how to put related book's slug here?
172 if media.type == 'daisy':
177 name = slughifi(filename.split(".")[0])
179 name = slughifi(media.name)
180 return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
181 return get_dynamic_path
184 class BookMedia(models.Model):
185 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
186 name = models.CharField(_('name'), max_length="100")
187 file = models.FileField(_('file'), upload_to=book_upload_path())
188 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
189 extra_info = JSONField(_('extra information'), default='{}', editable=False)
190 book = models.ForeignKey('Book', related_name='media')
191 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
193 def __unicode__(self):
194 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
197 ordering = ('type', 'name')
198 verbose_name = _('book media')
199 verbose_name_plural = _('book media')
201 def save(self, *args, **kwargs):
203 b = BookMedia.objects.get(pk=self.pk)
204 except BookMedia.DoesNotExist, e:
207 # if file is replaced, delete the old one
208 if self.file.path != b.file.path:
209 b.file.delete(save=False)
210 # if name changed, change the file name, too
211 elif self.name != b.name:
212 self.file.save(None, ExistingFile(self.file.path))
214 super(BookMedia, self).save(*args, **kwargs)
215 extra_info = self.get_extra_info_value()
216 extra_info.update(self.read_meta())
217 self.set_extra_info_value(extra_info)
218 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
219 return super(BookMedia, self).save(*args, **kwargs)
223 Reads some metadata from the audiobook.
226 artist_name = director_name = project = funded_by = ''
227 if self.type == 'mp3':
229 audio = id3.ID3(self.file.path)
230 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
231 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
232 project = ", ".join([t.data for t in audio.getall('PRIV')
233 if t.owner=='wolnelektury.pl?project'])
234 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
235 if t.owner=='wolnelektury.pl?funded_by'])
238 elif self.type == 'ogg':
240 audio = mutagen.File(self.file.path)
241 artist_name = ', '.join(audio.get('artist', []))
242 director_name = ', '.join(audio.get('conductor', []))
243 project = ", ".join(audio.get('project', []))
244 funded_by = ", ".join(audio.get('funded_by', []))
249 return {'artist_name': artist_name, 'director_name': director_name,
250 'project': project, 'funded_by': funded_by}
253 def read_source_sha1(filepath, filetype):
255 Reads source file SHA1 from audiobok metadata.
258 if filetype == 'mp3':
260 audio = id3.ID3(filepath)
261 return [t.data for t in audio.getall('PRIV')
262 if t.owner=='wolnelektury.pl?flac_sha1'][0]
265 elif filetype == 'ogg':
267 audio = mutagen.File(filepath)
268 return audio.get('flac_sha1', [None])[0]
275 class Book(models.Model):
276 title = models.CharField(_('title'), max_length=120)
277 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
278 description = models.TextField(_('description'), blank=True)
279 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
280 _short_html = models.TextField(_('short HTML'), editable=False)
281 parent_number = models.IntegerField(_('parent number'), default=0)
282 extra_info = JSONField(_('extra information'))
283 gazeta_link = models.CharField(blank=True, max_length=240)
284 wiki_link = models.CharField(blank=True, max_length=240)
285 # files generated during publication
286 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
287 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
288 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
289 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
290 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
292 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
293 objects = models.Manager()
294 tagged = managers.ModelTaggedItemManager(Tag)
295 tags = managers.TagDescriptor(Tag)
297 _tag_counter = JSONField(null=True, editable=False)
298 _theme_counter = JSONField(null=True, editable=False)
300 class AlreadyExists(Exception):
304 ordering = ('title',)
305 verbose_name = _('book')
306 verbose_name_plural = _('books')
308 def __unicode__(self):
311 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
313 # Reset _short_html during save
315 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
317 self.__setattr__(key, '')
318 # Fragment.short_html relies on book's tags, so reset it here too
319 self.fragments.all().update(**update)
321 return super(Book, self).save(force_insert, force_update)
324 def get_absolute_url(self):
325 return ('catalogue.views.book_detail', [self.slug])
331 def book_tag_slug(self):
332 return ('l-' + self.slug)[:120]
335 slug = self.book_tag_slug()
336 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
338 book_tag.name = self.title[:50]
339 book_tag.sort_key = self.title.lower()
343 def has_media(self, type):
370 if self.media.filter(type=type).exists():
375 def get_media(self, type):
376 if self.has_media(type):
380 return self.html_file
382 return self.epub_file
388 return self.media.filter(type=type)
393 return self.get_media("mp3")
395 return self.get_media("odt")
397 return self.get_media("ogg")
399 return self.get_media("daisy")
401 def short_html(self):
402 key = '_short_html_%s' % get_language()
403 short_html = getattr(self, key)
405 if short_html and len(short_html):
406 return mark_safe(short_html)
408 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
409 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
412 # files generated during publication
413 if self.has_media("html"):
414 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
415 if self.has_media("pdf"):
416 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
417 if self.root_ancestor.has_media("epub"):
418 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
419 if self.has_media("txt"):
420 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
422 for m in self.media.order_by('type'):
423 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
425 formats = [mark_safe(format) for format in formats]
427 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
428 {'book': self, 'tags': tags, 'formats': formats})))
429 self.save(reset_short_html=False)
430 return mark_safe(getattr(self, key))
434 def root_ancestor(self):
435 """ returns the oldest ancestor """
437 if not hasattr(self, '_root_ancestor'):
441 self._root_ancestor = book
442 return self._root_ancestor
445 def has_description(self):
446 return len(self.description) > 0
447 has_description.short_description = _('description')
448 has_description.boolean = True
451 def has_pdf_file(self):
452 return bool(self.pdf_file)
453 has_pdf_file.short_description = 'PDF'
454 has_pdf_file.boolean = True
456 def has_epub_file(self):
457 return bool(self.epub_file)
458 has_epub_file.short_description = 'EPUB'
459 has_epub_file.boolean = True
461 def has_txt_file(self):
462 return bool(self.txt_file)
463 has_txt_file.short_description = 'HTML'
464 has_txt_file.boolean = True
466 def has_html_file(self):
467 return bool(self.html_file)
468 has_html_file.short_description = 'HTML'
469 has_html_file.boolean = True
471 def has_odt_file(self):
472 return bool(self.has_media("odt"))
473 has_odt_file.short_description = 'ODT'
474 has_odt_file.boolean = True
476 def has_mp3_file(self):
477 return bool(self.has_media("mp3"))
478 has_mp3_file.short_description = 'MP3'
479 has_mp3_file.boolean = True
481 def has_ogg_file(self):
482 return bool(self.has_media("ogg"))
483 has_ogg_file.short_description = 'OGG'
484 has_ogg_file.boolean = True
486 def has_daisy_file(self):
487 return bool(self.has_media("daisy"))
488 has_daisy_file.short_description = 'DAISY'
489 has_daisy_file.boolean = True
491 def build_epub(self, remove_descendants=True):
492 """ (Re)builds the epub file.
493 If book has a parent, does nothing.
494 Unless remove_descendants is False, descendants' epubs are removed.
497 from StringIO import StringIO
498 from hashlib import sha1
499 from django.core.files.base import ContentFile
500 from librarian import DocProvider
502 class BookImportDocProvider(DocProvider):
503 """ used for joined EPUBs """
505 def __init__(self, book):
508 def by_slug(self, slug):
509 if slug == self.book.slug:
510 return self.book.xml_file
512 return Book.objects.get(slug=slug).xml_file
518 epub_file = StringIO()
520 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
521 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
522 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
526 book_descendants = list(self.children.all())
527 while len(book_descendants) > 0:
528 child_book = book_descendants.pop(0)
529 if remove_descendants and child_book.has_epub_file():
530 child_book.epub_file.delete()
531 # save anyway, to refresh short_html
533 book_descendants += list(child_book.children.all())
536 from StringIO import StringIO
537 from django.core.files.base import ContentFile
538 from librarian import text
541 text.transform(open(self.xml_file.path), out)
542 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
547 def from_xml_file(cls, xml_file, **kwargs):
548 # use librarian to parse meta-data
549 book_info = dcparser.parse(xml_file)
551 if not isinstance(xml_file, File):
552 xml_file = File(open(xml_file))
555 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
560 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
562 from tempfile import NamedTemporaryFile
563 from markupstring import MarkupString
564 from django.core.files.storage import default_storage
566 # check for parts before we do anything
568 if hasattr(book_info, 'parts'):
569 for part_url in book_info.parts:
570 base, slug = part_url.rsplit('/', 1)
572 children.append(Book.objects.get(slug=slug))
573 except Book.DoesNotExist, e:
574 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
578 book_base, book_slug = book_info.url.rsplit('/', 1)
579 if re.search(r'[^a-zA-Z0-9-]', book_slug):
580 raise ValueError('Invalid characters in slug')
581 book, created = Book.objects.get_or_create(slug=book_slug)
587 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
588 # Save shelves for this book
589 book_shelves = list(book.tags.filter(category='set'))
591 book.title = book_info.title
592 book.set_extra_info_value(book_info.to_dict())
593 book._short_html = ''
597 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
598 for field_name, category in categories:
600 tag_names = getattr(book_info, field_name)
602 tag_names = [getattr(book_info, category)]
603 for tag_name in tag_names:
604 tag_sort_key = tag_name
605 if category == 'author':
606 tag_sort_key = tag_name.last_name
607 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
608 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
611 tag.sort_key = tag_sort_key.lower()
613 book_tags.append(tag)
615 book.tags = set(book_tags + book_shelves)
617 book_tag = book.book_tag()
619 for n, child_book in enumerate(children):
620 child_book.parent = book
621 child_book.parent_number = n
624 # Save XML and HTML files
625 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
627 # delete old fragments when overwriting
628 book.fragments.all().delete()
630 html_file = NamedTemporaryFile()
631 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
632 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
634 # get ancestor l-tags for adding to new fragments
638 ancestor_tags.append(p.book_tag())
642 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
643 for fragment in closed_fragments.values():
645 theme_names = [s.strip() for s in fragment.themes.split(',')]
646 except AttributeError:
649 for theme_name in theme_names:
652 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
654 tag.name = theme_name
655 tag.sort_key = theme_name.lower()
661 text = fragment.to_string()
663 if (len(MarkupString(text)) > 240):
664 short_text = unicode(MarkupString(text)[:160])
665 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
666 defaults={'text': text, 'short_text': short_text})
669 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
671 if not settings.NO_BUILD_TXT and build_txt:
674 if not settings.NO_BUILD_EPUB and build_epub:
675 book.root_ancestor.build_epub()
677 book_descendants = list(book.children.all())
678 # add l-tag to descendants and their fragments
679 # delete unnecessary EPUB files
680 while len(book_descendants) > 0:
681 child_book = book_descendants.pop(0)
682 child_book.tags = list(child_book.tags) + [book_tag]
684 for fragment in child_book.fragments.all():
685 fragment.tags = set(list(fragment.tags) + [book_tag])
686 book_descendants += list(child_book.children.all())
689 book.reset_tag_counter()
690 book.reset_theme_counter()
696 def refresh_tag_counter(self):
698 for child in self.children.all().order_by():
699 for tag_pk, value in child.tag_counter.iteritems():
700 tags[tag_pk] = tags.get(tag_pk, 0) + value
701 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
703 self.set__tag_counter_value(tags)
704 self.save(reset_short_html=False)
707 def reset_tag_counter(self):
708 self._tag_counter = None
709 self.save(reset_short_html=False)
711 self.parent.reset_tag_counter()
714 def tag_counter(self):
715 if self._tag_counter is None:
716 return self.refresh_tag_counter()
717 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
719 def refresh_theme_counter(self):
721 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
722 for tag in fragment.tags.filter(category='theme').order_by():
723 tags[tag.pk] = tags.get(tag.pk, 0) + 1
724 self.set__theme_counter_value(tags)
725 self.save(reset_short_html=False)
728 def reset_theme_counter(self):
729 self._theme_counter = None
730 self.save(reset_short_html=False)
732 self.parent.reset_theme_counter()
735 def theme_counter(self):
736 if self._theme_counter is None:
737 return self.refresh_theme_counter()
738 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
740 def pretty_title(self, html_links=False):
742 names = list(book.tags.filter(category='author'))
748 names.extend(reversed(books))
751 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
753 names = [tag.name for tag in names]
755 return ', '.join(names)
758 class Fragment(models.Model):
759 text = models.TextField()
760 short_text = models.TextField(editable=False)
761 _short_html = models.TextField(editable=False)
762 anchor = models.CharField(max_length=120)
763 book = models.ForeignKey(Book, related_name='fragments')
765 objects = models.Manager()
766 tagged = managers.ModelTaggedItemManager(Tag)
767 tags = managers.TagDescriptor(Tag)
770 ordering = ('book', 'anchor',)
771 verbose_name = _('fragment')
772 verbose_name_plural = _('fragments')
774 def get_absolute_url(self):
775 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
777 def short_html(self):
778 key = '_short_html_%s' % get_language()
779 short_html = getattr(self, key)
780 if short_html and len(short_html):
781 return mark_safe(short_html)
783 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
784 {'fragment': self})))
786 return mark_safe(getattr(self, key))
789 class FileRecord(models.Model):
790 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
791 type = models.CharField(_('type'), max_length=20, db_index=True)
792 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
793 time = models.DateTimeField(_('time'), auto_now_add=True)
796 ordering = ('-time','-slug', '-type')
797 verbose_name = _('file record')
798 verbose_name_plural = _('file records')
800 def __unicode__(self):
801 return "%s %s.%s" % (self.sha1, self.slug, self.type)
810 def _tags_updated_handler(sender, affected_tags, **kwargs):
811 # reset tag global counter
812 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
814 # if book tags changed, reset book tag counter
815 if isinstance(sender, Book) and \
816 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
817 exclude(category__in=('book', 'theme', 'set')).count():
818 sender.reset_tag_counter()
819 # if fragment theme changed, reset book theme counter
820 elif isinstance(sender, Fragment) and \
821 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
822 filter(category='theme').count():
823 sender.book.reset_theme_counter()
824 tags_updated.connect(_tags_updated_handler)
827 def _pre_delete_handler(sender, instance, **kwargs):
828 """ refresh Book on BookMedia delete """
829 if sender == BookMedia:
831 pre_delete.connect(_pre_delete_handler)
833 def _post_save_handler(sender, instance, **kwargs):
834 """ refresh all the short_html stuff on BookMedia update """
835 if sender == BookMedia:
837 post_save.connect(_post_save_handler)