1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
23 from mutagen import id3
24 from slughifi import slughifi
28 ('author', _('author')),
29 ('epoch', _('epoch')),
31 ('genre', _('genre')),
32 ('theme', _('theme')),
38 ('odt', _('ODT file')),
39 ('mp3', _('MP3 file')),
40 ('ogg', _('OGG file')),
41 ('daisy', _('DAISY file')),
44 class TagSubcategoryManager(models.Manager):
45 def __init__(self, subcategory):
46 super(TagSubcategoryManager, self).__init__()
47 self.subcategory = subcategory
49 def get_query_set(self):
50 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
54 name = models.CharField(_('name'), max_length=50, db_index=True)
55 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
56 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
57 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
58 db_index=True, choices=TAG_CATEGORIES)
59 description = models.TextField(_('description'), blank=True)
60 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
62 user = models.ForeignKey(User, blank=True, null=True)
63 book_count = models.IntegerField(_('book count'), blank=True, null=True)
64 gazeta_link = models.CharField(blank=True, max_length=240)
65 wiki_link = models.CharField(blank=True, max_length=240)
67 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
68 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
78 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
81 ordering = ('sort_key',)
82 verbose_name = _('tag')
83 verbose_name_plural = _('tags')
84 unique_together = (("slug", "category"),)
86 def __unicode__(self):
90 return "Tag(slug=%r)" % self.slug
93 def get_absolute_url(self):
94 return ('catalogue.views.tagged_object_list', [self.url_chunk])
96 def has_description(self):
97 return len(self.description) > 0
98 has_description.short_description = _('description')
99 has_description.boolean = True
102 """ returns global book count for book tags, fragment count for themes """
104 if self.book_count is None:
105 if self.category == 'book':
107 objects = Book.objects.none()
108 elif self.category == 'theme':
109 objects = Fragment.tagged.with_all((self,))
111 objects = Book.tagged.with_all((self,)).order_by()
112 if self.category != 'set':
113 # eliminate descendants
114 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
117 objects = objects.exclude(pk__in=descendants_keys)
118 self.book_count = objects.count()
120 return self.book_count
123 def get_tag_list(tags):
124 if isinstance(tags, basestring):
128 tags_splitted = tags.split('/')
129 for index, name in enumerate(tags_splitted):
130 if name in Tag.categories_rev:
131 category = Tag.categories_rev[name]
134 real_tags.append(Tag.objects.get(slug=name, category=category))
138 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
139 except Tag.MultipleObjectsReturned, e:
140 ambiguous_slugs.append(name)
143 # something strange left off
144 raise Tag.DoesNotExist()
146 # some tags should be qualified
147 e = Tag.MultipleObjectsReturned()
149 e.ambiguous_slugs = ambiguous_slugs
154 return TagBase.get_tag_list(tags)
158 return '/'.join((Tag.categories_dict[self.category], self.slug))
161 # TODO: why is this hard-coded ?
162 def book_upload_path(ext=None, maxlen=100):
163 def get_dynamic_path(media, filename, ext=ext):
164 # how to put related book's slug here?
166 if media.type == 'daisy':
171 name = slughifi(filename.split(".")[0])
173 name = slughifi(media.name)
174 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
175 return get_dynamic_path
178 class BookMedia(models.Model):
179 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
180 name = models.CharField(_('name'), max_length="100")
181 file = models.FileField(_('file'), upload_to=book_upload_path())
182 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
183 extra_info = JSONField(_('extra information'), default='{}')
185 def book_count(self):
186 return self.book_set.count()
187 book_count.short_description = _('book count')
190 return mark_safe('<br/>'.join("<a href='%s'>%s</a>" % (reverse('admin:catalogue_book_change', args=[b.id]), b.title) for b in self.book_set.all()))
191 books.short_description = _('books')
193 def __unicode__(self):
194 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
197 ordering = ('type', 'name')
198 verbose_name = _('book media')
199 verbose_name_plural = _('book media')
201 def save(self, force_insert=False, force_update=False, **kwargs):
202 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
203 if self.type == 'mp3':
205 extra_info = self.get_extra_info_value()
206 extra_info.update(self.get_mp3_info())
207 self.set_extra_info_value(extra_info)
208 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
211 def get_mp3_info(self):
212 """Retrieves artist and director names from audio ID3 tags."""
214 audio = id3.ID3(self.file.path)
215 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
216 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
218 artist_name = director_name = ''
219 return {'artist_name': artist_name, 'director_name': director_name}
222 class Book(models.Model):
223 title = models.CharField(_('title'), max_length=120)
224 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
225 description = models.TextField(_('description'), blank=True)
226 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
227 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
228 _short_html = models.TextField(_('short HTML'), editable=False)
229 parent_number = models.IntegerField(_('parent number'), default=0)
230 extra_info = JSONField(_('extra information'))
231 gazeta_link = models.CharField(blank=True, max_length=240)
232 wiki_link = models.CharField(blank=True, max_length=240)
233 # files generated during publication
234 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
235 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
236 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
237 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
238 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
240 medias = models.ManyToManyField(BookMedia, blank=True)
242 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
243 objects = models.Manager()
244 tagged = managers.ModelTaggedItemManager(Tag)
245 tags = managers.TagDescriptor(Tag)
247 _tag_counter = JSONField(null=True, editable=False)
248 _theme_counter = JSONField(null=True, editable=False)
250 class AlreadyExists(Exception):
254 ordering = ('title',)
255 verbose_name = _('book')
256 verbose_name_plural = _('books')
258 def __unicode__(self):
261 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
263 # Reset _short_html during save
265 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
267 self.__setattr__(key, '')
268 # Fragment.short_html relies on book's tags, so reset it here too
269 self.fragments.all().update(**update)
271 return super(Book, self).save(force_insert, force_update)
274 def get_absolute_url(self):
275 return ('catalogue.views.book_detail', [self.slug])
281 def book_tag_slug(self):
282 return ('l-' + self.slug)[:120]
285 slug = self.book_tag_slug()
286 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
288 book_tag.name = self.title[:50]
289 book_tag.sort_key = self.title.lower()
293 def has_media(self, type):
320 if self.medias.filter(book=self, type=type).count() > 0:
325 def get_media(self, type):
326 if self.has_media(type):
330 return self.html_file
332 return self.epub_file
338 return self.medias.filter(book=self, type=type)
343 return self.get_media("mp3")
345 return self.get_media("odt")
347 return self.get_media("ogg")
349 return self.get_media("daisy")
351 def short_html(self):
352 key = '_short_html_%s' % get_language()
353 short_html = getattr(self, key)
355 if short_html and len(short_html):
356 return mark_safe(short_html)
358 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
359 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
362 # files generated during publication
363 if self.has_media("html"):
364 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
365 if self.has_media("pdf"):
366 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
367 if self.root_ancestor.has_media("epub"):
368 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
369 if self.has_media("txt"):
370 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
372 for m in self.medias.order_by('type'):
373 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
375 formats = [mark_safe(format) for format in formats]
377 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
378 {'book': self, 'tags': tags, 'formats': formats})))
379 self.save(reset_short_html=False)
380 return mark_safe(getattr(self, key))
384 def root_ancestor(self):
385 """ returns the oldest ancestor """
387 if not hasattr(self, '_root_ancestor'):
391 self._root_ancestor = book
392 return self._root_ancestor
395 def has_description(self):
396 return len(self.description) > 0
397 has_description.short_description = _('description')
398 has_description.boolean = True
401 def has_pdf_file(self):
402 return bool(self.pdf_file)
403 has_pdf_file.short_description = 'PDF'
404 has_pdf_file.boolean = True
406 def has_epub_file(self):
407 return bool(self.epub_file)
408 has_epub_file.short_description = 'EPUB'
409 has_epub_file.boolean = True
411 def has_txt_file(self):
412 return bool(self.txt_file)
413 has_txt_file.short_description = 'HTML'
414 has_txt_file.boolean = True
416 def has_html_file(self):
417 return bool(self.html_file)
418 has_html_file.short_description = 'HTML'
419 has_html_file.boolean = True
421 def has_odt_file(self):
422 return bool(self.has_media("odt"))
423 has_odt_file.short_description = 'ODT'
424 has_odt_file.boolean = True
426 def has_mp3_file(self):
427 return bool(self.has_media("mp3"))
428 has_mp3_file.short_description = 'MP3'
429 has_mp3_file.boolean = True
431 def has_ogg_file(self):
432 return bool(self.has_media("ogg"))
433 has_ogg_file.short_description = 'OGG'
434 has_ogg_file.boolean = True
436 def has_daisy_file(self):
437 return bool(self.has_media("daisy"))
438 has_daisy_file.short_description = 'DAISY'
439 has_daisy_file.boolean = True
441 def build_epub(self, remove_descendants=True):
442 """ (Re)builds the epub file.
443 If book has a parent, does nothing.
444 Unless remove_descendants is False, descendants' epubs are removed.
447 from StringIO import StringIO
448 from hashlib import sha1
449 from django.core.files.base import ContentFile
450 from librarian import DocProvider
452 class BookImportDocProvider(DocProvider):
453 """ used for joined EPUBs """
455 def __init__(self, book):
458 def by_slug(self, slug):
459 if slug == self.book.slug:
460 return self.book.xml_file
462 return Book.objects.get(slug=slug).xml_file
468 epub_file = StringIO()
470 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
471 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
472 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
476 book_descendants = list(self.children.all())
477 while len(book_descendants) > 0:
478 child_book = book_descendants.pop(0)
479 if remove_descendants and child_book.has_epub_file():
480 child_book.epub_file.delete()
481 # save anyway, to refresh short_html
483 book_descendants += list(child_book.children.all())
486 from StringIO import StringIO
487 from django.core.files.base import ContentFile
488 from librarian import text
491 text.transform(open(self.xml_file.path), out)
492 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
497 def from_xml_file(cls, xml_file, **kwargs):
498 # use librarian to parse meta-data
499 book_info = dcparser.parse(xml_file)
501 if not isinstance(xml_file, File):
502 xml_file = File(open(xml_file))
505 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
510 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
512 from tempfile import NamedTemporaryFile
513 from markupstring import MarkupString
514 from django.core.files.storage import default_storage
516 # check for parts before we do anything
518 if hasattr(book_info, 'parts'):
519 for part_url in book_info.parts:
520 base, slug = part_url.rsplit('/', 1)
522 children.append(Book.objects.get(slug=slug))
523 except Book.DoesNotExist, e:
524 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
528 book_base, book_slug = book_info.url.rsplit('/', 1)
529 if re.search(r'[^a-zA-Z0-9-]', book_slug):
530 raise ValueError('Invalid characters in slug')
531 book, created = Book.objects.get_or_create(slug=book_slug)
537 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
538 # Save shelves for this book
539 book_shelves = list(book.tags.filter(category='set'))
541 book.title = book_info.title
542 book.set_extra_info_value(book_info.to_dict())
543 book._short_html = ''
547 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
548 for field_name, category in categories:
550 tag_names = getattr(book_info, field_name)
552 tag_names = [getattr(book_info, category)]
553 for tag_name in tag_names:
554 tag_sort_key = tag_name
555 if category == 'author':
556 tag_sort_key = tag_name.last_name
557 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
558 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
561 tag.sort_key = tag_sort_key.lower()
563 book_tags.append(tag)
565 book.tags = set(book_tags + book_shelves)
567 book_tag = book.book_tag()
569 for n, child_book in enumerate(children):
570 child_book.parent = book
571 child_book.parent_number = n
574 # Save XML and HTML files
575 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
577 # delete old fragments when overwriting
578 book.fragments.all().delete()
580 html_file = NamedTemporaryFile()
581 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
582 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
584 # get ancestor l-tags for adding to new fragments
588 ancestor_tags.append(p.book_tag())
592 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
593 for fragment in closed_fragments.values():
595 theme_names = [s.strip() for s in fragment.themes.split(',')]
596 except AttributeError:
599 for theme_name in theme_names:
602 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
604 tag.name = theme_name
605 tag.sort_key = theme_name.lower()
611 text = fragment.to_string()
613 if (len(MarkupString(text)) > 240):
614 short_text = unicode(MarkupString(text)[:160])
615 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
616 defaults={'text': text, 'short_text': short_text})
619 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
621 if not settings.NO_BUILD_TXT and build_txt:
624 if not settings.NO_BUILD_EPUB and build_epub:
625 book.root_ancestor.build_epub()
627 book_descendants = list(book.children.all())
628 # add l-tag to descendants and their fragments
629 # delete unnecessary EPUB files
630 while len(book_descendants) > 0:
631 child_book = book_descendants.pop(0)
632 child_book.tags = list(child_book.tags) + [book_tag]
634 for fragment in child_book.fragments.all():
635 fragment.tags = set(list(fragment.tags) + [book_tag])
636 book_descendants += list(child_book.children.all())
639 book.reset_tag_counter()
640 book.reset_theme_counter()
646 def refresh_tag_counter(self):
648 for child in self.children.all().order_by():
649 for tag_pk, value in child.tag_counter.iteritems():
650 tags[tag_pk] = tags.get(tag_pk, 0) + value
651 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
653 self.set__tag_counter_value(tags)
654 self.save(reset_short_html=False)
657 def reset_tag_counter(self):
658 self._tag_counter = None
659 self.save(reset_short_html=False)
661 self.parent.reset_tag_counter()
664 def tag_counter(self):
665 if self._tag_counter is None:
666 return self.refresh_tag_counter()
667 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
669 def refresh_theme_counter(self):
671 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
672 for tag in fragment.tags.filter(category='theme').order_by():
673 tags[tag.pk] = tags.get(tag.pk, 0) + 1
674 self.set__theme_counter_value(tags)
675 self.save(reset_short_html=False)
678 def reset_theme_counter(self):
679 self._theme_counter = None
680 self.save(reset_short_html=False)
682 self.parent.reset_theme_counter()
685 def theme_counter(self):
686 if self._theme_counter is None:
687 return self.refresh_theme_counter()
688 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
690 def pretty_title(self, html_links=False):
692 names = list(book.tags.filter(category='author'))
698 names.extend(reversed(books))
701 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
703 names = [tag.name for tag in names]
705 return ', '.join(names)
708 class Fragment(models.Model):
709 text = models.TextField()
710 short_text = models.TextField(editable=False)
711 _short_html = models.TextField(editable=False)
712 anchor = models.CharField(max_length=120)
713 book = models.ForeignKey(Book, related_name='fragments')
715 objects = models.Manager()
716 tagged = managers.ModelTaggedItemManager(Tag)
717 tags = managers.TagDescriptor(Tag)
720 ordering = ('book', 'anchor',)
721 verbose_name = _('fragment')
722 verbose_name_plural = _('fragments')
724 def get_absolute_url(self):
725 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
727 def short_html(self):
728 key = '_short_html_%s' % get_language()
729 short_html = getattr(self, key)
730 if short_html and len(short_html):
731 return mark_safe(short_html)
733 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
734 {'fragment': self})))
736 return mark_safe(getattr(self, key))
739 class FileRecord(models.Model):
740 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
741 type = models.CharField(_('type'), max_length=20, db_index=True)
742 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
743 time = models.DateTimeField(_('time'), auto_now_add=True)
746 ordering = ('-time','-slug', '-type')
747 verbose_name = _('file record')
748 verbose_name_plural = _('file records')
750 def __unicode__(self):
751 return "%s %s.%s" % (self.sha1, self.slug, self.type)
760 def _tags_updated_handler(sender, affected_tags, **kwargs):
761 # reset tag global counter
762 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
764 # if book tags changed, reset book tag counter
765 if isinstance(sender, Book) and \
766 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
767 exclude(category__in=('book', 'theme', 'set')).count():
768 sender.reset_tag_counter()
769 # if fragment theme changed, reset book theme counter
770 elif isinstance(sender, Fragment) and \
771 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
772 filter(category='theme').count():
773 sender.book.reset_theme_counter()
774 tags_updated.connect(_tags_updated_handler)
777 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
778 """ refresh all the short_html stuff on BookMedia delete """
779 if sender == Book.medias.through and reverse and action == 'pre_clear':
780 for book in instance.book_set.all():
782 m2m_changed.connect(_m2m_changed_handler)
784 def _pre_delete_handler(sender, instance, **kwargs):
785 """ explicitly clear m2m, so that Books can be refreshed """
786 if sender == BookMedia:
787 instance.book_set.clear()
788 pre_delete.connect(_pre_delete_handler)
790 def _post_save_handler(sender, instance, **kwargs):
791 """ refresh all the short_html stuff on BookMedia update """
792 if sender == BookMedia:
793 for book in instance.book_set.all():
795 post_save.connect(_post_save_handler)