1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
23 from mutagen import id3
24 from slughifi import slughifi
28 ('author', _('author')),
29 ('epoch', _('epoch')),
31 ('genre', _('genre')),
32 ('theme', _('theme')),
38 ('odt', _('ODT file')),
39 ('mp3', _('MP3 file')),
40 ('ogg', _('OGG file')),
41 ('daisy', _('DAISY file')),
44 class TagSubcategoryManager(models.Manager):
45 def __init__(self, subcategory):
46 super(TagSubcategoryManager, self).__init__()
47 self.subcategory = subcategory
49 def get_query_set(self):
50 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
54 name = models.CharField(_('name'), max_length=50, db_index=True)
55 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
56 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
57 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
58 db_index=True, choices=TAG_CATEGORIES)
59 description = models.TextField(_('description'), blank=True)
60 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
62 user = models.ForeignKey(User, blank=True, null=True)
63 book_count = models.IntegerField(_('book count'), blank=True, null=True)
64 gazeta_link = models.CharField(blank=True, max_length=240)
65 wiki_link = models.CharField(blank=True, max_length=240)
75 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
78 ordering = ('sort_key',)
79 verbose_name = _('tag')
80 verbose_name_plural = _('tags')
81 unique_together = (("slug", "category"),)
83 def __unicode__(self):
87 return "Tag(slug=%r)" % self.slug
90 def get_absolute_url(self):
91 return ('catalogue.views.tagged_object_list', [self.url_chunk])
93 def has_description(self):
94 return len(self.description) > 0
95 has_description.short_description = _('description')
96 has_description.boolean = True
99 """ returns global book count for book tags, fragment count for themes """
101 if self.book_count is None:
102 if self.category == 'book':
104 objects = Book.objects.none()
105 elif self.category == 'theme':
106 objects = Fragment.tagged.with_all((self,))
108 objects = Book.tagged.with_all((self,)).order_by()
109 if self.category != 'set':
110 # eliminate descendants
111 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
112 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
114 objects = objects.exclude(pk__in=descendants_keys)
115 self.book_count = objects.count()
117 return self.book_count
120 def get_tag_list(tags):
121 if isinstance(tags, basestring):
125 tags_splitted = tags.split('/')
126 for index, name in enumerate(tags_splitted):
127 if name in Tag.categories_rev:
128 category = Tag.categories_rev[name]
131 real_tags.append(Tag.objects.get(slug=name, category=category))
135 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
136 except Tag.MultipleObjectsReturned, e:
137 ambiguous_slugs.append(name)
140 # something strange left off
141 raise Tag.DoesNotExist()
143 # some tags should be qualified
144 e = Tag.MultipleObjectsReturned()
146 e.ambiguous_slugs = ambiguous_slugs
151 return TagBase.get_tag_list(tags)
155 return '/'.join((Tag.categories_dict[self.category], self.slug))
158 # TODO: why is this hard-coded ?
159 def book_upload_path(ext=None, maxlen=100):
160 def get_dynamic_path(media, filename, ext=ext):
161 # how to put related book's slug here?
165 name = slughifi(filename.split(".")[0])
167 name = slughifi(media.name)
168 return 'lektura/%s.%s' % (name[:(maxlen-len('lektura/.%s' % ext))], ext)
169 return get_dynamic_path
172 class BookMedia(models.Model):
173 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
174 name = models.CharField(_('name'), max_length="100")
175 file = models.FileField(_('file'), upload_to=book_upload_path())
176 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
177 extra_info = JSONField(_('extra information'), default='{}')
179 def book_count(self):
180 return self.book_set.count()
181 book_count.short_description = _('book count')
184 return mark_safe('<br/>'.join("<a href='%s'>%s</a>" % (reverse('admin:catalogue_book_change', args=[b.id]), b.title) for b in self.book_set.all()))
185 books.short_description = _('books')
187 def __unicode__(self):
188 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
191 ordering = ('type', 'name')
192 verbose_name = _('book media')
193 verbose_name_plural = _('book media')
195 def save(self, force_insert=False, force_update=False, **kwargs):
196 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
197 if self.type == 'mp3':
199 extra_info = self.get_extra_info_value()
200 extra_info.update(self.get_mp3_info())
201 self.set_extra_info_value(extra_info)
202 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
205 def get_mp3_info(self):
206 """Retrieves artist and director names from audio ID3 tags."""
208 audio = id3.ID3(self.file.path)
209 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
210 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
212 artist_name = director_name = ''
213 return {'artist_name': artist_name, 'director_name': director_name}
216 class Book(models.Model):
217 title = models.CharField(_('title'), max_length=120)
218 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
219 description = models.TextField(_('description'), blank=True)
220 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
221 _short_html = models.TextField(_('short HTML'), editable=False)
222 parent_number = models.IntegerField(_('parent number'), default=0)
223 extra_info = JSONField(_('extra information'))
224 gazeta_link = models.CharField(blank=True, max_length=240)
225 wiki_link = models.CharField(blank=True, max_length=240)
226 # files generated during publication
227 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
228 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
229 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
230 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
231 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
233 medias = models.ManyToManyField(BookMedia, blank=True)
235 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
236 objects = models.Manager()
237 tagged = managers.ModelTaggedItemManager(Tag)
238 tags = managers.TagDescriptor(Tag)
240 _tag_counter = JSONField(null=True, editable=False)
241 _theme_counter = JSONField(null=True, editable=False)
243 class AlreadyExists(Exception):
247 ordering = ('title',)
248 verbose_name = _('book')
249 verbose_name_plural = _('books')
251 def __unicode__(self):
254 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
256 # Reset _short_html during save
258 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
260 self.__setattr__(key, '')
261 # Fragment.short_html relies on book's tags, so reset it here too
262 self.fragments.all().update(**update)
264 return super(Book, self).save(force_insert, force_update)
267 def get_absolute_url(self):
268 return ('catalogue.views.book_detail', [self.slug])
274 def book_tag_slug(self):
275 return ('l-' + self.slug)[:120]
278 slug = self.book_tag_slug()
279 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
281 book_tag.name = self.title[:50]
282 book_tag.sort_key = self.title.lower()
286 def has_media(self, type):
313 if self.medias.filter(book=self, type=type).count() > 0:
318 def get_media(self, type):
319 if self.has_media(type):
323 return self.html_file
325 return self.epub_file
331 return self.medias.filter(book=self, type=type)
336 return self.get_media("mp3")
338 return self.get_media("odt")
340 return self.get_media("ogg")
342 return self.get_media("daisy")
344 def short_html(self):
345 key = '_short_html_%s' % get_language()
346 short_html = getattr(self, key)
348 if short_html and len(short_html):
349 return mark_safe(short_html)
351 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
352 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
355 # files generated during publication
356 if self.has_media("html"):
357 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
358 if self.has_media("pdf"):
359 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
360 if self.root_ancestor.has_media("epub"):
361 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
362 if self.has_media("txt"):
363 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
365 for m in self.medias.order_by('type'):
366 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
368 formats = [mark_safe(format) for format in formats]
370 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
371 {'book': self, 'tags': tags, 'formats': formats})))
372 self.save(reset_short_html=False)
373 return mark_safe(getattr(self, key))
377 def root_ancestor(self):
378 """ returns the oldest ancestor """
380 if not hasattr(self, '_root_ancestor'):
384 self._root_ancestor = book
385 return self._root_ancestor
388 def has_description(self):
389 return len(self.description) > 0
390 has_description.short_description = _('description')
391 has_description.boolean = True
394 def has_pdf_file(self):
395 return bool(self.pdf_file)
396 has_pdf_file.short_description = 'PDF'
397 has_pdf_file.boolean = True
399 def has_epub_file(self):
400 return bool(self.epub_file)
401 has_epub_file.short_description = 'EPUB'
402 has_epub_file.boolean = True
404 def has_txt_file(self):
405 return bool(self.txt_file)
406 has_txt_file.short_description = 'HTML'
407 has_txt_file.boolean = True
409 def has_html_file(self):
410 return bool(self.html_file)
411 has_html_file.short_description = 'HTML'
412 has_html_file.boolean = True
414 def has_odt_file(self):
415 return bool(self.has_media("odt"))
416 has_odt_file.short_description = 'ODT'
417 has_odt_file.boolean = True
419 def has_mp3_file(self):
420 return bool(self.has_media("mp3"))
421 has_mp3_file.short_description = 'MP3'
422 has_mp3_file.boolean = True
424 def has_ogg_file(self):
425 return bool(self.has_media("ogg"))
426 has_ogg_file.short_description = 'OGG'
427 has_ogg_file.boolean = True
429 def has_daisy_file(self):
430 return bool(self.has_media("daisy"))
431 has_daisy_file.short_description = 'DAISY'
432 has_daisy_file.boolean = True
434 def build_epub(self, remove_descendants=True):
435 """ (Re)builds the epub file.
436 If book has a parent, does nothing.
437 Unless remove_descendants is False, descendants' epubs are removed.
440 from StringIO import StringIO
441 from hashlib import sha1
442 from django.core.files.base import ContentFile
443 from librarian import DocProvider
445 class BookImportDocProvider(DocProvider):
446 """ used for joined EPUBs """
448 def __init__(self, book):
451 def by_slug(self, slug):
452 if slug == self.book.slug:
453 return self.book.xml_file
455 return Book.objects.get(slug=slug).xml_file
461 epub_file = StringIO()
463 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
464 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
465 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
469 book_descendants = list(self.children.all())
470 while len(book_descendants) > 0:
471 child_book = book_descendants.pop(0)
472 if remove_descendants and child_book.has_epub_file():
473 child_book.epub_file.delete()
474 # save anyway, to refresh short_html
476 book_descendants += list(child_book.children.all())
479 from StringIO import StringIO
480 from django.core.files.base import ContentFile
481 from librarian import text
484 text.transform(open(self.xml_file.path), out)
485 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
490 def from_xml_file(cls, xml_file, **kwargs):
491 # use librarian to parse meta-data
492 book_info = dcparser.parse(xml_file)
494 if not isinstance(xml_file, File):
495 xml_file = File(open(xml_file))
498 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
503 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
505 from tempfile import NamedTemporaryFile
506 from markupstring import MarkupString
507 from django.core.files.storage import default_storage
509 # check for parts before we do anything
511 if hasattr(book_info, 'parts'):
512 for part_url in book_info.parts:
513 base, slug = part_url.rsplit('/', 1)
515 children.append(Book.objects.get(slug=slug))
516 except Book.DoesNotExist, e:
517 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
521 book_base, book_slug = book_info.url.rsplit('/', 1)
522 if re.search(r'[^a-zA-Z0-9-]', book_slug):
523 raise ValueError('Invalid characters in slug')
524 book, created = Book.objects.get_or_create(slug=book_slug)
530 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
531 # Save shelves for this book
532 book_shelves = list(book.tags.filter(category='set'))
534 book.title = book_info.title
535 book.set_extra_info_value(book_info.to_dict())
536 book._short_html = ''
540 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
541 for field_name, category in categories:
543 tag_names = getattr(book_info, field_name)
545 tag_names = [getattr(book_info, category)]
546 for tag_name in tag_names:
547 tag_sort_key = tag_name
548 if category == 'author':
549 tag_sort_key = tag_name.last_name
550 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
551 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
554 tag.sort_key = tag_sort_key.lower()
556 book_tags.append(tag)
558 book.tags = book_tags + book_shelves
560 book_tag = book.book_tag()
562 for n, child_book in enumerate(children):
563 child_book.parent = book
564 child_book.parent_number = n
567 # Save XML and HTML files
568 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
570 # delete old fragments when overwriting
571 book.fragments.all().delete()
573 html_file = NamedTemporaryFile()
574 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
575 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
577 # get ancestor l-tags for adding to new fragments
581 ancestor_tags.append(p.book_tag())
585 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
586 for fragment in closed_fragments.values():
588 theme_names = [s.strip() for s in fragment.themes.split(',')]
589 except AttributeError:
592 for theme_name in theme_names:
595 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
597 tag.name = theme_name
598 tag.sort_key = theme_name.lower()
604 text = fragment.to_string()
606 if (len(MarkupString(text)) > 240):
607 short_text = unicode(MarkupString(text)[:160])
608 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
609 defaults={'text': text, 'short_text': short_text})
612 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
614 if not settings.NO_BUILD_TXT and build_txt:
617 if not settings.NO_BUILD_EPUB and build_epub:
618 book.root_ancestor.build_epub()
620 book_descendants = list(book.children.all())
621 # add l-tag to descendants and their fragments
622 # delete unnecessary EPUB files
623 while len(book_descendants) > 0:
624 child_book = book_descendants.pop(0)
625 child_book.tags = list(child_book.tags) + [book_tag]
627 for fragment in child_book.fragments.all():
628 fragment.tags = set(list(fragment.tags) + [book_tag])
629 book_descendants += list(child_book.children.all())
632 book.reset_tag_counter()
633 book.reset_theme_counter()
639 def refresh_tag_counter(self):
641 for child in self.children.all().order_by():
642 for tag_pk, value in child.tag_counter.iteritems():
643 tags[tag_pk] = tags.get(tag_pk, 0) + value
644 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
646 self.set__tag_counter_value(tags)
647 self.save(reset_short_html=False)
650 def reset_tag_counter(self):
651 self._tag_counter = None
652 self.save(reset_short_html=False)
654 self.parent.reset_tag_counter()
657 def tag_counter(self):
658 if self._tag_counter is None:
659 return self.refresh_tag_counter()
660 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
662 def refresh_theme_counter(self):
664 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
665 for tag in fragment.tags.filter(category='theme').order_by():
666 tags[tag.pk] = tags.get(tag.pk, 0) + 1
667 self.set__theme_counter_value(tags)
668 self.save(reset_short_html=False)
671 def reset_theme_counter(self):
672 self._theme_counter = None
673 self.save(reset_short_html=False)
675 self.parent.reset_theme_counter()
678 def theme_counter(self):
679 if self._theme_counter is None:
680 return self.refresh_theme_counter()
681 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
683 def pretty_title(self, html_links=False):
685 names = list(book.tags.filter(category='author'))
691 names.extend(reversed(books))
694 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
696 names = [tag.name for tag in names]
698 return ', '.join(names)
701 class Fragment(models.Model):
702 text = models.TextField()
703 short_text = models.TextField(editable=False)
704 _short_html = models.TextField(editable=False)
705 anchor = models.CharField(max_length=120)
706 book = models.ForeignKey(Book, related_name='fragments')
708 objects = models.Manager()
709 tagged = managers.ModelTaggedItemManager(Tag)
710 tags = managers.TagDescriptor(Tag)
713 ordering = ('book', 'anchor',)
714 verbose_name = _('fragment')
715 verbose_name_plural = _('fragments')
717 def get_absolute_url(self):
718 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
720 def short_html(self):
721 key = '_short_html_%s' % get_language()
722 short_html = getattr(self, key)
723 if short_html and len(short_html):
724 return mark_safe(short_html)
726 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
727 {'fragment': self})))
729 return mark_safe(getattr(self, key))
732 class FileRecord(models.Model):
733 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
734 type = models.CharField(_('type'), max_length=20, db_index=True)
735 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
736 time = models.DateTimeField(_('time'), auto_now_add=True)
739 ordering = ('-time','-slug', '-type')
740 verbose_name = _('file record')
741 verbose_name_plural = _('file records')
743 def __unicode__(self):
744 return "%s %s.%s" % (self.sha1, self.slug, self.type)
753 def _tags_updated_handler(sender, affected_tags, **kwargs):
754 # reset tag global counter
755 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
757 # if book tags changed, reset book tag counter
758 if isinstance(sender, Book) and \
759 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
760 exclude(category__in=('book', 'theme', 'set')).count():
761 sender.reset_tag_counter()
762 # if fragment theme changed, reset book theme counter
763 elif isinstance(sender, Fragment) and \
764 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
765 filter(category='theme').count():
766 sender.book.reset_theme_counter()
767 tags_updated.connect(_tags_updated_handler)
770 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
771 """ refresh all the short_html stuff on BookMedia delete """
772 if sender == Book.medias.through and reverse and action == 'pre_clear':
773 for book in instance.book_set.all():
775 m2m_changed.connect(_m2m_changed_handler)
777 def _pre_delete_handler(sender, instance, **kwargs):
778 """ explicitly clear m2m, so that Books can be refreshed """
779 if sender == BookMedia:
780 instance.book_set.clear()
781 pre_delete.connect(_pre_delete_handler)
783 def _post_save_handler(sender, instance, **kwargs):
784 """ refresh all the short_html stuff on BookMedia update """
785 if sender == BookMedia:
786 for book in instance.book_set.all():
788 post_save.connect(_post_save_handler)