1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from django.db.models.signals import post_save, m2m_changed, pre_delete
16 from django.conf import settings
18 from newtagging.models import TagBase, tags_updated
19 from newtagging import managers
20 from catalogue.fields import JSONField
22 from librarian import dcparser, html, epub, NoDublinCore
23 from mutagen import id3
24 from slughifi import slughifi
28 ('author', _('author')),
29 ('epoch', _('epoch')),
31 ('genre', _('genre')),
32 ('theme', _('theme')),
38 ('odt', _('ODT file')),
39 ('mp3', _('MP3 file')),
40 ('ogg', _('OGG file')),
41 ('daisy', _('DAISY file')),
44 class TagSubcategoryManager(models.Manager):
45 def __init__(self, subcategory):
46 super(TagSubcategoryManager, self).__init__()
47 self.subcategory = subcategory
49 def get_query_set(self):
50 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
54 name = models.CharField(_('name'), max_length=50, db_index=True)
55 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
56 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
57 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
58 db_index=True, choices=TAG_CATEGORIES)
59 description = models.TextField(_('description'), blank=True)
60 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
62 user = models.ForeignKey(User, blank=True, null=True)
63 book_count = models.IntegerField(_('book count'), blank=True, null=True)
64 gazeta_link = models.CharField(blank=True, max_length=240)
65 wiki_link = models.CharField(blank=True, max_length=240)
67 class UrlDeprecationWarning(DeprecationWarning):
78 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
81 ordering = ('sort_key',)
82 verbose_name = _('tag')
83 verbose_name_plural = _('tags')
84 unique_together = (("slug", "category"),)
86 def __unicode__(self):
90 return "Tag(slug=%r)" % self.slug
93 def get_absolute_url(self):
94 return ('catalogue.views.tagged_object_list', [self.url_chunk])
96 def has_description(self):
97 return len(self.description) > 0
98 has_description.short_description = _('description')
99 has_description.boolean = True
102 """ returns global book count for book tags, fragment count for themes """
104 if self.book_count is None:
105 if self.category == 'book':
107 objects = Book.objects.none()
108 elif self.category == 'theme':
109 objects = Fragment.tagged.with_all((self,))
111 objects = Book.tagged.with_all((self,)).order_by()
112 if self.category != 'set':
113 # eliminate descendants
114 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
117 objects = objects.exclude(pk__in=descendants_keys)
118 self.book_count = objects.count()
120 return self.book_count
123 def get_tag_list(tags):
124 if isinstance(tags, basestring):
129 tags_splitted = tags.split('/')
130 for name in tags_splitted:
132 real_tags.append(Tag.objects.get(slug=name, category=category))
134 elif name in Tag.categories_rev:
135 category = Tag.categories_rev[name]
138 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
140 except Tag.MultipleObjectsReturned, e:
141 ambiguous_slugs.append(name)
144 # something strange left off
145 raise Tag.DoesNotExist()
147 # some tags should be qualified
148 e = Tag.MultipleObjectsReturned()
150 e.ambiguous_slugs = ambiguous_slugs
153 e = Tag.UrlDeprecationWarning()
158 return TagBase.get_tag_list(tags)
162 return '/'.join((Tag.categories_dict[self.category], self.slug))
165 # TODO: why is this hard-coded ?
166 def book_upload_path(ext=None, maxlen=100):
167 def get_dynamic_path(media, filename, ext=ext):
168 # how to put related book's slug here?
170 if media.type == 'daisy':
175 name = slughifi(filename.split(".")[0])
177 name = slughifi(media.name)
178 return 'lektura/%s.%s' % (name[:maxlen-len('lektura/.%s' % ext)-4], ext)
179 return get_dynamic_path
182 class BookMedia(models.Model):
183 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
184 name = models.CharField(_('name'), max_length="100")
185 file = models.FileField(_('file'), upload_to=book_upload_path())
186 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
187 extra_info = JSONField(_('extra information'), default='{}')
189 def book_count(self):
190 return self.book_set.count()
191 book_count.short_description = _('book count')
194 return mark_safe('<br/>'.join("<a href='%s'>%s</a>" % (reverse('admin:catalogue_book_change', args=[b.id]), b.title) for b in self.book_set.all()))
195 books.short_description = _('books')
197 def __unicode__(self):
198 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
201 ordering = ('type', 'name')
202 verbose_name = _('book media')
203 verbose_name_plural = _('book media')
205 def save(self, force_insert=False, force_update=False, **kwargs):
206 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
207 if self.type == 'mp3':
209 extra_info = self.get_extra_info_value()
210 extra_info.update(self.get_mp3_info())
211 self.set_extra_info_value(extra_info)
212 media = super(BookMedia, self).save(force_insert, force_update, **kwargs)
215 def get_mp3_info(self):
216 """Retrieves artist and director names from audio ID3 tags."""
218 audio = id3.ID3(self.file.path)
219 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
220 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
222 artist_name = director_name = ''
223 return {'artist_name': artist_name, 'director_name': director_name}
226 class Book(models.Model):
227 title = models.CharField(_('title'), max_length=120)
228 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
229 description = models.TextField(_('description'), blank=True)
230 created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
231 _short_html = models.TextField(_('short HTML'), editable=False)
232 parent_number = models.IntegerField(_('parent number'), default=0)
233 extra_info = JSONField(_('extra information'))
234 gazeta_link = models.CharField(blank=True, max_length=240)
235 wiki_link = models.CharField(blank=True, max_length=240)
236 # files generated during publication
237 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
238 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
239 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
240 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
241 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
243 medias = models.ManyToManyField(BookMedia, blank=True)
245 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
246 objects = models.Manager()
247 tagged = managers.ModelTaggedItemManager(Tag)
248 tags = managers.TagDescriptor(Tag)
250 _tag_counter = JSONField(null=True, editable=False)
251 _theme_counter = JSONField(null=True, editable=False)
253 class AlreadyExists(Exception):
257 ordering = ('title',)
258 verbose_name = _('book')
259 verbose_name_plural = _('books')
261 def __unicode__(self):
264 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
266 # Reset _short_html during save
268 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
270 self.__setattr__(key, '')
271 # Fragment.short_html relies on book's tags, so reset it here too
272 self.fragments.all().update(**update)
274 return super(Book, self).save(force_insert, force_update)
277 def get_absolute_url(self):
278 return ('catalogue.views.book_detail', [self.slug])
284 def book_tag_slug(self):
285 return ('l-' + self.slug)[:120]
288 slug = self.book_tag_slug()
289 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
291 book_tag.name = self.title[:50]
292 book_tag.sort_key = self.title.lower()
296 def has_media(self, type):
323 if self.medias.filter(book=self, type=type).count() > 0:
328 def get_media(self, type):
329 if self.has_media(type):
333 return self.html_file
335 return self.epub_file
341 return self.medias.filter(book=self, type=type)
346 return self.get_media("mp3")
348 return self.get_media("odt")
350 return self.get_media("ogg")
352 return self.get_media("daisy")
354 def short_html(self):
355 key = '_short_html_%s' % get_language()
356 short_html = getattr(self, key)
358 if short_html and len(short_html):
359 return mark_safe(short_html)
361 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
362 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
365 # files generated during publication
366 if self.has_media("html"):
367 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
368 if self.has_media("pdf"):
369 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
370 if self.root_ancestor.has_media("epub"):
371 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
372 if self.has_media("txt"):
373 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
375 for m in self.medias.order_by('type'):
376 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
378 formats = [mark_safe(format) for format in formats]
380 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
381 {'book': self, 'tags': tags, 'formats': formats})))
382 self.save(reset_short_html=False)
383 return mark_safe(getattr(self, key))
387 def root_ancestor(self):
388 """ returns the oldest ancestor """
390 if not hasattr(self, '_root_ancestor'):
394 self._root_ancestor = book
395 return self._root_ancestor
398 def has_description(self):
399 return len(self.description) > 0
400 has_description.short_description = _('description')
401 has_description.boolean = True
404 def has_pdf_file(self):
405 return bool(self.pdf_file)
406 has_pdf_file.short_description = 'PDF'
407 has_pdf_file.boolean = True
409 def has_epub_file(self):
410 return bool(self.epub_file)
411 has_epub_file.short_description = 'EPUB'
412 has_epub_file.boolean = True
414 def has_txt_file(self):
415 return bool(self.txt_file)
416 has_txt_file.short_description = 'HTML'
417 has_txt_file.boolean = True
419 def has_html_file(self):
420 return bool(self.html_file)
421 has_html_file.short_description = 'HTML'
422 has_html_file.boolean = True
424 def has_odt_file(self):
425 return bool(self.has_media("odt"))
426 has_odt_file.short_description = 'ODT'
427 has_odt_file.boolean = True
429 def has_mp3_file(self):
430 return bool(self.has_media("mp3"))
431 has_mp3_file.short_description = 'MP3'
432 has_mp3_file.boolean = True
434 def has_ogg_file(self):
435 return bool(self.has_media("ogg"))
436 has_ogg_file.short_description = 'OGG'
437 has_ogg_file.boolean = True
439 def has_daisy_file(self):
440 return bool(self.has_media("daisy"))
441 has_daisy_file.short_description = 'DAISY'
442 has_daisy_file.boolean = True
444 def build_epub(self, remove_descendants=True):
445 """ (Re)builds the epub file.
446 If book has a parent, does nothing.
447 Unless remove_descendants is False, descendants' epubs are removed.
450 from StringIO import StringIO
451 from hashlib import sha1
452 from django.core.files.base import ContentFile
453 from librarian import DocProvider
455 class BookImportDocProvider(DocProvider):
456 """ used for joined EPUBs """
458 def __init__(self, book):
461 def by_slug(self, slug):
462 if slug == self.book.slug:
463 return self.book.xml_file
465 return Book.objects.get(slug=slug).xml_file
471 epub_file = StringIO()
473 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
474 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
475 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
479 book_descendants = list(self.children.all())
480 while len(book_descendants) > 0:
481 child_book = book_descendants.pop(0)
482 if remove_descendants and child_book.has_epub_file():
483 child_book.epub_file.delete()
484 # save anyway, to refresh short_html
486 book_descendants += list(child_book.children.all())
489 from StringIO import StringIO
490 from django.core.files.base import ContentFile
491 from librarian import text
494 text.transform(open(self.xml_file.path), out)
495 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
500 def from_xml_file(cls, xml_file, **kwargs):
501 # use librarian to parse meta-data
502 book_info = dcparser.parse(xml_file)
504 if not isinstance(xml_file, File):
505 xml_file = File(open(xml_file))
508 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
513 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
515 from tempfile import NamedTemporaryFile
516 from markupstring import MarkupString
517 from django.core.files.storage import default_storage
519 # check for parts before we do anything
521 if hasattr(book_info, 'parts'):
522 for part_url in book_info.parts:
523 base, slug = part_url.rsplit('/', 1)
525 children.append(Book.objects.get(slug=slug))
526 except Book.DoesNotExist, e:
527 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
531 book_base, book_slug = book_info.url.rsplit('/', 1)
532 if re.search(r'[^a-zA-Z0-9-]', book_slug):
533 raise ValueError('Invalid characters in slug')
534 book, created = Book.objects.get_or_create(slug=book_slug)
540 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
541 # Save shelves for this book
542 book_shelves = list(book.tags.filter(category='set'))
544 book.title = book_info.title
545 book.set_extra_info_value(book_info.to_dict())
546 book._short_html = ''
550 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
551 for field_name, category in categories:
553 tag_names = getattr(book_info, field_name)
555 tag_names = [getattr(book_info, category)]
556 for tag_name in tag_names:
557 tag_sort_key = tag_name
558 if category == 'author':
559 tag_sort_key = tag_name.last_name
560 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
561 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
564 tag.sort_key = tag_sort_key.lower()
566 book_tags.append(tag)
568 book.tags = set(book_tags + book_shelves)
570 book_tag = book.book_tag()
572 for n, child_book in enumerate(children):
573 child_book.parent = book
574 child_book.parent_number = n
577 # Save XML and HTML files
578 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
580 # delete old fragments when overwriting
581 book.fragments.all().delete()
583 html_file = NamedTemporaryFile()
584 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
585 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
587 # get ancestor l-tags for adding to new fragments
591 ancestor_tags.append(p.book_tag())
595 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
596 for fragment in closed_fragments.values():
598 theme_names = [s.strip() for s in fragment.themes.split(',')]
599 except AttributeError:
602 for theme_name in theme_names:
605 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
607 tag.name = theme_name
608 tag.sort_key = theme_name.lower()
614 text = fragment.to_string()
616 if (len(MarkupString(text)) > 240):
617 short_text = unicode(MarkupString(text)[:160])
618 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
619 defaults={'text': text, 'short_text': short_text})
622 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
624 if not settings.NO_BUILD_TXT and build_txt:
627 if not settings.NO_BUILD_EPUB and build_epub:
628 book.root_ancestor.build_epub()
630 book_descendants = list(book.children.all())
631 # add l-tag to descendants and their fragments
632 # delete unnecessary EPUB files
633 while len(book_descendants) > 0:
634 child_book = book_descendants.pop(0)
635 child_book.tags = list(child_book.tags) + [book_tag]
637 for fragment in child_book.fragments.all():
638 fragment.tags = set(list(fragment.tags) + [book_tag])
639 book_descendants += list(child_book.children.all())
642 book.reset_tag_counter()
643 book.reset_theme_counter()
649 def refresh_tag_counter(self):
651 for child in self.children.all().order_by():
652 for tag_pk, value in child.tag_counter.iteritems():
653 tags[tag_pk] = tags.get(tag_pk, 0) + value
654 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
656 self.set__tag_counter_value(tags)
657 self.save(reset_short_html=False)
660 def reset_tag_counter(self):
661 self._tag_counter = None
662 self.save(reset_short_html=False)
664 self.parent.reset_tag_counter()
667 def tag_counter(self):
668 if self._tag_counter is None:
669 return self.refresh_tag_counter()
670 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
672 def refresh_theme_counter(self):
674 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
675 for tag in fragment.tags.filter(category='theme').order_by():
676 tags[tag.pk] = tags.get(tag.pk, 0) + 1
677 self.set__theme_counter_value(tags)
678 self.save(reset_short_html=False)
681 def reset_theme_counter(self):
682 self._theme_counter = None
683 self.save(reset_short_html=False)
685 self.parent.reset_theme_counter()
688 def theme_counter(self):
689 if self._theme_counter is None:
690 return self.refresh_theme_counter()
691 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
693 def pretty_title(self, html_links=False):
695 names = list(book.tags.filter(category='author'))
701 names.extend(reversed(books))
704 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
706 names = [tag.name for tag in names]
708 return ', '.join(names)
711 class Fragment(models.Model):
712 text = models.TextField()
713 short_text = models.TextField(editable=False)
714 _short_html = models.TextField(editable=False)
715 anchor = models.CharField(max_length=120)
716 book = models.ForeignKey(Book, related_name='fragments')
718 objects = models.Manager()
719 tagged = managers.ModelTaggedItemManager(Tag)
720 tags = managers.TagDescriptor(Tag)
723 ordering = ('book', 'anchor',)
724 verbose_name = _('fragment')
725 verbose_name_plural = _('fragments')
727 def get_absolute_url(self):
728 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
730 def short_html(self):
731 key = '_short_html_%s' % get_language()
732 short_html = getattr(self, key)
733 if short_html and len(short_html):
734 return mark_safe(short_html)
736 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
737 {'fragment': self})))
739 return mark_safe(getattr(self, key))
742 class FileRecord(models.Model):
743 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
744 type = models.CharField(_('type'), max_length=20, db_index=True)
745 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
746 time = models.DateTimeField(_('time'), auto_now_add=True)
749 ordering = ('-time','-slug', '-type')
750 verbose_name = _('file record')
751 verbose_name_plural = _('file records')
753 def __unicode__(self):
754 return "%s %s.%s" % (self.sha1, self.slug, self.type)
763 def _tags_updated_handler(sender, affected_tags, **kwargs):
764 # reset tag global counter
765 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
767 # if book tags changed, reset book tag counter
768 if isinstance(sender, Book) and \
769 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
770 exclude(category__in=('book', 'theme', 'set')).count():
771 sender.reset_tag_counter()
772 # if fragment theme changed, reset book theme counter
773 elif isinstance(sender, Fragment) and \
774 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
775 filter(category='theme').count():
776 sender.book.reset_theme_counter()
777 tags_updated.connect(_tags_updated_handler)
780 def _m2m_changed_handler(sender, instance, action, reverse, pk_set, **kwargs):
781 """ refresh all the short_html stuff on BookMedia delete """
782 if sender == Book.medias.through and reverse and action == 'pre_clear':
783 for book in instance.book_set.all():
785 m2m_changed.connect(_m2m_changed_handler)
787 def _pre_delete_handler(sender, instance, **kwargs):
788 """ explicitly clear m2m, so that Books can be refreshed """
789 if sender == BookMedia:
790 instance.book_set.clear()
791 pre_delete.connect(_pre_delete_handler)
793 def _post_save_handler(sender, instance, **kwargs):
794 """ refresh all the short_html stuff on BookMedia update """
795 if sender == BookMedia:
796 for book in instance.book_set.all():
798 post_save.connect(_post_save_handler)