1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from datetime import datetime
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.utils.translation import ugettext_lazy as _
10 from django.contrib.auth.models import User
11 from django.core.files import File
12 from django.template.loader import render_to_string
13 from django.utils.safestring import mark_safe
14 from django.utils.translation import get_language
15 from django.core.urlresolvers import reverse
16 from django.db.models.signals import post_save, m2m_changed, pre_delete
18 from django.conf import settings
20 from newtagging.models import TagBase, tags_updated
21 from newtagging import managers
22 from catalogue.fields import JSONField, OverwritingFileField
23 from catalogue.utils import ExistingFile
25 from librarian import dcparser, html, epub, NoDublinCore
27 from mutagen import id3
28 from slughifi import slughifi
29 from sortify import sortify
33 ('author', _('author')),
34 ('epoch', _('epoch')),
36 ('genre', _('genre')),
37 ('theme', _('theme')),
43 ('odt', _('ODT file')),
44 ('mp3', _('MP3 file')),
45 ('ogg', _('OGG file')),
46 ('daisy', _('DAISY file')),
49 class TagSubcategoryManager(models.Manager):
50 def __init__(self, subcategory):
51 super(TagSubcategoryManager, self).__init__()
52 self.subcategory = subcategory
54 def get_query_set(self):
55 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
59 name = models.CharField(_('name'), max_length=50, db_index=True)
60 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
61 sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
62 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
63 db_index=True, choices=TAG_CATEGORIES)
64 description = models.TextField(_('description'), blank=True)
65 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
67 user = models.ForeignKey(User, blank=True, null=True)
68 book_count = models.IntegerField(_('book count'), blank=True, null=True)
69 gazeta_link = models.CharField(blank=True, max_length=240)
70 wiki_link = models.CharField(blank=True, max_length=240)
72 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
73 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
75 class UrlDeprecationWarning(DeprecationWarning):
86 categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
89 ordering = ('sort_key',)
90 verbose_name = _('tag')
91 verbose_name_plural = _('tags')
92 unique_together = (("slug", "category"),)
94 def __unicode__(self):
98 return "Tag(slug=%r)" % self.slug
101 def get_absolute_url(self):
102 return ('catalogue.views.tagged_object_list', [self.url_chunk])
104 def has_description(self):
105 return len(self.description) > 0
106 has_description.short_description = _('description')
107 has_description.boolean = True
110 """ returns global book count for book tags, fragment count for themes """
112 if self.book_count is None:
113 if self.category == 'book':
115 objects = Book.objects.none()
116 elif self.category == 'theme':
117 objects = Fragment.tagged.with_all((self,))
119 objects = Book.tagged.with_all((self,)).order_by()
120 if self.category != 'set':
121 # eliminate descendants
122 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
123 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
125 objects = objects.exclude(pk__in=descendants_keys)
126 self.book_count = objects.count()
128 return self.book_count
131 def get_tag_list(tags):
132 if isinstance(tags, basestring):
137 tags_splitted = tags.split('/')
138 for name in tags_splitted:
140 real_tags.append(Tag.objects.get(slug=name, category=category))
142 elif name in Tag.categories_rev:
143 category = Tag.categories_rev[name]
146 real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
148 except Tag.MultipleObjectsReturned, e:
149 ambiguous_slugs.append(name)
152 # something strange left off
153 raise Tag.DoesNotExist()
155 # some tags should be qualified
156 e = Tag.MultipleObjectsReturned()
158 e.ambiguous_slugs = ambiguous_slugs
161 e = Tag.UrlDeprecationWarning()
166 return TagBase.get_tag_list(tags)
170 return '/'.join((Tag.categories_dict[self.category], self.slug))
173 # TODO: why is this hard-coded ?
174 def book_upload_path(ext=None, maxlen=100):
175 def get_dynamic_path(media, filename, ext=ext):
176 # how to put related book's slug here?
178 if media.type == 'daisy':
183 name = slughifi(filename.split(".")[0])
185 name = slughifi(media.name)
186 return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
187 return get_dynamic_path
190 class BookMedia(models.Model):
191 type = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100", editable=False)
192 name = models.CharField(_('name'), max_length="100")
193 file = OverwritingFileField(_('file'), upload_to=book_upload_path())
194 uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
195 extra_info = JSONField(_('extra information'), default='{}', editable=False)
196 book = models.ForeignKey('Book', related_name='media')
197 source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
199 def __unicode__(self):
200 return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
203 ordering = ('type', 'name')
204 verbose_name = _('book media')
205 verbose_name_plural = _('book media')
207 def save(self, *args, **kwargs):
209 old = BookMedia.objects.get(pk=self.pk)
210 except BookMedia.DoesNotExist, e:
213 # if name changed, change the file name, too
214 if slughifi(self.name) != slughifi(old.name):
215 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
217 super(BookMedia, self).save(*args, **kwargs)
218 extra_info = self.get_extra_info_value()
219 extra_info.update(self.read_meta())
220 self.set_extra_info_value(extra_info)
221 self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
222 return super(BookMedia, self).save(*args, **kwargs)
226 Reads some metadata from the audiobook.
229 artist_name = director_name = project = funded_by = ''
230 if self.type == 'mp3':
232 audio = id3.ID3(self.file.path)
233 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
234 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
235 project = ", ".join([t.data for t in audio.getall('PRIV')
236 if t.owner=='wolnelektury.pl?project'])
237 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
238 if t.owner=='wolnelektury.pl?funded_by'])
241 elif self.type == 'ogg':
243 audio = mutagen.File(self.file.path)
244 artist_name = ', '.join(audio.get('artist', []))
245 director_name = ', '.join(audio.get('conductor', []))
246 project = ", ".join(audio.get('project', []))
247 funded_by = ", ".join(audio.get('funded_by', []))
252 return {'artist_name': artist_name, 'director_name': director_name,
253 'project': project, 'funded_by': funded_by}
256 def read_source_sha1(filepath, filetype):
258 Reads source file SHA1 from audiobok metadata.
261 if filetype == 'mp3':
263 audio = id3.ID3(filepath)
264 return [t.data for t in audio.getall('PRIV')
265 if t.owner=='wolnelektury.pl?flac_sha1'][0]
268 elif filetype == 'ogg':
270 audio = mutagen.File(filepath)
271 return audio.get('flac_sha1', [None])[0]
278 class Book(models.Model):
279 title = models.CharField(_('title'), max_length=120)
280 sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
281 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
282 description = models.TextField(_('description'), blank=True)
283 created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
284 changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
285 _short_html = models.TextField(_('short HTML'), editable=False)
286 parent_number = models.IntegerField(_('parent number'), default=0)
287 extra_info = JSONField(_('extra information'), default='{}')
288 gazeta_link = models.CharField(blank=True, max_length=240)
289 wiki_link = models.CharField(blank=True, max_length=240)
290 # files generated during publication
291 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
292 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
293 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
294 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
295 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
297 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
298 objects = models.Manager()
299 tagged = managers.ModelTaggedItemManager(Tag)
300 tags = managers.TagDescriptor(Tag)
302 _tag_counter = JSONField(null=True, editable=False)
303 _theme_counter = JSONField(null=True, editable=False)
305 class AlreadyExists(Exception):
309 ordering = ('sort_key',)
310 verbose_name = _('book')
311 verbose_name_plural = _('books')
313 def __unicode__(self):
316 def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
317 self.sort_key = sortify(self.title)
320 # Reset _short_html during save
322 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
324 self.__setattr__(key, '')
325 # Fragment.short_html relies on book's tags, so reset it here too
326 self.fragments.all().update(**update)
328 return super(Book, self).save(force_insert, force_update)
331 def get_absolute_url(self):
332 return ('catalogue.views.book_detail', [self.slug])
338 def book_tag_slug(self):
339 return ('l-' + self.slug)[:120]
342 slug = self.book_tag_slug()
343 book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
345 book_tag.name = self.title[:50]
346 book_tag.sort_key = self.title.lower()
350 def has_media(self, type):
377 if self.media.filter(type=type).exists():
382 def get_media(self, type):
383 if self.has_media(type):
387 return self.html_file
389 return self.epub_file
395 return self.media.filter(type=type)
400 return self.get_media("mp3")
402 return self.get_media("odt")
404 return self.get_media("ogg")
406 return self.get_media("daisy")
408 def short_html(self):
409 key = '_short_html_%s' % get_language()
410 short_html = getattr(self, key)
412 if short_html and len(short_html):
413 return mark_safe(short_html)
415 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
416 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
419 # files generated during publication
420 if self.has_media("html"):
421 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
422 if self.has_media("pdf"):
423 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
424 if self.root_ancestor.has_media("epub"):
425 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
426 if self.has_media("txt"):
427 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
429 for m in self.media.order_by('type'):
430 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
432 formats = [mark_safe(format) for format in formats]
434 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
435 {'book': self, 'tags': tags, 'formats': formats})))
436 self.save(reset_short_html=False)
437 return mark_safe(getattr(self, key))
441 def root_ancestor(self):
442 """ returns the oldest ancestor """
444 if not hasattr(self, '_root_ancestor'):
448 self._root_ancestor = book
449 return self._root_ancestor
452 def has_description(self):
453 return len(self.description) > 0
454 has_description.short_description = _('description')
455 has_description.boolean = True
458 def has_pdf_file(self):
459 return bool(self.pdf_file)
460 has_pdf_file.short_description = 'PDF'
461 has_pdf_file.boolean = True
463 def has_epub_file(self):
464 return bool(self.epub_file)
465 has_epub_file.short_description = 'EPUB'
466 has_epub_file.boolean = True
468 def has_txt_file(self):
469 return bool(self.txt_file)
470 has_txt_file.short_description = 'HTML'
471 has_txt_file.boolean = True
473 def has_html_file(self):
474 return bool(self.html_file)
475 has_html_file.short_description = 'HTML'
476 has_html_file.boolean = True
478 def has_odt_file(self):
479 return bool(self.has_media("odt"))
480 has_odt_file.short_description = 'ODT'
481 has_odt_file.boolean = True
483 def has_mp3_file(self):
484 return bool(self.has_media("mp3"))
485 has_mp3_file.short_description = 'MP3'
486 has_mp3_file.boolean = True
488 def has_ogg_file(self):
489 return bool(self.has_media("ogg"))
490 has_ogg_file.short_description = 'OGG'
491 has_ogg_file.boolean = True
493 def has_daisy_file(self):
494 return bool(self.has_media("daisy"))
495 has_daisy_file.short_description = 'DAISY'
496 has_daisy_file.boolean = True
498 def build_epub(self, remove_descendants=True):
499 """ (Re)builds the epub file.
500 If book has a parent, does nothing.
501 Unless remove_descendants is False, descendants' epubs are removed.
504 from StringIO import StringIO
505 from hashlib import sha1
506 from django.core.files.base import ContentFile
507 from librarian import DocProvider
509 class BookImportDocProvider(DocProvider):
510 """ used for joined EPUBs """
512 def __init__(self, book):
515 def by_slug(self, slug):
516 if slug == self.book.slug:
517 return self.book.xml_file
519 return Book.objects.get(slug=slug).xml_file
525 epub_file = StringIO()
527 epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
528 self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
529 FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
533 book_descendants = list(self.children.all())
534 while len(book_descendants) > 0:
535 child_book = book_descendants.pop(0)
536 if remove_descendants and child_book.has_epub_file():
537 child_book.epub_file.delete()
538 # save anyway, to refresh short_html
540 book_descendants += list(child_book.children.all())
543 from StringIO import StringIO
544 from django.core.files.base import ContentFile
545 from librarian import text
548 text.transform(open(self.xml_file.path), out)
549 self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
554 def from_xml_file(cls, xml_file, **kwargs):
555 # use librarian to parse meta-data
556 book_info = dcparser.parse(xml_file)
558 if not isinstance(xml_file, File):
559 xml_file = File(open(xml_file))
562 return cls.from_text_and_meta(xml_file, book_info, **kwargs)
567 def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
569 from tempfile import NamedTemporaryFile
570 from markupstring import MarkupString
571 from django.core.files.storage import default_storage
573 # check for parts before we do anything
575 if hasattr(book_info, 'parts'):
576 for part_url in book_info.parts:
577 base, slug = part_url.rsplit('/', 1)
579 children.append(Book.objects.get(slug=slug))
580 except Book.DoesNotExist, e:
581 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
585 book_base, book_slug = book_info.url.rsplit('/', 1)
586 if re.search(r'[^a-zA-Z0-9-]', book_slug):
587 raise ValueError('Invalid characters in slug')
588 book, created = Book.objects.get_or_create(slug=book_slug)
594 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
595 # Save shelves for this book
596 book_shelves = list(book.tags.filter(category='set'))
598 book.title = book_info.title
599 book.set_extra_info_value(book_info.to_dict())
600 book._short_html = ''
604 categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
605 for field_name, category in categories:
607 tag_names = getattr(book_info, field_name)
609 tag_names = [getattr(book_info, category)]
610 for tag_name in tag_names:
611 tag_sort_key = tag_name
612 if category == 'author':
613 tag_sort_key = tag_name.last_name
614 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
615 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
618 tag.sort_key = sortify(tag_sort_key.lower())
620 book_tags.append(tag)
622 book.tags = set(book_tags + book_shelves)
624 book_tag = book.book_tag()
626 for n, child_book in enumerate(children):
627 child_book.parent = book
628 child_book.parent_number = n
631 # Save XML and HTML files
632 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
634 # delete old fragments when overwriting
635 book.fragments.all().delete()
637 html_file = NamedTemporaryFile()
638 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
639 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
641 # get ancestor l-tags for adding to new fragments
645 ancestor_tags.append(p.book_tag())
649 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
650 for fragment in closed_fragments.values():
652 theme_names = [s.strip() for s in fragment.themes.split(',')]
653 except AttributeError:
656 for theme_name in theme_names:
659 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
661 tag.name = theme_name
662 tag.sort_key = theme_name.lower()
668 text = fragment.to_string()
670 if (len(MarkupString(text)) > 240):
671 short_text = unicode(MarkupString(text)[:160])
672 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
673 defaults={'text': text, 'short_text': short_text})
676 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
678 if not settings.NO_BUILD_TXT and build_txt:
681 if not settings.NO_BUILD_EPUB and build_epub:
682 book.root_ancestor.build_epub()
684 book_descendants = list(book.children.all())
685 # add l-tag to descendants and their fragments
686 # delete unnecessary EPUB files
687 while len(book_descendants) > 0:
688 child_book = book_descendants.pop(0)
689 child_book.tags = list(child_book.tags) + [book_tag]
691 for fragment in child_book.fragments.all():
692 fragment.tags = set(list(fragment.tags) + [book_tag])
693 book_descendants += list(child_book.children.all())
696 book.reset_tag_counter()
697 book.reset_theme_counter()
703 def refresh_tag_counter(self):
705 for child in self.children.all().order_by():
706 for tag_pk, value in child.tag_counter.iteritems():
707 tags[tag_pk] = tags.get(tag_pk, 0) + value
708 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
710 self.set__tag_counter_value(tags)
711 self.save(reset_short_html=False)
714 def reset_tag_counter(self):
715 self._tag_counter = None
716 self.save(reset_short_html=False)
718 self.parent.reset_tag_counter()
721 def tag_counter(self):
722 if self._tag_counter is None:
723 return self.refresh_tag_counter()
724 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
726 def refresh_theme_counter(self):
728 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
729 for tag in fragment.tags.filter(category='theme').order_by():
730 tags[tag.pk] = tags.get(tag.pk, 0) + 1
731 self.set__theme_counter_value(tags)
732 self.save(reset_short_html=False)
735 def reset_theme_counter(self):
736 self._theme_counter = None
737 self.save(reset_short_html=False)
739 self.parent.reset_theme_counter()
742 def theme_counter(self):
743 if self._theme_counter is None:
744 return self.refresh_theme_counter()
745 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
747 def pretty_title(self, html_links=False):
749 names = list(book.tags.filter(category='author'))
755 names.extend(reversed(books))
758 names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
760 names = [tag.name for tag in names]
762 return ', '.join(names)
765 def tagged_top_level(cls, tags):
766 """ Returns top-level books tagged with `tags'.
768 It only returns those books which don't have ancestors which are
769 also tagged with those tags.
772 # get relevant books and their tags
773 objects = cls.tagged.with_all(tags)
774 # eliminate descendants
775 l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
776 descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
778 objects = objects.exclude(pk__in=descendants_keys)
783 class Fragment(models.Model):
784 text = models.TextField()
785 short_text = models.TextField(editable=False)
786 _short_html = models.TextField(editable=False)
787 anchor = models.CharField(max_length=120)
788 book = models.ForeignKey(Book, related_name='fragments')
790 objects = models.Manager()
791 tagged = managers.ModelTaggedItemManager(Tag)
792 tags = managers.TagDescriptor(Tag)
795 ordering = ('book', 'anchor',)
796 verbose_name = _('fragment')
797 verbose_name_plural = _('fragments')
799 def get_absolute_url(self):
800 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
802 def short_html(self):
803 key = '_short_html_%s' % get_language()
804 short_html = getattr(self, key)
805 if short_html and len(short_html):
806 return mark_safe(short_html)
808 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
809 {'fragment': self})))
811 return mark_safe(getattr(self, key))
814 class FileRecord(models.Model):
815 slug = models.SlugField(_('slug'), max_length=120, db_index=True)
816 type = models.CharField(_('type'), max_length=20, db_index=True)
817 sha1 = models.CharField(_('sha-1 hash'), max_length=40)
818 time = models.DateTimeField(_('time'), auto_now_add=True)
821 ordering = ('-time','-slug', '-type')
822 verbose_name = _('file record')
823 verbose_name_plural = _('file records')
825 def __unicode__(self):
826 return "%s %s.%s" % (self.sha1, self.slug, self.type)
835 def _tags_updated_handler(sender, affected_tags, **kwargs):
836 # reset tag global counter
837 # we want Tag.changed_at updated for API to know the tag was touched
838 Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
840 # if book tags changed, reset book tag counter
841 if isinstance(sender, Book) and \
842 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
843 exclude(category__in=('book', 'theme', 'set')).count():
844 sender.reset_tag_counter()
845 # if fragment theme changed, reset book theme counter
846 elif isinstance(sender, Fragment) and \
847 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
848 filter(category='theme').count():
849 sender.book.reset_theme_counter()
850 tags_updated.connect(_tags_updated_handler)
853 def _pre_delete_handler(sender, instance, **kwargs):
854 """ refresh Book on BookMedia delete """
855 if sender == BookMedia:
857 pre_delete.connect(_pre_delete_handler)
859 def _post_save_handler(sender, instance, **kwargs):
860 """ refresh all the short_html stuff on BookMedia update """
861 if sender == BookMedia:
863 post_save.connect(_post_save_handler)