apps/catalogue/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.db import models
   6 from django.db.models import permalink, Q
   7 from django.utils.translation import ugettext_lazy as _
   8 from django.contrib.auth.models import User
   9 from django.core.files import File
  10 from django.template.loader import render_to_string
  11 from django.utils.safestring import mark_safe
  12 from django.utils.translation import get_language
  13 from django.core.urlresolvers import reverse
  14 from datetime import datetime
  15
  16 from newtagging.models import TagBase
  17 from newtagging import managers
  18 from catalogue.fields import JSONField
  19
  20 from librarian import html, dcparser
  21 from mutagen import id3
  22
  23
  24 TAG_CATEGORIES = (
  25     ('author', _('author')),
  26     ('epoch', _('epoch')),
  27     ('kind', _('kind')),
  28     ('genre', _('genre')),
  29     ('theme', _('theme')),
  30     ('set', _('set')),
  31     ('book', _('book')),
  32 )
  33
  34
  35 class TagSubcategoryManager(models.Manager):
  36     def __init__(self, subcategory):
  37         super(TagSubcategoryManager, self).__init__()
  38         self.subcategory = subcategory
  39
  40     def get_query_set(self):
  41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
  42
  43
  44 class Tag(TagBase):
  45     name = models.CharField(_('name'), max_length=50, db_index=True)
  46     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
  47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
  48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
  49         db_index=True, choices=TAG_CATEGORIES)
  50     description = models.TextField(_('description'), blank=True)
  51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  52
  53     user = models.ForeignKey(User, blank=True, null=True)
  54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
  55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
  56     gazeta_link = models.CharField(blank=True, max_length=240)
  57     wiki_link = models.CharField(blank=True, max_length=240)
  58
  59     class Meta:
  60         ordering = ('sort_key',)
  61         verbose_name = _('tag')
  62         verbose_name_plural = _('tags')
  63
  64     def __unicode__(self):
  65         return self.name
  66
  67     def __repr__(self):
  68         return "Tag(slug=%r)" % self.slug
  69
  70     @permalink
  71     def get_absolute_url(self):
  72         return ('catalogue.views.tagged_object_list', [self.slug])
  73
  74     def has_description(self):
  75         return len(self.description) > 0
  76     has_description.short_description = _('description')
  77     has_description.boolean = True
  78
  79     def alive(self):
  80         return self.death is None
  81
  82     def in_pd(self):
  83         """ tests whether an author is in public domain """
  84         return self.death is not None and self.goes_to_pd() <= datetime.now().year
  85
  86     def goes_to_pd(self):
  87         """ calculates the year of public domain entry for an author """
  88         return self.death + 71 if self.death is not None else None
  89
  90     @staticmethod
  91     def get_tag_list(tags):
  92         if isinstance(tags, basestring):
  93             tag_slugs = tags.split('/')
  94             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
  95         else:
  96             return TagBase.get_tag_list(tags)
  97
  98
  99 # TODO: why is this hard-coded ?
 100 def book_upload_path(ext):
 101     def get_dynamic_path(book, filename):
 102         return 'lektura/%s.%s' % (book.slug, ext)
 103     return get_dynamic_path
 104
 105
 106 class Book(models.Model):
 107     title = models.CharField(_('title'), max_length=120)
 108     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 109     description = models.TextField(_('description'), blank=True)
 110     created_at = models.DateTimeField(_('creation date'), auto_now=True)
 111     _short_html = models.TextField(_('short HTML'), editable=False)
 112     parent_number = models.IntegerField(_('parent number'), default=0)
 113     extra_info = JSONField(_('extra information'))
 114     gazeta_link = models.CharField(blank=True, max_length=240)
 115     wiki_link = models.CharField(blank=True, max_length=240)
 116
 117
 118     # Formats
 119     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
 120     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
 121     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
 122     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
 123     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
 124     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
 125     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
 126
 127     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
 128
 129     objects = models.Manager()
 130     tagged = managers.ModelTaggedItemManager(Tag)
 131     tags = managers.TagDescriptor(Tag)
 132
 133     class AlreadyExists(Exception):
 134         pass
 135
 136     class Meta:
 137         ordering = ('title',)
 138         verbose_name = _('book')
 139         verbose_name_plural = _('books')
 140
 141     def __unicode__(self):
 142         return self.title
 143
 144     def save(self, force_insert=False, force_update=False, reset_short_html=True):
 145         if reset_short_html:
 146             # Reset _short_html during save
 147             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
 148                 self.__setattr__(key, '')
 149
 150         book = super(Book, self).save(force_insert, force_update)
 151
 152         if self.mp3_file:
 153             print self.mp3_file, self.mp3_file.path
 154             extra_info = self.get_extra_info_value()
 155             extra_info.update(self.get_mp3_info())
 156             self.set_extra_info_value(extra_info)
 157             book = super(Book, self).save(force_insert, force_update)
 158
 159         return book
 160
 161     @permalink
 162     def get_absolute_url(self):
 163         return ('catalogue.views.book_detail', [self.slug])
 164
 165     @property
 166     def name(self):
 167         return self.title
 168
 169     def short_html(self):
 170         key = '_short_html_%s' % get_language()
 171         short_html = getattr(self, key)
 172
 173         if short_html and len(short_html):
 174             return mark_safe(short_html)
 175         else:
 176             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 177             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 178
 179             formats = []
 180             if self.html_file:
 181                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 182             if self.pdf_file:
 183                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
 184             if self.odt_file:
 185                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
 186             if self.txt_file:
 187                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
 188             if self.mp3_file:
 189                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
 190             if self.ogg_file:
 191                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
 192
 193             formats = [mark_safe(format) for format in formats]
 194
 195             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
 196                 {'book': self, 'tags': tags, 'formats': formats})))
 197             self.save(reset_short_html=False)
 198             return mark_safe(getattr(self, key))
 199
 200
 201     def get_mp3_info(self):
 202         """Retrieves artist and director names from audio ID3 tags."""
 203         audio = id3.ID3(self.mp3_file.path)
 204         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
 205         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
 206         return {'artist_name': artist_name, 'director_name': director_name}
 207
 208     def has_description(self):
 209         return len(self.description) > 0
 210     has_description.short_description = _('description')
 211     has_description.boolean = True
 212
 213     def has_pdf_file(self):
 214         return bool(self.pdf_file)
 215     has_pdf_file.short_description = 'PDF'
 216     has_pdf_file.boolean = True
 217
 218     def has_odt_file(self):
 219         return bool(self.odt_file)
 220     has_odt_file.short_description = 'ODT'
 221     has_odt_file.boolean = True
 222
 223     def has_html_file(self):
 224         return bool(self.html_file)
 225     has_html_file.short_description = 'HTML'
 226     has_html_file.boolean = True
 227
 228     @classmethod
 229     def from_xml_file(cls, xml_file, overwrite=False):
 230         # use librarian to parse meta-data
 231         book_info = dcparser.parse(xml_file)
 232
 233         if not isinstance(xml_file, File):
 234             xml_file = File(xml_file)
 235
 236         try:
 237             return cls.from_text_and_meta(xml_file, book_info, overwrite)
 238         finally:
 239             xml_file.close()
 240
 241     @classmethod
 242     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
 243         from tempfile import NamedTemporaryFile
 244         from slughifi import slughifi
 245         from markupstring import MarkupString
 246
 247         # Read book metadata
 248         book_base, book_slug = book_info.url.rsplit('/', 1)
 249         book, created = Book.objects.get_or_create(slug=book_slug)
 250
 251         if created:
 252             book_shelves = []
 253         else:
 254             if not overwrite:
 255                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 256             # Save shelves for this book
 257             book_shelves = list(book.tags.filter(category='set'))
 258
 259         book.title = book_info.title
 260         book.set_extra_info_value(book_info.to_dict())
 261         book._short_html = ''
 262         book.save()
 263
 264         book_tags = []
 265         for category in ('kind', 'genre', 'author', 'epoch'):
 266             tag_name = getattr(book_info, category)
 267             tag_sort_key = tag_name
 268             if category == 'author':
 269                 tag_sort_key = tag_name.last_name
 270                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 271             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
 272             if created:
 273                 tag.name = tag_name
 274                 tag.sort_key = slughifi(tag_sort_key)
 275                 tag.category = category
 276                 tag.save()
 277             book_tags.append(tag)
 278
 279         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
 280         if created:
 281             book_tag.name = book.title[:50]
 282             book_tag.sort_key = ('l-' + book.slug)[:120]
 283             book_tag.category = 'book'
 284             book_tag.save()
 285         book_tags.append(book_tag)
 286
 287         book.tags = book_tags
 288
 289         if hasattr(book_info, 'parts'):
 290             for n, part_url in enumerate(book_info.parts):
 291                 base, slug = part_url.rsplit('/', 1)
 292                 try:
 293                     child_book = Book.objects.get(slug=slug)
 294                     child_book.parent = book
 295                     child_book.parent_number = n
 296                     child_book.save()
 297                 except Book.DoesNotExist, e:
 298                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 299
 300         book_descendants = list(book.children.all())
 301         while len(book_descendants) > 0:
 302             child_book = book_descendants.pop(0)
 303             for fragment in child_book.fragments.all():
 304                 fragment.tags = set(list(fragment.tags) + [book_tag])
 305             book_descendants += list(child_book.children.all())
 306
 307         # Save XML and HTML files
 308         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 309
 310         html_file = NamedTemporaryFile()
 311         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
 312             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
 313
 314             # Extract fragments
 315             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
 316             book_themes = []
 317             for fragment in closed_fragments.values():
 318                 text = fragment.to_string()
 319                 short_text = ''
 320                 if (len(MarkupString(text)) > 240):
 321                     short_text = unicode(MarkupString(text)[:160])
 322                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
 323                     defaults={'text': text, 'short_text': short_text})
 324
 325                 try:
 326                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 327                 except AttributeError:
 328                     continue
 329                 themes = []
 330                 for theme_name in theme_names:
 331                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
 332                     if created:
 333                         tag.name = theme_name
 334                         tag.sort_key = slughifi(theme_name)
 335                         tag.category = 'theme'
 336                         tag.save()
 337                     themes.append(tag)
 338                 new_fragment.save()
 339                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
 340                 book_themes += themes
 341
 342             book_themes = set(book_themes)
 343             book.tags = list(book.tags) + list(book_themes) + book_shelves
 344
 345         book.save()
 346         return book
 347
 348
 349 class Fragment(models.Model):
 350     text = models.TextField()
 351     short_text = models.TextField(editable=False)
 352     _short_html = models.TextField(editable=False)
 353     anchor = models.CharField(max_length=120)
 354     book = models.ForeignKey(Book, related_name='fragments')
 355
 356     objects = models.Manager()
 357     tagged = managers.ModelTaggedItemManager(Tag)
 358     tags = managers.TagDescriptor(Tag)
 359
 360     class Meta:
 361         ordering = ('book', 'anchor',)
 362         verbose_name = _('fragment')
 363         verbose_name_plural = _('fragments')
 364
 365     def get_absolute_url(self):
 366         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 367
 368     def short_html(self):
 369         key = '_short_html_%s' % get_language()
 370         short_html = getattr(self, key)
 371         if short_html and len(short_html):
 372             return mark_safe(short_html)
 373         else:
 374             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
 375                 for tag in self.book.tags if tag.category == 'author']
 376
 377             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
 378                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
 379             self.save()
 380             return mark_safe(getattr(self, key))
 381
 382
 383 class BookStub(models.Model):
 384     title = models.CharField(_('title'), max_length=120)
 385     author = models.CharField(_('author'), max_length=120)
 386     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
 387     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 388     translator = models.TextField(_('translator'), blank=True)
 389     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
 390
 391     class Meta:
 392         ordering = ('title',)
 393         verbose_name = _('book stub')
 394         verbose_name_plural = _('book stubs')
 395
 396     def __unicode__(self):
 397         return self.title
 398
 399     @permalink
 400     def get_absolute_url(self):
 401         return ('catalogue.views.book_detail', [self.slug])
 402
 403     def in_pd(self):
 404         return self.pd is not None and self.pd <= datetime.now().year
 405
 406     @property
 407     def name(self):
 408         return self.title
 409
 410