apps/catalogue/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.db import models
   6 from django.db.models import permalink, Q
   7 from django.utils.translation import ugettext_lazy as _
   8 from django.contrib.auth.models import User
   9 from django.core.files import File
  10 from django.template.loader import render_to_string
  11 from django.utils.safestring import mark_safe
  12 from django.utils.translation import get_language
  13 from django.core.urlresolvers import reverse
  14 from datetime import datetime
  15
  16 from newtagging.models import TagBase
  17 from newtagging import managers
  18 from catalogue.fields import JSONField
  19
  20 from librarian import html, dcparser
  21 from mutagen import id3
  22
  23
  24 TAG_CATEGORIES = (
  25     ('author', _('author')),
  26     ('epoch', _('epoch')),
  27     ('kind', _('kind')),
  28     ('genre', _('genre')),
  29     ('theme', _('theme')),
  30     ('set', _('set')),
  31     ('book', _('book')),
  32 )
  33
  34
  35 class TagSubcategoryManager(models.Manager):
  36     def __init__(self, subcategory):
  37         super(TagSubcategoryManager, self).__init__()
  38         self.subcategory = subcategory
  39
  40     def get_query_set(self):
  41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
  42
  43
  44 class Tag(TagBase):
  45     name = models.CharField(_('name'), max_length=50, db_index=True)
  46     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
  47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
  48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
  49         db_index=True, choices=TAG_CATEGORIES)
  50     description = models.TextField(_('description'), blank=True)
  51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  52
  53     user = models.ForeignKey(User, blank=True, null=True)
  54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
  55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
  56     gazeta_link = models.CharField(blank=True, max_length=240)
  57     wiki_link = models.CharField(blank=True, max_length=240)
  58
  59     class Meta:
  60         ordering = ('sort_key',)
  61         verbose_name = _('tag')
  62         verbose_name_plural = _('tags')
  63
  64     def __unicode__(self):
  65         return self.name
  66
  67     def __repr__(self):
  68         return "Tag(slug=%r)" % self.slug
  69
  70     @permalink
  71     def get_absolute_url(self):
  72         return ('catalogue.views.tagged_object_list', [self.slug])
  73
  74     def has_description(self):
  75         return len(self.description) > 0
  76     has_description.short_description = _('description')
  77     has_description.boolean = True
  78
  79     def alive(self):
  80         return self.death is None
  81
  82     def in_pd(self):
  83         """ tests whether an author is in public domain """
  84         return self.death is not None and self.goes_to_pd() <= datetime.now().year
  85
  86     def goes_to_pd(self):
  87         """ calculates the year of public domain entry for an author """
  88         return self.death + 71 if self.death is not None else None
  89
  90     @staticmethod
  91     def get_tag_list(tags):
  92         if isinstance(tags, basestring):
  93             tag_slugs = tags.split('/')
  94             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
  95         else:
  96             return TagBase.get_tag_list(tags)
  97
  98
  99 # TODO: why is this hard-coded ?
 100 def book_upload_path(ext):
 101     def get_dynamic_path(book, filename):
 102         return 'lektura/%s.%s' % (book.slug, ext)
 103     return get_dynamic_path
 104
 105
 106 class Book(models.Model):
 107     title = models.CharField(_('title'), max_length=120)
 108     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 109     description = models.TextField(_('description'), blank=True)
 110     created_at = models.DateTimeField(_('creation date'), auto_now=True)
 111     _short_html = models.TextField(_('short HTML'), editable=False)
 112     parent_number = models.IntegerField(_('parent number'), default=0)
 113     extra_info = JSONField(_('extra information'))
 114     gazeta_link = models.CharField(blank=True, max_length=240)
 115     wiki_link = models.CharField(blank=True, max_length=240)
 116
 117
 118     # Formats
 119     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
 120     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
 121     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
 122     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
 123     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
 124     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
 125     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
 126     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
 127
 128     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
 129
 130     objects = models.Manager()
 131     tagged = managers.ModelTaggedItemManager(Tag)
 132     tags = managers.TagDescriptor(Tag)
 133
 134     _tag_counter = JSONField(editable=False, default='')
 135     _theme_counter = JSONField(editable=False, default='')
 136
 137     class AlreadyExists(Exception):
 138         pass
 139
 140     class Meta:
 141         ordering = ('title',)
 142         verbose_name = _('book')
 143         verbose_name_plural = _('books')
 144
 145     def __unicode__(self):
 146         return self.title
 147
 148     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
 149         if reset_short_html:
 150             # Reset _short_html during save
 151             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
 152                 self.__setattr__(key, '')
 153
 154         book = super(Book, self).save(force_insert, force_update)
 155
 156         if refresh_mp3 and self.mp3_file:
 157             print self.mp3_file, self.mp3_file.path
 158             extra_info = self.get_extra_info_value()
 159             extra_info.update(self.get_mp3_info())
 160             self.set_extra_info_value(extra_info)
 161             book = super(Book, self).save(force_insert, force_update)
 162
 163         return book
 164
 165     @permalink
 166     def get_absolute_url(self):
 167         return ('catalogue.views.book_detail', [self.slug])
 168
 169     @property
 170     def name(self):
 171         return self.title
 172
 173     def book_tag(self):
 174         slug = ('l-' + self.slug)[:120]
 175         book_tag, created = Tag.objects.get_or_create(slug=slug)
 176         if created:
 177             book_tag.name = self.title[:50]
 178             book_tag.sort_key = slug
 179             book_tag.category = 'book'
 180             book_tag.save()
 181         return book_tag
 182
 183     def short_html(self):
 184         key = '_short_html_%s' % get_language()
 185         short_html = getattr(self, key)
 186
 187         if short_html and len(short_html):
 188             return mark_safe(short_html)
 189         else:
 190             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 191             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 192
 193             formats = []
 194             if self.html_file:
 195                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 196             if self.pdf_file:
 197                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
 198             if self.epub_file:
 199                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
 200             if self.odt_file:
 201                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
 202             if self.txt_file:
 203                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
 204             if self.mp3_file:
 205                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
 206             if self.ogg_file:
 207                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
 208
 209             formats = [mark_safe(format) for format in formats]
 210
 211             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
 212                 {'book': self, 'tags': tags, 'formats': formats})))
 213             self.save(reset_short_html=False)
 214             return mark_safe(getattr(self, key))
 215
 216
 217     def get_mp3_info(self):
 218         """Retrieves artist and director names from audio ID3 tags."""
 219         audio = id3.ID3(self.mp3_file.path)
 220         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
 221         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
 222         return {'artist_name': artist_name, 'director_name': director_name}
 223
 224     def has_description(self):
 225         return len(self.description) > 0
 226     has_description.short_description = _('description')
 227     has_description.boolean = True
 228
 229     def has_pdf_file(self):
 230         return bool(self.pdf_file)
 231     has_pdf_file.short_description = 'PDF'
 232     has_pdf_file.boolean = True
 233
 234     def has_epub_file(self):
 235         return bool(self.epub_file)
 236     has_epub_file.short_description = 'EPUB'
 237     has_epub_file.boolean = True
 238
 239     def has_odt_file(self):
 240         return bool(self.odt_file)
 241     has_odt_file.short_description = 'ODT'
 242     has_odt_file.boolean = True
 243
 244     def has_html_file(self):
 245         return bool(self.html_file)
 246     has_html_file.short_description = 'HTML'
 247     has_html_file.boolean = True
 248
 249     @classmethod
 250     def from_xml_file(cls, xml_file, overwrite=False):
 251         # use librarian to parse meta-data
 252         book_info = dcparser.parse(xml_file)
 253
 254         if not isinstance(xml_file, File):
 255             xml_file = File(xml_file)
 256
 257         try:
 258             return cls.from_text_and_meta(xml_file, book_info, overwrite)
 259         finally:
 260             xml_file.close()
 261
 262     @classmethod
 263     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
 264         from tempfile import NamedTemporaryFile
 265         from slughifi import slughifi
 266         from markupstring import MarkupString
 267
 268         # Read book metadata
 269         book_base, book_slug = book_info.url.rsplit('/', 1)
 270         book, created = Book.objects.get_or_create(slug=book_slug)
 271
 272         if created:
 273             book_shelves = []
 274         else:
 275             if not overwrite:
 276                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 277             # Save shelves for this book
 278             book_shelves = list(book.tags.filter(category='set'))
 279
 280         book.title = book_info.title
 281         book.set_extra_info_value(book_info.to_dict())
 282         book._short_html = ''
 283         book.save()
 284
 285         book_tags = []
 286         for category in ('kind', 'genre', 'author', 'epoch'):
 287             tag_name = getattr(book_info, category)
 288             tag_sort_key = tag_name
 289             if category == 'author':
 290                 tag_sort_key = tag_name.last_name
 291                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 292             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
 293             if created:
 294                 tag.name = tag_name
 295                 tag.sort_key = slughifi(tag_sort_key)
 296                 tag.category = category
 297                 tag.save()
 298             book_tags.append(tag)
 299
 300         book.tags = book_tags
 301
 302         book_tag = book.book_tag()
 303
 304         if hasattr(book_info, 'parts'):
 305             for n, part_url in enumerate(book_info.parts):
 306                 base, slug = part_url.rsplit('/', 1)
 307                 try:
 308                     child_book = Book.objects.get(slug=slug)
 309                     child_book.parent = book
 310                     child_book.parent_number = n
 311                     child_book.save()
 312                 except Book.DoesNotExist, e:
 313                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 314
 315         book_descendants = list(book.children.all())
 316         while len(book_descendants) > 0:
 317             child_book = book_descendants.pop(0)
 318             child_book.tags = list(child_book.tags) + [book_tag]
 319             child_book.save()
 320             for fragment in child_book.fragments.all():
 321                 fragment.tags = set(list(fragment.tags) + [book_tag])
 322             book_descendants += list(child_book.children.all())
 323
 324         # Save XML and HTML files
 325         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 326
 327         html_file = NamedTemporaryFile()
 328         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
 329             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
 330
 331             # Extract fragments
 332             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
 333             book_themes = []
 334             for fragment in closed_fragments.values():
 335                 text = fragment.to_string()
 336                 short_text = ''
 337                 if (len(MarkupString(text)) > 240):
 338                     short_text = unicode(MarkupString(text)[:160])
 339                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
 340                     defaults={'text': text, 'short_text': short_text})
 341
 342                 try:
 343                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 344                 except AttributeError:
 345                     continue
 346                 themes = []
 347                 for theme_name in theme_names:
 348                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
 349                     if created:
 350                         tag.name = theme_name
 351                         tag.sort_key = slughifi(theme_name)
 352                         tag.category = 'theme'
 353                         tag.save()
 354                     themes.append(tag)
 355                 new_fragment.save()
 356                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
 357                 book_themes += themes
 358
 359             book_themes = set(book_themes)
 360             book.tags = list(book.tags) + list(book_themes) + book_shelves
 361
 362         book.save()
 363         return book
 364
 365
 366     def refresh_tag_counter(self):
 367         tags = {}
 368         for child in self.children.all().order_by():
 369             for tag_pk, value in child.tag_counter.iteritems():
 370                 tags[tag_pk] = tags.get(tag_pk, 0) + value
 371         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
 372             tags[tag.pk] = 1
 373         self.set__tag_counter_value(tags)
 374         self.save(reset_short_html=False, refresh_mp3=False)
 375         return tags
 376
 377     @property
 378     def tag_counter(self):
 379         if self._tag_counter == '':
 380             return self.refresh_tag_counter()
 381         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
 382         #return self.get__tag_counter_value()
 383
 384     def refresh_theme_counter(self):
 385         tags = {}
 386         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
 387             for tag in fragment.tags.filter(category='theme').order_by():
 388                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
 389         self.set__theme_counter_value(tags)
 390         self.save(reset_short_html=False, refresh_mp3=False)
 391         return tags
 392
 393     @property
 394     def theme_counter(self):
 395         if self._theme_counter == '':
 396             return self.refresh_theme_counter()
 397         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
 398         return self.get__theme_counter_value()
 399
 400
 401
 402 class Fragment(models.Model):
 403     text = models.TextField()
 404     short_text = models.TextField(editable=False)
 405     _short_html = models.TextField(editable=False)
 406     anchor = models.CharField(max_length=120)
 407     book = models.ForeignKey(Book, related_name='fragments')
 408
 409     objects = models.Manager()
 410     tagged = managers.ModelTaggedItemManager(Tag)
 411     tags = managers.TagDescriptor(Tag)
 412
 413     class Meta:
 414         ordering = ('book', 'anchor',)
 415         verbose_name = _('fragment')
 416         verbose_name_plural = _('fragments')
 417
 418     def get_absolute_url(self):
 419         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 420
 421     def short_html(self):
 422         key = '_short_html_%s' % get_language()
 423         short_html = getattr(self, key)
 424         if short_html and len(short_html):
 425             return mark_safe(short_html)
 426         else:
 427             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
 428                 for tag in self.book.tags if tag.category == 'author']
 429
 430             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
 431                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
 432             self.save()
 433             return mark_safe(getattr(self, key))
 434
 435
 436 class BookStub(models.Model):
 437     title = models.CharField(_('title'), max_length=120)
 438     author = models.CharField(_('author'), max_length=120)
 439     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
 440     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 441     translator = models.TextField(_('translator'), blank=True)
 442     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
 443
 444     class Meta:
 445         ordering = ('title',)
 446         verbose_name = _('book stub')
 447         verbose_name_plural = _('book stubs')
 448
 449     def __unicode__(self):
 450         return self.title
 451
 452     @permalink
 453     def get_absolute_url(self):
 454         return ('catalogue.views.book_detail', [self.slug])
 455
 456     def in_pd(self):
 457         return self.pd is not None and self.pd <= datetime.now().year
 458
 459     @property
 460     def name(self):
 461         return self.title
 462
 463