apps/catalogue/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from datetime import datetime
   6
   7 from django.db import models
   8 from django.db.models import permalink, Q
   9 import django.dispatch
  10 from django.core.cache import cache
  11 from django.core.files.storage import DefaultStorage
  12 from django.utils.translation import ugettext_lazy as _
  13 from django.contrib.auth.models import User
  14 from django.template.loader import render_to_string
  15 from django.utils.datastructures import SortedDict
  16 from django.utils.safestring import mark_safe
  17 from django.utils.translation import get_language
  18 from django.core.urlresolvers import reverse
  19 from django.db.models.signals import post_save, m2m_changed, pre_delete
  20
  21 from django.conf import settings
  22
  23 from newtagging.models import TagBase, tags_updated
  24 from newtagging import managers
  25 from catalogue.fields import JSONField, OverwritingFileField
  26 from catalogue.utils import create_zip
  27 from shutil import copy
  28 from glob import glob
  29 import re
  30 from os import path
  31
  32
  33 TAG_CATEGORIES = (
  34     ('author', _('author')),
  35     ('epoch', _('epoch')),
  36     ('kind', _('kind')),
  37     ('genre', _('genre')),
  38     ('theme', _('theme')),
  39     ('set', _('set')),
  40     ('book', _('book')),
  41 )
  42
  43 MEDIA_FORMATS = (
  44     ('odt', _('ODT file')),
  45     ('mp3', _('MP3 file')),
  46     ('ogg', _('OGG file')),
  47     ('daisy', _('DAISY file')),
  48 )
  49
  50 # not quite, but Django wants you to set a timeout
  51 CACHE_FOREVER = 2419200  # 28 days
  52
  53
  54 class TagSubcategoryManager(models.Manager):
  55     def __init__(self, subcategory):
  56         super(TagSubcategoryManager, self).__init__()
  57         self.subcategory = subcategory
  58
  59     def get_query_set(self):
  60         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
  61
  62
  63 class Tag(TagBase):
  64     name = models.CharField(_('name'), max_length=50, db_index=True)
  65     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  66     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
  67     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
  68         db_index=True, choices=TAG_CATEGORIES)
  69     description = models.TextField(_('description'), blank=True)
  70     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  71
  72     user = models.ForeignKey(User, blank=True, null=True)
  73     book_count = models.IntegerField(_('book count'), blank=True, null=True)
  74     gazeta_link = models.CharField(blank=True, max_length=240)
  75     wiki_link = models.CharField(blank=True, max_length=240)
  76
  77     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  78     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
  79
  80     class UrlDeprecationWarning(DeprecationWarning):
  81         pass
  82
  83     categories_rev = {
  84         'autor': 'author',
  85         'epoka': 'epoch',
  86         'rodzaj': 'kind',
  87         'gatunek': 'genre',
  88         'motyw': 'theme',
  89         'polka': 'set',
  90     }
  91     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
  92
  93     class Meta:
  94         ordering = ('sort_key',)
  95         verbose_name = _('tag')
  96         verbose_name_plural = _('tags')
  97         unique_together = (("slug", "category"),)
  98
  99     def __unicode__(self):
 100         return self.name
 101
 102     def __repr__(self):
 103         return "Tag(slug=%r)" % self.slug
 104
 105     @permalink
 106     def get_absolute_url(self):
 107         return ('catalogue.views.tagged_object_list', [self.url_chunk])
 108
 109     def has_description(self):
 110         return len(self.description) > 0
 111     has_description.short_description = _('description')
 112     has_description.boolean = True
 113
 114     def get_count(self):
 115         """ returns global book count for book tags, fragment count for themes """
 116
 117         if self.book_count is None:
 118             if self.category == 'book':
 119                 # never used
 120                 objects = Book.objects.none()
 121             elif self.category == 'theme':
 122                 objects = Fragment.tagged.with_all((self,))
 123             else:
 124                 objects = Book.tagged.with_all((self,)).order_by()
 125                 if self.category != 'set':
 126                     # eliminate descendants
 127                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
 128                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 129                     if descendants_keys:
 130                         objects = objects.exclude(pk__in=descendants_keys)
 131             self.book_count = objects.count()
 132             self.save()
 133         return self.book_count
 134
 135     @staticmethod
 136     def get_tag_list(tags):
 137         if isinstance(tags, basestring):
 138             real_tags = []
 139             ambiguous_slugs = []
 140             category = None
 141             deprecated = False
 142             tags_splitted = tags.split('/')
 143             for name in tags_splitted:
 144                 if category:
 145                     real_tags.append(Tag.objects.get(slug=name, category=category))
 146                     category = None
 147                 elif name in Tag.categories_rev:
 148                     category = Tag.categories_rev[name]
 149                 else:
 150                     try:
 151                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
 152                         deprecated = True
 153                     except Tag.MultipleObjectsReturned, e:
 154                         ambiguous_slugs.append(name)
 155
 156             if category:
 157                 # something strange left off
 158                 raise Tag.DoesNotExist()
 159             if ambiguous_slugs:
 160                 # some tags should be qualified
 161                 e = Tag.MultipleObjectsReturned()
 162                 e.tags = real_tags
 163                 e.ambiguous_slugs = ambiguous_slugs
 164                 raise e
 165             if deprecated:
 166                 e = Tag.UrlDeprecationWarning()
 167                 e.tags = real_tags
 168                 raise e
 169             return real_tags
 170         else:
 171             return TagBase.get_tag_list(tags)
 172
 173     @property
 174     def url_chunk(self):
 175         return '/'.join((Tag.categories_dict[self.category], self.slug))
 176
 177
 178 def get_dynamic_path(media, filename, ext=None, maxlen=100):
 179     from slughifi import slughifi
 180
 181     # how to put related book's slug here?
 182     if not ext:
 183         if media.type == 'daisy':
 184             ext = 'daisy.zip'
 185         else:
 186             ext = media.type
 187     if media is None or not media.name:
 188         name = slughifi(filename.split(".")[0])
 189     else:
 190         name = slughifi(media.name)
 191     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
 192
 193
 194 # TODO: why is this hard-coded ?
 195 def book_upload_path(ext=None, maxlen=100):
 196     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
 197
 198
 199 def get_customized_pdf_path(book, customizations):
 200     """
 201     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 202     """
 203     customizations.sort()
 204     h = hash(tuple(customizations))
 205     pdf_name = '%s-custom-%s' % (book.slug, h)
 206     pdf_file = get_dynamic_path(None, pdf_name, ext='pdf')
 207     return pdf_file
 208
 209
 210 def get_existing_customized_pdf(book):
 211     """
 212     Returns a list of paths to generated customized pdf of a book
 213     """
 214     pdf_glob = '%s-custom-' % (book.slug,)
 215     pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf')
 216     pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob)
 217     return glob(path.join(settings.MEDIA_ROOT, pdf_glob))
 218
 219
 220 class BookMedia(models.Model):
 221     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
 222     name        = models.CharField(_('name'), max_length="100")
 223     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
 224     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
 225     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
 226     book = models.ForeignKey('Book', related_name='media')
 227     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 228
 229     def __unicode__(self):
 230         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
 231
 232     class Meta:
 233         ordering            = ('type', 'name')
 234         verbose_name        = _('book media')
 235         verbose_name_plural = _('book media')
 236
 237     def save(self, *args, **kwargs):
 238         from slughifi import slughifi
 239         from catalogue.utils import ExistingFile, remove_zip
 240
 241         try:
 242             old = BookMedia.objects.get(pk=self.pk)
 243         except BookMedia.DoesNotExist, e:
 244             pass
 245         else:
 246             # if name changed, change the file name, too
 247             if slughifi(self.name) != slughifi(old.name):
 248                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
 249
 250         super(BookMedia, self).save(*args, **kwargs)
 251
 252         # remove the zip package for book with modified media
 253         remove_zip(self.book.slug)
 254
 255         extra_info = self.get_extra_info_value()
 256         extra_info.update(self.read_meta())
 257         self.set_extra_info_value(extra_info)
 258         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
 259         return super(BookMedia, self).save(*args, **kwargs)
 260
 261     def read_meta(self):
 262         """
 263             Reads some metadata from the audiobook.
 264         """
 265         import mutagen
 266         from mutagen import id3
 267
 268         artist_name = director_name = project = funded_by = ''
 269         if self.type == 'mp3':
 270             try:
 271                 audio = id3.ID3(self.file.path)
 272                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
 273                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
 274                 project = ", ".join([t.data for t in audio.getall('PRIV')
 275                         if t.owner=='wolnelektury.pl?project'])
 276                 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
 277                         if t.owner=='wolnelektury.pl?funded_by'])
 278             except:
 279                 pass
 280         elif self.type == 'ogg':
 281             try:
 282                 audio = mutagen.File(self.file.path)
 283                 artist_name = ', '.join(audio.get('artist', []))
 284                 director_name = ', '.join(audio.get('conductor', []))
 285                 project = ", ".join(audio.get('project', []))
 286                 funded_by = ", ".join(audio.get('funded_by', []))
 287             except:
 288                 pass
 289         else:
 290             return {}
 291         return {'artist_name': artist_name, 'director_name': director_name,
 292                 'project': project, 'funded_by': funded_by}
 293
 294     @staticmethod
 295     def read_source_sha1(filepath, filetype):
 296         """
 297             Reads source file SHA1 from audiobok metadata.
 298         """
 299         import mutagen
 300         from mutagen import id3
 301
 302         if filetype == 'mp3':
 303             try:
 304                 audio = id3.ID3(filepath)
 305                 return [t.data for t in audio.getall('PRIV')
 306                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
 307             except:
 308                 return None
 309         elif filetype == 'ogg':
 310             try:
 311                 audio = mutagen.File(filepath)
 312                 return audio.get('flac_sha1', [None])[0]
 313             except:
 314                 return None
 315         else:
 316             return None
 317
 318
 319 class Book(models.Model):
 320     title         = models.CharField(_('title'), max_length=120)
 321     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
 322     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 323     description   = models.TextField(_('description'), blank=True)
 324     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
 325     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
 326     parent_number = models.IntegerField(_('parent number'), default=0)
 327     extra_info    = JSONField(_('extra information'), default='{}')
 328     gazeta_link   = models.CharField(blank=True, max_length=240)
 329     wiki_link     = models.CharField(blank=True, max_length=240)
 330     # files generated during publication
 331
 332     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
 333
 334     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
 335     objects  = models.Manager()
 336     tagged   = managers.ModelTaggedItemManager(Tag)
 337     tags     = managers.TagDescriptor(Tag)
 338
 339     html_built = django.dispatch.Signal()
 340     published = django.dispatch.Signal()
 341
 342     class AlreadyExists(Exception):
 343         pass
 344
 345     class Meta:
 346         ordering = ('sort_key',)
 347         verbose_name = _('book')
 348         verbose_name_plural = _('books')
 349
 350     def __unicode__(self):
 351         return self.title
 352
 353     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
 354         from sortify import sortify
 355
 356         self.sort_key = sortify(self.title)
 357
 358         ret = super(Book, self).save(force_insert, force_update)
 359
 360         if reset_short_html:
 361             self.reset_short_html()
 362
 363         return ret
 364
 365     @permalink
 366     def get_absolute_url(self):
 367         return ('catalogue.views.book_detail', [self.slug])
 368
 369     @property
 370     def name(self):
 371         return self.title
 372
 373     def book_tag_slug(self):
 374         return ('l-' + self.slug)[:120]
 375
 376     def book_tag(self):
 377         slug = self.book_tag_slug()
 378         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
 379         if created:
 380             book_tag.name = self.title[:50]
 381             book_tag.sort_key = self.title.lower()
 382             book_tag.save()
 383         return book_tag
 384
 385     def has_media(self, type):
 386         if type in Book.file_types:
 387             return bool(getattr(self, "%s_file" % type))
 388         else:
 389             return self.media.filter(type=type).exists()
 390
 391     def get_media(self, type):
 392         if self.has_media(type):
 393             if type in Book.file_types:
 394                 return getattr(self, "%s_file" % type)
 395             else:
 396                 return self.media.filter(type=type)
 397         else:
 398             return None
 399
 400     def get_mp3(self):
 401         return self.get_media("mp3")
 402     def get_odt(self):
 403         return self.get_media("odt")
 404     def get_ogg(self):
 405         return self.get_media("ogg")
 406     def get_daisy(self):
 407         return self.get_media("daisy")
 408
 409     def reset_short_html(self):
 410         if self.id is None:
 411             return
 412
 413         cache_key = "Book.short_html/%d/%s"
 414         for lang, langname in settings.LANGUAGES:
 415             cache.delete(cache_key % (self.id, lang))
 416         # Fragment.short_html relies on book's tags, so reset it here too
 417         for fragm in self.fragments.all():
 418             fragm.reset_short_html()
 419
 420     def short_html(self):
 421         if self.id:
 422             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
 423             short_html = cache.get(cache_key)
 424         else:
 425             short_html = None
 426
 427         if short_html is not None:
 428             return mark_safe(short_html)
 429         else:
 430             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 431             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 432
 433             formats = []
 434             # files generated during publication
 435             if self.has_media("html"):
 436                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 437             if self.has_media("pdf"):
 438                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
 439             if self.has_media("mobi"):
 440                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
 441             if self.root_ancestor.has_media("epub"):
 442                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
 443             if self.has_media("txt"):
 444                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
 445             # other files
 446             for m in self.media.order_by('type'):
 447                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
 448
 449             formats = [mark_safe(format) for format in formats]
 450
 451             short_html = unicode(render_to_string('catalogue/book_short.html',
 452                 {'book': self, 'tags': tags, 'formats': formats}))
 453
 454             if self.id:
 455                 cache.set(cache_key, short_html, CACHE_FOREVER)
 456             return mark_safe(short_html)
 457
 458     @property
 459     def root_ancestor(self):
 460         """ returns the oldest ancestor """
 461
 462         if not hasattr(self, '_root_ancestor'):
 463             book = self
 464             while book.parent:
 465                 book = book.parent
 466             self._root_ancestor = book
 467         return self._root_ancestor
 468
 469
 470     def has_description(self):
 471         return len(self.description) > 0
 472     has_description.short_description = _('description')
 473     has_description.boolean = True
 474
 475     # ugly ugly ugly
 476     def has_odt_file(self):
 477         return bool(self.has_media("odt"))
 478     has_odt_file.short_description = 'ODT'
 479     has_odt_file.boolean = True
 480
 481     def has_mp3_file(self):
 482         return bool(self.has_media("mp3"))
 483     has_mp3_file.short_description = 'MP3'
 484     has_mp3_file.boolean = True
 485
 486     def has_ogg_file(self):
 487         return bool(self.has_media("ogg"))
 488     has_ogg_file.short_description = 'OGG'
 489     has_ogg_file.boolean = True
 490
 491     def has_daisy_file(self):
 492         return bool(self.has_media("daisy"))
 493     has_daisy_file.short_description = 'DAISY'
 494     has_daisy_file.boolean = True
 495
 496     def build_pdf(self, customizations=None, file_name=None):
 497         """ (Re)builds the pdf file.
 498         customizations - customizations which are passed to LaTeX class file.
 499         file_name - save the pdf file under a different name and DO NOT save it in db.
 500         """
 501         from tempfile import NamedTemporaryFile
 502         from os import unlink
 503         from django.core.files import File
 504         from librarian import pdf
 505         from catalogue.utils import ORMDocProvider, remove_zip
 506
 507         try:
 508             pdf_file = NamedTemporaryFile(delete=False)
 509             pdf.transform(ORMDocProvider(self),
 510                       file_path=str(self.xml_file.path),
 511                       output_file=pdf_file,
 512                       customizations=customizations
 513                       )
 514
 515             if file_name is None:
 516                 # we'd like to be sure not to overwrite changes happening while
 517                 # (timely) pdf generation is taking place (async celery scenario)
 518                 current_self = Book.objects.get(id=self.id)
 519                 current_self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
 520                 self.pdf_file = current_self.pdf_file
 521
 522                 # remove cached downloadables
 523                 remove_zip(settings.ALL_PDF_ZIP)
 524
 525                 for customized_pdf in get_existing_customized_pdf(self):
 526                     unlink(customized_pdf)
 527             else:
 528                 print "save %s to: %s" % (file_name, DefaultStorage().path(file_name))
 529
 530                 pdf_django_file = File(open(pdf_file.name))
 531                 DefaultStorage().save(file_name, pdf_django_file)
 532                 pdf_django_file.close()
 533         finally:
 534             pass
 535             unlink(pdf_file.name)
 536
 537     def build_mobi(self):
 538         """ (Re)builds the MOBI file.
 539
 540         """
 541         from tempfile import NamedTemporaryFile
 542         from os import unlink
 543         from django.core.files import File
 544         from librarian import mobi
 545         from catalogue.utils import ORMDocProvider, remove_zip
 546
 547         try:
 548             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
 549             mobi.transform(ORMDocProvider(self), verbose=1,
 550                       file_path=str(self.xml_file.path),
 551                       output_file=mobi_file.name,
 552                       )
 553
 554             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
 555         finally:
 556             unlink(mobi_file.name)
 557
 558         # remove zip with all mobi files
 559         remove_zip(settings.ALL_MOBI_ZIP)
 560
 561     def build_epub(self, remove_descendants=True):
 562         """ (Re)builds the epub file.
 563             If book has a parent, does nothing.
 564             Unless remove_descendants is False, descendants' epubs are removed.
 565         """
 566         from StringIO import StringIO
 567         from hashlib import sha1
 568         from django.core.files.base import ContentFile
 569         from librarian import epub, NoDublinCore
 570         from catalogue.utils import ORMDocProvider, remove_zip
 571
 572         if self.parent:
 573             # don't need an epub
 574             return
 575
 576         epub_file = StringIO()
 577         try:
 578             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
 579             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
 580             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
 581         except NoDublinCore:
 582             pass
 583
 584         book_descendants = list(self.children.all())
 585         while len(book_descendants) > 0:
 586             child_book = book_descendants.pop(0)
 587             if remove_descendants and child_book.has_epub_file():
 588                 child_book.epub_file.delete()
 589             # save anyway, to refresh short_html
 590             child_book.save()
 591             book_descendants += list(child_book.children.all())
 592
 593         # remove zip package with all epub files
 594         remove_zip(settings.ALL_EPUB_ZIP)
 595
 596     def build_txt(self):
 597         from StringIO import StringIO
 598         from django.core.files.base import ContentFile
 599         from librarian import text
 600
 601         out = StringIO()
 602         text.transform(open(self.xml_file.path), out)
 603         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
 604
 605
 606     def build_html(self):
 607         from tempfile import NamedTemporaryFile
 608         from markupstring import MarkupString
 609         from django.core.files import File
 610         from slughifi import slughifi
 611         from librarian import html
 612
 613         meta_tags = list(self.tags.filter(
 614             category__in=('author', 'epoch', 'genre', 'kind')))
 615         book_tag = self.book_tag()
 616
 617         html_file = NamedTemporaryFile()
 618         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
 619             self.html_file.save('%s.html' % self.slug, File(html_file))
 620
 621             # get ancestor l-tags for adding to new fragments
 622             ancestor_tags = []
 623             p = self.parent
 624             while p:
 625                 ancestor_tags.append(p.book_tag())
 626                 p = p.parent
 627
 628             # Delete old fragments and create them from scratch
 629             self.fragments.all().delete()
 630             # Extract fragments
 631             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
 632             for fragment in closed_fragments.values():
 633                 try:
 634                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 635                 except AttributeError:
 636                     continue
 637                 themes = []
 638                 for theme_name in theme_names:
 639                     if not theme_name:
 640                         continue
 641                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
 642                     if created:
 643                         tag.name = theme_name
 644                         tag.sort_key = theme_name.lower()
 645                         tag.save()
 646                     themes.append(tag)
 647                 if not themes:
 648                     continue
 649
 650                 text = fragment.to_string()
 651                 short_text = ''
 652                 if (len(MarkupString(text)) > 240):
 653                     short_text = unicode(MarkupString(text)[:160])
 654                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
 655                     text=text, short_text=short_text)
 656
 657                 new_fragment.save()
 658                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
 659             self.save()
 660             self.html_built.send(sender=self)
 661             return True
 662         return False
 663
 664     @staticmethod
 665     def zip_format(format_):
 666         def pretty_file_name(book):
 667             return "%s/%s.%s" % (
 668                 b.get_extra_info_value()['author'],
 669                 b.slug,
 670                 format_)
 671
 672         field_name = "%s_file" % format_
 673         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
 674         paths = [(pretty_file_name(b), getattr(b, field_name).path)
 675                     for b in books]
 676         result = create_zip.delay(paths,
 677                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 678         return result.wait()
 679
 680     def zip_audiobooks(self):
 681         bm = BookMedia.objects.filter(book=self, type='mp3')
 682         paths = map(lambda bm: (None, bm.file.path), bm)
 683         result = create_zip.delay(paths, self.slug)
 684         return result.wait()
 685
 686     @classmethod
 687     def from_xml_file(cls, xml_file, **kwargs):
 688         from django.core.files import File
 689         from librarian import dcparser
 690
 691         # use librarian to parse meta-data
 692         book_info = dcparser.parse(xml_file)
 693
 694         if not isinstance(xml_file, File):
 695             xml_file = File(open(xml_file))
 696
 697         try:
 698             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 699         finally:
 700             xml_file.close()
 701
 702     @classmethod
 703     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
 704             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
 705         import re
 706         from slughifi import slughifi
 707         from sortify import sortify
 708
 709         # check for parts before we do anything
 710         children = []
 711         if hasattr(book_info, 'parts'):
 712             for part_url in book_info.parts:
 713                 base, slug = part_url.rsplit('/', 1)
 714                 try:
 715                     children.append(Book.objects.get(slug=slug))
 716                 except Book.DoesNotExist, e:
 717                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 718
 719
 720         # Read book metadata
 721         book_base, book_slug = book_info.url.rsplit('/', 1)
 722         if re.search(r'[^a-zA-Z0-9-]', book_slug):
 723             raise ValueError('Invalid characters in slug')
 724         book, created = Book.objects.get_or_create(slug=book_slug)
 725
 726         if created:
 727             book_shelves = []
 728         else:
 729             if not overwrite:
 730                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 731             # Save shelves for this book
 732             book_shelves = list(book.tags.filter(category='set'))
 733
 734         book.title = book_info.title
 735         book.set_extra_info_value(book_info.to_dict())
 736         book.save()
 737
 738         meta_tags = []
 739         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
 740         for field_name, category in categories:
 741             try:
 742                 tag_names = getattr(book_info, field_name)
 743             except:
 744                 tag_names = [getattr(book_info, category)]
 745             for tag_name in tag_names:
 746                 tag_sort_key = tag_name
 747                 if category == 'author':
 748                     tag_sort_key = tag_name.last_name
 749                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 750                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
 751                 if created:
 752                     tag.name = tag_name
 753                     tag.sort_key = sortify(tag_sort_key.lower())
 754                     tag.save()
 755                 meta_tags.append(tag)
 756
 757         book.tags = set(meta_tags + book_shelves)
 758
 759         book_tag = book.book_tag()
 760
 761         for n, child_book in enumerate(children):
 762             child_book.parent = book
 763             child_book.parent_number = n
 764             child_book.save()
 765
 766         # Save XML and HTML files
 767         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 768
 769         # delete old fragments when overwriting
 770         book.fragments.all().delete()
 771
 772         if book.build_html():
 773             if not settings.NO_BUILD_TXT and build_txt:
 774                 book.build_txt()
 775
 776         if not settings.NO_BUILD_EPUB and build_epub:
 777             book.root_ancestor.build_epub()
 778
 779         if not settings.NO_BUILD_PDF and build_pdf:
 780             book.root_ancestor.build_pdf()
 781
 782         if not settings.NO_BUILD_MOBI and build_mobi:
 783             book.build_mobi()
 784
 785         book_descendants = list(book.children.all())
 786         # add l-tag to descendants and their fragments
 787         # delete unnecessary EPUB files
 788         while len(book_descendants) > 0:
 789             child_book = book_descendants.pop(0)
 790             child_book.tags = list(child_book.tags) + [book_tag]
 791             child_book.save()
 792             for fragment in child_book.fragments.all():
 793                 fragment.tags = set(list(fragment.tags) + [book_tag])
 794             book_descendants += list(child_book.children.all())
 795
 796         book.save()
 797
 798         # refresh cache
 799         book.reset_tag_counter()
 800         book.reset_theme_counter()
 801
 802         cls.published.send(sender=book)
 803         return book
 804
 805     def reset_tag_counter(self):
 806         if self.id is None:
 807             return
 808
 809         cache_key = "Book.tag_counter/%d" % self.id
 810         cache.delete(cache_key)
 811         if self.parent:
 812             self.parent.reset_tag_counter()
 813
 814     @property
 815     def tag_counter(self):
 816         if self.id:
 817             cache_key = "Book.tag_counter/%d" % self.id
 818             tags = cache.get(cache_key)
 819         else:
 820             tags = None
 821
 822         if tags is None:
 823             tags = {}
 824             for child in self.children.all().order_by():
 825                 for tag_pk, value in child.tag_counter.iteritems():
 826                     tags[tag_pk] = tags.get(tag_pk, 0) + value
 827             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
 828                 tags[tag.pk] = 1
 829
 830             if self.id:
 831                 cache.set(cache_key, tags, CACHE_FOREVER)
 832         return tags
 833
 834     def reset_theme_counter(self):
 835         if self.id is None:
 836             return
 837
 838         cache_key = "Book.theme_counter/%d" % self.id
 839         cache.delete(cache_key)
 840         if self.parent:
 841             self.parent.reset_theme_counter()
 842
 843     @property
 844     def theme_counter(self):
 845         if self.id:
 846             cache_key = "Book.theme_counter/%d" % self.id
 847             tags = cache.get(cache_key)
 848         else:
 849             tags = None
 850
 851         if tags is None:
 852             tags = {}
 853             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
 854                 for tag in fragment.tags.filter(category='theme').order_by():
 855                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 856
 857             if self.id:
 858                 cache.set(cache_key, tags, CACHE_FOREVER)
 859         return tags
 860
 861     def pretty_title(self, html_links=False):
 862         book = self
 863         names = list(book.tags.filter(category='author'))
 864
 865         books = []
 866         while book:
 867             books.append(book)
 868             book = book.parent
 869         names.extend(reversed(books))
 870
 871         if html_links:
 872             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
 873         else:
 874             names = [tag.name for tag in names]
 875
 876         return ', '.join(names)
 877
 878     @classmethod
 879     def tagged_top_level(cls, tags):
 880         """ Returns top-level books tagged with `tags'.
 881
 882         It only returns those books which don't have ancestors which are
 883         also tagged with those tags.
 884
 885         """
 886         # get relevant books and their tags
 887         objects = cls.tagged.with_all(tags)
 888         # eliminate descendants
 889         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
 890         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
 891         if descendants_keys:
 892             objects = objects.exclude(pk__in=descendants_keys)
 893
 894         return objects
 895
 896     @classmethod
 897     def book_list(cls, filter=None):
 898         """Generates a hierarchical listing of all books.
 899
 900         Books are optionally filtered with a test function.
 901
 902         """
 903
 904         books_by_parent = {}
 905         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
 906         if filter:
 907             books = books.filter(filter).distinct()
 908             book_ids = set((book.pk for book in books))
 909             for book in books:
 910                 parent = book.parent_id
 911                 if parent not in book_ids:
 912                     parent = None
 913                 books_by_parent.setdefault(parent, []).append(book)
 914         else:
 915             for book in books:
 916                 books_by_parent.setdefault(book.parent_id, []).append(book)
 917
 918         orphans = []
 919         books_by_author = SortedDict()
 920         for tag in Tag.objects.filter(category='author'):
 921             books_by_author[tag] = []
 922
 923         for book in books_by_parent.get(None,()):
 924             authors = list(book.tags.filter(category='author'))
 925             if authors:
 926                 for author in authors:
 927                     books_by_author[author].append(book)
 928             else:
 929                 orphans.append(book)
 930
 931         return books_by_author, orphans, books_by_parent
 932
 933     _audiences_pl = {
 934         "SP1": (1, u"szkoła podstawowa"),
 935         "SP2": (1, u"szkoła podstawowa"),
 936         "P": (1, u"szkoła podstawowa"),
 937         "G": (2, u"gimnazjum"),
 938         "L": (3, u"liceum"),
 939         "LP": (3, u"liceum"),
 940     }
 941     def audiences_pl(self):
 942         audiences = self.get_extra_info_value().get('audiences', [])
 943         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
 944         return [a[1] for a in audiences]
 945
 946
 947 def _has_factory(ftype):
 948     has = lambda self: bool(getattr(self, "%s_file" % ftype))
 949     has.short_description = t.upper()
 950     has.boolean = True
 951     has.__name__ = "has_%s_file" % ftype
 952     return has
 953
 954
 955 # add the file fields
 956 for t in Book.file_types:
 957     field_name = "%s_file" % t
 958     models.FileField(_("%s file" % t.upper()),
 959             upload_to=book_upload_path(t),
 960             blank=True).contribute_to_class(Book, field_name)
 961
 962     setattr(Book, "has_%s_file" % t, _has_factory(t))
 963
 964
 965 class Fragment(models.Model):
 966     text = models.TextField()
 967     short_text = models.TextField(editable=False)
 968     anchor = models.CharField(max_length=120)
 969     book = models.ForeignKey(Book, related_name='fragments')
 970
 971     objects = models.Manager()
 972     tagged = managers.ModelTaggedItemManager(Tag)
 973     tags = managers.TagDescriptor(Tag)
 974
 975     class Meta:
 976         ordering = ('book', 'anchor',)
 977         verbose_name = _('fragment')
 978         verbose_name_plural = _('fragments')
 979
 980     def get_absolute_url(self):
 981         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 982
 983     def reset_short_html(self):
 984         if self.id is None:
 985             return
 986
 987         cache_key = "Fragment.short_html/%d/%s"
 988         for lang, langname in settings.LANGUAGES:
 989             cache.delete(cache_key % (self.id, lang))
 990
 991     def short_html(self):
 992         if self.id:
 993             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
 994             short_html = cache.get(cache_key)
 995         else:
 996             short_html = None
 997
 998         if short_html is not None:
 999             return mark_safe(short_html)
1000         else:
1001             short_html = unicode(render_to_string('catalogue/fragment_short.html',
1002                 {'fragment': self}))
1003             if self.id:
1004                 cache.set(cache_key, short_html, CACHE_FOREVER)
1005             return mark_safe(short_html)
1006
1007
1008 class FileRecord(models.Model):
1009     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
1010     type = models.CharField(_('type'), max_length=20, db_index=True)
1011     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
1012     time = models.DateTimeField(_('time'), auto_now_add=True)
1013
1014     class Meta:
1015         ordering = ('-time','-slug', '-type')
1016         verbose_name = _('file record')
1017         verbose_name_plural = _('file records')
1018
1019     def __unicode__(self):
1020         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
1021
1022 ###########
1023 #
1024 # SIGNALS
1025 #
1026 ###########
1027
1028
1029 def _tags_updated_handler(sender, affected_tags, **kwargs):
1030     # reset tag global counter
1031     # we want Tag.changed_at updated for API to know the tag was touched
1032     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
1033
1034     # if book tags changed, reset book tag counter
1035     if isinstance(sender, Book) and \
1036                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1037                     exclude(category__in=('book', 'theme', 'set')).count():
1038         sender.reset_tag_counter()
1039     # if fragment theme changed, reset book theme counter
1040     elif isinstance(sender, Fragment) and \
1041                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1042                     filter(category='theme').count():
1043         sender.book.reset_theme_counter()
1044 tags_updated.connect(_tags_updated_handler)
1045
1046
1047 def _pre_delete_handler(sender, instance, **kwargs):
1048     """ refresh Book on BookMedia delete """
1049     if sender == BookMedia:
1050         instance.book.save()
1051 pre_delete.connect(_pre_delete_handler)
1052
1053 def _post_save_handler(sender, instance, **kwargs):
1054     """ refresh all the short_html stuff on BookMedia update """
1055     if sender == BookMedia:
1056         instance.book.save()
1057 post_save.connect(_post_save_handler)