apps/catalogue/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from datetime import datetime
   6
   7 from django.db import models
   8 from django.db.models import permalink, Q
   9 import django.dispatch
  10 from django.core.cache import cache
  11 from django.utils.translation import ugettext_lazy as _
  12 from django.contrib.auth.models import User
  13 from django.core.files import File
  14 from django.template.loader import render_to_string
  15 from django.utils.safestring import mark_safe
  16 from django.utils.translation import get_language
  17 from django.core.urlresolvers import reverse
  18 from django.db.models.signals import post_save, m2m_changed, pre_delete
  19
  20 from django.conf import settings
  21
  22 from newtagging.models import TagBase, tags_updated
  23 from newtagging import managers
  24 from catalogue.fields import JSONField, OverwritingFileField
  25 from catalogue.utils import ExistingFile, BookImportDocProvider, create_zip_task, remove_zip
  26
  27 from librarian import dcparser, html, epub, NoDublinCore
  28 import mutagen
  29 from mutagen import id3
  30 from slughifi import slughifi
  31 from sortify import sortify
  32 from os import unlink
  33
  34 TAG_CATEGORIES = (
  35     ('author', _('author')),
  36     ('epoch', _('epoch')),
  37     ('kind', _('kind')),
  38     ('genre', _('genre')),
  39     ('theme', _('theme')),
  40     ('set', _('set')),
  41     ('book', _('book')),
  42 )
  43
  44 MEDIA_FORMATS = (
  45     ('odt', _('ODT file')),
  46     ('mp3', _('MP3 file')),
  47     ('ogg', _('OGG file')),
  48     ('daisy', _('DAISY file')),
  49 )
  50
  51 # not quite, but Django wants you to set a timeout
  52 CACHE_FOREVER = 2419200  # 28 days
  53
  54
  55 class TagSubcategoryManager(models.Manager):
  56     def __init__(self, subcategory):
  57         super(TagSubcategoryManager, self).__init__()
  58         self.subcategory = subcategory
  59
  60     def get_query_set(self):
  61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
  62
  63
  64 class Tag(TagBase):
  65     name = models.CharField(_('name'), max_length=50, db_index=True)
  66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
  68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
  69         db_index=True, choices=TAG_CATEGORIES)
  70     description = models.TextField(_('description'), blank=True)
  71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  72
  73     user = models.ForeignKey(User, blank=True, null=True)
  74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
  75     gazeta_link = models.CharField(blank=True, max_length=240)
  76     wiki_link = models.CharField(blank=True, max_length=240)
  77
  78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
  80
  81     class UrlDeprecationWarning(DeprecationWarning):
  82         pass
  83
  84     categories_rev = {
  85         'autor': 'author',
  86         'epoka': 'epoch',
  87         'rodzaj': 'kind',
  88         'gatunek': 'genre',
  89         'motyw': 'theme',
  90         'polka': 'set',
  91     }
  92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
  93
  94     class Meta:
  95         ordering = ('sort_key',)
  96         verbose_name = _('tag')
  97         verbose_name_plural = _('tags')
  98         unique_together = (("slug", "category"),)
  99
 100     def __unicode__(self):
 101         return self.name
 102
 103     def __repr__(self):
 104         return "Tag(slug=%r)" % self.slug
 105
 106     @permalink
 107     def get_absolute_url(self):
 108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
 109
 110     def has_description(self):
 111         return len(self.description) > 0
 112     has_description.short_description = _('description')
 113     has_description.boolean = True
 114
 115     def get_count(self):
 116         """ returns global book count for book tags, fragment count for themes """
 117
 118         if self.book_count is None:
 119             if self.category == 'book':
 120                 # never used
 121                 objects = Book.objects.none()
 122             elif self.category == 'theme':
 123                 objects = Fragment.tagged.with_all((self,))
 124             else:
 125                 objects = Book.tagged.with_all((self,)).order_by()
 126                 if self.category != 'set':
 127                     # eliminate descendants
 128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
 129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 130                     if descendants_keys:
 131                         objects = objects.exclude(pk__in=descendants_keys)
 132             self.book_count = objects.count()
 133             self.save()
 134         return self.book_count
 135
 136     @staticmethod
 137     def get_tag_list(tags):
 138         if isinstance(tags, basestring):
 139             real_tags = []
 140             ambiguous_slugs = []
 141             category = None
 142             deprecated = False
 143             tags_splitted = tags.split('/')
 144             for name in tags_splitted:
 145                 if category:
 146                     real_tags.append(Tag.objects.get(slug=name, category=category))
 147                     category = None
 148                 elif name in Tag.categories_rev:
 149                     category = Tag.categories_rev[name]
 150                 else:
 151                     try:
 152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
 153                         deprecated = True
 154                     except Tag.MultipleObjectsReturned, e:
 155                         ambiguous_slugs.append(name)
 156
 157             if category:
 158                 # something strange left off
 159                 raise Tag.DoesNotExist()
 160             if ambiguous_slugs:
 161                 # some tags should be qualified
 162                 e = Tag.MultipleObjectsReturned()
 163                 e.tags = real_tags
 164                 e.ambiguous_slugs = ambiguous_slugs
 165                 raise e
 166             if deprecated:
 167                 e = Tag.UrlDeprecationWarning()
 168                 e.tags = real_tags
 169                 raise e
 170             return real_tags
 171         else:
 172             return TagBase.get_tag_list(tags)
 173
 174     @property
 175     def url_chunk(self):
 176         return '/'.join((Tag.categories_dict[self.category], self.slug))
 177
 178
 179 # TODO: why is this hard-coded ?
 180 def book_upload_path(ext=None, maxlen=100):
 181     def get_dynamic_path(media, filename, ext=ext):
 182         # how to put related book's slug here?
 183         if not ext:
 184             if media.type == 'daisy':
 185                 ext = 'daisy.zip'
 186             else:
 187                 ext = media.type
 188         if not media.name:
 189             name = slughifi(filename.split(".")[0])
 190         else:
 191             name = slughifi(media.name)
 192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
 193     return get_dynamic_path
 194
 195
 196 class BookMedia(models.Model):
 197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
 198     name        = models.CharField(_('name'), max_length="100")
 199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
 200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
 201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
 202     book = models.ForeignKey('Book', related_name='media')
 203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 204
 205     def __unicode__(self):
 206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
 207
 208     class Meta:
 209         ordering            = ('type', 'name')
 210         verbose_name        = _('book media')
 211         verbose_name_plural = _('book media')
 212
 213     def save(self, *args, **kwargs):
 214         try:
 215             old = BookMedia.objects.get(pk=self.pk)
 216         except BookMedia.DoesNotExist, e:
 217             pass
 218         else:
 219             # if name changed, change the file name, too
 220             if slughifi(self.name) != slughifi(old.name):
 221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
 222
 223         super(BookMedia, self).save(*args, **kwargs)
 224         extra_info = self.get_extra_info_value()
 225         extra_info.update(self.read_meta())
 226         self.set_extra_info_value(extra_info)
 227         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
 228         return super(BookMedia, self).save(*args, **kwargs)
 229
 230     def read_meta(self):
 231         """
 232             Reads some metadata from the audiobook.
 233         """
 234
 235         artist_name = director_name = project = funded_by = ''
 236         if self.type == 'mp3':
 237             try:
 238                 audio = id3.ID3(self.file.path)
 239                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
 240                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
 241                 project = ", ".join([t.data for t in audio.getall('PRIV')
 242                         if t.owner=='wolnelektury.pl?project'])
 243                 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
 244                         if t.owner=='wolnelektury.pl?funded_by'])
 245             except:
 246                 pass
 247         elif self.type == 'ogg':
 248             try:
 249                 audio = mutagen.File(self.file.path)
 250                 artist_name = ', '.join(audio.get('artist', []))
 251                 director_name = ', '.join(audio.get('conductor', []))
 252                 project = ", ".join(audio.get('project', []))
 253                 funded_by = ", ".join(audio.get('funded_by', []))
 254             except:
 255                 pass
 256         else:
 257             return {}
 258         return {'artist_name': artist_name, 'director_name': director_name,
 259                 'project': project, 'funded_by': funded_by}
 260
 261     @staticmethod
 262     def read_source_sha1(filepath, filetype):
 263         """
 264             Reads source file SHA1 from audiobok metadata.
 265         """
 266
 267         if filetype == 'mp3':
 268             try:
 269                 audio = id3.ID3(filepath)
 270                 return [t.data for t in audio.getall('PRIV')
 271                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
 272             except:
 273                 return None
 274         elif filetype == 'ogg':
 275             try:
 276                 audio = mutagen.File(filepath)
 277                 return audio.get('flac_sha1', [None])[0]
 278             except:
 279                 return None
 280         else:
 281             return None
 282
 283
 284 class Book(models.Model):
 285     title         = models.CharField(_('title'), max_length=120)
 286     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
 287     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 288     description   = models.TextField(_('description'), blank=True)
 289     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
 290     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
 291     parent_number = models.IntegerField(_('parent number'), default=0)
 292     extra_info    = JSONField(_('extra information'), default='{}')
 293     gazeta_link   = models.CharField(blank=True, max_length=240)
 294     wiki_link     = models.CharField(blank=True, max_length=240)
 295     # files generated during publication
 296     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
 297     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
 298     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
 299     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
 300     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
 301
 302     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
 303     objects  = models.Manager()
 304     tagged   = managers.ModelTaggedItemManager(Tag)
 305     tags     = managers.TagDescriptor(Tag)
 306
 307     html_built = django.dispatch.Signal()
 308
 309     class AlreadyExists(Exception):
 310         pass
 311
 312     class Meta:
 313         ordering = ('sort_key',)
 314         verbose_name = _('book')
 315         verbose_name_plural = _('books')
 316
 317     def __unicode__(self):
 318         return self.title
 319
 320     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
 321         self.sort_key = sortify(self.title)
 322
 323         ret = super(Book, self).save(force_insert, force_update)
 324
 325         if reset_short_html:
 326             self.reset_short_html()
 327
 328         return ret
 329
 330     @permalink
 331     def get_absolute_url(self):
 332         return ('catalogue.views.book_detail', [self.slug])
 333
 334     @property
 335     def name(self):
 336         return self.title
 337
 338     def book_tag_slug(self):
 339         return ('l-' + self.slug)[:120]
 340
 341     def book_tag(self):
 342         slug = self.book_tag_slug()
 343         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
 344         if created:
 345             book_tag.name = self.title[:50]
 346             book_tag.sort_key = self.title.lower()
 347             book_tag.save()
 348         return book_tag
 349
 350     def has_media(self, type):
 351         if   type == 'xml':
 352             if self.xml_file:
 353                 return True
 354             else:
 355                 return False
 356         elif type == 'html':
 357             if self.html_file:
 358                 return True
 359             else:
 360                 return False
 361         elif type == 'txt':
 362             if self.txt_file:
 363                 return True
 364             else:
 365                 return False
 366         elif type == 'pdf':
 367             if self.pdf_file:
 368                 return True
 369             else:
 370                 return False
 371         elif type == 'epub':
 372             if self.epub_file:
 373                 return True
 374             else:
 375                 return False
 376         else:
 377             if self.media.filter(type=type).exists():
 378                 return True
 379             else:
 380                 return False
 381
 382     def get_media(self, type):
 383         if self.has_media(type):
 384             if   type == "xml":
 385                 return self.xml_file
 386             elif type == "html":
 387                 return self.html_file
 388             elif type == "epub":
 389                 return self.epub_file
 390             elif type == "txt":
 391                 return self.txt_file
 392             elif type == "pdf":
 393                 return self.pdf_file
 394             else:
 395                 return self.media.filter(type=type)
 396         else:
 397             return None
 398
 399     def get_mp3(self):
 400         return self.get_media("mp3")
 401     def get_odt(self):
 402         return self.get_media("odt")
 403     def get_ogg(self):
 404         return self.get_media("ogg")
 405     def get_daisy(self):
 406         return self.get_media("daisy")
 407
 408     def reset_short_html(self):
 409         if self.id is None:
 410             return
 411
 412         cache_key = "Book.short_html/%d/%s"
 413         for lang, langname in settings.LANGUAGES:
 414             cache.delete(cache_key % (self.id, lang))
 415         # Fragment.short_html relies on book's tags, so reset it here too
 416         for fragm in self.fragments.all():
 417             fragm.reset_short_html()
 418
 419     def short_html(self):
 420         if self.id:
 421             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
 422             short_html = cache.get(cache_key)
 423         else:
 424             short_html = None
 425
 426         if short_html is not None:
 427             return mark_safe(short_html)
 428         else:
 429             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 430             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 431
 432             formats = []
 433             # files generated during publication
 434             if self.has_media("html"):
 435                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 436             if self.has_media("pdf"):
 437                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
 438             if self.root_ancestor.has_media("epub"):
 439                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
 440             if self.has_media("txt"):
 441                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
 442             # other files
 443             for m in self.media.order_by('type'):
 444                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
 445
 446             formats = [mark_safe(format) for format in formats]
 447
 448             short_html = unicode(render_to_string('catalogue/book_short.html',
 449                 {'book': self, 'tags': tags, 'formats': formats}))
 450
 451             if self.id:
 452                 cache.set(cache_key, short_html, CACHE_FOREVER)
 453             return mark_safe(short_html)
 454
 455     @property
 456     def root_ancestor(self):
 457         """ returns the oldest ancestor """
 458
 459         if not hasattr(self, '_root_ancestor'):
 460             book = self
 461             while book.parent:
 462                 book = book.parent
 463             self._root_ancestor = book
 464         return self._root_ancestor
 465
 466
 467     def has_description(self):
 468         return len(self.description) > 0
 469     has_description.short_description = _('description')
 470     has_description.boolean = True
 471
 472     # ugly ugly ugly
 473     def has_pdf_file(self):
 474         return bool(self.pdf_file)
 475     has_pdf_file.short_description = 'PDF'
 476     has_pdf_file.boolean = True
 477
 478     def has_epub_file(self):
 479         return bool(self.epub_file)
 480     has_epub_file.short_description = 'EPUB'
 481     has_epub_file.boolean = True
 482
 483     def has_txt_file(self):
 484         return bool(self.txt_file)
 485     has_txt_file.short_description = 'HTML'
 486     has_txt_file.boolean = True
 487
 488     def has_html_file(self):
 489         return bool(self.html_file)
 490     has_html_file.short_description = 'HTML'
 491     has_html_file.boolean = True
 492
 493     def has_odt_file(self):
 494         return bool(self.has_media("odt"))
 495     has_odt_file.short_description = 'ODT'
 496     has_odt_file.boolean = True
 497
 498     def has_mp3_file(self):
 499         return bool(self.has_media("mp3"))
 500     has_mp3_file.short_description = 'MP3'
 501     has_mp3_file.boolean = True
 502
 503     def has_ogg_file(self):
 504         return bool(self.has_media("ogg"))
 505     has_ogg_file.short_description = 'OGG'
 506     has_ogg_file.boolean = True
 507
 508     def has_daisy_file(self):
 509         return bool(self.has_media("daisy"))
 510     has_daisy_file.short_description = 'DAISY'
 511     has_daisy_file.boolean = True
 512
 513     def build_pdf(self):
 514         """ (Re)builds the pdf file.
 515
 516         """
 517         from librarian import pdf
 518         from tempfile import NamedTemporaryFile
 519         import os
 520
 521         path, fname = os.path.realpath(self.xml_file.path).rsplit('/', 1)
 522         try:
 523             pdf_file = NamedTemporaryFile(delete=False)
 524             print("%s -> %s" % (self.xml_file.path, pdf_file))
 525             pdf.transform(BookImportDocProvider(self),
 526                       file_path=str(self.xml_file.path),
 527                       output_file=pdf_file,
 528                       )
 529
 530             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
 531         finally:
 532             unlink(pdf_file.name)
 533
 534     def build_epub(self, remove_descendants=True):
 535         """ (Re)builds the epub file.
 536             If book has a parent, does nothing.
 537             Unless remove_descendants is False, descendants' epubs are removed.
 538         """
 539         from StringIO import StringIO
 540         from hashlib import sha1
 541         from django.core.files.base import ContentFile
 542
 543         if self.parent:
 544             # don't need an epub
 545             return
 546
 547         epub_file = StringIO()
 548         try:
 549             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
 550             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
 551             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
 552         except NoDublinCore:
 553             pass
 554
 555         book_descendants = list(self.children.all())
 556         while len(book_descendants) > 0:
 557             child_book = book_descendants.pop(0)
 558             if remove_descendants and child_book.has_epub_file():
 559                 child_book.epub_file.delete()
 560             # save anyway, to refresh short_html
 561             child_book.save()
 562             book_descendants += list(child_book.children.all())
 563
 564     def build_txt(self):
 565         from StringIO import StringIO
 566         from django.core.files.base import ContentFile
 567         from librarian import text
 568
 569         out = StringIO()
 570         text.transform(open(self.xml_file.path), out)
 571         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
 572
 573
 574     def build_html(self):
 575         from tempfile import NamedTemporaryFile
 576         from markupstring import MarkupString
 577
 578         meta_tags = list(self.tags.filter(
 579             category__in=('author', 'epoch', 'genre', 'kind')))
 580         book_tag = self.book_tag()
 581
 582         html_file = NamedTemporaryFile()
 583         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
 584             self.html_file.save('%s.html' % self.slug, File(html_file))
 585
 586             # get ancestor l-tags for adding to new fragments
 587             ancestor_tags = []
 588             p = self.parent
 589             while p:
 590                 ancestor_tags.append(p.book_tag())
 591                 p = p.parent
 592
 593             # Delete old fragments and create them from scratch
 594             self.fragments.all().delete()
 595             # Extract fragments
 596             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
 597             for fragment in closed_fragments.values():
 598                 try:
 599                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 600                 except AttributeError:
 601                     continue
 602                 themes = []
 603                 for theme_name in theme_names:
 604                     if not theme_name:
 605                         continue
 606                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
 607                     if created:
 608                         tag.name = theme_name
 609                         tag.sort_key = theme_name.lower()
 610                         tag.save()
 611                     themes.append(tag)
 612                 if not themes:
 613                     continue
 614
 615                 text = fragment.to_string()
 616                 short_text = ''
 617                 if (len(MarkupString(text)) > 240):
 618                     short_text = unicode(MarkupString(text)[:160])
 619                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
 620                     text=text, short_text=short_text)
 621
 622                 new_fragment.save()
 623                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
 624             self.save()
 625             self.html_built.send(sender=self)
 626             return True
 627         return False
 628
 629     @staticmethod
 630     def zip_epub():
 631         books = Book.objects.all()
 632
 633         paths = filter(lambda x: x is not None,
 634                        map(lambda b: b.epub_file and b.epub_file.path or None, books))
 635         result = create_zip_task.delay(paths, settings.ALL_EPUB_ZIP)
 636         return settings.MEDIA_URL + result.wait()
 637
 638     @staticmethod
 639     def zip_pdf():
 640         books = Book.objects.all()
 641
 642         paths = filter(lambda x: x is not None,
 643                        map(lambda b: b.pdf_file and b.pdf_file.path or None, books))
 644         result = create_zip_task.delay(paths, settings.ALL_PDF_ZIP)
 645         return settings.MEDIA_URL + result.wait()
 646
 647     def zip_audiobooks(self):
 648         bm = BookMedia.objects.filter(book=self)
 649         paths = map(lambda bm: bm.file.path, bm)
 650         result = create_zip_task.delay(paths, self.slug)
 651
 652         return settings.MEDIA_URL + result.wait()
 653
 654     def clean_zip_files(self):
 655         remove_zip(self.slug)
 656         remove_zip(settings.ALL_EPUB_ZIP)
 657         remove_zip(settings.ALL_PDF_ZIP)
 658
 659     @classmethod
 660     def from_xml_file(cls, xml_file, **kwargs):
 661         # use librarian to parse meta-data
 662         book_info = dcparser.parse(xml_file)
 663
 664         if not isinstance(xml_file, File):
 665             xml_file = File(open(xml_file))
 666
 667         try:
 668             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 669         finally:
 670             xml_file.close()
 671
 672     @classmethod
 673     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True):
 674         import re
 675
 676         # check for parts before we do anything
 677         children = []
 678         if hasattr(book_info, 'parts'):
 679             for part_url in book_info.parts:
 680                 base, slug = part_url.rsplit('/', 1)
 681                 try:
 682                     children.append(Book.objects.get(slug=slug))
 683                 except Book.DoesNotExist, e:
 684                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 685
 686
 687         # Read book metadata
 688         book_base, book_slug = book_info.url.rsplit('/', 1)
 689         if re.search(r'[^a-zA-Z0-9-]', book_slug):
 690             raise ValueError('Invalid characters in slug')
 691         book, created = Book.objects.get_or_create(slug=book_slug)
 692
 693         if created:
 694             book_shelves = []
 695         else:
 696             if not overwrite:
 697                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 698             # Save shelves for this book
 699             book_shelves = list(book.tags.filter(category='set'))
 700
 701         book.title = book_info.title
 702         book.set_extra_info_value(book_info.to_dict())
 703         book.save()
 704
 705         meta_tags = []
 706         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
 707         for field_name, category in categories:
 708             try:
 709                 tag_names = getattr(book_info, field_name)
 710             except:
 711                 tag_names = [getattr(book_info, category)]
 712             for tag_name in tag_names:
 713                 tag_sort_key = tag_name
 714                 if category == 'author':
 715                     tag_sort_key = tag_name.last_name
 716                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 717                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
 718                 if created:
 719                     tag.name = tag_name
 720                     tag.sort_key = sortify(tag_sort_key.lower())
 721                     tag.save()
 722                 meta_tags.append(tag)
 723
 724         book.tags = set(meta_tags + book_shelves)
 725
 726         book_tag = book.book_tag()
 727
 728         for n, child_book in enumerate(children):
 729             child_book.parent = book
 730             child_book.parent_number = n
 731             child_book.save()
 732
 733         # Save XML and HTML files
 734         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 735
 736         # delete old fragments when overwriting
 737         book.fragments.all().delete()
 738
 739         if book.build_html():
 740             if not settings.NO_BUILD_TXT and build_txt:
 741                 book.build_txt()
 742
 743         if not settings.NO_BUILD_EPUB and build_epub:
 744             book.root_ancestor.build_epub()
 745
 746         if not settings.NO_BUILD_PDF and build_pdf:
 747             book.root_ancestor.build_pdf()
 748
 749         book_descendants = list(book.children.all())
 750         # add l-tag to descendants and their fragments
 751         # delete unnecessary EPUB files
 752         while len(book_descendants) > 0:
 753             child_book = book_descendants.pop(0)
 754             child_book.tags = list(child_book.tags) + [book_tag]
 755             child_book.save()
 756             for fragment in child_book.fragments.all():
 757                 fragment.tags = set(list(fragment.tags) + [book_tag])
 758             book_descendants += list(child_book.children.all())
 759
 760         book.save()
 761
 762         # refresh cache
 763         book.reset_tag_counter()
 764         book.reset_theme_counter()
 765
 766         return book
 767
 768     def reset_tag_counter(self):
 769         if self.id is None:
 770             return
 771
 772         cache_key = "Book.tag_counter/%d" % self.id
 773         cache.delete(cache_key)
 774         if self.parent:
 775             self.parent.reset_tag_counter()
 776
 777     @property
 778     def tag_counter(self):
 779         if self.id:
 780             cache_key = "Book.tag_counter/%d" % self.id
 781             tags = cache.get(cache_key)
 782         else:
 783             tags = None
 784
 785         if tags is None:
 786             tags = {}
 787             for child in self.children.all().order_by():
 788                 for tag_pk, value in child.tag_counter.iteritems():
 789                     tags[tag_pk] = tags.get(tag_pk, 0) + value
 790             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
 791                 tags[tag.pk] = 1
 792
 793             if self.id:
 794                 cache.set(cache_key, tags, CACHE_FOREVER)
 795         return tags
 796
 797     def reset_theme_counter(self):
 798         if self.id is None:
 799             return
 800
 801         cache_key = "Book.theme_counter/%d" % self.id
 802         cache.delete(cache_key)
 803         if self.parent:
 804             self.parent.reset_theme_counter()
 805
 806     @property
 807     def theme_counter(self):
 808         if self.id:
 809             cache_key = "Book.theme_counter/%d" % self.id
 810             tags = cache.get(cache_key)
 811         else:
 812             tags = None
 813
 814         if tags is None:
 815             tags = {}
 816             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
 817                 for tag in fragment.tags.filter(category='theme').order_by():
 818                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 819
 820             if self.id:
 821                 cache.set(cache_key, tags, CACHE_FOREVER)
 822         return tags
 823
 824     def pretty_title(self, html_links=False):
 825         book = self
 826         names = list(book.tags.filter(category='author'))
 827
 828         books = []
 829         while book:
 830             books.append(book)
 831             book = book.parent
 832         names.extend(reversed(books))
 833
 834         if html_links:
 835             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
 836         else:
 837             names = [tag.name for tag in names]
 838
 839         return ', '.join(names)
 840
 841     @classmethod
 842     def tagged_top_level(cls, tags):
 843         """ Returns top-level books tagged with `tags'.
 844
 845         It only returns those books which don't have ancestors which are
 846         also tagged with those tags.
 847
 848         """
 849         # get relevant books and their tags
 850         objects = cls.tagged.with_all(tags)
 851         # eliminate descendants
 852         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
 853         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
 854         if descendants_keys:
 855             objects = objects.exclude(pk__in=descendants_keys)
 856
 857         return objects
 858
 859
 860 class Fragment(models.Model):
 861     text = models.TextField()
 862     short_text = models.TextField(editable=False)
 863     anchor = models.CharField(max_length=120)
 864     book = models.ForeignKey(Book, related_name='fragments')
 865
 866     objects = models.Manager()
 867     tagged = managers.ModelTaggedItemManager(Tag)
 868     tags = managers.TagDescriptor(Tag)
 869
 870     class Meta:
 871         ordering = ('book', 'anchor',)
 872         verbose_name = _('fragment')
 873         verbose_name_plural = _('fragments')
 874
 875     def get_absolute_url(self):
 876         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 877
 878     def reset_short_html(self):
 879         if self.id is None:
 880             return
 881
 882         cache_key = "Fragment.short_html/%d/%s"
 883         for lang, langname in settings.LANGUAGES:
 884             cache.delete(cache_key % (self.id, lang))
 885
 886     def short_html(self):
 887         if self.id:
 888             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
 889             short_html = cache.get(cache_key)
 890         else:
 891             short_html = None
 892
 893         if short_html is not None:
 894             return mark_safe(short_html)
 895         else:
 896             short_html = unicode(render_to_string('catalogue/fragment_short.html',
 897                 {'fragment': self}))
 898             if self.id:
 899                 cache.set(cache_key, short_html, CACHE_FOREVER)
 900             return mark_safe(short_html)
 901
 902
 903 class FileRecord(models.Model):
 904     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
 905     type = models.CharField(_('type'), max_length=20, db_index=True)
 906     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
 907     time = models.DateTimeField(_('time'), auto_now_add=True)
 908
 909     class Meta:
 910         ordering = ('-time','-slug', '-type')
 911         verbose_name = _('file record')
 912         verbose_name_plural = _('file records')
 913
 914     def __unicode__(self):
 915         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
 916
 917 ###########
 918 #
 919 # SIGNALS
 920 #
 921 ###########
 922
 923
 924 def _tags_updated_handler(sender, affected_tags, **kwargs):
 925     # reset tag global counter
 926     # we want Tag.changed_at updated for API to know the tag was touched
 927     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
 928
 929     # if book tags changed, reset book tag counter
 930     if isinstance(sender, Book) and \
 931                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
 932                     exclude(category__in=('book', 'theme', 'set')).count():
 933         sender.reset_tag_counter()
 934     # if fragment theme changed, reset book theme counter
 935     elif isinstance(sender, Fragment) and \
 936                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
 937                     filter(category='theme').count():
 938         sender.book.reset_theme_counter()
 939 tags_updated.connect(_tags_updated_handler)
 940
 941
 942 def _pre_delete_handler(sender, instance, **kwargs):
 943     """ refresh Book on BookMedia delete """
 944     if sender == BookMedia:
 945         instance.book.save()
 946 pre_delete.connect(_pre_delete_handler)
 947
 948 def _post_save_handler(sender, instance, **kwargs):
 949     """ refresh all the short_html stuff on BookMedia update """
 950     if sender == BookMedia:
 951         instance.book.save()
 952 post_save.connect(_post_save_handler)