apps/catalogue/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from datetime import datetime
   6
   7 from django.db import models
   8 from django.db.models import permalink, Q
   9 import django.dispatch
  10 from django.core.cache import cache
  11 from django.utils.translation import ugettext_lazy as _
  12 from django.contrib.auth.models import User
  13 from django.template.loader import render_to_string
  14 from django.utils.datastructures import SortedDict
  15 from django.utils.safestring import mark_safe
  16 from django.utils.translation import get_language
  17 from django.core.urlresolvers import reverse
  18 from django.db.models.signals import post_save, m2m_changed, pre_delete
  19
  20 from django.conf import settings
  21
  22 from newtagging.models import TagBase, tags_updated
  23 from newtagging import managers
  24 from catalogue.fields import JSONField, OverwritingFileField
  25 from catalogue.utils import create_zip
  26
  27
  28 TAG_CATEGORIES = (
  29     ('author', _('author')),
  30     ('epoch', _('epoch')),
  31     ('kind', _('kind')),
  32     ('genre', _('genre')),
  33     ('theme', _('theme')),
  34     ('set', _('set')),
  35     ('book', _('book')),
  36 )
  37
  38 MEDIA_FORMATS = (
  39     ('odt', _('ODT file')),
  40     ('mp3', _('MP3 file')),
  41     ('ogg', _('OGG file')),
  42     ('daisy', _('DAISY file')),
  43 )
  44
  45 # not quite, but Django wants you to set a timeout
  46 CACHE_FOREVER = 2419200  # 28 days
  47
  48
  49 class TagSubcategoryManager(models.Manager):
  50     def __init__(self, subcategory):
  51         super(TagSubcategoryManager, self).__init__()
  52         self.subcategory = subcategory
  53
  54     def get_query_set(self):
  55         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
  56
  57
  58 class Tag(TagBase):
  59     name = models.CharField(_('name'), max_length=50, db_index=True)
  60     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
  62     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
  63         db_index=True, choices=TAG_CATEGORIES)
  64     description = models.TextField(_('description'), blank=True)
  65     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  66
  67     user = models.ForeignKey(User, blank=True, null=True)
  68     book_count = models.IntegerField(_('book count'), blank=True, null=True)
  69     gazeta_link = models.CharField(blank=True, max_length=240)
  70     wiki_link = models.CharField(blank=True, max_length=240)
  71
  72     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
  73     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
  74
  75     class UrlDeprecationWarning(DeprecationWarning):
  76         pass
  77
  78     categories_rev = {
  79         'autor': 'author',
  80         'epoka': 'epoch',
  81         'rodzaj': 'kind',
  82         'gatunek': 'genre',
  83         'motyw': 'theme',
  84         'polka': 'set',
  85     }
  86     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
  87
  88     class Meta:
  89         ordering = ('sort_key',)
  90         verbose_name = _('tag')
  91         verbose_name_plural = _('tags')
  92         unique_together = (("slug", "category"),)
  93
  94     def __unicode__(self):
  95         return self.name
  96
  97     def __repr__(self):
  98         return "Tag(slug=%r)" % self.slug
  99
 100     @permalink
 101     def get_absolute_url(self):
 102         return ('catalogue.views.tagged_object_list', [self.url_chunk])
 103
 104     def has_description(self):
 105         return len(self.description) > 0
 106     has_description.short_description = _('description')
 107     has_description.boolean = True
 108
 109     def get_count(self):
 110         """ returns global book count for book tags, fragment count for themes """
 111
 112         if self.book_count is None:
 113             if self.category == 'book':
 114                 # never used
 115                 objects = Book.objects.none()
 116             elif self.category == 'theme':
 117                 objects = Fragment.tagged.with_all((self,))
 118             else:
 119                 objects = Book.tagged.with_all((self,)).order_by()
 120                 if self.category != 'set':
 121                     # eliminate descendants
 122                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
 123                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 124                     if descendants_keys:
 125                         objects = objects.exclude(pk__in=descendants_keys)
 126             self.book_count = objects.count()
 127             self.save()
 128         return self.book_count
 129
 130     @staticmethod
 131     def get_tag_list(tags):
 132         if isinstance(tags, basestring):
 133             real_tags = []
 134             ambiguous_slugs = []
 135             category = None
 136             deprecated = False
 137             tags_splitted = tags.split('/')
 138             for name in tags_splitted:
 139                 if category:
 140                     real_tags.append(Tag.objects.get(slug=name, category=category))
 141                     category = None
 142                 elif name in Tag.categories_rev:
 143                     category = Tag.categories_rev[name]
 144                 else:
 145                     try:
 146                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
 147                         deprecated = True
 148                     except Tag.MultipleObjectsReturned, e:
 149                         ambiguous_slugs.append(name)
 150
 151             if category:
 152                 # something strange left off
 153                 raise Tag.DoesNotExist()
 154             if ambiguous_slugs:
 155                 # some tags should be qualified
 156                 e = Tag.MultipleObjectsReturned()
 157                 e.tags = real_tags
 158                 e.ambiguous_slugs = ambiguous_slugs
 159                 raise e
 160             if deprecated:
 161                 e = Tag.UrlDeprecationWarning()
 162                 e.tags = real_tags
 163                 raise e
 164             return real_tags
 165         else:
 166             return TagBase.get_tag_list(tags)
 167
 168     @property
 169     def url_chunk(self):
 170         return '/'.join((Tag.categories_dict[self.category], self.slug))
 171
 172
 173 # TODO: why is this hard-coded ?
 174 def book_upload_path(ext=None, maxlen=100):
 175     def get_dynamic_path(media, filename, ext=ext):
 176         from slughifi import slughifi
 177
 178         # how to put related book's slug here?
 179         if not ext:
 180             if media.type == 'daisy':
 181                 ext = 'daisy.zip'
 182             else:
 183                 ext = media.type
 184         if not media.name:
 185             name = slughifi(filename.split(".")[0])
 186         else:
 187             name = slughifi(media.name)
 188         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
 189     return get_dynamic_path
 190
 191
 192 class BookMedia(models.Model):
 193     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
 194     name        = models.CharField(_('name'), max_length="100")
 195     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
 196     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
 197     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
 198     book = models.ForeignKey('Book', related_name='media')
 199     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 200
 201     def __unicode__(self):
 202         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
 203
 204     class Meta:
 205         ordering            = ('type', 'name')
 206         verbose_name        = _('book media')
 207         verbose_name_plural = _('book media')
 208
 209     def save(self, *args, **kwargs):
 210         from slughifi import slughifi
 211         from catalogue.utils import ExistingFile, remove_zip
 212
 213         try:
 214             old = BookMedia.objects.get(pk=self.pk)
 215         except BookMedia.DoesNotExist, e:
 216             pass
 217         else:
 218             # if name changed, change the file name, too
 219             if slughifi(self.name) != slughifi(old.name):
 220                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
 221
 222         super(BookMedia, self).save(*args, **kwargs)
 223
 224         # remove the zip package for book with modified media
 225         remove_zip(self.book.slug)
 226
 227         extra_info = self.get_extra_info_value()
 228         extra_info.update(self.read_meta())
 229         self.set_extra_info_value(extra_info)
 230         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
 231         return super(BookMedia, self).save(*args, **kwargs)
 232
 233     def read_meta(self):
 234         """
 235             Reads some metadata from the audiobook.
 236         """
 237         import mutagen
 238         from mutagen import id3
 239
 240         artist_name = director_name = project = funded_by = ''
 241         if self.type == 'mp3':
 242             try:
 243                 audio = id3.ID3(self.file.path)
 244                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
 245                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
 246                 project = ", ".join([t.data for t in audio.getall('PRIV')
 247                         if t.owner=='wolnelektury.pl?project'])
 248                 funded_by = ", ".join([t.data for t in audio.getall('PRIV')
 249                         if t.owner=='wolnelektury.pl?funded_by'])
 250             except:
 251                 pass
 252         elif self.type == 'ogg':
 253             try:
 254                 audio = mutagen.File(self.file.path)
 255                 artist_name = ', '.join(audio.get('artist', []))
 256                 director_name = ', '.join(audio.get('conductor', []))
 257                 project = ", ".join(audio.get('project', []))
 258                 funded_by = ", ".join(audio.get('funded_by', []))
 259             except:
 260                 pass
 261         else:
 262             return {}
 263         return {'artist_name': artist_name, 'director_name': director_name,
 264                 'project': project, 'funded_by': funded_by}
 265
 266     @staticmethod
 267     def read_source_sha1(filepath, filetype):
 268         """
 269             Reads source file SHA1 from audiobok metadata.
 270         """
 271         import mutagen
 272         from mutagen import id3
 273
 274         if filetype == 'mp3':
 275             try:
 276                 audio = id3.ID3(filepath)
 277                 return [t.data for t in audio.getall('PRIV')
 278                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
 279             except:
 280                 return None
 281         elif filetype == 'ogg':
 282             try:
 283                 audio = mutagen.File(filepath)
 284                 return audio.get('flac_sha1', [None])[0]
 285             except:
 286                 return None
 287         else:
 288             return None
 289
 290
 291 class Book(models.Model):
 292     title         = models.CharField(_('title'), max_length=120)
 293     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
 294     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 295     description   = models.TextField(_('description'), blank=True)
 296     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
 297     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
 298     parent_number = models.IntegerField(_('parent number'), default=0)
 299     extra_info    = JSONField(_('extra information'), default='{}')
 300     gazeta_link   = models.CharField(blank=True, max_length=240)
 301     wiki_link     = models.CharField(blank=True, max_length=240)
 302     # files generated during publication
 303
 304     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
 305
 306     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
 307     objects  = models.Manager()
 308     tagged   = managers.ModelTaggedItemManager(Tag)
 309     tags     = managers.TagDescriptor(Tag)
 310
 311     html_built = django.dispatch.Signal()
 312     published = django.dispatch.Signal()
 313
 314     class AlreadyExists(Exception):
 315         pass
 316
 317     class Meta:
 318         ordering = ('sort_key',)
 319         verbose_name = _('book')
 320         verbose_name_plural = _('books')
 321
 322     def __unicode__(self):
 323         return self.title
 324
 325     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
 326         from sortify import sortify
 327
 328         self.sort_key = sortify(self.title)
 329
 330         ret = super(Book, self).save(force_insert, force_update)
 331
 332         if reset_short_html:
 333             self.reset_short_html()
 334
 335         return ret
 336
 337     @permalink
 338     def get_absolute_url(self):
 339         return ('catalogue.views.book_detail', [self.slug])
 340
 341     @property
 342     def name(self):
 343         return self.title
 344
 345     def book_tag_slug(self):
 346         return ('l-' + self.slug)[:120]
 347
 348     def book_tag(self):
 349         slug = self.book_tag_slug()
 350         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
 351         if created:
 352             book_tag.name = self.title[:50]
 353             book_tag.sort_key = self.title.lower()
 354             book_tag.save()
 355         return book_tag
 356
 357     def has_media(self, type):
 358         if type in Book.file_types:
 359             return bool(getattr(self, "%s_file" % type))
 360         else:
 361             return self.media.filter(type=type).exists()
 362
 363     def get_media(self, type):
 364         if self.has_media(type):
 365             if type in Book.file_types:
 366                 return getattr(self, "%s_file" % type)
 367             else:
 368                 return self.media.filter(type=type)
 369         else:
 370             return None
 371
 372     def get_mp3(self):
 373         return self.get_media("mp3")
 374     def get_odt(self):
 375         return self.get_media("odt")
 376     def get_ogg(self):
 377         return self.get_media("ogg")
 378     def get_daisy(self):
 379         return self.get_media("daisy")
 380
 381     def reset_short_html(self):
 382         if self.id is None:
 383             return
 384
 385         cache_key = "Book.short_html/%d/%s"
 386         for lang, langname in settings.LANGUAGES:
 387             cache.delete(cache_key % (self.id, lang))
 388         # Fragment.short_html relies on book's tags, so reset it here too
 389         for fragm in self.fragments.all():
 390             fragm.reset_short_html()
 391
 392     def short_html(self):
 393         if self.id:
 394             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
 395             short_html = cache.get(cache_key)
 396         else:
 397             short_html = None
 398
 399         if short_html is not None:
 400             return mark_safe(short_html)
 401         else:
 402             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 403             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 404
 405             formats = []
 406             # files generated during publication
 407             if self.has_media("html"):
 408                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 409             if self.has_media("pdf"):
 410                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
 411             if self.has_media("mobi"):
 412                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
 413             if self.root_ancestor.has_media("epub"):
 414                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
 415             if self.has_media("txt"):
 416                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
 417             # other files
 418             for m in self.media.order_by('type'):
 419                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
 420
 421             formats = [mark_safe(format) for format in formats]
 422
 423             short_html = unicode(render_to_string('catalogue/book_short.html',
 424                 {'book': self, 'tags': tags, 'formats': formats}))
 425
 426             if self.id:
 427                 cache.set(cache_key, short_html, CACHE_FOREVER)
 428             return mark_safe(short_html)
 429
 430     @property
 431     def root_ancestor(self):
 432         """ returns the oldest ancestor """
 433
 434         if not hasattr(self, '_root_ancestor'):
 435             book = self
 436             while book.parent:
 437                 book = book.parent
 438             self._root_ancestor = book
 439         return self._root_ancestor
 440
 441
 442     def has_description(self):
 443         return len(self.description) > 0
 444     has_description.short_description = _('description')
 445     has_description.boolean = True
 446
 447     # ugly ugly ugly
 448     def has_odt_file(self):
 449         return bool(self.has_media("odt"))
 450     has_odt_file.short_description = 'ODT'
 451     has_odt_file.boolean = True
 452
 453     def has_mp3_file(self):
 454         return bool(self.has_media("mp3"))
 455     has_mp3_file.short_description = 'MP3'
 456     has_mp3_file.boolean = True
 457
 458     def has_ogg_file(self):
 459         return bool(self.has_media("ogg"))
 460     has_ogg_file.short_description = 'OGG'
 461     has_ogg_file.boolean = True
 462
 463     def has_daisy_file(self):
 464         return bool(self.has_media("daisy"))
 465     has_daisy_file.short_description = 'DAISY'
 466     has_daisy_file.boolean = True
 467
 468     def build_pdf(self):
 469         """ (Re)builds the pdf file.
 470
 471         """
 472         from tempfile import NamedTemporaryFile
 473         from os import unlink
 474         from django.core.files import File
 475         from librarian import pdf
 476         from catalogue.utils import ORMDocProvider, remove_zip
 477
 478         try:
 479             pdf_file = NamedTemporaryFile(delete=False)
 480             pdf.transform(ORMDocProvider(self),
 481                       file_path=str(self.xml_file.path),
 482                       output_file=pdf_file,
 483                       )
 484
 485             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
 486         finally:
 487             unlink(pdf_file.name)
 488
 489         # remove zip with all pdf files
 490         remove_zip(settings.ALL_PDF_ZIP)
 491
 492     def build_mobi(self):
 493         """ (Re)builds the MOBI file.
 494
 495         """
 496         from tempfile import NamedTemporaryFile
 497         from os import unlink
 498         from django.core.files import File
 499         from librarian import mobi
 500         from catalogue.utils import ORMDocProvider, remove_zip
 501
 502         try:
 503             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
 504             mobi.transform(ORMDocProvider(self), verbose=1,
 505                       file_path=str(self.xml_file.path),
 506                       output_file=mobi_file.name,
 507                       )
 508
 509             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
 510         finally:
 511             unlink(mobi_file.name)
 512
 513         # remove zip with all mobi files
 514         remove_zip(settings.ALL_MOBI_ZIP)
 515
 516     def build_epub(self, remove_descendants=True):
 517         """ (Re)builds the epub file.
 518             If book has a parent, does nothing.
 519             Unless remove_descendants is False, descendants' epubs are removed.
 520         """
 521         from StringIO import StringIO
 522         from hashlib import sha1
 523         from django.core.files.base import ContentFile
 524         from librarian import epub, NoDublinCore
 525         from catalogue.utils import ORMDocProvider, remove_zip
 526
 527         if self.parent:
 528             # don't need an epub
 529             return
 530
 531         epub_file = StringIO()
 532         try:
 533             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
 534             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
 535             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
 536         except NoDublinCore:
 537             pass
 538
 539         book_descendants = list(self.children.all())
 540         while len(book_descendants) > 0:
 541             child_book = book_descendants.pop(0)
 542             if remove_descendants and child_book.has_epub_file():
 543                 child_book.epub_file.delete()
 544             # save anyway, to refresh short_html
 545             child_book.save()
 546             book_descendants += list(child_book.children.all())
 547
 548         # remove zip package with all epub files
 549         remove_zip(settings.ALL_EPUB_ZIP)
 550
 551     def build_txt(self):
 552         from StringIO import StringIO
 553         from django.core.files.base import ContentFile
 554         from librarian import text
 555
 556         out = StringIO()
 557         text.transform(open(self.xml_file.path), out)
 558         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
 559
 560
 561     def build_html(self):
 562         from tempfile import NamedTemporaryFile
 563         from markupstring import MarkupString
 564         from django.core.files import File
 565         from slughifi import slughifi
 566         from librarian import html
 567
 568         meta_tags = list(self.tags.filter(
 569             category__in=('author', 'epoch', 'genre', 'kind')))
 570         book_tag = self.book_tag()
 571
 572         html_file = NamedTemporaryFile()
 573         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
 574             self.html_file.save('%s.html' % self.slug, File(html_file))
 575
 576             # get ancestor l-tags for adding to new fragments
 577             ancestor_tags = []
 578             p = self.parent
 579             while p:
 580                 ancestor_tags.append(p.book_tag())
 581                 p = p.parent
 582
 583             # Delete old fragments and create them from scratch
 584             self.fragments.all().delete()
 585             # Extract fragments
 586             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
 587             for fragment in closed_fragments.values():
 588                 try:
 589                     theme_names = [s.strip() for s in fragment.themes.split(',')]
 590                 except AttributeError:
 591                     continue
 592                 themes = []
 593                 for theme_name in theme_names:
 594                     if not theme_name:
 595                         continue
 596                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
 597                     if created:
 598                         tag.name = theme_name
 599                         tag.sort_key = theme_name.lower()
 600                         tag.save()
 601                     themes.append(tag)
 602                 if not themes:
 603                     continue
 604
 605                 text = fragment.to_string()
 606                 short_text = ''
 607                 if (len(MarkupString(text)) > 240):
 608                     short_text = unicode(MarkupString(text)[:160])
 609                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
 610                     text=text, short_text=short_text)
 611
 612                 new_fragment.save()
 613                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
 614             self.save()
 615             self.html_built.send(sender=self)
 616             return True
 617         return False
 618
 619     @staticmethod
 620     def zip_format(format_):
 621         def pretty_file_name(book):
 622             return "%s/%s.%s" % (
 623                 b.get_extra_info_value()['author'],
 624                 b.slug,
 625                 format_)
 626
 627         field_name = "%s_file" % format_
 628         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
 629         paths = [(pretty_file_name(b), getattr(b, field_name).path)
 630                     for b in books]
 631         result = create_zip.delay(paths,
 632                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
 633         return result.wait()
 634
 635     def zip_audiobooks(self):
 636         bm = BookMedia.objects.filter(book=self, type='mp3')
 637         paths = map(lambda bm: (None, bm.file.path), bm)
 638         result = create_zip.delay(paths, self.slug)
 639         return result.wait()
 640
 641     @classmethod
 642     def from_xml_file(cls, xml_file, **kwargs):
 643         from django.core.files import File
 644         from librarian import dcparser
 645
 646         # use librarian to parse meta-data
 647         book_info = dcparser.parse(xml_file)
 648
 649         if not isinstance(xml_file, File):
 650             xml_file = File(open(xml_file))
 651
 652         try:
 653             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
 654         finally:
 655             xml_file.close()
 656
 657     @classmethod
 658     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
 659             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
 660         import re
 661         from slughifi import slughifi
 662         from sortify import sortify
 663
 664         # check for parts before we do anything
 665         children = []
 666         if hasattr(book_info, 'parts'):
 667             for part_url in book_info.parts:
 668                 base, slug = part_url.rsplit('/', 1)
 669                 try:
 670                     children.append(Book.objects.get(slug=slug))
 671                 except Book.DoesNotExist, e:
 672                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 673
 674
 675         # Read book metadata
 676         book_base, book_slug = book_info.url.rsplit('/', 1)
 677         if re.search(r'[^a-zA-Z0-9-]', book_slug):
 678             raise ValueError('Invalid characters in slug')
 679         book, created = Book.objects.get_or_create(slug=book_slug)
 680
 681         if created:
 682             book_shelves = []
 683         else:
 684             if not overwrite:
 685                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 686             # Save shelves for this book
 687             book_shelves = list(book.tags.filter(category='set'))
 688
 689         book.title = book_info.title
 690         book.set_extra_info_value(book_info.to_dict())
 691         book.save()
 692
 693         meta_tags = []
 694         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
 695         for field_name, category in categories:
 696             try:
 697                 tag_names = getattr(book_info, field_name)
 698             except:
 699                 tag_names = [getattr(book_info, category)]
 700             for tag_name in tag_names:
 701                 tag_sort_key = tag_name
 702                 if category == 'author':
 703                     tag_sort_key = tag_name.last_name
 704                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 705                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
 706                 if created:
 707                     tag.name = tag_name
 708                     tag.sort_key = sortify(tag_sort_key.lower())
 709                     tag.save()
 710                 meta_tags.append(tag)
 711
 712         book.tags = set(meta_tags + book_shelves)
 713
 714         book_tag = book.book_tag()
 715
 716         for n, child_book in enumerate(children):
 717             child_book.parent = book
 718             child_book.parent_number = n
 719             child_book.save()
 720
 721         # Save XML and HTML files
 722         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
 723
 724         # delete old fragments when overwriting
 725         book.fragments.all().delete()
 726
 727         if book.build_html():
 728             if not settings.NO_BUILD_TXT and build_txt:
 729                 book.build_txt()
 730
 731         if not settings.NO_BUILD_EPUB and build_epub:
 732             book.root_ancestor.build_epub()
 733
 734         if not settings.NO_BUILD_PDF and build_pdf:
 735             book.root_ancestor.build_pdf()
 736
 737         if not settings.NO_BUILD_MOBI and build_mobi:
 738             book.build_mobi()
 739
 740         book_descendants = list(book.children.all())
 741         # add l-tag to descendants and their fragments
 742         # delete unnecessary EPUB files
 743         while len(book_descendants) > 0:
 744             child_book = book_descendants.pop(0)
 745             child_book.tags = list(child_book.tags) + [book_tag]
 746             child_book.save()
 747             for fragment in child_book.fragments.all():
 748                 fragment.tags = set(list(fragment.tags) + [book_tag])
 749             book_descendants += list(child_book.children.all())
 750
 751         book.save()
 752
 753         # refresh cache
 754         book.reset_tag_counter()
 755         book.reset_theme_counter()
 756
 757         cls.published.send(sender=book)
 758         return book
 759
 760     def reset_tag_counter(self):
 761         if self.id is None:
 762             return
 763
 764         cache_key = "Book.tag_counter/%d" % self.id
 765         cache.delete(cache_key)
 766         if self.parent:
 767             self.parent.reset_tag_counter()
 768
 769     @property
 770     def tag_counter(self):
 771         if self.id:
 772             cache_key = "Book.tag_counter/%d" % self.id
 773             tags = cache.get(cache_key)
 774         else:
 775             tags = None
 776
 777         if tags is None:
 778             tags = {}
 779             for child in self.children.all().order_by():
 780                 for tag_pk, value in child.tag_counter.iteritems():
 781                     tags[tag_pk] = tags.get(tag_pk, 0) + value
 782             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
 783                 tags[tag.pk] = 1
 784
 785             if self.id:
 786                 cache.set(cache_key, tags, CACHE_FOREVER)
 787         return tags
 788
 789     def reset_theme_counter(self):
 790         if self.id is None:
 791             return
 792
 793         cache_key = "Book.theme_counter/%d" % self.id
 794         cache.delete(cache_key)
 795         if self.parent:
 796             self.parent.reset_theme_counter()
 797
 798     @property
 799     def theme_counter(self):
 800         if self.id:
 801             cache_key = "Book.theme_counter/%d" % self.id
 802             tags = cache.get(cache_key)
 803         else:
 804             tags = None
 805
 806         if tags is None:
 807             tags = {}
 808             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
 809                 for tag in fragment.tags.filter(category='theme').order_by():
 810                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 811
 812             if self.id:
 813                 cache.set(cache_key, tags, CACHE_FOREVER)
 814         return tags
 815
 816     def pretty_title(self, html_links=False):
 817         book = self
 818         names = list(book.tags.filter(category='author'))
 819
 820         books = []
 821         while book:
 822             books.append(book)
 823             book = book.parent
 824         names.extend(reversed(books))
 825
 826         if html_links:
 827             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
 828         else:
 829             names = [tag.name for tag in names]
 830
 831         return ', '.join(names)
 832
 833     @classmethod
 834     def tagged_top_level(cls, tags):
 835         """ Returns top-level books tagged with `tags'.
 836
 837         It only returns those books which don't have ancestors which are
 838         also tagged with those tags.
 839
 840         """
 841         # get relevant books and their tags
 842         objects = cls.tagged.with_all(tags)
 843         # eliminate descendants
 844         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
 845         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
 846         if descendants_keys:
 847             objects = objects.exclude(pk__in=descendants_keys)
 848
 849         return objects
 850
 851     @classmethod
 852     def book_list(cls, filter=None):
 853         """Generates a hierarchical listing of all books.
 854
 855         Books are optionally filtered with a test function.
 856
 857         """
 858
 859         books_by_parent = {}
 860         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
 861         if filter:
 862             books = books.filter(filter).distinct()
 863             book_ids = set((book.pk for book in books))
 864             for book in books:
 865                 parent = book.parent_id
 866                 if parent not in book_ids:
 867                     parent = None
 868                 books_by_parent.setdefault(parent, []).append(book)
 869         else:
 870             for book in books:
 871                 books_by_parent.setdefault(book.parent_id, []).append(book)
 872
 873         orphans = []
 874         books_by_author = SortedDict()
 875         for tag in Tag.objects.filter(category='author'):
 876             books_by_author[tag] = []
 877
 878         for book in books_by_parent.get(None,()):
 879             authors = list(book.tags.filter(category='author'))
 880             if authors:
 881                 for author in authors:
 882                     books_by_author[author].append(book)
 883             else:
 884                 orphans.append(book)
 885
 886         return books_by_author, orphans, books_by_parent
 887
 888
 889 def _has_factory(ftype):
 890     has = lambda self: bool(getattr(self, "%s_file" % ftype))
 891     has.short_description = t.upper()
 892     has.boolean = True
 893     has.__name__ = "has_%s_file" % ftype
 894     return has
 895
 896
 897 # add the file fields
 898 for t in Book.file_types:
 899     field_name = "%s_file" % t
 900     models.FileField(_("%s file" % t.upper()),
 901             upload_to=book_upload_path(t),
 902             blank=True).contribute_to_class(Book, field_name)
 903
 904     setattr(Book, "has_%s_file" % t, _has_factory(t))
 905
 906
 907 class Fragment(models.Model):
 908     text = models.TextField()
 909     short_text = models.TextField(editable=False)
 910     anchor = models.CharField(max_length=120)
 911     book = models.ForeignKey(Book, related_name='fragments')
 912
 913     objects = models.Manager()
 914     tagged = managers.ModelTaggedItemManager(Tag)
 915     tags = managers.TagDescriptor(Tag)
 916
 917     class Meta:
 918         ordering = ('book', 'anchor',)
 919         verbose_name = _('fragment')
 920         verbose_name_plural = _('fragments')
 921
 922     def get_absolute_url(self):
 923         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 924
 925     def reset_short_html(self):
 926         if self.id is None:
 927             return
 928
 929         cache_key = "Fragment.short_html/%d/%s"
 930         for lang, langname in settings.LANGUAGES:
 931             cache.delete(cache_key % (self.id, lang))
 932
 933     def short_html(self):
 934         if self.id:
 935             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
 936             short_html = cache.get(cache_key)
 937         else:
 938             short_html = None
 939
 940         if short_html is not None:
 941             return mark_safe(short_html)
 942         else:
 943             short_html = unicode(render_to_string('catalogue/fragment_short.html',
 944                 {'fragment': self}))
 945             if self.id:
 946                 cache.set(cache_key, short_html, CACHE_FOREVER)
 947             return mark_safe(short_html)
 948
 949
 950 class FileRecord(models.Model):
 951     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
 952     type = models.CharField(_('type'), max_length=20, db_index=True)
 953     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
 954     time = models.DateTimeField(_('time'), auto_now_add=True)
 955
 956     class Meta:
 957         ordering = ('-time','-slug', '-type')
 958         verbose_name = _('file record')
 959         verbose_name_plural = _('file records')
 960
 961     def __unicode__(self):
 962         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
 963
 964
 965 class Collection(models.Model):
 966     """A collection of books, which might be defined before publishing them."""
 967     title = models.CharField(_('title'), max_length=120, db_index=True)
 968     slug = models.SlugField(_('slug'), max_length=120, primary_key=True)
 969     description = models.TextField(_('description'), null=True, blank=True)
 970
 971     models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 972     book_slugs = models.TextField(_('book slugs'))
 973
 974     class Meta:
 975         ordering = ('title',)
 976         verbose_name = _('collection')
 977         verbose_name_plural = _('collections')
 978
 979     def __unicode__(self):
 980         return self.title
 981
 982
 983 ###########
 984 #
 985 # SIGNALS
 986 #
 987 ###########
 988
 989
 990 def _tags_updated_handler(sender, affected_tags, **kwargs):
 991     # reset tag global counter
 992     # we want Tag.changed_at updated for API to know the tag was touched
 993     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
 994
 995     # if book tags changed, reset book tag counter
 996     if isinstance(sender, Book) and \
 997                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
 998                     exclude(category__in=('book', 'theme', 'set')).count():
 999         sender.reset_tag_counter()
1000     # if fragment theme changed, reset book theme counter
1001     elif isinstance(sender, Fragment) and \
1002                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1003                     filter(category='theme').count():
1004         sender.book.reset_theme_counter()
1005 tags_updated.connect(_tags_updated_handler)
1006
1007
1008 def _pre_delete_handler(sender, instance, **kwargs):
1009     """ refresh Book on BookMedia delete """
1010     if sender == BookMedia:
1011         instance.book.save()
1012 pre_delete.connect(_pre_delete_handler)
1013
1014 def _post_save_handler(sender, instance, **kwargs):
1015     """ refresh all the short_html stuff on BookMedia update """
1016     if sender == BookMedia:
1017         instance.book.save()
1018 post_save.connect(_post_save_handler)