Merge branch 'master' of https://github.com/fnp/wolnelektury
[wolnelektury.git] / apps / catalogue / models.py
index d902996..29106b1 100644 (file)
@@ -2,35 +2,33 @@
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from datetime import datetime
+from collections import namedtuple
 
 from django.db import models
 
 from django.db import models
-from django.db.models import permalink, Q
+from django.db.models import permalink
 import django.dispatch
 import django.dispatch
-from django.core.cache import cache
+from django.core.cache import get_cache
 from django.utils.translation import ugettext_lazy as _
 from django.contrib.auth.models import User
 from django.utils.translation import ugettext_lazy as _
 from django.contrib.auth.models import User
-from django.core.files import File
 from django.template.loader import render_to_string
 from django.template.loader import render_to_string
+from django.utils.datastructures import SortedDict
 from django.utils.safestring import mark_safe
 from django.utils.translation import get_language
 from django.core.urlresolvers import reverse
 from django.utils.safestring import mark_safe
 from django.utils.translation import get_language
 from django.core.urlresolvers import reverse
-from django.db.models.signals import post_save, m2m_changed, pre_delete
+from django.db.models.signals import post_save, pre_delete, post_delete
+import jsonfield
 
 from django.conf import settings
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
 
 from django.conf import settings
 
 from newtagging.models import TagBase, tags_updated
 from newtagging import managers
-from catalogue.fields import JSONField, OverwritingFileField
-from catalogue.utils import ExistingFile, ORMDocProvider, create_zip, remove_zip
+from catalogue.fields import OverwritingFileField
+from catalogue.utils import create_zip, split_tags, truncate_html_words
+from catalogue import tasks
+import re
 
 
-from librarian import dcparser, html, epub, NoDublinCore
-import mutagen
-from mutagen import id3
-from slughifi import slughifi
-from sortify import sortify
-from os import unlink
 
 
+# Those are hard-coded here so that makemessages sees them.
 TAG_CATEGORIES = (
     ('author', _('author')),
     ('epoch', _('epoch')),
 TAG_CATEGORIES = (
     ('author', _('author')),
     ('epoch', _('epoch')),
@@ -41,15 +39,8 @@ TAG_CATEGORIES = (
     ('book', _('book')),
 )
 
     ('book', _('book')),
 )
 
-MEDIA_FORMATS = (
-    ('odt', _('ODT file')),
-    ('mp3', _('MP3 file')),
-    ('ogg', _('OGG file')),
-    ('daisy', _('DAISY file')), 
-)
 
 
-# not quite, but Django wants you to set a timeout
-CACHE_FOREVER = 2419200  # 28 days
+permanent_cache = get_cache('permanent')
 
 
 class TagSubcategoryManager(models.Manager):
 
 
 class TagSubcategoryManager(models.Manager):
@@ -62,13 +53,16 @@ class TagSubcategoryManager(models.Manager):
 
 
 class Tag(TagBase):
 
 
 class Tag(TagBase):
+    """A tag attachable to books and fragments (and possibly anything).
+    
+    Used to represent searchable metadata (authors, epochs, genres, kinds),
+    fragment themes (motifs) and some book hierarchy related kludges."""
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
         db_index=True, choices=TAG_CATEGORIES)
     description = models.TextField(_('description'), blank=True)
     name = models.CharField(_('name'), max_length=50, db_index=True)
     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
         db_index=True, choices=TAG_CATEGORIES)
     description = models.TextField(_('description'), blank=True)
-    main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
 
     user = models.ForeignKey(User, blank=True, null=True)
     book_count = models.IntegerField(_('book count'), blank=True, null=True)
 
     user = models.ForeignKey(User, blank=True, null=True)
     book_count = models.IntegerField(_('book count'), blank=True, null=True)
@@ -113,25 +107,22 @@ class Tag(TagBase):
     has_description.boolean = True
 
     def get_count(self):
     has_description.boolean = True
 
     def get_count(self):
-        """ returns global book count for book tags, fragment count for themes """
-
-        if self.book_count is None:
-            if self.category == 'book':
-                # never used
-                objects = Book.objects.none()
-            elif self.category == 'theme':
-                objects = Fragment.tagged.with_all((self,))
-            else:
-                objects = Book.tagged.with_all((self,)).order_by()
-                if self.category != 'set':
-                    # eliminate descendants
-                    l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
-                    descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
-                    if descendants_keys:
-                        objects = objects.exclude(pk__in=descendants_keys)
-            self.book_count = objects.count()
-            self.save()
-        return self.book_count
+        """Returns global book count for book tags, fragment count for themes."""
+
+        if self.category == 'book':
+            # never used
+            objects = Book.objects.none()
+        elif self.category == 'theme':
+            objects = Fragment.tagged.with_all((self,))
+        else:
+            objects = Book.tagged.with_all((self,)).order_by()
+            if self.category != 'set':
+                # eliminate descendants
+                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()])
+                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
+                if descendants_keys:
+                    objects = objects.exclude(pk__in=descendants_keys)
+        return objects.count()
 
     @staticmethod
     def get_tag_list(tags):
 
     @staticmethod
     def get_tag_list(tags):
@@ -175,30 +166,71 @@ class Tag(TagBase):
     def url_chunk(self):
         return '/'.join((Tag.categories_dict[self.category], self.slug))
 
     def url_chunk(self):
         return '/'.join((Tag.categories_dict[self.category], self.slug))
 
+    @staticmethod
+    def tags_from_info(info):
+        from slughifi import slughifi
+        from sortify import sortify
+        meta_tags = []
+        categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
+        for field_name, category in categories:
+            try:
+                tag_names = getattr(info, field_name)
+            except:
+                try:
+                    tag_names = [getattr(info, category)]
+                except:
+                    # For instance, Pictures do not have 'genre' field.
+                    continue
+            for tag_name in tag_names:
+                tag_sort_key = tag_name
+                if category == 'author':
+                    tag_sort_key = tag_name.last_name
+                    tag_name = tag_name.readable()
+                tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
+                if created:
+                    tag.name = tag_name
+                    tag.sort_key = sortify(tag_sort_key.lower())
+                    tag.save()
+                meta_tags.append(tag)
+        return meta_tags
+
+
+
+def get_dynamic_path(media, filename, ext=None, maxlen=100):
+    from slughifi import slughifi
+
+    # how to put related book's slug here?
+    if not ext:
+        # BookMedia case
+        ext = media.formats[media.type].ext
+    if media is None or not media.name:
+        name = slughifi(filename.split(".")[0])
+    else:
+        name = slughifi(media.name)
+    return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
+
 
 # TODO: why is this hard-coded ?
 def book_upload_path(ext=None, maxlen=100):
 
 # TODO: why is this hard-coded ?
 def book_upload_path(ext=None, maxlen=100):
-    def get_dynamic_path(media, filename, ext=ext):
-        # how to put related book's slug here?
-        if not ext:
-            if media.type == 'daisy':
-                ext = 'daisy.zip'
-            else:
-                ext = media.type
-        if not media.name:
-            name = slughifi(filename.split(".")[0])
-        else:
-            name = slughifi(media.name)
-        return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
-    return get_dynamic_path
+    return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
 
 
 class BookMedia(models.Model):
 
 
 class BookMedia(models.Model):
-    type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
+    """Represents media attached to a book."""
+    FileFormat = namedtuple("FileFormat", "name ext")
+    formats = SortedDict([
+        ('mp3', FileFormat(name='MP3', ext='mp3')),
+        ('ogg', FileFormat(name='Ogg Vorbis', ext='ogg')),
+        ('daisy', FileFormat(name='DAISY', ext='daisy.zip')),
+    ])
+    format_choices = [(k, _('%s file') % t.name)
+            for k, t in formats.items()]
+
+    type        = models.CharField(_('type'), choices=format_choices, max_length="100")
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
     name        = models.CharField(_('name'), max_length="100")
     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
-    extra_info  = JSONField(_('extra information'), default='{}', editable=False)
+    extra_info  = jsonfield.JSONField(_('extra information'), default='{}', editable=False)
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
     book = models.ForeignKey('Book', related_name='media')
     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
 
@@ -211,10 +243,13 @@ class BookMedia(models.Model):
         verbose_name_plural = _('book media')
 
     def save(self, *args, **kwargs):
         verbose_name_plural = _('book media')
 
     def save(self, *args, **kwargs):
+        from slughifi import slughifi
+        from catalogue.utils import ExistingFile, remove_zip
+
         try:
             old = BookMedia.objects.get(pk=self.pk)
         try:
             old = BookMedia.objects.get(pk=self.pk)
-        except BookMedia.DoesNotExist, e:
-            pass
+        except BookMedia.DoesNotExist:
+            old = None
         else:
             # if name changed, change the file name, too
             if slughifi(self.name) != slughifi(old.name):
         else:
             # if name changed, change the file name, too
             if slughifi(self.name) != slughifi(old.name):
@@ -223,11 +258,13 @@ class BookMedia(models.Model):
         super(BookMedia, self).save(*args, **kwargs)
 
         # remove the zip package for book with modified media
         super(BookMedia, self).save(*args, **kwargs)
 
         # remove the zip package for book with modified media
-        remove_zip(self.book.slug)
+        if old:
+            remove_zip("%s_%s" % (old.book.slug, old.type))
+        remove_zip("%s_%s" % (self.book.slug, self.type))
 
 
-        extra_info = self.get_extra_info_value()
+        extra_info = self.extra_info
         extra_info.update(self.read_meta())
         extra_info.update(self.read_meta())
-        self.set_extra_info_value(extra_info)
+        self.extra_info = extra_info
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
         return super(BookMedia, self).save(*args, **kwargs)
 
@@ -235,6 +272,8 @@ class BookMedia(models.Model):
         """
             Reads some metadata from the audiobook.
         """
         """
             Reads some metadata from the audiobook.
         """
+        import mutagen
+        from mutagen import id3
 
         artist_name = director_name = project = funded_by = ''
         if self.type == 'mp3':
 
         artist_name = director_name = project = funded_by = ''
         if self.type == 'mp3':
@@ -267,6 +306,8 @@ class BookMedia(models.Model):
         """
             Reads source file SHA1 from audiobok metadata.
         """
         """
             Reads source file SHA1 from audiobok metadata.
         """
+        import mutagen
+        from mutagen import id3
 
         if filetype == 'mp3':
             try:
 
         if filetype == 'mp3':
             try:
@@ -286,26 +327,38 @@ class BookMedia(models.Model):
 
 
 class Book(models.Model):
 
 
 class Book(models.Model):
+    """Represents a book imported from WL-XML."""
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
     title         = models.CharField(_('title'), max_length=120)
     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
-    slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
+    slug = models.SlugField(_('slug'), max_length=120, db_index=True,
+            unique=True)
+    common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
+    language = models.CharField(_('language code'), max_length=3, db_index=True,
+                    default=settings.CATALOGUE_DEFAULT_LANGUAGE)
     description   = models.TextField(_('description'), blank=True)
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
     description   = models.TextField(_('description'), blank=True)
     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
-    extra_info    = JSONField(_('extra information'), default='{}')
+    extra_info    = jsonfield.JSONField(_('extra information'), default='{}')
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
     gazeta_link   = models.CharField(blank=True, max_length=240)
     wiki_link     = models.CharField(blank=True, max_length=240)
     # files generated during publication
 
-    file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
-    
+    cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
+                null=True, blank=True)
+    ebook_formats = ['pdf', 'epub', 'mobi', 'txt']
+    formats = ebook_formats + ['html', 'xml']
+
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
+
+    _related_info = jsonfield.JSONField(blank=True, null=True, editable=False)
+
     objects  = models.Manager()
     tagged   = managers.ModelTaggedItemManager(Tag)
     tags     = managers.TagDescriptor(Tag)
 
     html_built = django.dispatch.Signal()
     objects  = models.Manager()
     tagged   = managers.ModelTaggedItemManager(Tag)
     tags     = managers.TagDescriptor(Tag)
 
     html_built = django.dispatch.Signal()
+    published = django.dispatch.Signal()
 
     class AlreadyExists(Exception):
         pass
 
     class AlreadyExists(Exception):
         pass
@@ -319,6 +372,8 @@ class Book(models.Model):
         return self.title
 
     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
         return self.title
 
     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
+        from sortify import sortify
+
         self.sort_key = sortify(self.title)
 
         ret = super(Book, self).save(force_insert, force_update)
         self.sort_key = sortify(self.title)
 
         ret = super(Book, self).save(force_insert, force_update)
@@ -348,18 +403,18 @@ class Book(models.Model):
             book_tag.save()
         return book_tag
 
             book_tag.save()
         return book_tag
 
-    def has_media(self, type):
-        if type in Book.file_types:
-            return bool(getattr(self, "%s_file" % type))
+    def has_media(self, type_):
+        if type_ in Book.formats:
+            return bool(getattr(self, "%s_file" % type_))
         else:
         else:
-            return self.media.filter(type=type).exists()
+            return self.media.filter(type=type_).exists()
 
 
-    def get_media(self, type):
-        if self.has_media(type):
-            if type in Book.file_types:
-                return getattr(self, "%s_file" % type)
+    def get_media(self, type_):
+        if self.has_media(type_):
+            if type_ in Book.formats:
+                return getattr(self, "%s_file" % type_)
             else:                                             
             else:                                             
-                return self.media.filter(type=type)
+                return self.media.filter(type=type_)
         else:
             return None
 
         else:
             return None
 
@@ -376,74 +431,17 @@ class Book(models.Model):
         if self.id is None:
             return
 
         if self.id is None:
             return
 
-        cache_key = "Book.short_html/%d/%s"
-        for lang, langname in settings.LANGUAGES:
-            cache.delete(cache_key % (self.id, lang))
+        type(self).objects.filter(pk=self.pk).update(_related_info=None)
         # Fragment.short_html relies on book's tags, so reset it here too
         # Fragment.short_html relies on book's tags, so reset it here too
-        for fragm in self.fragments.all():
+        for fragm in self.fragments.all().iterator():
             fragm.reset_short_html()
 
             fragm.reset_short_html()
 
-    def short_html(self):
-        if self.id:
-            cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
-            short_html = cache.get(cache_key)
-        else:
-            short_html = None
-
-        if short_html is not None:
-            return mark_safe(short_html)
-        else:
-            tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
-            tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
-
-            formats = []
-            # files generated during publication
-            if self.has_media("html"):
-                formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
-            if self.has_media("pdf"):
-                formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
-            if self.has_media("mobi"):
-                formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
-            if self.root_ancestor.has_media("epub"):
-                formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
-            if self.has_media("txt"):
-                formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
-            # other files
-            for m in self.media.order_by('type'):
-                formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
-
-            formats = [mark_safe(format) for format in formats]
-
-            short_html = unicode(render_to_string('catalogue/book_short.html',
-                {'book': self, 'tags': tags, 'formats': formats}))
-
-            if self.id:
-                cache.set(cache_key, short_html, CACHE_FOREVER)
-            return mark_safe(short_html)
-
-    @property
-    def root_ancestor(self):
-        """ returns the oldest ancestor """
-
-        if not hasattr(self, '_root_ancestor'):
-            book = self
-            while book.parent:
-                book = book.parent
-            self._root_ancestor = book
-        return self._root_ancestor
-
-
     def has_description(self):
         return len(self.description) > 0
     has_description.short_description = _('description')
     has_description.boolean = True
 
     # ugly ugly ugly
     def has_description(self):
         return len(self.description) > 0
     has_description.short_description = _('description')
     has_description.boolean = True
 
     # ugly ugly ugly
-    def has_odt_file(self):
-        return bool(self.has_media("odt"))
-    has_odt_file.short_description = 'ODT'
-    has_odt_file.boolean = True
-
     def has_mp3_file(self):
         return bool(self.has_media("mp3"))
     has_mp3_file.short_description = 'MP3'
     def has_mp3_file(self):
         return bool(self.has_media("mp3"))
     has_mp3_file.short_description = 'MP3'
@@ -459,104 +457,41 @@ class Book(models.Model):
     has_daisy_file.short_description = 'DAISY'
     has_daisy_file.boolean = True
 
     has_daisy_file.short_description = 'DAISY'
     has_daisy_file.boolean = True
 
-    def build_pdf(self):
-        """ (Re)builds the pdf file.
-
-        """
-        from librarian import pdf
-        from tempfile import NamedTemporaryFile
-        import os
-
-        try:
-            pdf_file = NamedTemporaryFile(delete=False)
-            pdf.transform(ORMDocProvider(self),
-                      file_path=str(self.xml_file.path),
-                      output_file=pdf_file,
-                      )
-
-            self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
-        finally:
-            unlink(pdf_file.name)
-
-        # remove zip with all pdf files
-        remove_zip(settings.ALL_PDF_ZIP)
-
-    def build_mobi(self):
-        """ (Re)builds the MOBI file.
-
-        """
-        from librarian import mobi
-        from tempfile import NamedTemporaryFile
-        import os
-
-        try:
-            mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
-            mobi.transform(ORMDocProvider(self), verbose=1,
-                      file_path=str(self.xml_file.path),
-                      output_file=mobi_file.name,
-                      )
-
-            self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
-        finally:
-            unlink(mobi_file.name)
-
-        # remove zip with all mobi files
-        remove_zip(settings.ALL_MOBI_ZIP)
+    def wldocument(self, parse_dublincore=True):
+        from catalogue.import_utils import ORMDocProvider
+        from librarian.parser import WLDocument
 
 
-    def build_epub(self, remove_descendants=True):
-        """ (Re)builds the epub file.
-            If book has a parent, does nothing.
-            Unless remove_descendants is False, descendants' epubs are removed.
-        """
-        from StringIO import StringIO
-        from hashlib import sha1
-        from django.core.files.base import ContentFile
-
-        if self.parent:
-            # don't need an epub
-            return
-
-        epub_file = StringIO()
-        try:
-            epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
-            self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
-            FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
-        except NoDublinCore:
-            pass
+        return WLDocument.from_file(self.xml_file.path,
+                provider=ORMDocProvider(self),
+                parse_dublincore=parse_dublincore)
 
 
-        book_descendants = list(self.children.all())
-        while len(book_descendants) > 0:
-            child_book = book_descendants.pop(0)
-            if remove_descendants and child_book.has_epub_file():
-                child_book.epub_file.delete()
-            # save anyway, to refresh short_html
-            child_book.save()
-            book_descendants += list(child_book.children.all())
-
-        # remove zip package with all epub files
-        remove_zip(settings.ALL_EPUB_ZIP)
-
-    def build_txt(self):
+    def build_cover(self, book_info=None):
+        """(Re)builds the cover image."""
         from StringIO import StringIO
         from django.core.files.base import ContentFile
         from StringIO import StringIO
         from django.core.files.base import ContentFile
-        from librarian import text
+        from librarian.cover import WLCover
 
 
-        out = StringIO()
-        text.transform(open(self.xml_file.path), out)
-        self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
+        if book_info is None:
+            book_info = self.wldocument().book_info
 
 
+        cover = WLCover(book_info).image()
+        imgstr = StringIO()
+        cover.save(imgstr, 'png')
+        self.cover.save(None, ContentFile(imgstr.getvalue()))
 
     def build_html(self):
 
     def build_html(self):
-        from tempfile import NamedTemporaryFile
-        from markupstring import MarkupString
+        from django.core.files.base import ContentFile
+        from slughifi import slughifi
+        from librarian import html
 
         meta_tags = list(self.tags.filter(
             category__in=('author', 'epoch', 'genre', 'kind')))
         book_tag = self.book_tag()
 
 
         meta_tags = list(self.tags.filter(
             category__in=('author', 'epoch', 'genre', 'kind')))
         book_tag = self.book_tag()
 
-        html_file = NamedTemporaryFile()
-        if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
-            self.html_file.save('%s.html' % self.slug, File(html_file))
+        html_output = self.wldocument(parse_dublincore=False).as_html()
+        if html_output:
+            self.html_file.save('%s.html' % self.slug,
+                    ContentFile(html_output.get_string()))
 
             # get ancestor l-tags for adding to new fragments
             ancestor_tags = []
 
             # get ancestor l-tags for adding to new fragments
             ancestor_tags = []
@@ -588,9 +523,9 @@ class Book(models.Model):
                     continue
 
                 text = fragment.to_string()
                     continue
 
                 text = fragment.to_string()
-                short_text = ''
-                if (len(MarkupString(text)) > 240):
-                    short_text = unicode(MarkupString(text)[:160])
+                short_text = truncate_html_words(text, 15)
+                if text == short_text:
+                    short_text = ''
                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
                     text=text, short_text=short_text)
 
                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
                     text=text, short_text=short_text)
 
@@ -601,30 +536,60 @@ class Book(models.Model):
             return True
         return False
 
             return True
         return False
 
+    # Thin wrappers for builder tasks
+    def build_pdf(self, *args, **kwargs):
+        """(Re)builds PDF."""
+        return tasks.build_pdf.delay(self.pk, *args, **kwargs)
+    def build_epub(self, *args, **kwargs):
+        """(Re)builds EPUB."""
+        return tasks.build_epub.delay(self.pk, *args, **kwargs)
+    def build_mobi(self, *args, **kwargs):
+        """(Re)builds MOBI."""
+        return tasks.build_mobi.delay(self.pk, *args, **kwargs)
+    def build_txt(self, *args, **kwargs):
+        """(Re)builds TXT."""
+        return tasks.build_txt.delay(self.pk, *args, **kwargs)
+
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
     @staticmethod
     def zip_format(format_):
         def pretty_file_name(book):
             return "%s/%s.%s" % (
-                b.get_extra_info_value()['author'],
+                b.extra_info['author'],
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
                 b.slug,
                 format_)
 
         field_name = "%s_file" % format_
         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
         paths = [(pretty_file_name(b), getattr(b, field_name).path)
-                    for b in books]
-        result = create_zip.delay(paths,
+                    for b in books.iterator()]
+        return create_zip(paths,
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
-        return result.wait()
 
 
-    def zip_audiobooks(self):
-        bm = BookMedia.objects.filter(book=self, type='mp3')
+    def zip_audiobooks(self, format_):
+        bm = BookMedia.objects.filter(book=self, type=format_)
         paths = map(lambda bm: (None, bm.file.path), bm)
         paths = map(lambda bm: (None, bm.file.path), bm)
-        result = create_zip.delay(paths, self.slug)
-        return result.wait()
+        return create_zip(paths, "%s_%s" % (self.slug, format_))
+
+    def search_index(self, book_info=None, reuse_index=False, index_tags=True):
+        import search
+        if reuse_index:
+            idx = search.ReusableIndex()
+        else:
+            idx = search.Index()
+            
+        idx.open()
+        try:
+            idx.index_book(self, book_info)
+            if index_tags:
+                idx.index_tags()
+        finally:
+            idx.close()
 
     @classmethod
     def from_xml_file(cls, xml_file, **kwargs):
 
     @classmethod
     def from_xml_file(cls, xml_file, **kwargs):
+        from django.core.files import File
+        from librarian import dcparser
+
         # use librarian to parse meta-data
         book_info = dcparser.parse(xml_file)
 
         # use librarian to parse meta-data
         book_info = dcparser.parse(xml_file)
 
@@ -638,23 +603,23 @@ class Book(models.Model):
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
-            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
-        import re
+            build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
+            search_index=True, search_index_tags=True, search_index_reuse=False):
 
         # check for parts before we do anything
         children = []
         if hasattr(book_info, 'parts'):
             for part_url in book_info.parts:
 
         # check for parts before we do anything
         children = []
         if hasattr(book_info, 'parts'):
             for part_url in book_info.parts:
-                base, slug = part_url.rsplit('/', 1)
                 try:
                 try:
-                    children.append(Book.objects.get(slug=slug))
-                except Book.DoesNotExist, e:
-                    raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
+                    children.append(Book.objects.get(slug=part_url.slug))
+                except Book.DoesNotExist:
+                    raise Book.DoesNotExist(_('Book "%s" does not exist.') %
+                            part_url.slug)
 
 
         # Read book metadata
 
 
         # Read book metadata
-        book_base, book_slug = book_info.url.rsplit('/', 1)
-        if re.search(r'[^a-zA-Z0-9-]', book_slug):
+        book_slug = book_info.url.slug
+        if re.search(r'[^a-z0-9-]', book_slug):
             raise ValueError('Invalid characters in slug')
         book, created = Book.objects.get_or_create(slug=book_slug)
 
             raise ValueError('Invalid characters in slug')
         book, created = Book.objects.get_or_create(slug=book_slug)
 
@@ -662,32 +627,21 @@ class Book(models.Model):
             book_shelves = []
         else:
             if not overwrite:
             book_shelves = []
         else:
             if not overwrite:
-                raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
+                raise Book.AlreadyExists(_('Book %s already exists') % (
+                        book_slug))
             # Save shelves for this book
             book_shelves = list(book.tags.filter(category='set'))
 
             # Save shelves for this book
             book_shelves = list(book.tags.filter(category='set'))
 
+        book.language = book_info.language
         book.title = book_info.title
         book.title = book_info.title
-        book.set_extra_info_value(book_info.to_dict())
+        if book_info.variant_of:
+            book.common_slug = book_info.variant_of.slug
+        else:
+            book.common_slug = book.slug
+        book.extra_info = book_info.to_dict()
         book.save()
 
         book.save()
 
-        meta_tags = []
-        categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
-        for field_name, category in categories:
-            try:
-                tag_names = getattr(book_info, field_name)
-            except:
-                tag_names = [getattr(book_info, category)]
-            for tag_name in tag_names:
-                tag_sort_key = tag_name
-                if category == 'author':
-                    tag_sort_key = tag_name.last_name
-                    tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
-                tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
-                if created:
-                    tag.name = tag_name
-                    tag.sort_key = sortify(tag_sort_key.lower())
-                    tag.save()
-                meta_tags.append(tag)
+        meta_tags = Tag.tags_from_info(book_info)
 
         book.tags = set(meta_tags + book_shelves)
 
 
         book.tags = set(meta_tags + book_shelves)
 
@@ -708,40 +662,88 @@ class Book(models.Model):
             if not settings.NO_BUILD_TXT and build_txt:
                 book.build_txt()
 
             if not settings.NO_BUILD_TXT and build_txt:
                 book.build_txt()
 
+        book.build_cover(book_info)
+
         if not settings.NO_BUILD_EPUB and build_epub:
         if not settings.NO_BUILD_EPUB and build_epub:
-            book.root_ancestor.build_epub()
+            book.build_epub()
 
         if not settings.NO_BUILD_PDF and build_pdf:
 
         if not settings.NO_BUILD_PDF and build_pdf:
-            book.root_ancestor.build_pdf()
+            book.build_pdf()
 
         if not settings.NO_BUILD_MOBI and build_mobi:
             book.build_mobi()
 
 
         if not settings.NO_BUILD_MOBI and build_mobi:
             book.build_mobi()
 
+        if not settings.NO_SEARCH_INDEX and search_index:
+            book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
+            #index_book.delay(book.id, book_info)
+
         book_descendants = list(book.children.all())
         book_descendants = list(book.children.all())
+        descendants_tags = set()
         # add l-tag to descendants and their fragments
         # add l-tag to descendants and their fragments
-        # delete unnecessary EPUB files
         while len(book_descendants) > 0:
             child_book = book_descendants.pop(0)
         while len(book_descendants) > 0:
             child_book = book_descendants.pop(0)
+            descendants_tags.update(child_book.tags)
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
             child_book.tags = list(child_book.tags) + [book_tag]
             child_book.save()
-            for fragment in child_book.fragments.all():
+            for fragment in child_book.fragments.all().iterator():
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
                 fragment.tags = set(list(fragment.tags) + [book_tag])
             book_descendants += list(child_book.children.all())
 
+        for tag in descendants_tags:
+            tasks.touch_tag(tag)
+
         book.save()
 
         # refresh cache
         book.reset_tag_counter()
         book.reset_theme_counter()
 
         book.save()
 
         # refresh cache
         book.reset_tag_counter()
         book.reset_theme_counter()
 
+        cls.published.send(sender=book)
         return book
 
         return book
 
+    def related_info(self):
+        """Keeps info about related objects (tags, media) in cache field."""
+        if self._related_info is not None:
+            return self._related_info
+        else:
+            rel = {'tags': {}, 'media': {}}
+
+            tags = self.tags.filter(category__in=(
+                    'author', 'kind', 'genre', 'epoch'))
+            tags = split_tags(tags)
+            for category in tags:
+                rel['tags'][category] = [
+                        (t.name, t.slug) for t in tags[category]]
+
+            for media_format in BookMedia.formats:
+                rel['media'][media_format] = self.has_media(media_format)
+
+            book = self
+            parents = []
+            while book.parent:
+                parents.append((book.parent.title, book.parent.slug))
+                book = book.parent
+            parents = parents[::-1]
+            if parents:
+                rel['parents'] = parents
+
+            if self.pk:
+                type(self).objects.filter(pk=self.pk).update(_related_info=rel)
+            return rel
+
+    def related_themes(self):
+        theme_counter = self.theme_counter
+        book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys()))
+        for tag in book_themes:
+            tag.count = theme_counter[tag.pk]
+        return book_themes
+
     def reset_tag_counter(self):
         if self.id is None:
             return
 
         cache_key = "Book.tag_counter/%d" % self.id
     def reset_tag_counter(self):
         if self.id is None:
             return
 
         cache_key = "Book.tag_counter/%d" % self.id
-        cache.delete(cache_key)
+        permanent_cache.delete(cache_key)
         if self.parent:
             self.parent.reset_tag_counter()
 
         if self.parent:
             self.parent.reset_tag_counter()
 
@@ -749,20 +751,20 @@ class Book(models.Model):
     def tag_counter(self):
         if self.id:
             cache_key = "Book.tag_counter/%d" % self.id
     def tag_counter(self):
         if self.id:
             cache_key = "Book.tag_counter/%d" % self.id
-            tags = cache.get(cache_key)
+            tags = permanent_cache.get(cache_key)
         else:
             tags = None
 
         if tags is None:
             tags = {}
         else:
             tags = None
 
         if tags is None:
             tags = {}
-            for child in self.children.all().order_by():
+            for child in self.children.all().order_by().iterator():
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
                 for tag_pk, value in child.tag_counter.iteritems():
                     tags[tag_pk] = tags.get(tag_pk, 0) + value
-            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
+            for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator():
                 tags[tag.pk] = 1
 
             if self.id:
                 tags[tag.pk] = 1
 
             if self.id:
-                cache.set(cache_key, tags, CACHE_FOREVER)
+                permanent_cache.set(cache_key, tags)
         return tags
 
     def reset_theme_counter(self):
         return tags
 
     def reset_theme_counter(self):
@@ -770,7 +772,7 @@ class Book(models.Model):
             return
 
         cache_key = "Book.theme_counter/%d" % self.id
             return
 
         cache_key = "Book.theme_counter/%d" % self.id
-        cache.delete(cache_key)
+        permanent_cache.delete(cache_key)
         if self.parent:
             self.parent.reset_theme_counter()
 
         if self.parent:
             self.parent.reset_theme_counter()
 
@@ -778,18 +780,18 @@ class Book(models.Model):
     def theme_counter(self):
         if self.id:
             cache_key = "Book.theme_counter/%d" % self.id
     def theme_counter(self):
         if self.id:
             cache_key = "Book.theme_counter/%d" % self.id
-            tags = cache.get(cache_key)
+            tags = permanent_cache.get(cache_key)
         else:
             tags = None
 
         if tags is None:
             tags = {}
         else:
             tags = None
 
         if tags is None:
             tags = {}
-            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
-                for tag in fragment.tags.filter(category='theme').order_by():
+            for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator():
+                for tag in fragment.tags.filter(category='theme').order_by().iterator():
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
 
             if self.id:
-                cache.set(cache_key, tags, CACHE_FOREVER)
+                permanent_cache.set(cache_key, tags)
         return tags
 
     def pretty_title(self, html_links=False):
         return tags
 
     def pretty_title(self, html_links=False):
@@ -811,7 +813,7 @@ class Book(models.Model):
 
     @classmethod
     def tagged_top_level(cls, tags):
 
     @classmethod
     def tagged_top_level(cls, tags):
-        """ Returns top-level books tagged with `tags'.
+        """ Returns top-level books tagged with `tags`.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
 
         It only returns those books which don't have ancestors which are
         also tagged with those tags.
@@ -820,24 +822,88 @@ class Book(models.Model):
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
         # get relevant books and their tags
         objects = cls.tagged.with_all(tags)
         # eliminate descendants
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
-        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book',
+            slug__in=[book.book_tag_slug() for book in objects.iterator()])
+        descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()]
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
         return objects
 
         if descendants_keys:
             objects = objects.exclude(pk__in=descendants_keys)
 
         return objects
 
+    @classmethod
+    def book_list(cls, filter=None):
+        """Generates a hierarchical listing of all books.
+
+        Books are optionally filtered with a test function.
+
+        """
+
+        books_by_parent = {}
+        books = cls.objects.all().order_by('parent_number', 'sort_key').only(
+                'title', 'parent', 'slug')
+        if filter:
+            books = books.filter(filter).distinct()
+            
+            book_ids = set(b['pk'] for b in books.values("pk").iterator())
+            for book in books.iterator():
+                parent = book.parent_id
+                if parent not in book_ids:
+                    parent = None
+                books_by_parent.setdefault(parent, []).append(book)
+        else:
+            for book in books.iterator():
+                books_by_parent.setdefault(book.parent_id, []).append(book)
+
+        orphans = []
+        books_by_author = SortedDict()
+        for tag in Tag.objects.filter(category='author').iterator():
+            books_by_author[tag] = []
+
+        for book in books_by_parent.get(None,()):
+            authors = list(book.tags.filter(category='author'))
+            if authors:
+                for author in authors:
+                    books_by_author[author].append(book)
+            else:
+                orphans.append(book)
+
+        return books_by_author, orphans, books_by_parent
+
+    _audiences_pl = {
+        "SP1": (1, u"szkoła podstawowa"),
+        "SP2": (1, u"szkoła podstawowa"),
+        "P": (1, u"szkoła podstawowa"),
+        "G": (2, u"gimnazjum"),
+        "L": (3, u"liceum"),
+        "LP": (3, u"liceum"),
+    }
+    def audiences_pl(self):
+        audiences = self.extra_info.get('audiences', [])
+        audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
+        return [a[1] for a in audiences]
+
+    def choose_fragment(self):
+        tag = self.book_tag()
+        fragments = Fragment.tagged.with_any([tag])
+        if fragments.exists():
+            return fragments.order_by('?')[0]
+        elif self.parent:
+            return self.parent.choose_fragment()
+        else:
+            return None
+
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
 
 def _has_factory(ftype):
     has = lambda self: bool(getattr(self, "%s_file" % ftype))
-    has.short_description = t.upper()
+    has.short_description = ftype.upper()
+    has.__doc__ = None
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
 
     
 # add the file fields
     has.boolean = True
     has.__name__ = "has_%s_file" % ftype
     return has
 
     
 # add the file fields
-for t in Book.file_types:
+for t in Book.formats:
     field_name = "%s_file" % t
     models.FileField(_("%s file" % t.upper()),
             upload_to=book_upload_path(t),
     field_name = "%s_file" % t
     models.FileField(_("%s file" % t.upper()),
             upload_to=book_upload_path(t),
@@ -847,6 +913,7 @@ for t in Book.file_types:
 
 
 class Fragment(models.Model):
 
 
 class Fragment(models.Model):
+    """Represents a themed fragment of a book."""
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
     text = models.TextField()
     short_text = models.TextField(editable=False)
     anchor = models.CharField(max_length=120)
@@ -862,7 +929,7 @@ class Fragment(models.Model):
         verbose_name_plural = _('fragments')
 
     def get_absolute_url(self):
         verbose_name_plural = _('fragments')
 
     def get_absolute_url(self):
-        return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
+        return '%s#m%s' % (reverse('book_text', args=[self.book.slug]), self.anchor)
 
     def reset_short_html(self):
         if self.id is None:
 
     def reset_short_html(self):
         if self.id is None:
@@ -870,12 +937,16 @@ class Fragment(models.Model):
 
         cache_key = "Fragment.short_html/%d/%s"
         for lang, langname in settings.LANGUAGES:
 
         cache_key = "Fragment.short_html/%d/%s"
         for lang, langname in settings.LANGUAGES:
-            cache.delete(cache_key % (self.id, lang))
+            permanent_cache.delete(cache_key % (self.id, lang))
+
+    def get_short_text(self):
+        """Returns short version of the fragment."""
+        return self.short_text if self.short_text else self.text
 
     def short_html(self):
         if self.id:
             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
 
     def short_html(self):
         if self.id:
             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
-            short_html = cache.get(cache_key)
+            short_html = permanent_cache.get(cache_key)
         else:
             short_html = None
 
         else:
             short_html = None
 
@@ -885,23 +956,27 @@ class Fragment(models.Model):
             short_html = unicode(render_to_string('catalogue/fragment_short.html',
                 {'fragment': self}))
             if self.id:
             short_html = unicode(render_to_string('catalogue/fragment_short.html',
                 {'fragment': self}))
             if self.id:
-                cache.set(cache_key, short_html, CACHE_FOREVER)
+                permanent_cache.set(cache_key, short_html)
             return mark_safe(short_html)
 
 
             return mark_safe(short_html)
 
 
-class FileRecord(models.Model):
-    slug = models.SlugField(_('slug'), max_length=120, db_index=True)
-    type = models.CharField(_('type'), max_length=20, db_index=True)
-    sha1 = models.CharField(_('sha-1 hash'), max_length=40)
-    time = models.DateTimeField(_('time'), auto_now_add=True)
+class Collection(models.Model):
+    """A collection of books, which might be defined before publishing them."""
+    title = models.CharField(_('title'), max_length=120, db_index=True)
+    slug = models.SlugField(_('slug'), max_length=120, primary_key=True)
+    description = models.TextField(_('description'), null=True, blank=True)
+
+    models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
+    book_slugs = models.TextField(_('book slugs'))
 
     class Meta:
 
     class Meta:
-        ordering = ('-time','-slug', '-type')
-        verbose_name = _('file record')
-        verbose_name_plural = _('file records')
+        ordering = ('title',)
+        verbose_name = _('collection')
+        verbose_name_plural = _('collections')
 
     def __unicode__(self):
 
     def __unicode__(self):
-        return "%s %s.%s" % (self.sha1,  self.slug, self.type)
+        return self.title
+
 
 ###########
 #
 
 ###########
 #
@@ -913,7 +988,8 @@ class FileRecord(models.Model):
 def _tags_updated_handler(sender, affected_tags, **kwargs):
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
 def _tags_updated_handler(sender, affected_tags, **kwargs):
     # reset tag global counter
     # we want Tag.changed_at updated for API to know the tag was touched
-    Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
+    for tag in affected_tags:
+        tasks.touch_tag(tag)
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \
 
     # if book tags changed, reset book tag counter
     if isinstance(sender, Book) and \
@@ -934,8 +1010,24 @@ def _pre_delete_handler(sender, instance, **kwargs):
         instance.book.save()
 pre_delete.connect(_pre_delete_handler)
 
         instance.book.save()
 pre_delete.connect(_pre_delete_handler)
 
+
 def _post_save_handler(sender, instance, **kwargs):
     """ refresh all the short_html stuff on BookMedia update """
     if sender == BookMedia:
         instance.book.save()
 post_save.connect(_post_save_handler)
 def _post_save_handler(sender, instance, **kwargs):
     """ refresh all the short_html stuff on BookMedia update """
     if sender == BookMedia:
         instance.book.save()
 post_save.connect(_post_save_handler)
+
+
+if not settings.NO_SEARCH_INDEX:
+    @django.dispatch.receiver(post_delete, sender=Book)
+    def _remove_book_from_index_handler(sender, instance, **kwargs):
+        """ remove the book from search index, when it is deleted."""
+        import search
+        search.JVM.attachCurrentThread()
+        idx = search.Index()
+        idx.open(timeout=10000)  # 10 seconds timeout.
+        try:
+            idx.remove_book(instance)
+            idx.index_tags()
+        finally:
+            idx.close()