log parallel job exceptions
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, ORMDocProvider, create_zip, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 import search
35
36 TAG_CATEGORIES = (
37     ('author', _('author')),
38     ('epoch', _('epoch')),
39     ('kind', _('kind')),
40     ('genre', _('genre')),
41     ('theme', _('theme')),
42     ('set', _('set')),
43     ('book', _('book')),
44 )
45
46 MEDIA_FORMATS = (
47     ('odt', _('ODT file')),
48     ('mp3', _('MP3 file')),
49     ('ogg', _('OGG file')),
50     ('daisy', _('DAISY file')), 
51 )
52
53 # not quite, but Django wants you to set a timeout
54 CACHE_FOREVER = 2419200  # 28 days
55
56
57 class TagSubcategoryManager(models.Manager):
58     def __init__(self, subcategory):
59         super(TagSubcategoryManager, self).__init__()
60         self.subcategory = subcategory
61
62     def get_query_set(self):
63         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
64
65
66 class Tag(TagBase):
67     name = models.CharField(_('name'), max_length=50, db_index=True)
68     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
69     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
70     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
71         db_index=True, choices=TAG_CATEGORIES)
72     description = models.TextField(_('description'), blank=True)
73     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
74
75     user = models.ForeignKey(User, blank=True, null=True)
76     book_count = models.IntegerField(_('book count'), blank=True, null=True)
77     gazeta_link = models.CharField(blank=True, max_length=240)
78     wiki_link = models.CharField(blank=True, max_length=240)
79
80     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
81     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
82
83     class UrlDeprecationWarning(DeprecationWarning):
84         pass
85
86     categories_rev = {
87         'autor': 'author',
88         'epoka': 'epoch',
89         'rodzaj': 'kind',
90         'gatunek': 'genre',
91         'motyw': 'theme',
92         'polka': 'set',
93     }
94     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
95
96     class Meta:
97         ordering = ('sort_key',)
98         verbose_name = _('tag')
99         verbose_name_plural = _('tags')
100         unique_together = (("slug", "category"),)
101
102     def __unicode__(self):
103         return self.name
104
105     def __repr__(self):
106         return "Tag(slug=%r)" % self.slug
107
108     @permalink
109     def get_absolute_url(self):
110         return ('catalogue.views.tagged_object_list', [self.url_chunk])
111
112     def has_description(self):
113         return len(self.description) > 0
114     has_description.short_description = _('description')
115     has_description.boolean = True
116
117     def get_count(self):
118         """ returns global book count for book tags, fragment count for themes """
119
120         if self.book_count is None:
121             if self.category == 'book':
122                 # never used
123                 objects = Book.objects.none()
124             elif self.category == 'theme':
125                 objects = Fragment.tagged.with_all((self,))
126             else:
127                 objects = Book.tagged.with_all((self,)).order_by()
128                 if self.category != 'set':
129                     # eliminate descendants
130                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
131                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
132                     if descendants_keys:
133                         objects = objects.exclude(pk__in=descendants_keys)
134             self.book_count = objects.count()
135             self.save()
136         return self.book_count
137
138     @staticmethod
139     def get_tag_list(tags):
140         if isinstance(tags, basestring):
141             real_tags = []
142             ambiguous_slugs = []
143             category = None
144             deprecated = False
145             tags_splitted = tags.split('/')
146             for name in tags_splitted:
147                 if category:
148                     real_tags.append(Tag.objects.get(slug=name, category=category))
149                     category = None
150                 elif name in Tag.categories_rev:
151                     category = Tag.categories_rev[name]
152                 else:
153                     try:
154                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
155                         deprecated = True 
156                     except Tag.MultipleObjectsReturned, e:
157                         ambiguous_slugs.append(name)
158
159             if category:
160                 # something strange left off
161                 raise Tag.DoesNotExist()
162             if ambiguous_slugs:
163                 # some tags should be qualified
164                 e = Tag.MultipleObjectsReturned()
165                 e.tags = real_tags
166                 e.ambiguous_slugs = ambiguous_slugs
167                 raise e
168             if deprecated:
169                 e = Tag.UrlDeprecationWarning()
170                 e.tags = real_tags
171                 raise e
172             return real_tags
173         else:
174             return TagBase.get_tag_list(tags)
175
176     @property
177     def url_chunk(self):
178         return '/'.join((Tag.categories_dict[self.category], self.slug))
179
180
181 # TODO: why is this hard-coded ?
182 def book_upload_path(ext=None, maxlen=100):
183     def get_dynamic_path(media, filename, ext=ext):
184         # how to put related book's slug here?
185         if not ext:
186             if media.type == 'daisy':
187                 ext = 'daisy.zip'
188             else:
189                 ext = media.type
190         if not media.name:
191             name = slughifi(filename.split(".")[0])
192         else:
193             name = slughifi(media.name)
194         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
195     return get_dynamic_path
196
197
198 class BookMedia(models.Model):
199     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
200     name        = models.CharField(_('name'), max_length="100")
201     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
202     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
203     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
204     book = models.ForeignKey('Book', related_name='media')
205     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
206
207     def __unicode__(self):
208         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
209
210     class Meta:
211         ordering            = ('type', 'name')
212         verbose_name        = _('book media')
213         verbose_name_plural = _('book media')
214
215     def save(self, *args, **kwargs):
216         try:
217             old = BookMedia.objects.get(pk=self.pk)
218         except BookMedia.DoesNotExist, e:
219             pass
220         else:
221             # if name changed, change the file name, too
222             if slughifi(self.name) != slughifi(old.name):
223                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
224
225         super(BookMedia, self).save(*args, **kwargs)
226
227         # remove the zip package for book with modified media
228         remove_zip(self.book.slug)
229
230         extra_info = self.get_extra_info_value()
231         extra_info.update(self.read_meta())
232         self.set_extra_info_value(extra_info)
233         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
234         return super(BookMedia, self).save(*args, **kwargs)
235
236     def read_meta(self):
237         """
238             Reads some metadata from the audiobook.
239         """
240
241         artist_name = director_name = project = funded_by = ''
242         if self.type == 'mp3':
243             try:
244                 audio = id3.ID3(self.file.path)
245                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
246                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
247                 project = ", ".join([t.data for t in audio.getall('PRIV') 
248                         if t.owner=='wolnelektury.pl?project'])
249                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
250                         if t.owner=='wolnelektury.pl?funded_by'])
251             except:
252                 pass
253         elif self.type == 'ogg':
254             try:
255                 audio = mutagen.File(self.file.path)
256                 artist_name = ', '.join(audio.get('artist', []))
257                 director_name = ', '.join(audio.get('conductor', []))
258                 project = ", ".join(audio.get('project', []))
259                 funded_by = ", ".join(audio.get('funded_by', []))
260             except:
261                 pass
262         else:
263             return {}
264         return {'artist_name': artist_name, 'director_name': director_name,
265                 'project': project, 'funded_by': funded_by}
266
267     @staticmethod
268     def read_source_sha1(filepath, filetype):
269         """
270             Reads source file SHA1 from audiobok metadata.
271         """
272
273         if filetype == 'mp3':
274             try:
275                 audio = id3.ID3(filepath)
276                 return [t.data for t in audio.getall('PRIV') 
277                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
278             except:
279                 return None
280         elif filetype == 'ogg':
281             try:
282                 audio = mutagen.File(filepath)
283                 return audio.get('flac_sha1', [None])[0] 
284             except:
285                 return None
286         else:
287             return None
288
289
290 class Book(models.Model):
291     title         = models.CharField(_('title'), max_length=120)
292     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
293     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
294     description   = models.TextField(_('description'), blank=True)
295     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
296     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
297     parent_number = models.IntegerField(_('parent number'), default=0)
298     extra_info    = JSONField(_('extra information'), default='{}')
299     gazeta_link   = models.CharField(blank=True, max_length=240)
300     wiki_link     = models.CharField(blank=True, max_length=240)
301     # files generated during publication
302
303     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
304     
305     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
306     objects  = models.Manager()
307     tagged   = managers.ModelTaggedItemManager(Tag)
308     tags     = managers.TagDescriptor(Tag)
309
310     html_built = django.dispatch.Signal()
311
312     class AlreadyExists(Exception):
313         pass
314
315     class Meta:
316         ordering = ('sort_key',)
317         verbose_name = _('book')
318         verbose_name_plural = _('books')
319
320     def __unicode__(self):
321         return self.title
322
323     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
324         self.sort_key = sortify(self.title)
325
326         ret = super(Book, self).save(force_insert, force_update)
327
328         if reset_short_html:
329             self.reset_short_html()
330
331         return ret
332
333     @permalink
334     def get_absolute_url(self):
335         return ('catalogue.views.book_detail', [self.slug])
336
337     @property
338     def name(self):
339         return self.title
340
341     def book_tag_slug(self):
342         return ('l-' + self.slug)[:120]
343
344     def book_tag(self):
345         slug = self.book_tag_slug()
346         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
347         if created:
348             book_tag.name = self.title[:50]
349             book_tag.sort_key = self.title.lower()
350             book_tag.save()
351         return book_tag
352
353     def has_media(self, type):
354         if type in Book.file_types:
355             return bool(getattr(self, "%s_file" % type))
356         else:
357             return self.media.filter(type=type).exists()
358
359     def get_media(self, type):
360         if self.has_media(type):
361             if type in Book.file_types:
362                 return getattr(self, "%s_file" % type)
363             else:                                             
364                 return self.media.filter(type=type)
365         else:
366             return None
367
368     def get_mp3(self):
369         return self.get_media("mp3")
370     def get_odt(self):
371         return self.get_media("odt")
372     def get_ogg(self):
373         return self.get_media("ogg")
374     def get_daisy(self):
375         return self.get_media("daisy")                       
376
377     def reset_short_html(self):
378         if self.id is None:
379             return
380
381         cache_key = "Book.short_html/%d/%s"
382         for lang, langname in settings.LANGUAGES:
383             cache.delete(cache_key % (self.id, lang))
384         # Fragment.short_html relies on book's tags, so reset it here too
385         for fragm in self.fragments.all():
386             fragm.reset_short_html()
387
388     def short_html(self):
389         if self.id:
390             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
391             short_html = cache.get(cache_key)
392         else:
393             short_html = None
394
395         if short_html is not None:
396             return mark_safe(short_html)
397         else:
398             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
399             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
400
401             formats = []
402             # files generated during publication
403             if self.has_media("html"):
404                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
405             if self.has_media("pdf"):
406                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
407             if self.has_media("mobi"):
408                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
409             if self.root_ancestor.has_media("epub"):
410                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
411             if self.has_media("txt"):
412                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
413             # other files
414             for m in self.media.order_by('type'):
415                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
416
417             formats = [mark_safe(format) for format in formats]
418
419             short_html = unicode(render_to_string('catalogue/book_short.html',
420                 {'book': self, 'tags': tags, 'formats': formats}))
421
422             if self.id:
423                 cache.set(cache_key, short_html, CACHE_FOREVER)
424             return mark_safe(short_html)
425
426     @property
427     def root_ancestor(self):
428         """ returns the oldest ancestor """
429
430         if not hasattr(self, '_root_ancestor'):
431             book = self
432             while book.parent:
433                 book = book.parent
434             self._root_ancestor = book
435         return self._root_ancestor
436
437
438     def has_description(self):
439         return len(self.description) > 0
440     has_description.short_description = _('description')
441     has_description.boolean = True
442
443     # ugly ugly ugly
444     def has_odt_file(self):
445         return bool(self.has_media("odt"))
446     has_odt_file.short_description = 'ODT'
447     has_odt_file.boolean = True
448
449     def has_mp3_file(self):
450         return bool(self.has_media("mp3"))
451     has_mp3_file.short_description = 'MP3'
452     has_mp3_file.boolean = True
453
454     def has_ogg_file(self):
455         return bool(self.has_media("ogg"))
456     has_ogg_file.short_description = 'OGG'
457     has_ogg_file.boolean = True
458
459     def has_daisy_file(self):
460         return bool(self.has_media("daisy"))
461     has_daisy_file.short_description = 'DAISY'
462     has_daisy_file.boolean = True
463
464     def build_pdf(self):
465         """ (Re)builds the pdf file.
466
467         """
468         from librarian import pdf
469         from tempfile import NamedTemporaryFile
470         import os
471
472         try:
473             pdf_file = NamedTemporaryFile(delete=False)
474             pdf.transform(ORMDocProvider(self),
475                       file_path=str(self.xml_file.path),
476                       output_file=pdf_file,
477                       )
478
479             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
480         finally:
481             unlink(pdf_file.name)
482
483         # remove zip with all pdf files
484         remove_zip(settings.ALL_PDF_ZIP)
485
486     def build_mobi(self):
487         """ (Re)builds the MOBI file.
488
489         """
490         from librarian import mobi
491         from tempfile import NamedTemporaryFile
492         import os
493
494         try:
495             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
496             mobi.transform(ORMDocProvider(self), verbose=1,
497                       file_path=str(self.xml_file.path),
498                       output_file=mobi_file.name,
499                       )
500
501             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
502         finally:
503             unlink(mobi_file.name)
504
505         # remove zip with all mobi files
506         remove_zip(settings.ALL_MOBI_ZIP)
507
508     def build_epub(self, remove_descendants=True):
509         """ (Re)builds the epub file.
510             If book has a parent, does nothing.
511             Unless remove_descendants is False, descendants' epubs are removed.
512         """
513         from StringIO import StringIO
514         from hashlib import sha1
515         from django.core.files.base import ContentFile
516
517         if self.parent:
518             # don't need an epub
519             return
520
521         epub_file = StringIO()
522         try:
523             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
524             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
525             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
526         except NoDublinCore:
527             pass
528
529         book_descendants = list(self.children.all())
530         while len(book_descendants) > 0:
531             child_book = book_descendants.pop(0)
532             if remove_descendants and child_book.has_epub_file():
533                 child_book.epub_file.delete()
534             # save anyway, to refresh short_html
535             child_book.save()
536             book_descendants += list(child_book.children.all())
537
538         # remove zip package with all epub files
539         remove_zip(settings.ALL_EPUB_ZIP)
540
541     def build_txt(self):
542         from StringIO import StringIO
543         from django.core.files.base import ContentFile
544         from librarian import text
545
546         out = StringIO()
547         text.transform(open(self.xml_file.path), out)
548         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
549
550
551     def build_html(self):
552         from tempfile import NamedTemporaryFile
553         from markupstring import MarkupString
554
555         meta_tags = list(self.tags.filter(
556             category__in=('author', 'epoch', 'genre', 'kind')))
557         book_tag = self.book_tag()
558
559         html_file = NamedTemporaryFile()
560         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
561             self.html_file.save('%s.html' % self.slug, File(html_file))
562
563             # get ancestor l-tags for adding to new fragments
564             ancestor_tags = []
565             p = self.parent
566             while p:
567                 ancestor_tags.append(p.book_tag())
568                 p = p.parent
569
570             # Delete old fragments and create them from scratch
571             self.fragments.all().delete()
572             # Extract fragments
573             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
574             for fragment in closed_fragments.values():
575                 try:
576                     theme_names = [s.strip() for s in fragment.themes.split(',')]
577                 except AttributeError:
578                     continue
579                 themes = []
580                 for theme_name in theme_names:
581                     if not theme_name:
582                         continue
583                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
584                     if created:
585                         tag.name = theme_name
586                         tag.sort_key = theme_name.lower()
587                         tag.save()
588                     themes.append(tag)
589                 if not themes:
590                     continue
591
592                 text = fragment.to_string()
593                 short_text = ''
594                 if (len(MarkupString(text)) > 240):
595                     short_text = unicode(MarkupString(text)[:160])
596                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
597                     text=text, short_text=short_text)
598
599                 new_fragment.save()
600                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
601             self.save()
602             self.html_built.send(sender=self)
603             return True
604         return False
605
606     @staticmethod
607     def zip_format(format_):
608         def pretty_file_name(book):
609             return "%s/%s.%s" % (
610                 b.get_extra_info_value()['author'],
611                 b.slug,
612                 format_)
613
614         field_name = "%s_file" % format_
615         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
616         paths = [(pretty_file_name(b), getattr(b, field_name).path)
617                     for b in books]
618         result = create_zip.delay(paths,
619                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
620         return result.wait()
621
622     def zip_audiobooks(self):
623         bm = BookMedia.objects.filter(book=self, type='mp3')
624         paths = map(lambda bm: (None, bm.file.path), bm)
625         result = create_zip.delay(paths, self.slug)
626         return result.wait()
627
628     def search_index(self):
629         idx = search.ReusableIndex()
630         idx.open()
631         try:
632             idx.index_book(self)
633         finally:
634             idx.close()
635
636     @classmethod
637     def from_xml_file(cls, xml_file, **kwargs):
638         # use librarian to parse meta-data
639         book_info = dcparser.parse(xml_file)
640
641         if not isinstance(xml_file, File):
642             xml_file = File(open(xml_file))
643
644         try:
645             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
646         finally:
647             xml_file.close()
648
649     @classmethod
650     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
651             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
652             search_index=True):
653         import re
654
655         # check for parts before we do anything
656         children = []
657         if hasattr(book_info, 'parts'):
658             for part_url in book_info.parts:
659                 base, slug = part_url.rsplit('/', 1)
660                 try:
661                     children.append(Book.objects.get(slug=slug))
662                 except Book.DoesNotExist, e:
663                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
664
665
666         # Read book metadata
667         book_base, book_slug = book_info.url.rsplit('/', 1)
668         if re.search(r'[^a-zA-Z0-9-]', book_slug):
669             raise ValueError('Invalid characters in slug')
670         book, created = Book.objects.get_or_create(slug=book_slug)
671
672         if created:
673             book_shelves = []
674         else:
675             if not overwrite:
676                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
677             # Save shelves for this book
678             book_shelves = list(book.tags.filter(category='set'))
679
680         book.title = book_info.title
681         book.set_extra_info_value(book_info.to_dict())
682         book.save()
683
684         meta_tags = []
685         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
686         for field_name, category in categories:
687             try:
688                 tag_names = getattr(book_info, field_name)
689             except:
690                 tag_names = [getattr(book_info, category)]
691             for tag_name in tag_names:
692                 tag_sort_key = tag_name
693                 if category == 'author':
694                     tag_sort_key = tag_name.last_name
695                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
696                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
697                 if created:
698                     tag.name = tag_name
699                     tag.sort_key = sortify(tag_sort_key.lower())
700                     tag.save()
701                 meta_tags.append(tag)
702
703         book.tags = set(meta_tags + book_shelves)
704
705         book_tag = book.book_tag()
706
707         for n, child_book in enumerate(children):
708             child_book.parent = book
709             child_book.parent_number = n
710             child_book.save()
711
712         # Save XML and HTML files
713         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
714
715         # delete old fragments when overwriting
716         book.fragments.all().delete()
717
718         if book.build_html():
719             if not settings.NO_BUILD_TXT and build_txt:
720                 book.build_txt()
721
722         if not settings.NO_BUILD_EPUB and build_epub:
723             book.root_ancestor.build_epub()
724
725         if not settings.NO_BUILD_PDF and build_pdf:
726             book.root_ancestor.build_pdf()
727
728         if not settings.NO_BUILD_MOBI and build_mobi:
729             book.build_mobi()
730
731         if not settings.NO_SEARCH_INDEX and search_index:
732             book.search_index()
733
734         book_descendants = list(book.children.all())
735         # add l-tag to descendants and their fragments
736         # delete unnecessary EPUB files
737         while len(book_descendants) > 0:
738             child_book = book_descendants.pop(0)
739             child_book.tags = list(child_book.tags) + [book_tag]
740             child_book.save()
741             for fragment in child_book.fragments.all():
742                 fragment.tags = set(list(fragment.tags) + [book_tag])
743             book_descendants += list(child_book.children.all())
744
745         book.save()
746
747         # refresh cache
748         book.reset_tag_counter()
749         book.reset_theme_counter()
750
751         return book
752
753     def reset_tag_counter(self):
754         if self.id is None:
755             return
756
757         cache_key = "Book.tag_counter/%d" % self.id
758         cache.delete(cache_key)
759         if self.parent:
760             self.parent.reset_tag_counter()
761
762     @property
763     def tag_counter(self):
764         if self.id:
765             cache_key = "Book.tag_counter/%d" % self.id
766             tags = cache.get(cache_key)
767         else:
768             tags = None
769
770         if tags is None:
771             tags = {}
772             for child in self.children.all().order_by():
773                 for tag_pk, value in child.tag_counter.iteritems():
774                     tags[tag_pk] = tags.get(tag_pk, 0) + value
775             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
776                 tags[tag.pk] = 1
777
778             if self.id:
779                 cache.set(cache_key, tags, CACHE_FOREVER)
780         return tags
781
782     def reset_theme_counter(self):
783         if self.id is None:
784             return
785
786         cache_key = "Book.theme_counter/%d" % self.id
787         cache.delete(cache_key)
788         if self.parent:
789             self.parent.reset_theme_counter()
790
791     @property
792     def theme_counter(self):
793         if self.id:
794             cache_key = "Book.theme_counter/%d" % self.id
795             tags = cache.get(cache_key)
796         else:
797             tags = None
798
799         if tags is None:
800             tags = {}
801             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
802                 for tag in fragment.tags.filter(category='theme').order_by():
803                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
804
805             if self.id:
806                 cache.set(cache_key, tags, CACHE_FOREVER)
807         return tags
808
809     def pretty_title(self, html_links=False):
810         book = self
811         names = list(book.tags.filter(category='author'))
812
813         books = []
814         while book:
815             books.append(book)
816             book = book.parent
817         names.extend(reversed(books))
818
819         if html_links:
820             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
821         else:
822             names = [tag.name for tag in names]
823
824         return ', '.join(names)
825
826     @classmethod
827     def tagged_top_level(cls, tags):
828         """ Returns top-level books tagged with `tags'.
829
830         It only returns those books which don't have ancestors which are
831         also tagged with those tags.
832
833         """
834         # get relevant books and their tags
835         objects = cls.tagged.with_all(tags)
836         # eliminate descendants
837         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
838         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
839         if descendants_keys:
840             objects = objects.exclude(pk__in=descendants_keys)
841
842         return objects
843
844
845 def _has_factory(ftype):
846     has = lambda self: bool(getattr(self, "%s_file" % ftype))
847     has.short_description = t.upper()
848     has.boolean = True
849     has.__name__ = "has_%s_file" % ftype
850     return has
851
852     
853 # add the file fields
854 for t in Book.file_types:
855     field_name = "%s_file" % t
856     models.FileField(_("%s file" % t.upper()),
857             upload_to=book_upload_path(t),
858             blank=True).contribute_to_class(Book, field_name)
859
860     setattr(Book, "has_%s_file" % t, _has_factory(t))
861
862
863 class Fragment(models.Model):
864     text = models.TextField()
865     short_text = models.TextField(editable=False)
866     anchor = models.CharField(max_length=120)
867     book = models.ForeignKey(Book, related_name='fragments')
868
869     objects = models.Manager()
870     tagged = managers.ModelTaggedItemManager(Tag)
871     tags = managers.TagDescriptor(Tag)
872
873     class Meta:
874         ordering = ('book', 'anchor',)
875         verbose_name = _('fragment')
876         verbose_name_plural = _('fragments')
877
878     def get_absolute_url(self):
879         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
880
881     def reset_short_html(self):
882         if self.id is None:
883             return
884
885         cache_key = "Fragment.short_html/%d/%s"
886         for lang, langname in settings.LANGUAGES:
887             cache.delete(cache_key % (self.id, lang))
888
889     def short_html(self):
890         if self.id:
891             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
892             short_html = cache.get(cache_key)
893         else:
894             short_html = None
895
896         if short_html is not None:
897             return mark_safe(short_html)
898         else:
899             short_html = unicode(render_to_string('catalogue/fragment_short.html',
900                 {'fragment': self}))
901             if self.id:
902                 cache.set(cache_key, short_html, CACHE_FOREVER)
903             return mark_safe(short_html)
904
905
906 class FileRecord(models.Model):
907     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
908     type = models.CharField(_('type'), max_length=20, db_index=True)
909     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
910     time = models.DateTimeField(_('time'), auto_now_add=True)
911
912     class Meta:
913         ordering = ('-time','-slug', '-type')
914         verbose_name = _('file record')
915         verbose_name_plural = _('file records')
916
917     def __unicode__(self):
918         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
919
920 ###########
921 #
922 # SIGNALS
923 #
924 ###########
925
926
927 def _tags_updated_handler(sender, affected_tags, **kwargs):
928     # reset tag global counter
929     # we want Tag.changed_at updated for API to know the tag was touched
930     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
931
932     # if book tags changed, reset book tag counter
933     if isinstance(sender, Book) and \
934                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
935                     exclude(category__in=('book', 'theme', 'set')).count():
936         sender.reset_tag_counter()
937     # if fragment theme changed, reset book theme counter
938     elif isinstance(sender, Fragment) and \
939                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
940                     filter(category='theme').count():
941         sender.book.reset_theme_counter()
942 tags_updated.connect(_tags_updated_handler)
943
944
945 def _pre_delete_handler(sender, instance, **kwargs):
946     """ refresh Book on BookMedia delete """
947     if sender == BookMedia:
948         instance.book.save()
949 pre_delete.connect(_pre_delete_handler)
950
951 def _post_save_handler(sender, instance, **kwargs):
952     """ refresh all the short_html stuff on BookMedia update """
953     if sender == BookMedia:
954         instance.book.save()
955 post_save.connect(_post_save_handler)