remove USE_CELERY option, use built-in CELERY_ALWAYS_EAGER instead
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, ORMDocProvider, create_zip, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54
55 class TagSubcategoryManager(models.Manager):
56     def __init__(self, subcategory):
57         super(TagSubcategoryManager, self).__init__()
58         self.subcategory = subcategory
59
60     def get_query_set(self):
61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
62
63
64 class Tag(TagBase):
65     name = models.CharField(_('name'), max_length=50, db_index=True)
66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
69         db_index=True, choices=TAG_CATEGORIES)
70     description = models.TextField(_('description'), blank=True)
71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
72
73     user = models.ForeignKey(User, blank=True, null=True)
74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
75     gazeta_link = models.CharField(blank=True, max_length=240)
76     wiki_link = models.CharField(blank=True, max_length=240)
77
78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
80
81     class UrlDeprecationWarning(DeprecationWarning):
82         pass
83
84     categories_rev = {
85         'autor': 'author',
86         'epoka': 'epoch',
87         'rodzaj': 'kind',
88         'gatunek': 'genre',
89         'motyw': 'theme',
90         'polka': 'set',
91     }
92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('tag')
97         verbose_name_plural = _('tags')
98         unique_together = (("slug", "category"),)
99
100     def __unicode__(self):
101         return self.name
102
103     def __repr__(self):
104         return "Tag(slug=%r)" % self.slug
105
106     @permalink
107     def get_absolute_url(self):
108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
109
110     def has_description(self):
111         return len(self.description) > 0
112     has_description.short_description = _('description')
113     has_description.boolean = True
114
115     def get_count(self):
116         """ returns global book count for book tags, fragment count for themes """
117
118         if self.book_count is None:
119             if self.category == 'book':
120                 # never used
121                 objects = Book.objects.none()
122             elif self.category == 'theme':
123                 objects = Fragment.tagged.with_all((self,))
124             else:
125                 objects = Book.tagged.with_all((self,)).order_by()
126                 if self.category != 'set':
127                     # eliminate descendants
128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
130                     if descendants_keys:
131                         objects = objects.exclude(pk__in=descendants_keys)
132             self.book_count = objects.count()
133             self.save()
134         return self.book_count
135
136     @staticmethod
137     def get_tag_list(tags):
138         if isinstance(tags, basestring):
139             real_tags = []
140             ambiguous_slugs = []
141             category = None
142             deprecated = False
143             tags_splitted = tags.split('/')
144             for name in tags_splitted:
145                 if category:
146                     real_tags.append(Tag.objects.get(slug=name, category=category))
147                     category = None
148                 elif name in Tag.categories_rev:
149                     category = Tag.categories_rev[name]
150                 else:
151                     try:
152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
153                         deprecated = True 
154                     except Tag.MultipleObjectsReturned, e:
155                         ambiguous_slugs.append(name)
156
157             if category:
158                 # something strange left off
159                 raise Tag.DoesNotExist()
160             if ambiguous_slugs:
161                 # some tags should be qualified
162                 e = Tag.MultipleObjectsReturned()
163                 e.tags = real_tags
164                 e.ambiguous_slugs = ambiguous_slugs
165                 raise e
166             if deprecated:
167                 e = Tag.UrlDeprecationWarning()
168                 e.tags = real_tags
169                 raise e
170             return real_tags
171         else:
172             return TagBase.get_tag_list(tags)
173
174     @property
175     def url_chunk(self):
176         return '/'.join((Tag.categories_dict[self.category], self.slug))
177
178
179 # TODO: why is this hard-coded ?
180 def book_upload_path(ext=None, maxlen=100):
181     def get_dynamic_path(media, filename, ext=ext):
182         # how to put related book's slug here?
183         if not ext:
184             if media.type == 'daisy':
185                 ext = 'daisy.zip'
186             else:
187                 ext = media.type
188         if not media.name:
189             name = slughifi(filename.split(".")[0])
190         else:
191             name = slughifi(media.name)
192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
193     return get_dynamic_path
194
195
196 class BookMedia(models.Model):
197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
198     name        = models.CharField(_('name'), max_length="100")
199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
202     book = models.ForeignKey('Book', related_name='media')
203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
204
205     def __unicode__(self):
206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
207
208     class Meta:
209         ordering            = ('type', 'name')
210         verbose_name        = _('book media')
211         verbose_name_plural = _('book media')
212
213     def save(self, *args, **kwargs):
214         try:
215             old = BookMedia.objects.get(pk=self.pk)
216         except BookMedia.DoesNotExist, e:
217             pass
218         else:
219             # if name changed, change the file name, too
220             if slughifi(self.name) != slughifi(old.name):
221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
222
223         # remove the zip package for book with modified media
224         remove_zip(self.book.slug)
225
226         super(BookMedia, self).save(*args, **kwargs)
227         extra_info = self.get_extra_info_value()
228         extra_info.update(self.read_meta())
229         self.set_extra_info_value(extra_info)
230         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
231         return super(BookMedia, self).save(*args, **kwargs)
232
233     def read_meta(self):
234         """
235             Reads some metadata from the audiobook.
236         """
237
238         artist_name = director_name = project = funded_by = ''
239         if self.type == 'mp3':
240             try:
241                 audio = id3.ID3(self.file.path)
242                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
243                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
244                 project = ", ".join([t.data for t in audio.getall('PRIV') 
245                         if t.owner=='wolnelektury.pl?project'])
246                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
247                         if t.owner=='wolnelektury.pl?funded_by'])
248             except:
249                 pass
250         elif self.type == 'ogg':
251             try:
252                 audio = mutagen.File(self.file.path)
253                 artist_name = ', '.join(audio.get('artist', []))
254                 director_name = ', '.join(audio.get('conductor', []))
255                 project = ", ".join(audio.get('project', []))
256                 funded_by = ", ".join(audio.get('funded_by', []))
257             except:
258                 pass
259         else:
260             return {}
261         return {'artist_name': artist_name, 'director_name': director_name,
262                 'project': project, 'funded_by': funded_by}
263
264     @staticmethod
265     def read_source_sha1(filepath, filetype):
266         """
267             Reads source file SHA1 from audiobok metadata.
268         """
269
270         if filetype == 'mp3':
271             try:
272                 audio = id3.ID3(filepath)
273                 return [t.data for t in audio.getall('PRIV') 
274                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
275             except:
276                 return None
277         elif filetype == 'ogg':
278             try:
279                 audio = mutagen.File(filepath)
280                 return audio.get('flac_sha1', [None])[0] 
281             except:
282                 return None
283         else:
284             return None
285
286
287 class Book(models.Model):
288     title         = models.CharField(_('title'), max_length=120)
289     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
290     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
291     description   = models.TextField(_('description'), blank=True)
292     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
293     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
294     parent_number = models.IntegerField(_('parent number'), default=0)
295     extra_info    = JSONField(_('extra information'), default='{}')
296     gazeta_link   = models.CharField(blank=True, max_length=240)
297     wiki_link     = models.CharField(blank=True, max_length=240)
298     # files generated during publication
299
300     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
301     
302     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
303     objects  = models.Manager()
304     tagged   = managers.ModelTaggedItemManager(Tag)
305     tags     = managers.TagDescriptor(Tag)
306
307     html_built = django.dispatch.Signal()
308
309     class AlreadyExists(Exception):
310         pass
311
312     class Meta:
313         ordering = ('sort_key',)
314         verbose_name = _('book')
315         verbose_name_plural = _('books')
316
317     def __unicode__(self):
318         return self.title
319
320     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
321         self.sort_key = sortify(self.title)
322
323         ret = super(Book, self).save(force_insert, force_update)
324
325         if reset_short_html:
326             self.reset_short_html()
327
328         return ret
329
330     @permalink
331     def get_absolute_url(self):
332         return ('catalogue.views.book_detail', [self.slug])
333
334     @property
335     def name(self):
336         return self.title
337
338     def book_tag_slug(self):
339         return ('l-' + self.slug)[:120]
340
341     def book_tag(self):
342         slug = self.book_tag_slug()
343         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
344         if created:
345             book_tag.name = self.title[:50]
346             book_tag.sort_key = self.title.lower()
347             book_tag.save()
348         return book_tag
349
350     def has_media(self, type):
351         if type in Book.file_types:
352             return bool(getattr(self, "%s_file" % type))
353         else:
354             return self.media.filter(type=type).exists()
355
356     def get_media(self, type):
357         if self.has_media(type):
358             if type in Book.file_types:
359                 return getattr(self, "%s_file" % type)
360             else:                                             
361                 return self.media.filter(type=type)
362         else:
363             return None
364
365     def get_mp3(self):
366         return self.get_media("mp3")
367     def get_odt(self):
368         return self.get_media("odt")
369     def get_ogg(self):
370         return self.get_media("ogg")
371     def get_daisy(self):
372         return self.get_media("daisy")                       
373
374     def reset_short_html(self):
375         if self.id is None:
376             return
377
378         cache_key = "Book.short_html/%d/%s"
379         for lang, langname in settings.LANGUAGES:
380             cache.delete(cache_key % (self.id, lang))
381         # Fragment.short_html relies on book's tags, so reset it here too
382         for fragm in self.fragments.all():
383             fragm.reset_short_html()
384
385     def short_html(self):
386         if self.id:
387             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
388             short_html = cache.get(cache_key)
389         else:
390             short_html = None
391
392         if short_html is not None:
393             return mark_safe(short_html)
394         else:
395             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
396             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
397
398             formats = []
399             # files generated during publication
400             if self.has_media("html"):
401                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
402             if self.has_media("pdf"):
403                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
404             if self.has_media("mobi"):
405                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
406             if self.root_ancestor.has_media("epub"):
407                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
408             if self.has_media("txt"):
409                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
410             # other files
411             for m in self.media.order_by('type'):
412                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
413
414             formats = [mark_safe(format) for format in formats]
415
416             short_html = unicode(render_to_string('catalogue/book_short.html',
417                 {'book': self, 'tags': tags, 'formats': formats}))
418
419             if self.id:
420                 cache.set(cache_key, short_html, CACHE_FOREVER)
421             return mark_safe(short_html)
422
423     @property
424     def root_ancestor(self):
425         """ returns the oldest ancestor """
426
427         if not hasattr(self, '_root_ancestor'):
428             book = self
429             while book.parent:
430                 book = book.parent
431             self._root_ancestor = book
432         return self._root_ancestor
433
434
435     def has_description(self):
436         return len(self.description) > 0
437     has_description.short_description = _('description')
438     has_description.boolean = True
439
440     # ugly ugly ugly
441     def has_odt_file(self):
442         return bool(self.has_media("odt"))
443     has_odt_file.short_description = 'ODT'
444     has_odt_file.boolean = True
445
446     def has_mp3_file(self):
447         return bool(self.has_media("mp3"))
448     has_mp3_file.short_description = 'MP3'
449     has_mp3_file.boolean = True
450
451     def has_ogg_file(self):
452         return bool(self.has_media("ogg"))
453     has_ogg_file.short_description = 'OGG'
454     has_ogg_file.boolean = True
455
456     def has_daisy_file(self):
457         return bool(self.has_media("daisy"))
458     has_daisy_file.short_description = 'DAISY'
459     has_daisy_file.boolean = True
460
461     def build_pdf(self):
462         """ (Re)builds the pdf file.
463
464         """
465         from librarian import pdf
466         from tempfile import NamedTemporaryFile
467         import os
468
469         # remove zip with all pdf files
470         remove_zip(settings.ALL_PDF_ZIP)
471
472         try:
473             pdf_file = NamedTemporaryFile(delete=False)
474             pdf.transform(ORMDocProvider(self),
475                       file_path=str(self.xml_file.path),
476                       output_file=pdf_file,
477                       )
478
479             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
480         finally:
481             unlink(pdf_file.name)
482
483     def build_mobi(self):
484         """ (Re)builds the MOBI file.
485
486         """
487         from librarian import mobi
488         from tempfile import NamedTemporaryFile
489         import os
490
491         # remove zip with all pdf files
492         remove_zip(settings.ALL_MOBI_ZIP)
493
494         try:
495             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
496             mobi.transform(ORMDocProvider(self), verbose=1,
497                       file_path=str(self.xml_file.path),
498                       output_file=mobi_file.name,
499                       )
500
501             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
502         finally:
503             unlink(mobi_file.name)
504
505     def build_epub(self, remove_descendants=True):
506         """ (Re)builds the epub file.
507             If book has a parent, does nothing.
508             Unless remove_descendants is False, descendants' epubs are removed.
509         """
510         from StringIO import StringIO
511         from hashlib import sha1
512         from django.core.files.base import ContentFile
513
514         if self.parent:
515             # don't need an epub
516             return
517
518         # remove zip package with all epub files
519         remove_zip(settings.ALL_EPUB_ZIP)
520
521         epub_file = StringIO()
522         try:
523             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
524             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
525             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
526         except NoDublinCore:
527             pass
528
529         book_descendants = list(self.children.all())
530         while len(book_descendants) > 0:
531             child_book = book_descendants.pop(0)
532             if remove_descendants and child_book.has_epub_file():
533                 child_book.epub_file.delete()
534             # save anyway, to refresh short_html
535             child_book.save()
536             book_descendants += list(child_book.children.all())
537
538     def build_txt(self):
539         from StringIO import StringIO
540         from django.core.files.base import ContentFile
541         from librarian import text
542
543         out = StringIO()
544         text.transform(open(self.xml_file.path), out)
545         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
546
547
548     def build_html(self):
549         from tempfile import NamedTemporaryFile
550         from markupstring import MarkupString
551
552         meta_tags = list(self.tags.filter(
553             category__in=('author', 'epoch', 'genre', 'kind')))
554         book_tag = self.book_tag()
555
556         html_file = NamedTemporaryFile()
557         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
558             self.html_file.save('%s.html' % self.slug, File(html_file))
559
560             # get ancestor l-tags for adding to new fragments
561             ancestor_tags = []
562             p = self.parent
563             while p:
564                 ancestor_tags.append(p.book_tag())
565                 p = p.parent
566
567             # Delete old fragments and create them from scratch
568             self.fragments.all().delete()
569             # Extract fragments
570             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
571             for fragment in closed_fragments.values():
572                 try:
573                     theme_names = [s.strip() for s in fragment.themes.split(',')]
574                 except AttributeError:
575                     continue
576                 themes = []
577                 for theme_name in theme_names:
578                     if not theme_name:
579                         continue
580                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
581                     if created:
582                         tag.name = theme_name
583                         tag.sort_key = theme_name.lower()
584                         tag.save()
585                     themes.append(tag)
586                 if not themes:
587                     continue
588
589                 text = fragment.to_string()
590                 short_text = ''
591                 if (len(MarkupString(text)) > 240):
592                     short_text = unicode(MarkupString(text)[:160])
593                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
594                     text=text, short_text=short_text)
595
596                 new_fragment.save()
597                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
598             self.save()
599             self.html_built.send(sender=self)
600             return True
601         return False
602
603     @staticmethod
604     def zip_epub():
605         books = Book.objects.all()
606
607         paths = filter(lambda x: x is not None,
608                        map(lambda b: b.epub_file and b.epub_file.path or None, books))
609         result = create_zip_task.delay(paths, settings.ALL_EPUB_ZIP)
610         return result.wait()
611
612     @staticmethod
613     def zip_pdf():
614         books = Book.objects.all()
615
616         paths = filter(lambda x: x is not None,
617                        map(lambda b: b.pdf_file and b.pdf_file.path or None, books))
618         result = create_zip_task.delay(paths, settings.ALL_PDF_ZIP)
619         return result.wait()
620
621     @staticmethod
622     def zip_mobi():
623         books = Book.objects.all()
624
625         paths = filter(lambda x: x is not None,
626                        map(lambda b: b.mobi_file and b.mobi_file.path or None, books))
627         result = create_zip_task.delay(paths, settings.ALL_MOBI_ZIP)
628         return settings.MEDIA_URL + result.wait()
629
630     def zip_audiobooks(self):
631         bm = BookMedia.objects.filter(book=self)
632         paths = map(lambda bm: bm.file.path, bm)
633         result = create_zip_task.delay(paths, self.slug)
634         return result.wait()
635
636     @classmethod
637     def from_xml_file(cls, xml_file, **kwargs):
638         # use librarian to parse meta-data
639         book_info = dcparser.parse(xml_file)
640
641         if not isinstance(xml_file, File):
642             xml_file = File(open(xml_file))
643
644         try:
645             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
646         finally:
647             xml_file.close()
648
649     @classmethod
650     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
651             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
652         import re
653
654         # check for parts before we do anything
655         children = []
656         if hasattr(book_info, 'parts'):
657             for part_url in book_info.parts:
658                 base, slug = part_url.rsplit('/', 1)
659                 try:
660                     children.append(Book.objects.get(slug=slug))
661                 except Book.DoesNotExist, e:
662                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
663
664
665         # Read book metadata
666         book_base, book_slug = book_info.url.rsplit('/', 1)
667         if re.search(r'[^a-zA-Z0-9-]', book_slug):
668             raise ValueError('Invalid characters in slug')
669         book, created = Book.objects.get_or_create(slug=book_slug)
670
671         if created:
672             book_shelves = []
673         else:
674             if not overwrite:
675                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
676             # Save shelves for this book
677             book_shelves = list(book.tags.filter(category='set'))
678
679         book.title = book_info.title
680         book.set_extra_info_value(book_info.to_dict())
681         book.save()
682
683         meta_tags = []
684         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
685         for field_name, category in categories:
686             try:
687                 tag_names = getattr(book_info, field_name)
688             except:
689                 tag_names = [getattr(book_info, category)]
690             for tag_name in tag_names:
691                 tag_sort_key = tag_name
692                 if category == 'author':
693                     tag_sort_key = tag_name.last_name
694                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
695                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
696                 if created:
697                     tag.name = tag_name
698                     tag.sort_key = sortify(tag_sort_key.lower())
699                     tag.save()
700                 meta_tags.append(tag)
701
702         book.tags = set(meta_tags + book_shelves)
703
704         book_tag = book.book_tag()
705
706         for n, child_book in enumerate(children):
707             child_book.parent = book
708             child_book.parent_number = n
709             child_book.save()
710
711         # Save XML and HTML files
712         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
713
714         # delete old fragments when overwriting
715         book.fragments.all().delete()
716
717         if book.build_html():
718             if not settings.NO_BUILD_TXT and build_txt:
719                 book.build_txt()
720
721         if not settings.NO_BUILD_EPUB and build_epub:
722             book.root_ancestor.build_epub()
723
724         if not settings.NO_BUILD_PDF and build_pdf:
725             book.root_ancestor.build_pdf()
726
727         if not settings.NO_BUILD_MOBI and build_mobi:
728             book.build_mobi()
729
730         book_descendants = list(book.children.all())
731         # add l-tag to descendants and their fragments
732         # delete unnecessary EPUB files
733         while len(book_descendants) > 0:
734             child_book = book_descendants.pop(0)
735             child_book.tags = list(child_book.tags) + [book_tag]
736             child_book.save()
737             for fragment in child_book.fragments.all():
738                 fragment.tags = set(list(fragment.tags) + [book_tag])
739             book_descendants += list(child_book.children.all())
740
741         book.save()
742
743         # refresh cache
744         book.reset_tag_counter()
745         book.reset_theme_counter()
746
747         return book
748
749     def reset_tag_counter(self):
750         if self.id is None:
751             return
752
753         cache_key = "Book.tag_counter/%d" % self.id
754         cache.delete(cache_key)
755         if self.parent:
756             self.parent.reset_tag_counter()
757
758     @property
759     def tag_counter(self):
760         if self.id:
761             cache_key = "Book.tag_counter/%d" % self.id
762             tags = cache.get(cache_key)
763         else:
764             tags = None
765
766         if tags is None:
767             tags = {}
768             for child in self.children.all().order_by():
769                 for tag_pk, value in child.tag_counter.iteritems():
770                     tags[tag_pk] = tags.get(tag_pk, 0) + value
771             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
772                 tags[tag.pk] = 1
773
774             if self.id:
775                 cache.set(cache_key, tags, CACHE_FOREVER)
776         return tags
777
778     def reset_theme_counter(self):
779         if self.id is None:
780             return
781
782         cache_key = "Book.theme_counter/%d" % self.id
783         cache.delete(cache_key)
784         if self.parent:
785             self.parent.reset_theme_counter()
786
787     @property
788     def theme_counter(self):
789         if self.id:
790             cache_key = "Book.theme_counter/%d" % self.id
791             tags = cache.get(cache_key)
792         else:
793             tags = None
794
795         if tags is None:
796             tags = {}
797             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
798                 for tag in fragment.tags.filter(category='theme').order_by():
799                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
800
801             if self.id:
802                 cache.set(cache_key, tags, CACHE_FOREVER)
803         return tags
804
805     def pretty_title(self, html_links=False):
806         book = self
807         names = list(book.tags.filter(category='author'))
808
809         books = []
810         while book:
811             books.append(book)
812             book = book.parent
813         names.extend(reversed(books))
814
815         if html_links:
816             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
817         else:
818             names = [tag.name for tag in names]
819
820         return ', '.join(names)
821
822     @classmethod
823     def tagged_top_level(cls, tags):
824         """ Returns top-level books tagged with `tags'.
825
826         It only returns those books which don't have ancestors which are
827         also tagged with those tags.
828
829         """
830         # get relevant books and their tags
831         objects = cls.tagged.with_all(tags)
832         # eliminate descendants
833         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
834         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
835         if descendants_keys:
836             objects = objects.exclude(pk__in=descendants_keys)
837
838         return objects
839
840
841 def _has_factory(ftype):
842     has = lambda self: bool(getattr(self, "%s_file" % ftype))
843     has.short_description = t.upper()
844     has.boolean = True
845     has.__name__ = "has_%s_file" % ftype
846     return has
847
848     
849 # add the file fields
850 for t in Book.file_types:
851     field_name = "%s_file" % t
852     models.FileField(_("%s file" % t.upper()),
853             upload_to=book_upload_path(t),
854             blank=True).contribute_to_class(Book, field_name)
855
856     setattr(Book, "has_%s_file" % t, _has_factory(t))
857
858
859 class Fragment(models.Model):
860     text = models.TextField()
861     short_text = models.TextField(editable=False)
862     anchor = models.CharField(max_length=120)
863     book = models.ForeignKey(Book, related_name='fragments')
864
865     objects = models.Manager()
866     tagged = managers.ModelTaggedItemManager(Tag)
867     tags = managers.TagDescriptor(Tag)
868
869     class Meta:
870         ordering = ('book', 'anchor',)
871         verbose_name = _('fragment')
872         verbose_name_plural = _('fragments')
873
874     def get_absolute_url(self):
875         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
876
877     def reset_short_html(self):
878         if self.id is None:
879             return
880
881         cache_key = "Fragment.short_html/%d/%s"
882         for lang, langname in settings.LANGUAGES:
883             cache.delete(cache_key % (self.id, lang))
884
885     def short_html(self):
886         if self.id:
887             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
888             short_html = cache.get(cache_key)
889         else:
890             short_html = None
891
892         if short_html is not None:
893             return mark_safe(short_html)
894         else:
895             short_html = unicode(render_to_string('catalogue/fragment_short.html',
896                 {'fragment': self}))
897             if self.id:
898                 cache.set(cache_key, short_html, CACHE_FOREVER)
899             return mark_safe(short_html)
900
901
902 class FileRecord(models.Model):
903     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
904     type = models.CharField(_('type'), max_length=20, db_index=True)
905     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
906     time = models.DateTimeField(_('time'), auto_now_add=True)
907
908     class Meta:
909         ordering = ('-time','-slug', '-type')
910         verbose_name = _('file record')
911         verbose_name_plural = _('file records')
912
913     def __unicode__(self):
914         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
915
916 ###########
917 #
918 # SIGNALS
919 #
920 ###########
921
922
923 def _tags_updated_handler(sender, affected_tags, **kwargs):
924     # reset tag global counter
925     # we want Tag.changed_at updated for API to know the tag was touched
926     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
927
928     # if book tags changed, reset book tag counter
929     if isinstance(sender, Book) and \
930                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
931                     exclude(category__in=('book', 'theme', 'set')).count():
932         sender.reset_tag_counter()
933     # if fragment theme changed, reset book theme counter
934     elif isinstance(sender, Fragment) and \
935                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
936                     filter(category='theme').count():
937         sender.book.reset_theme_counter()
938 tags_updated.connect(_tags_updated_handler)
939
940
941 def _pre_delete_handler(sender, instance, **kwargs):
942     """ refresh Book on BookMedia delete """
943     if sender == BookMedia:
944         instance.book.save()
945 pre_delete.connect(_pre_delete_handler)
946
947 def _post_save_handler(sender, instance, **kwargs):
948     """ refresh all the short_html stuff on BookMedia update """
949     if sender == BookMedia:
950         instance.book.save()
951 post_save.connect(_post_save_handler)