565b759ce4ff77b6c40afa13972c59096f107a76
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, ORMDocProvider, create_zip_task, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54
55 class TagSubcategoryManager(models.Manager):
56     def __init__(self, subcategory):
57         super(TagSubcategoryManager, self).__init__()
58         self.subcategory = subcategory
59
60     def get_query_set(self):
61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
62
63
64 class Tag(TagBase):
65     name = models.CharField(_('name'), max_length=50, db_index=True)
66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
69         db_index=True, choices=TAG_CATEGORIES)
70     description = models.TextField(_('description'), blank=True)
71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
72
73     user = models.ForeignKey(User, blank=True, null=True)
74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
75     gazeta_link = models.CharField(blank=True, max_length=240)
76     wiki_link = models.CharField(blank=True, max_length=240)
77
78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
80
81     class UrlDeprecationWarning(DeprecationWarning):
82         pass
83
84     categories_rev = {
85         'autor': 'author',
86         'epoka': 'epoch',
87         'rodzaj': 'kind',
88         'gatunek': 'genre',
89         'motyw': 'theme',
90         'polka': 'set',
91     }
92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('tag')
97         verbose_name_plural = _('tags')
98         unique_together = (("slug", "category"),)
99
100     def __unicode__(self):
101         return self.name
102
103     def __repr__(self):
104         return "Tag(slug=%r)" % self.slug
105
106     @permalink
107     def get_absolute_url(self):
108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
109
110     def has_description(self):
111         return len(self.description) > 0
112     has_description.short_description = _('description')
113     has_description.boolean = True
114
115     def get_count(self):
116         """ returns global book count for book tags, fragment count for themes """
117
118         if self.book_count is None:
119             if self.category == 'book':
120                 # never used
121                 objects = Book.objects.none()
122             elif self.category == 'theme':
123                 objects = Fragment.tagged.with_all((self,))
124             else:
125                 objects = Book.tagged.with_all((self,)).order_by()
126                 if self.category != 'set':
127                     # eliminate descendants
128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
130                     if descendants_keys:
131                         objects = objects.exclude(pk__in=descendants_keys)
132             self.book_count = objects.count()
133             self.save()
134         return self.book_count
135
136     @staticmethod
137     def get_tag_list(tags):
138         if isinstance(tags, basestring):
139             real_tags = []
140             ambiguous_slugs = []
141             category = None
142             deprecated = False
143             tags_splitted = tags.split('/')
144             for name in tags_splitted:
145                 if category:
146                     real_tags.append(Tag.objects.get(slug=name, category=category))
147                     category = None
148                 elif name in Tag.categories_rev:
149                     category = Tag.categories_rev[name]
150                 else:
151                     try:
152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
153                         deprecated = True 
154                     except Tag.MultipleObjectsReturned, e:
155                         ambiguous_slugs.append(name)
156
157             if category:
158                 # something strange left off
159                 raise Tag.DoesNotExist()
160             if ambiguous_slugs:
161                 # some tags should be qualified
162                 e = Tag.MultipleObjectsReturned()
163                 e.tags = real_tags
164                 e.ambiguous_slugs = ambiguous_slugs
165                 raise e
166             if deprecated:
167                 e = Tag.UrlDeprecationWarning()
168                 e.tags = real_tags
169                 raise e
170             return real_tags
171         else:
172             return TagBase.get_tag_list(tags)
173
174     @property
175     def url_chunk(self):
176         return '/'.join((Tag.categories_dict[self.category], self.slug))
177
178
179 # TODO: why is this hard-coded ?
180 def book_upload_path(ext=None, maxlen=100):
181     def get_dynamic_path(media, filename, ext=ext):
182         # how to put related book's slug here?
183         if not ext:
184             if media.type == 'daisy':
185                 ext = 'daisy.zip'
186             else:
187                 ext = media.type
188         if not media.name:
189             name = slughifi(filename.split(".")[0])
190         else:
191             name = slughifi(media.name)
192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
193     return get_dynamic_path
194
195
196 class BookMedia(models.Model):
197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
198     name        = models.CharField(_('name'), max_length="100")
199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
202     book = models.ForeignKey('Book', related_name='media')
203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
204
205     def __unicode__(self):
206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
207
208     class Meta:
209         ordering            = ('type', 'name')
210         verbose_name        = _('book media')
211         verbose_name_plural = _('book media')
212
213     def save(self, *args, **kwargs):
214         try:
215             old = BookMedia.objects.get(pk=self.pk)
216         except BookMedia.DoesNotExist, e:
217             pass
218         else:
219             # if name changed, change the file name, too
220             if slughifi(self.name) != slughifi(old.name):
221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
222
223         # remove the zip package for book with modified media
224         remove_zip(self.book.slug)
225
226         super(BookMedia, self).save(*args, **kwargs)
227         extra_info = self.get_extra_info_value()
228         extra_info.update(self.read_meta())
229         self.set_extra_info_value(extra_info)
230         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
231         return super(BookMedia, self).save(*args, **kwargs)
232
233     def read_meta(self):
234         """
235             Reads some metadata from the audiobook.
236         """
237
238         artist_name = director_name = project = funded_by = ''
239         if self.type == 'mp3':
240             try:
241                 audio = id3.ID3(self.file.path)
242                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
243                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
244                 project = ", ".join([t.data for t in audio.getall('PRIV') 
245                         if t.owner=='wolnelektury.pl?project'])
246                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
247                         if t.owner=='wolnelektury.pl?funded_by'])
248             except:
249                 pass
250         elif self.type == 'ogg':
251             try:
252                 audio = mutagen.File(self.file.path)
253                 artist_name = ', '.join(audio.get('artist', []))
254                 director_name = ', '.join(audio.get('conductor', []))
255                 project = ", ".join(audio.get('project', []))
256                 funded_by = ", ".join(audio.get('funded_by', []))
257             except:
258                 pass
259         else:
260             return {}
261         return {'artist_name': artist_name, 'director_name': director_name,
262                 'project': project, 'funded_by': funded_by}
263
264     @staticmethod
265     def read_source_sha1(filepath, filetype):
266         """
267             Reads source file SHA1 from audiobok metadata.
268         """
269
270         if filetype == 'mp3':
271             try:
272                 audio = id3.ID3(filepath)
273                 return [t.data for t in audio.getall('PRIV') 
274                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
275             except:
276                 return None
277         elif filetype == 'ogg':
278             try:
279                 audio = mutagen.File(filepath)
280                 return audio.get('flac_sha1', [None])[0] 
281             except:
282                 return None
283         else:
284             return None
285
286
287 class Book(models.Model):
288     title         = models.CharField(_('title'), max_length=120)
289     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
290     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
291     description   = models.TextField(_('description'), blank=True)
292     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
293     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
294     parent_number = models.IntegerField(_('parent number'), default=0)
295     extra_info    = JSONField(_('extra information'), default='{}')
296     gazeta_link   = models.CharField(blank=True, max_length=240)
297     wiki_link     = models.CharField(blank=True, max_length=240)
298     # files generated during publication
299
300     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
301     
302     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
303     objects  = models.Manager()
304     tagged   = managers.ModelTaggedItemManager(Tag)
305     tags     = managers.TagDescriptor(Tag)
306
307     html_built = django.dispatch.Signal()
308
309     class AlreadyExists(Exception):
310         pass
311
312     class Meta:
313         ordering = ('sort_key',)
314         verbose_name = _('book')
315         verbose_name_plural = _('books')
316
317     def __unicode__(self):
318         return self.title
319
320     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
321         self.sort_key = sortify(self.title)
322
323         ret = super(Book, self).save(force_insert, force_update)
324
325         if reset_short_html:
326             self.reset_short_html()
327
328         return ret
329
330     @permalink
331     def get_absolute_url(self):
332         return ('catalogue.views.book_detail', [self.slug])
333
334     @property
335     def name(self):
336         return self.title
337
338     def book_tag_slug(self):
339         return ('l-' + self.slug)[:120]
340
341     def book_tag(self):
342         slug = self.book_tag_slug()
343         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
344         if created:
345             book_tag.name = self.title[:50]
346             book_tag.sort_key = self.title.lower()
347             book_tag.save()
348         return book_tag
349
350     def has_media(self, type):
351         if type in Book.file_types:
352             return bool(getattr(self, "%s_file" % type))
353         else:
354             return self.media.filter(type=type).exists()
355
356     def get_media(self, type):
357         if self.has_media(type):
358             if type in Book.file_types:
359                 return getattr(self, "%s_file" % type)
360             else:                                             
361                 return self.media.filter(type=type)
362         else:
363             return None
364
365     def get_mp3(self):
366         return self.get_media("mp3")
367     def get_odt(self):
368         return self.get_media("odt")
369     def get_ogg(self):
370         return self.get_media("ogg")
371     def get_daisy(self):
372         return self.get_media("daisy")                       
373
374     def reset_short_html(self):
375         if self.id is None:
376             return
377
378         cache_key = "Book.short_html/%d/%s"
379         for lang, langname in settings.LANGUAGES:
380             cache.delete(cache_key % (self.id, lang))
381         # Fragment.short_html relies on book's tags, so reset it here too
382         for fragm in self.fragments.all():
383             fragm.reset_short_html()
384
385     def short_html(self):
386         if self.id:
387             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
388             short_html = cache.get(cache_key)
389         else:
390             short_html = None
391
392         if short_html is not None:
393             return mark_safe(short_html)
394         else:
395             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
396             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
397
398             formats = []
399             # files generated during publication
400             if self.has_media("html"):
401                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
402             if self.has_media("pdf"):
403                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
404             if self.has_media("mobi"):
405                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
406             if self.root_ancestor.has_media("epub"):
407                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
408             if self.has_media("txt"):
409                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
410             # other files
411             for m in self.media.order_by('type'):
412                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
413
414             formats = [mark_safe(format) for format in formats]
415
416             short_html = unicode(render_to_string('catalogue/book_short.html',
417                 {'book': self, 'tags': tags, 'formats': formats}))
418
419             if self.id:
420                 cache.set(cache_key, short_html, CACHE_FOREVER)
421             return mark_safe(short_html)
422
423     @property
424     def root_ancestor(self):
425         """ returns the oldest ancestor """
426
427         if not hasattr(self, '_root_ancestor'):
428             book = self
429             while book.parent:
430                 book = book.parent
431             self._root_ancestor = book
432         return self._root_ancestor
433
434
435     def has_description(self):
436         return len(self.description) > 0
437     has_description.short_description = _('description')
438     has_description.boolean = True
439
440     # ugly ugly ugly
441     def has_odt_file(self):
442         return bool(self.has_media("odt"))
443     has_odt_file.short_description = 'ODT'
444     has_odt_file.boolean = True
445
446     def has_mp3_file(self):
447         return bool(self.has_media("mp3"))
448     has_mp3_file.short_description = 'MP3'
449     has_mp3_file.boolean = True
450
451     def has_ogg_file(self):
452         return bool(self.has_media("ogg"))
453     has_ogg_file.short_description = 'OGG'
454     has_ogg_file.boolean = True
455
456     def has_daisy_file(self):
457         return bool(self.has_media("daisy"))
458     has_daisy_file.short_description = 'DAISY'
459     has_daisy_file.boolean = True
460
461     def build_pdf(self):
462         """ (Re)builds the pdf file.
463
464         """
465         from librarian import pdf
466         from tempfile import NamedTemporaryFile
467         import os
468
469         # remove zip with all pdf files
470         remove_zip(settings.ALL_PDF_ZIP)
471
472         try:
473             pdf_file = NamedTemporaryFile(delete=False)
474             pdf.transform(ORMDocProvider(self),
475                       file_path=str(self.xml_file.path),
476                       output_file=pdf_file,
477                       )
478
479             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
480         finally:
481             unlink(pdf_file.name)
482
483     def build_mobi(self):
484         """ (Re)builds the MOBI file.
485
486         """
487         from librarian import mobi
488         from tempfile import NamedTemporaryFile
489         import os
490
491         # remove zip with all pdf files
492         remove_zip(settings.ALL_MOBI_ZIP)
493
494         try:
495             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
496             mobi.transform(ORMDocProvider(self), verbose=1,
497                       file_path=str(self.xml_file.path),
498                       output_file=mobi_file.name,
499                       )
500
501             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
502         finally:
503             unlink(mobi_file.name)
504
505     def build_epub(self, remove_descendants=True):
506         """ (Re)builds the epub file.
507             If book has a parent, does nothing.
508             Unless remove_descendants is False, descendants' epubs are removed.
509         """
510         from StringIO import StringIO
511         from hashlib import sha1
512         from django.core.files.base import ContentFile
513
514         if self.parent:
515             # don't need an epub
516             return
517
518         # remove zip package with all epub files
519         remove_zip(settings.ALL_EPUB_ZIP)
520
521         epub_file = StringIO()
522         try:
523             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
524             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
525             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
526         except NoDublinCore:
527             pass
528
529         book_descendants = list(self.children.all())
530         while len(book_descendants) > 0:
531             child_book = book_descendants.pop(0)
532             if remove_descendants and child_book.has_epub_file():
533                 child_book.epub_file.delete()
534             # save anyway, to refresh short_html
535             child_book.save()
536             book_descendants += list(child_book.children.all())
537
538     def build_txt(self):
539         from StringIO import StringIO
540         from django.core.files.base import ContentFile
541         from librarian import text
542
543         out = StringIO()
544         text.transform(open(self.xml_file.path), out)
545         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
546
547
548     def build_html(self):
549         from tempfile import NamedTemporaryFile
550         from markupstring import MarkupString
551
552         meta_tags = list(self.tags.filter(
553             category__in=('author', 'epoch', 'genre', 'kind')))
554         book_tag = self.book_tag()
555
556         html_file = NamedTemporaryFile()
557         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
558             self.html_file.save('%s.html' % self.slug, File(html_file))
559
560             # get ancestor l-tags for adding to new fragments
561             ancestor_tags = []
562             p = self.parent
563             while p:
564                 ancestor_tags.append(p.book_tag())
565                 p = p.parent
566
567             # Delete old fragments and create them from scratch
568             self.fragments.all().delete()
569             # Extract fragments
570             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
571             for fragment in closed_fragments.values():
572                 try:
573                     theme_names = [s.strip() for s in fragment.themes.split(',')]
574                 except AttributeError:
575                     continue
576                 themes = []
577                 for theme_name in theme_names:
578                     if not theme_name:
579                         continue
580                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
581                     if created:
582                         tag.name = theme_name
583                         tag.sort_key = theme_name.lower()
584                         tag.save()
585                     themes.append(tag)
586                 if not themes:
587                     continue
588
589                 text = fragment.to_string()
590                 short_text = ''
591                 if (len(MarkupString(text)) > 240):
592                     short_text = unicode(MarkupString(text)[:160])
593                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
594                     text=text, short_text=short_text)
595
596                 new_fragment.save()
597                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
598             self.save()
599             self.html_built.send(sender=self)
600             return True
601         return False
602
603     @staticmethod
604     def zip_epub():
605         books = Book.objects.all()
606
607         paths = filter(lambda x: x is not None,
608                        map(lambda b: b.epub_file and b.epub_file.path or None, books))
609         if settings.USE_CELERY:
610             result = create_zip_task.delay(paths, settings.ALL_EPUB_ZIP)
611             return result.wait()
612         else:
613             result = create_zip_task(paths, settings.ALL_EPUB_ZIP)
614             return result
615
616     @staticmethod
617     def zip_pdf():
618         books = Book.objects.all()
619
620         paths = filter(lambda x: x is not None,
621                        map(lambda b: b.pdf_file and b.pdf_file.path or None, books))
622         if settings.USE_CELERY:
623             result = create_zip_task.delay(paths, settings.ALL_PDF_ZIP)
624             return result.wait()
625         else:
626             result = create_zip_task(paths, settings.ALL_PDF_ZIP)
627             return result
628
629     @staticmethod
630     def zip_mobi():
631         books = Book.objects.all()
632
633         paths = filter(lambda x: x is not None,
634                        map(lambda b: b.mobi_file and b.mobi_file.path or None, books))
635         result = create_zip_task.delay(paths, settings.ALL_MOBI_ZIP)
636         return settings.MEDIA_URL + result.wait()
637
638     def zip_audiobooks(self):
639         bm = BookMedia.objects.filter(book=self)
640         paths = map(lambda bm: bm.file.path, bm)
641         if settings.USE_CELERY:
642             result = create_zip_task.delay(paths, self.slug)
643             return result.wait()
644         else:
645             result = create_zip_task(paths, self.slug)
646             return result
647
648     @classmethod
649     def from_xml_file(cls, xml_file, **kwargs):
650         # use librarian to parse meta-data
651         book_info = dcparser.parse(xml_file)
652
653         if not isinstance(xml_file, File):
654             xml_file = File(open(xml_file))
655
656         try:
657             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
658         finally:
659             xml_file.close()
660
661     @classmethod
662     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
663             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
664         import re
665
666         # check for parts before we do anything
667         children = []
668         if hasattr(book_info, 'parts'):
669             for part_url in book_info.parts:
670                 base, slug = part_url.rsplit('/', 1)
671                 try:
672                     children.append(Book.objects.get(slug=slug))
673                 except Book.DoesNotExist, e:
674                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
675
676
677         # Read book metadata
678         book_base, book_slug = book_info.url.rsplit('/', 1)
679         if re.search(r'[^a-zA-Z0-9-]', book_slug):
680             raise ValueError('Invalid characters in slug')
681         book, created = Book.objects.get_or_create(slug=book_slug)
682
683         if created:
684             book_shelves = []
685         else:
686             if not overwrite:
687                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
688             # Save shelves for this book
689             book_shelves = list(book.tags.filter(category='set'))
690
691         book.title = book_info.title
692         book.set_extra_info_value(book_info.to_dict())
693         book.save()
694
695         meta_tags = []
696         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
697         for field_name, category in categories:
698             try:
699                 tag_names = getattr(book_info, field_name)
700             except:
701                 tag_names = [getattr(book_info, category)]
702             for tag_name in tag_names:
703                 tag_sort_key = tag_name
704                 if category == 'author':
705                     tag_sort_key = tag_name.last_name
706                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
707                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
708                 if created:
709                     tag.name = tag_name
710                     tag.sort_key = sortify(tag_sort_key.lower())
711                     tag.save()
712                 meta_tags.append(tag)
713
714         book.tags = set(meta_tags + book_shelves)
715
716         book_tag = book.book_tag()
717
718         for n, child_book in enumerate(children):
719             child_book.parent = book
720             child_book.parent_number = n
721             child_book.save()
722
723         # Save XML and HTML files
724         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
725
726         # delete old fragments when overwriting
727         book.fragments.all().delete()
728
729         if book.build_html():
730             if not settings.NO_BUILD_TXT and build_txt:
731                 book.build_txt()
732
733         if not settings.NO_BUILD_EPUB and build_epub:
734             book.root_ancestor.build_epub()
735
736         if not settings.NO_BUILD_PDF and build_pdf:
737             book.root_ancestor.build_pdf()
738
739         if not settings.NO_BUILD_MOBI and build_mobi:
740             book.build_mobi()
741
742         book_descendants = list(book.children.all())
743         # add l-tag to descendants and their fragments
744         # delete unnecessary EPUB files
745         while len(book_descendants) > 0:
746             child_book = book_descendants.pop(0)
747             child_book.tags = list(child_book.tags) + [book_tag]
748             child_book.save()
749             for fragment in child_book.fragments.all():
750                 fragment.tags = set(list(fragment.tags) + [book_tag])
751             book_descendants += list(child_book.children.all())
752
753         book.save()
754
755         # refresh cache
756         book.reset_tag_counter()
757         book.reset_theme_counter()
758
759         return book
760
761     def reset_tag_counter(self):
762         if self.id is None:
763             return
764
765         cache_key = "Book.tag_counter/%d" % self.id
766         cache.delete(cache_key)
767         if self.parent:
768             self.parent.reset_tag_counter()
769
770     @property
771     def tag_counter(self):
772         if self.id:
773             cache_key = "Book.tag_counter/%d" % self.id
774             tags = cache.get(cache_key)
775         else:
776             tags = None
777
778         if tags is None:
779             tags = {}
780             for child in self.children.all().order_by():
781                 for tag_pk, value in child.tag_counter.iteritems():
782                     tags[tag_pk] = tags.get(tag_pk, 0) + value
783             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
784                 tags[tag.pk] = 1
785
786             if self.id:
787                 cache.set(cache_key, tags, CACHE_FOREVER)
788         return tags
789
790     def reset_theme_counter(self):
791         if self.id is None:
792             return
793
794         cache_key = "Book.theme_counter/%d" % self.id
795         cache.delete(cache_key)
796         if self.parent:
797             self.parent.reset_theme_counter()
798
799     @property
800     def theme_counter(self):
801         if self.id:
802             cache_key = "Book.theme_counter/%d" % self.id
803             tags = cache.get(cache_key)
804         else:
805             tags = None
806
807         if tags is None:
808             tags = {}
809             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
810                 for tag in fragment.tags.filter(category='theme').order_by():
811                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
812
813             if self.id:
814                 cache.set(cache_key, tags, CACHE_FOREVER)
815         return tags
816
817     def pretty_title(self, html_links=False):
818         book = self
819         names = list(book.tags.filter(category='author'))
820
821         books = []
822         while book:
823             books.append(book)
824             book = book.parent
825         names.extend(reversed(books))
826
827         if html_links:
828             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
829         else:
830             names = [tag.name for tag in names]
831
832         return ', '.join(names)
833
834     @classmethod
835     def tagged_top_level(cls, tags):
836         """ Returns top-level books tagged with `tags'.
837
838         It only returns those books which don't have ancestors which are
839         also tagged with those tags.
840
841         """
842         # get relevant books and their tags
843         objects = cls.tagged.with_all(tags)
844         # eliminate descendants
845         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
846         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
847         if descendants_keys:
848             objects = objects.exclude(pk__in=descendants_keys)
849
850         return objects
851
852
853 def _has_factory(ftype):
854     has = lambda self: bool(getattr(self, "%s_file" % ftype))
855     has.short_description = t.upper()
856     has.boolean = True
857     has.__name__ = "has_%s_file" % ftype
858     return has
859
860     
861 # add the file fields
862 for t in Book.file_types:
863     field_name = "%s_file" % t
864     models.FileField(_("%s file" % t.upper()),
865             upload_to=book_upload_path(t),
866             blank=True).contribute_to_class(Book, field_name)
867
868     setattr(Book, "has_%s_file" % t, _has_factory(t))
869
870
871 class Fragment(models.Model):
872     text = models.TextField()
873     short_text = models.TextField(editable=False)
874     anchor = models.CharField(max_length=120)
875     book = models.ForeignKey(Book, related_name='fragments')
876
877     objects = models.Manager()
878     tagged = managers.ModelTaggedItemManager(Tag)
879     tags = managers.TagDescriptor(Tag)
880
881     class Meta:
882         ordering = ('book', 'anchor',)
883         verbose_name = _('fragment')
884         verbose_name_plural = _('fragments')
885
886     def get_absolute_url(self):
887         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
888
889     def reset_short_html(self):
890         if self.id is None:
891             return
892
893         cache_key = "Fragment.short_html/%d/%s"
894         for lang, langname in settings.LANGUAGES:
895             cache.delete(cache_key % (self.id, lang))
896
897     def short_html(self):
898         if self.id:
899             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
900             short_html = cache.get(cache_key)
901         else:
902             short_html = None
903
904         if short_html is not None:
905             return mark_safe(short_html)
906         else:
907             short_html = unicode(render_to_string('catalogue/fragment_short.html',
908                 {'fragment': self}))
909             if self.id:
910                 cache.set(cache_key, short_html, CACHE_FOREVER)
911             return mark_safe(short_html)
912
913
914 class FileRecord(models.Model):
915     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
916     type = models.CharField(_('type'), max_length=20, db_index=True)
917     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
918     time = models.DateTimeField(_('time'), auto_now_add=True)
919
920     class Meta:
921         ordering = ('-time','-slug', '-type')
922         verbose_name = _('file record')
923         verbose_name_plural = _('file records')
924
925     def __unicode__(self):
926         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
927
928 ###########
929 #
930 # SIGNALS
931 #
932 ###########
933
934
935 def _tags_updated_handler(sender, affected_tags, **kwargs):
936     # reset tag global counter
937     # we want Tag.changed_at updated for API to know the tag was touched
938     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
939
940     # if book tags changed, reset book tag counter
941     if isinstance(sender, Book) and \
942                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
943                     exclude(category__in=('book', 'theme', 'set')).count():
944         sender.reset_tag_counter()
945     # if fragment theme changed, reset book theme counter
946     elif isinstance(sender, Fragment) and \
947                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
948                     filter(category='theme').count():
949         sender.book.reset_theme_counter()
950 tags_updated.connect(_tags_updated_handler)
951
952
953 def _pre_delete_handler(sender, instance, **kwargs):
954     """ refresh Book on BookMedia delete """
955     if sender == BookMedia:
956         instance.book.save()
957 pre_delete.connect(_pre_delete_handler)
958
959 def _post_save_handler(sender, instance, **kwargs):
960     """ refresh all the short_html stuff on BookMedia update """
961     if sender == BookMedia:
962         instance.book.save()
963 post_save.connect(_post_save_handler)