counter fixes
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54 class TagSubcategoryManager(models.Manager):
55     def __init__(self, subcategory):
56         super(TagSubcategoryManager, self).__init__()
57         self.subcategory = subcategory
58
59     def get_query_set(self):
60         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
61
62
63 class Tag(TagBase):
64     name = models.CharField(_('name'), max_length=50, db_index=True)
65     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
67     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
68         db_index=True, choices=TAG_CATEGORIES)
69     description = models.TextField(_('description'), blank=True)
70     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
71
72     user = models.ForeignKey(User, blank=True, null=True)
73     book_count = models.IntegerField(_('book count'), blank=True, null=True)
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76
77     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
78     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
79
80     class UrlDeprecationWarning(DeprecationWarning):
81         pass
82
83     categories_rev = {
84         'autor': 'author',
85         'epoka': 'epoch',
86         'rodzaj': 'kind',
87         'gatunek': 'genre',
88         'motyw': 'theme',
89         'polka': 'set',
90     }
91     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
92
93     class Meta:
94         ordering = ('sort_key',)
95         verbose_name = _('tag')
96         verbose_name_plural = _('tags')
97         unique_together = (("slug", "category"),)
98
99     def __unicode__(self):
100         return self.name
101
102     def __repr__(self):
103         return "Tag(slug=%r)" % self.slug
104
105     @permalink
106     def get_absolute_url(self):
107         return ('catalogue.views.tagged_object_list', [self.url_chunk])
108
109     def has_description(self):
110         return len(self.description) > 0
111     has_description.short_description = _('description')
112     has_description.boolean = True
113
114     def get_count(self):
115         """ returns global book count for book tags, fragment count for themes """
116
117         if self.book_count is None:
118             if self.category == 'book':
119                 # never used
120                 objects = Book.objects.none()
121             elif self.category == 'theme':
122                 objects = Fragment.tagged.with_all((self,))
123             else:
124                 objects = Book.tagged.with_all((self,)).order_by()
125                 if self.category != 'set':
126                     # eliminate descendants
127                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
128                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
129                     if descendants_keys:
130                         objects = objects.exclude(pk__in=descendants_keys)
131             self.book_count = objects.count()
132             self.save()
133         return self.book_count
134
135     @staticmethod
136     def get_tag_list(tags):
137         if isinstance(tags, basestring):
138             real_tags = []
139             ambiguous_slugs = []
140             category = None
141             deprecated = False
142             tags_splitted = tags.split('/')
143             for name in tags_splitted:
144                 if category:
145                     real_tags.append(Tag.objects.get(slug=name, category=category))
146                     category = None
147                 elif name in Tag.categories_rev:
148                     category = Tag.categories_rev[name]
149                 else:
150                     try:
151                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
152                         deprecated = True 
153                     except Tag.MultipleObjectsReturned, e:
154                         ambiguous_slugs.append(name)
155
156             if category:
157                 # something strange left off
158                 raise Tag.DoesNotExist()
159             if ambiguous_slugs:
160                 # some tags should be qualified
161                 e = Tag.MultipleObjectsReturned()
162                 e.tags = real_tags
163                 e.ambiguous_slugs = ambiguous_slugs
164                 raise e
165             if deprecated:
166                 e = Tag.UrlDeprecationWarning()
167                 e.tags = real_tags
168                 raise e
169             return real_tags
170         else:
171             return TagBase.get_tag_list(tags)
172
173     @property
174     def url_chunk(self):
175         return '/'.join((Tag.categories_dict[self.category], self.slug))
176
177
178 # TODO: why is this hard-coded ?
179 def book_upload_path(ext=None, maxlen=100):
180     def get_dynamic_path(media, filename, ext=ext):
181         # how to put related book's slug here?
182         if not ext:
183             if media.type == 'daisy':
184                 ext = 'daisy.zip'
185             else:
186                 ext = media.type
187         if not media.name:
188             name = slughifi(filename.split(".")[0])
189         else:
190             name = slughifi(media.name)
191         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
192     return get_dynamic_path
193
194
195 class BookMedia(models.Model):
196     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
197     name        = models.CharField(_('name'), max_length="100")
198     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
199     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
200     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
201     book = models.ForeignKey('Book', related_name='media')
202     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
203
204     def __unicode__(self):
205         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
206
207     class Meta:
208         ordering            = ('type', 'name')
209         verbose_name        = _('book media')
210         verbose_name_plural = _('book media')
211
212     def save(self, *args, **kwargs):
213         try:
214             old = BookMedia.objects.get(pk=self.pk)
215         except BookMedia.DoesNotExist, e:
216             pass
217         else:
218             # if name changed, change the file name, too
219             if slughifi(self.name) != slughifi(old.name):
220                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
221
222         super(BookMedia, self).save(*args, **kwargs)
223         extra_info = self.get_extra_info_value()
224         extra_info.update(self.read_meta())
225         self.set_extra_info_value(extra_info)
226         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
227         return super(BookMedia, self).save(*args, **kwargs)
228
229     def read_meta(self):
230         """
231             Reads some metadata from the audiobook.
232         """
233
234         artist_name = director_name = project = funded_by = ''
235         if self.type == 'mp3':
236             try:
237                 audio = id3.ID3(self.file.path)
238                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
239                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
240                 project = ", ".join([t.data for t in audio.getall('PRIV') 
241                         if t.owner=='wolnelektury.pl?project'])
242                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
243                         if t.owner=='wolnelektury.pl?funded_by'])
244             except:
245                 pass
246         elif self.type == 'ogg':
247             try:
248                 audio = mutagen.File(self.file.path)
249                 artist_name = ', '.join(audio.get('artist', []))
250                 director_name = ', '.join(audio.get('conductor', []))
251                 project = ", ".join(audio.get('project', []))
252                 funded_by = ", ".join(audio.get('funded_by', []))
253             except:
254                 pass
255         else:
256             return {}
257         return {'artist_name': artist_name, 'director_name': director_name,
258                 'project': project, 'funded_by': funded_by}
259
260     @staticmethod
261     def read_source_sha1(filepath, filetype):
262         """
263             Reads source file SHA1 from audiobok metadata.
264         """
265
266         if filetype == 'mp3':
267             try:
268                 audio = id3.ID3(filepath)
269                 return [t.data for t in audio.getall('PRIV') 
270                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
271             except:
272                 return None
273         elif filetype == 'ogg':
274             try:
275                 audio = mutagen.File(filepath)
276                 return audio.get('flac_sha1', [None])[0] 
277             except:
278                 return None
279         else:
280             return None
281
282
283 class Book(models.Model):
284     title         = models.CharField(_('title'), max_length=120)
285     sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
286     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
287     description   = models.TextField(_('description'), blank=True)
288     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
289     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
290     parent_number = models.IntegerField(_('parent number'), default=0)
291     extra_info    = JSONField(_('extra information'), default='{}')
292     gazeta_link   = models.CharField(blank=True, max_length=240)
293     wiki_link     = models.CharField(blank=True, max_length=240)
294     # files generated during publication
295     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
296     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
297     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
298     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)    
299     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)        
300
301     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
302     objects  = models.Manager()
303     tagged   = managers.ModelTaggedItemManager(Tag)
304     tags     = managers.TagDescriptor(Tag)
305
306     html_built = django.dispatch.Signal()
307
308     class AlreadyExists(Exception):
309         pass
310
311     class Meta:
312         ordering = ('sort_key',)
313         verbose_name = _('book')
314         verbose_name_plural = _('books')
315
316     def __unicode__(self):
317         return self.title
318
319     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
320         self.sort_key = sortify(self.title)
321
322         ret = super(Book, self).save(force_insert, force_update)
323
324         if reset_short_html:
325             self.reset_short_html()
326
327         return ret
328
329     @permalink
330     def get_absolute_url(self):
331         return ('catalogue.views.book_detail', [self.slug])
332
333     @property
334     def name(self):
335         return self.title
336
337     def book_tag_slug(self):
338         return ('l-' + self.slug)[:120]
339
340     def book_tag(self):
341         slug = self.book_tag_slug()
342         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
343         if created:
344             book_tag.name = self.title[:50]
345             book_tag.sort_key = self.title.lower()
346             book_tag.save()
347         return book_tag
348
349     def has_media(self, type):
350         if   type == 'xml':
351             if self.xml_file:
352                 return True
353             else:
354                 return False
355         elif type == 'html':
356             if self.html_file:
357                 return True
358             else:
359                 return False        
360         elif type == 'txt':
361             if self.txt_file:
362                 return True
363             else:
364                 return False        
365         elif type == 'pdf':
366             if self.pdf_file:
367                 return True
368             else:
369                 return False  
370         elif type == 'epub':
371             if self.epub_file:
372                 return True
373             else:
374                 return False                          
375         else:
376             if self.media.filter(type=type).exists():
377                 return True
378             else:
379                 return False
380
381     def get_media(self, type):
382         if self.has_media(type):
383             if   type == "xml":
384                 return self.xml_file
385             elif type == "html":
386                 return self.html_file
387             elif type == "epub":
388                 return self.epub_file
389             elif type == "txt":
390                 return self.txt_file
391             elif type == "pdf":
392                 return self.pdf_file
393             else:                                             
394                 return self.media.filter(type=type)
395         else:
396             return None
397
398     def get_mp3(self):
399         return self.get_media("mp3")
400     def get_odt(self):
401         return self.get_media("odt")
402     def get_ogg(self):
403         return self.get_media("ogg")
404     def get_daisy(self):
405         return self.get_media("daisy")                       
406
407     def reset_short_html(self):
408         if self.id is None:
409             return
410
411         cache_key = "Book.short_html/%d/%s"
412         for lang, langname in settings.LANGUAGES:
413             cache.delete(cache_key % (self.id, lang))
414         # Fragment.short_html relies on book's tags, so reset it here too
415         for fragm in self.fragments.all():
416             fragm.reset_short_html()
417
418     def short_html(self):
419         if self.id:
420             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
421             short_html = cache.get(cache_key)
422         else:
423             short_html = None
424
425         if short_html is not None:
426             return mark_safe(short_html)
427         else:
428             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
429             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
430
431             formats = []
432             # files generated during publication               
433             if self.has_media("html"):
434                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
435             if self.has_media("pdf"):
436                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
437             if self.root_ancestor.has_media("epub"):
438                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
439             if self.has_media("txt"):
440                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
441             # other files
442             for m in self.media.order_by('type'):
443                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
444
445             formats = [mark_safe(format) for format in formats]
446
447             short_html = unicode(render_to_string('catalogue/book_short.html',
448                 {'book': self, 'tags': tags, 'formats': formats}))
449
450             if self.id:
451                 cache.set(cache_key, short_html, CACHE_FOREVER)
452             return mark_safe(short_html)
453
454     @property
455     def root_ancestor(self):
456         """ returns the oldest ancestor """
457
458         if not hasattr(self, '_root_ancestor'):
459             book = self
460             while book.parent:
461                 book = book.parent
462             self._root_ancestor = book
463         return self._root_ancestor
464
465
466     def has_description(self):
467         return len(self.description) > 0
468     has_description.short_description = _('description')
469     has_description.boolean = True
470
471     # ugly ugly ugly
472     def has_pdf_file(self):
473         return bool(self.pdf_file)
474     has_pdf_file.short_description = 'PDF'
475     has_pdf_file.boolean = True
476
477     def has_epub_file(self):
478         return bool(self.epub_file)
479     has_epub_file.short_description = 'EPUB'
480     has_epub_file.boolean = True
481
482     def has_txt_file(self):
483         return bool(self.txt_file)
484     has_txt_file.short_description = 'HTML'
485     has_txt_file.boolean = True
486
487     def has_html_file(self):
488         return bool(self.html_file)
489     has_html_file.short_description = 'HTML'
490     has_html_file.boolean = True
491
492     def has_odt_file(self):
493         return bool(self.has_media("odt"))
494     has_odt_file.short_description = 'ODT'
495     has_odt_file.boolean = True
496
497     def has_mp3_file(self):
498         return bool(self.has_media("mp3"))
499     has_mp3_file.short_description = 'MP3'
500     has_mp3_file.boolean = True
501
502     def has_ogg_file(self):
503         return bool(self.has_media("ogg"))
504     has_ogg_file.short_description = 'OGG'
505     has_ogg_file.boolean = True
506     
507     def has_daisy_file(self):
508         return bool(self.has_media("daisy"))
509     has_daisy_file.short_description = 'DAISY'
510     has_daisy_file.boolean = True    
511     
512     def build_epub(self, remove_descendants=True):
513         """ (Re)builds the epub file.
514             If book has a parent, does nothing.
515             Unless remove_descendants is False, descendants' epubs are removed.
516         """
517     
518         from StringIO import StringIO
519         from hashlib import sha1
520         from django.core.files.base import ContentFile
521         from librarian import DocProvider
522
523         class BookImportDocProvider(DocProvider):
524             """ used for joined EPUBs """
525
526             def __init__(self, book):
527                 self.book = book
528
529             def by_slug(self, slug):
530                 if slug == self.book.slug:
531                     return self.book.xml_file
532                 else:
533                     return Book.objects.get(slug=slug).xml_file
534
535         if self.parent:
536             # don't need an epub
537             return
538
539         epub_file = StringIO()
540         try:
541             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
542             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
543             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
544         except NoDublinCore:
545             pass
546
547         book_descendants = list(self.children.all())
548         while len(book_descendants) > 0:
549             child_book = book_descendants.pop(0)
550             if remove_descendants and child_book.has_epub_file():
551                 child_book.epub_file.delete()
552             # save anyway, to refresh short_html
553             child_book.save()
554             book_descendants += list(child_book.children.all())
555
556     def build_txt(self):
557         from StringIO import StringIO
558         from django.core.files.base import ContentFile
559         from librarian import text
560
561         out = StringIO()
562         text.transform(open(self.xml_file.path), out)
563         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
564
565
566     def build_html(self):
567         from tempfile import NamedTemporaryFile
568         from markupstring import MarkupString
569
570         meta_tags = list(self.tags.filter(
571             category__in=('author', 'epoch', 'genre', 'kind')))
572         book_tag = self.book_tag()
573
574         html_file = NamedTemporaryFile()
575         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
576             self.html_file.save('%s.html' % self.slug, File(html_file))
577
578             # get ancestor l-tags for adding to new fragments
579             ancestor_tags = []
580             p = self.parent
581             while p:
582                 ancestor_tags.append(p.book_tag())
583                 p = p.parent
584
585             # Delete old fragments and create them from scratch
586             self.fragments.all().delete()
587             # Extract fragments
588             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
589             for fragment in closed_fragments.values():
590                 try:
591                     theme_names = [s.strip() for s in fragment.themes.split(',')]
592                 except AttributeError:
593                     continue
594                 themes = []
595                 for theme_name in theme_names:
596                     if not theme_name:
597                         continue
598                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
599                     if created:
600                         tag.name = theme_name
601                         tag.sort_key = theme_name.lower()
602                         tag.save()
603                     themes.append(tag)
604                 if not themes:
605                     continue
606
607                 text = fragment.to_string()
608                 short_text = ''
609                 if (len(MarkupString(text)) > 240):
610                     short_text = unicode(MarkupString(text)[:160])
611                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
612                     text=text, short_text=short_text)
613
614                 new_fragment.save()
615                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
616             self.save()
617             self.html_built.send(sender=self)
618             return True
619         return False
620
621
622     @classmethod
623     def from_xml_file(cls, xml_file, **kwargs):
624         # use librarian to parse meta-data
625         book_info = dcparser.parse(xml_file)
626
627         if not isinstance(xml_file, File):
628             xml_file = File(open(xml_file))
629
630         try:
631             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
632         finally:
633             xml_file.close()
634
635     @classmethod
636     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
637         import re
638
639         # check for parts before we do anything
640         children = []
641         if hasattr(book_info, 'parts'):
642             for part_url in book_info.parts:
643                 base, slug = part_url.rsplit('/', 1)
644                 try:
645                     children.append(Book.objects.get(slug=slug))
646                 except Book.DoesNotExist, e:
647                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
648
649
650         # Read book metadata
651         book_base, book_slug = book_info.url.rsplit('/', 1)
652         if re.search(r'[^a-zA-Z0-9-]', book_slug):
653             raise ValueError('Invalid characters in slug')
654         book, created = Book.objects.get_or_create(slug=book_slug)
655
656         if created:
657             book_shelves = []
658         else:
659             if not overwrite:
660                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
661             # Save shelves for this book
662             book_shelves = list(book.tags.filter(category='set'))
663
664         book.title = book_info.title
665         book.set_extra_info_value(book_info.to_dict())
666         book.save()
667
668         meta_tags = []
669         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
670         for field_name, category in categories:
671             try:
672                 tag_names = getattr(book_info, field_name)
673             except:
674                 tag_names = [getattr(book_info, category)]
675             for tag_name in tag_names:
676                 tag_sort_key = tag_name
677                 if category == 'author':
678                     tag_sort_key = tag_name.last_name
679                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
680                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
681                 if created:
682                     tag.name = tag_name
683                     tag.sort_key = sortify(tag_sort_key.lower())
684                     tag.save()
685                 meta_tags.append(tag)
686
687         book.tags = set(meta_tags + book_shelves)
688
689         book_tag = book.book_tag()
690
691         for n, child_book in enumerate(children):
692             child_book.parent = book
693             child_book.parent_number = n
694             child_book.save()
695
696         # Save XML and HTML files
697         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
698
699         # delete old fragments when overwriting
700         book.fragments.all().delete()
701
702         if book.build_html():
703             if not settings.NO_BUILD_TXT and build_txt:
704                 book.build_txt()
705
706         if not settings.NO_BUILD_EPUB and build_epub:
707             book.root_ancestor.build_epub()
708
709         book_descendants = list(book.children.all())
710         # add l-tag to descendants and their fragments
711         # delete unnecessary EPUB files
712         while len(book_descendants) > 0:
713             child_book = book_descendants.pop(0)
714             child_book.tags = list(child_book.tags) + [book_tag]
715             child_book.save()
716             for fragment in child_book.fragments.all():
717                 fragment.tags = set(list(fragment.tags) + [book_tag])
718             book_descendants += list(child_book.children.all())
719
720         book.save()
721
722         # refresh cache
723         book.reset_tag_counter()
724         book.reset_theme_counter()
725
726         return book
727
728     def reset_tag_counter(self):
729         if self.id is None:
730             return
731
732         cache_key = "Book.tag_counter/%d" % self.id
733         cache.delete(cache_key)
734         if self.parent:
735             self.parent.reset_tag_counter()
736
737     @property
738     def tag_counter(self):
739         if self.id:
740             cache_key = "Book.tag_counter/%d" % self.id
741             tags = cache.get(cache_key)
742         else:
743             tags = None
744
745         if tags is None:
746             tags = {}
747             for child in self.children.all().order_by():
748                 for tag_pk, value in child.tag_counter.iteritems():
749                     tags[tag_pk] = tags.get(tag_pk, 0) + value
750             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
751                 tags[tag.pk] = 1
752
753             if self.id:
754                 cache.set(cache_key, tags, CACHE_FOREVER)
755         return tags
756
757     def reset_theme_counter(self):
758         if self.id is None:
759             return
760
761         cache_key = "Book.theme_counter/%d" % self.id
762         cache.delete(cache_key)
763         if self.parent:
764             self.parent.reset_theme_counter()
765
766     @property
767     def theme_counter(self):
768         if self.id:
769             cache_key = "Book.theme_counter/%d" % self.id
770             tags = cache.get(cache_key)
771         else:
772             tags = None
773
774         if tags is None:
775             tags = {}
776             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
777                 for tag in fragment.tags.filter(category='theme').order_by():
778                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
779
780             if self.id:
781                 cache.set(cache_key, tags, CACHE_FOREVER)
782         return tags
783
784     def pretty_title(self, html_links=False):
785         book = self
786         names = list(book.tags.filter(category='author'))
787
788         books = []
789         while book:
790             books.append(book)
791             book = book.parent
792         names.extend(reversed(books))
793
794         if html_links:
795             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
796         else:
797             names = [tag.name for tag in names]
798
799         return ', '.join(names)
800
801     @classmethod
802     def tagged_top_level(cls, tags):
803         """ Returns top-level books tagged with `tags'.
804
805         It only returns those books which don't have ancestors which are
806         also tagged with those tags.
807
808         """
809         # get relevant books and their tags
810         objects = cls.tagged.with_all(tags)
811         # eliminate descendants
812         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
813         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
814         if descendants_keys:
815             objects = objects.exclude(pk__in=descendants_keys)
816
817         return objects
818
819
820 class Fragment(models.Model):
821     text = models.TextField()
822     short_text = models.TextField(editable=False)
823     anchor = models.CharField(max_length=120)
824     book = models.ForeignKey(Book, related_name='fragments')
825
826     objects = models.Manager()
827     tagged = managers.ModelTaggedItemManager(Tag)
828     tags = managers.TagDescriptor(Tag)
829
830     class Meta:
831         ordering = ('book', 'anchor',)
832         verbose_name = _('fragment')
833         verbose_name_plural = _('fragments')
834
835     def get_absolute_url(self):
836         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
837
838     def reset_short_html(self):
839         if self.id is None:
840             return
841
842         cache_key = "Fragment.short_html/%d/%s"
843         for lang, langname in settings.LANGUAGES:
844             cache.delete(cache_key % (self.id, lang))
845
846     def short_html(self):
847         if self.id:
848             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
849             short_html = cache.get(cache_key)
850         else:
851             short_html = None
852
853         if short_html is not None:
854             return mark_safe(short_html)
855         else:
856             short_html = unicode(render_to_string('catalogue/fragment_short.html',
857                 {'fragment': self}))
858             if self.id:
859                 cache.set(cache_key, short_html, CACHE_FOREVER)
860             return mark_safe(short_html)
861
862
863 class FileRecord(models.Model):
864     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
865     type = models.CharField(_('type'), max_length=20, db_index=True)
866     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
867     time = models.DateTimeField(_('time'), auto_now_add=True)
868
869     class Meta:
870         ordering = ('-time','-slug', '-type')
871         verbose_name = _('file record')
872         verbose_name_plural = _('file records')
873
874     def __unicode__(self):
875         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
876
877 ###########
878 #
879 # SIGNALS
880 #
881 ###########
882
883
884 def _tags_updated_handler(sender, affected_tags, **kwargs):
885     # reset tag global counter
886     # we want Tag.changed_at updated for API to know the tag was touched
887     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
888
889     # if book tags changed, reset book tag counter
890     if isinstance(sender, Book) and \
891                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
892                     exclude(category__in=('book', 'theme', 'set')).count():
893         sender.reset_tag_counter()
894     # if fragment theme changed, reset book theme counter
895     elif isinstance(sender, Fragment) and \
896                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
897                     filter(category='theme').count():
898         sender.book.reset_theme_counter()
899 tags_updated.connect(_tags_updated_handler)
900
901
902 def _pre_delete_handler(sender, instance, **kwargs):
903     """ refresh Book on BookMedia delete """
904     if sender == BookMedia:
905         instance.book.save()
906 pre_delete.connect(_pre_delete_handler)
907
908 def _post_save_handler(sender, instance, **kwargs):
909     """ refresh all the short_html stuff on BookMedia update """
910     if sender == BookMedia:
911         instance.book.save()
912 post_save.connect(_post_save_handler)