books, tags, fragments api
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.core.cache import cache
10 from django.utils.translation import ugettext_lazy as _
11 from django.contrib.auth.models import User
12 from django.core.files import File
13 from django.template.loader import render_to_string
14 from django.utils.safestring import mark_safe
15 from django.utils.translation import get_language
16 from django.core.urlresolvers import reverse
17 from django.db.models.signals import post_save, m2m_changed, pre_delete
18
19 from django.conf import settings
20
21 from newtagging.models import TagBase, tags_updated
22 from newtagging import managers
23 from catalogue.fields import JSONField, OverwritingFileField
24 from catalogue.utils import ExistingFile
25
26 from librarian import dcparser, html, epub, NoDublinCore
27 import mutagen
28 from mutagen import id3
29 from slughifi import slughifi
30 from sortify import sortify
31
32
33 TAG_CATEGORIES = (
34     ('author', _('author')),
35     ('epoch', _('epoch')),
36     ('kind', _('kind')),
37     ('genre', _('genre')),
38     ('theme', _('theme')),
39     ('set', _('set')),
40     ('book', _('book')),
41 )
42
43 MEDIA_FORMATS = (
44     ('odt', _('ODT file')),
45     ('mp3', _('MP3 file')),
46     ('ogg', _('OGG file')),
47     ('daisy', _('DAISY file')), 
48 )
49
50 # not quite, but Django wants you to set a timeout
51 CACHE_FOREVER = 2419200  # 28 days
52
53 class TagSubcategoryManager(models.Manager):
54     def __init__(self, subcategory):
55         super(TagSubcategoryManager, self).__init__()
56         self.subcategory = subcategory
57
58     def get_query_set(self):
59         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
60
61
62 class Tag(TagBase):
63     name = models.CharField(_('name'), max_length=50, db_index=True)
64     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
65     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
66     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
67         db_index=True, choices=TAG_CATEGORIES)
68     description = models.TextField(_('description'), blank=True)
69     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
70
71     user = models.ForeignKey(User, blank=True, null=True)
72     book_count = models.IntegerField(_('book count'), blank=True, null=True)
73     gazeta_link = models.CharField(blank=True, max_length=240)
74     wiki_link = models.CharField(blank=True, max_length=240)
75
76     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
77     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
78
79     class UrlDeprecationWarning(DeprecationWarning):
80         pass
81
82     categories_rev = {
83         'autor': 'author',
84         'epoka': 'epoch',
85         'rodzaj': 'kind',
86         'gatunek': 'genre',
87         'motyw': 'theme',
88         'polka': 'set',
89     }
90     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
91
92     class Meta:
93         ordering = ('sort_key',)
94         verbose_name = _('tag')
95         verbose_name_plural = _('tags')
96         unique_together = (("slug", "category"),)
97
98     def __unicode__(self):
99         return self.name
100
101     def __repr__(self):
102         return "Tag(slug=%r)" % self.slug
103
104     @permalink
105     def get_absolute_url(self):
106         return ('catalogue.views.tagged_object_list', [self.url_chunk])
107
108     def has_description(self):
109         return len(self.description) > 0
110     has_description.short_description = _('description')
111     has_description.boolean = True
112
113     def get_count(self):
114         """ returns global book count for book tags, fragment count for themes """
115
116         if self.book_count is None:
117             if self.category == 'book':
118                 # never used
119                 objects = Book.objects.none()
120             elif self.category == 'theme':
121                 objects = Fragment.tagged.with_all((self,))
122             else:
123                 objects = Book.tagged.with_all((self,)).order_by()
124                 if self.category != 'set':
125                     # eliminate descendants
126                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
127                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
128                     if descendants_keys:
129                         objects = objects.exclude(pk__in=descendants_keys)
130             self.book_count = objects.count()
131             self.save()
132         return self.book_count
133
134     @staticmethod
135     def get_tag_list(tags):
136         if isinstance(tags, basestring):
137             real_tags = []
138             ambiguous_slugs = []
139             category = None
140             deprecated = False
141             tags_splitted = tags.split('/')
142             for name in tags_splitted:
143                 if category:
144                     real_tags.append(Tag.objects.get(slug=name, category=category))
145                     category = None
146                 elif name in Tag.categories_rev:
147                     category = Tag.categories_rev[name]
148                 else:
149                     try:
150                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
151                         deprecated = True 
152                     except Tag.MultipleObjectsReturned, e:
153                         ambiguous_slugs.append(name)
154
155             if category:
156                 # something strange left off
157                 raise Tag.DoesNotExist()
158             if ambiguous_slugs:
159                 # some tags should be qualified
160                 e = Tag.MultipleObjectsReturned()
161                 e.tags = real_tags
162                 e.ambiguous_slugs = ambiguous_slugs
163                 raise e
164             if deprecated:
165                 e = Tag.UrlDeprecationWarning()
166                 e.tags = real_tags
167                 raise e
168             return real_tags
169         else:
170             return TagBase.get_tag_list(tags)
171
172     @property
173     def url_chunk(self):
174         return '/'.join((Tag.categories_dict[self.category], self.slug))
175
176
177 # TODO: why is this hard-coded ?
178 def book_upload_path(ext=None, maxlen=100):
179     def get_dynamic_path(media, filename, ext=ext):
180         # how to put related book's slug here?
181         if not ext:
182             if media.type == 'daisy':
183                 ext = 'daisy.zip'
184             else:
185                 ext = media.type
186         if not media.name:
187             name = slughifi(filename.split(".")[0])
188         else:
189             name = slughifi(media.name)
190         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
191     return get_dynamic_path
192
193
194 class BookMedia(models.Model):
195     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
196     name        = models.CharField(_('name'), max_length="100")
197     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
198     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
199     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
200     book = models.ForeignKey('Book', related_name='media')
201     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
202
203     def __unicode__(self):
204         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
205
206     class Meta:
207         ordering            = ('type', 'name')
208         verbose_name        = _('book media')
209         verbose_name_plural = _('book media')
210
211     def save(self, *args, **kwargs):
212         try:
213             old = BookMedia.objects.get(pk=self.pk)
214         except BookMedia.DoesNotExist, e:
215             pass
216         else:
217             # if name changed, change the file name, too
218             if slughifi(self.name) != slughifi(old.name):
219                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
220
221         super(BookMedia, self).save(*args, **kwargs)
222         extra_info = self.get_extra_info_value()
223         extra_info.update(self.read_meta())
224         self.set_extra_info_value(extra_info)
225         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
226         return super(BookMedia, self).save(*args, **kwargs)
227
228     def read_meta(self):
229         """
230             Reads some metadata from the audiobook.
231         """
232
233         artist_name = director_name = project = funded_by = ''
234         if self.type == 'mp3':
235             try:
236                 audio = id3.ID3(self.file.path)
237                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
238                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
239                 project = ", ".join([t.data for t in audio.getall('PRIV') 
240                         if t.owner=='wolnelektury.pl?project'])
241                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
242                         if t.owner=='wolnelektury.pl?funded_by'])
243             except:
244                 pass
245         elif self.type == 'ogg':
246             try:
247                 audio = mutagen.File(self.file.path)
248                 artist_name = ', '.join(audio.get('artist', []))
249                 director_name = ', '.join(audio.get('conductor', []))
250                 project = ", ".join(audio.get('project', []))
251                 funded_by = ", ".join(audio.get('funded_by', []))
252             except:
253                 pass
254         else:
255             return {}
256         return {'artist_name': artist_name, 'director_name': director_name,
257                 'project': project, 'funded_by': funded_by}
258
259     @staticmethod
260     def read_source_sha1(filepath, filetype):
261         """
262             Reads source file SHA1 from audiobok metadata.
263         """
264
265         if filetype == 'mp3':
266             try:
267                 audio = id3.ID3(filepath)
268                 return [t.data for t in audio.getall('PRIV') 
269                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
270             except:
271                 return None
272         elif filetype == 'ogg':
273             try:
274                 audio = mutagen.File(filepath)
275                 return audio.get('flac_sha1', [None])[0] 
276             except:
277                 return None
278         else:
279             return None
280
281
282 class Book(models.Model):
283     title         = models.CharField(_('title'), max_length=120)
284     sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
285     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
286     description   = models.TextField(_('description'), blank=True)
287     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
288     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
289     parent_number = models.IntegerField(_('parent number'), default=0)
290     extra_info    = JSONField(_('extra information'), default='{}')
291     gazeta_link   = models.CharField(blank=True, max_length=240)
292     wiki_link     = models.CharField(blank=True, max_length=240)
293     # files generated during publication
294     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
295     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
296     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
297     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)    
298     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)        
299
300     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
301     objects  = models.Manager()
302     tagged   = managers.ModelTaggedItemManager(Tag)
303     tags     = managers.TagDescriptor(Tag)
304
305     class AlreadyExists(Exception):
306         pass
307
308     class Meta:
309         ordering = ('sort_key',)
310         verbose_name = _('book')
311         verbose_name_plural = _('books')
312
313     def __unicode__(self):
314         return self.title
315
316     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
317         self.sort_key = sortify(self.title)
318
319         ret = super(Book, self).save(force_insert, force_update)
320
321         if reset_short_html:
322             self.reset_short_html()
323
324         return ret
325
326     @permalink
327     def get_absolute_url(self):
328         return ('catalogue.views.book_detail', [self.slug])
329
330     @property
331     def name(self):
332         return self.title
333
334     def book_tag_slug(self):
335         return ('l-' + self.slug)[:120]
336
337     def book_tag(self):
338         slug = self.book_tag_slug()
339         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
340         if created:
341             book_tag.name = self.title[:50]
342             book_tag.sort_key = self.title.lower()
343             book_tag.save()
344         return book_tag
345
346     def has_media(self, type):
347         if   type == 'xml':
348             if self.xml_file:
349                 return True
350             else:
351                 return False
352         elif type == 'html':
353             if self.html_file:
354                 return True
355             else:
356                 return False        
357         elif type == 'txt':
358             if self.txt_file:
359                 return True
360             else:
361                 return False        
362         elif type == 'pdf':
363             if self.pdf_file:
364                 return True
365             else:
366                 return False  
367         elif type == 'epub':
368             if self.epub_file:
369                 return True
370             else:
371                 return False                          
372         else:
373             if self.media.filter(type=type).exists():
374                 return True
375             else:
376                 return False
377
378     def get_media(self, type):
379         if self.has_media(type):
380             if   type == "xml":
381                 return self.xml_file
382             elif type == "html":
383                 return self.html_file
384             elif type == "epub":
385                 return self.epub_file
386             elif type == "txt":
387                 return self.txt_file
388             elif type == "pdf":
389                 return self.pdf_file
390             else:                                             
391                 return self.media.filter(type=type)
392         else:
393             return None
394
395     def get_mp3(self):
396         return self.get_media("mp3")
397     def get_odt(self):
398         return self.get_media("odt")
399     def get_ogg(self):
400         return self.get_media("ogg")
401     def get_daisy(self):
402         return self.get_media("daisy")                       
403
404     def reset_short_html(self):
405         if self.id is None:
406             return
407
408         cache_key = "Book.short_html/%d/%s"
409         for lang, langname in settings.LANGUAGES:
410             cache.delete(cache_key % (self.id, lang))
411         # Fragment.short_html relies on book's tags, so reset it here too
412         for fragm in self.fragments.all():
413             fragm.reset_short_html()
414
415     def short_html(self):
416         if self.id:
417             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
418             short_html = cache.get(cache_key)
419         else:
420             short_html = None
421
422         if short_html is not None:
423             return mark_safe(short_html)
424         else:
425             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
426             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
427
428             formats = []
429             # files generated during publication               
430             if self.has_media("html"):
431                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
432             if self.has_media("pdf"):
433                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
434             if self.root_ancestor.has_media("epub"):
435                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
436             if self.has_media("txt"):
437                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
438             # other files
439             for m in self.media.order_by('type'):
440                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
441
442             formats = [mark_safe(format) for format in formats]
443
444             short_html = unicode(render_to_string('catalogue/book_short.html',
445                 {'book': self, 'tags': tags, 'formats': formats}))
446
447             if self.id:
448                 cache.set(cache_key, short_html, CACHE_FOREVER)
449             return mark_safe(short_html)
450
451     @property
452     def root_ancestor(self):
453         """ returns the oldest ancestor """
454
455         if not hasattr(self, '_root_ancestor'):
456             book = self
457             while book.parent:
458                 book = book.parent
459             self._root_ancestor = book
460         return self._root_ancestor
461
462
463     def has_description(self):
464         return len(self.description) > 0
465     has_description.short_description = _('description')
466     has_description.boolean = True
467
468     # ugly ugly ugly
469     def has_pdf_file(self):
470         return bool(self.pdf_file)
471     has_pdf_file.short_description = 'PDF'
472     has_pdf_file.boolean = True
473
474     def has_epub_file(self):
475         return bool(self.epub_file)
476     has_epub_file.short_description = 'EPUB'
477     has_epub_file.boolean = True
478
479     def has_txt_file(self):
480         return bool(self.txt_file)
481     has_txt_file.short_description = 'HTML'
482     has_txt_file.boolean = True
483
484     def has_html_file(self):
485         return bool(self.html_file)
486     has_html_file.short_description = 'HTML'
487     has_html_file.boolean = True
488
489     def has_odt_file(self):
490         return bool(self.has_media("odt"))
491     has_odt_file.short_description = 'ODT'
492     has_odt_file.boolean = True
493
494     def has_mp3_file(self):
495         return bool(self.has_media("mp3"))
496     has_mp3_file.short_description = 'MP3'
497     has_mp3_file.boolean = True
498
499     def has_ogg_file(self):
500         return bool(self.has_media("ogg"))
501     has_ogg_file.short_description = 'OGG'
502     has_ogg_file.boolean = True
503     
504     def has_daisy_file(self):
505         return bool(self.has_media("daisy"))
506     has_daisy_file.short_description = 'DAISY'
507     has_daisy_file.boolean = True    
508     
509     def build_epub(self, remove_descendants=True):
510         """ (Re)builds the epub file.
511             If book has a parent, does nothing.
512             Unless remove_descendants is False, descendants' epubs are removed.
513         """
514     
515         from StringIO import StringIO
516         from hashlib import sha1
517         from django.core.files.base import ContentFile
518         from librarian import DocProvider
519
520         class BookImportDocProvider(DocProvider):
521             """ used for joined EPUBs """
522
523             def __init__(self, book):
524                 self.book = book
525
526             def by_slug(self, slug):
527                 if slug == self.book.slug:
528                     return self.book.xml_file
529                 else:
530                     return Book.objects.get(slug=slug).xml_file
531
532         if self.parent:
533             # don't need an epub
534             return
535
536         epub_file = StringIO()
537         try:
538             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
539             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
540             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
541         except NoDublinCore:
542             pass
543
544         book_descendants = list(self.children.all())
545         while len(book_descendants) > 0:
546             child_book = book_descendants.pop(0)
547             if remove_descendants and child_book.has_epub_file():
548                 child_book.epub_file.delete()
549             # save anyway, to refresh short_html
550             child_book.save()
551             book_descendants += list(child_book.children.all())
552
553     def build_txt(self):
554         from StringIO import StringIO
555         from django.core.files.base import ContentFile
556         from librarian import text
557
558         out = StringIO()
559         text.transform(open(self.xml_file.path), out)
560         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
561
562
563     def build_html(self):
564         from tempfile import NamedTemporaryFile
565         from markupstring import MarkupString
566
567         meta_tags = list(self.tags.filter(
568             category__in=('author', 'epoch', 'genre', 'kind')))
569         book_tag = self.book_tag()
570
571         html_file = NamedTemporaryFile()
572         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
573             self.html_file.save('%s.html' % self.slug, File(html_file))
574
575             # get ancestor l-tags for adding to new fragments
576             ancestor_tags = []
577             p = self.parent
578             while p:
579                 ancestor_tags.append(p.book_tag())
580                 p = p.parent
581
582             # Delete old fragments and create them from scratch
583             self.fragments.all().delete()
584             # Extract fragments
585             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
586             for fragment in closed_fragments.values():
587                 try:
588                     theme_names = [s.strip() for s in fragment.themes.split(',')]
589                 except AttributeError:
590                     continue
591                 themes = []
592                 for theme_name in theme_names:
593                     if not theme_name:
594                         continue
595                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
596                     if created:
597                         tag.name = theme_name
598                         tag.sort_key = theme_name.lower()
599                         tag.save()
600                     themes.append(tag)
601                 if not themes:
602                     continue
603
604                 text = fragment.to_string()
605                 short_text = ''
606                 if (len(MarkupString(text)) > 240):
607                     short_text = unicode(MarkupString(text)[:160])
608                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
609                     text=text, short_text=short_text)
610
611                 new_fragment.save()
612                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
613             self.save()
614             return True
615         return False
616
617
618     @classmethod
619     def from_xml_file(cls, xml_file, **kwargs):
620         # use librarian to parse meta-data
621         book_info = dcparser.parse(xml_file)
622
623         if not isinstance(xml_file, File):
624             xml_file = File(open(xml_file))
625
626         try:
627             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
628         finally:
629             xml_file.close()
630
631     @classmethod
632     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
633         import re
634
635         # check for parts before we do anything
636         children = []
637         if hasattr(book_info, 'parts'):
638             for part_url in book_info.parts:
639                 base, slug = part_url.rsplit('/', 1)
640                 try:
641                     children.append(Book.objects.get(slug=slug))
642                 except Book.DoesNotExist, e:
643                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
644
645
646         # Read book metadata
647         book_base, book_slug = book_info.url.rsplit('/', 1)
648         if re.search(r'[^a-zA-Z0-9-]', book_slug):
649             raise ValueError('Invalid characters in slug')
650         book, created = Book.objects.get_or_create(slug=book_slug)
651
652         if created:
653             book_shelves = []
654         else:
655             if not overwrite:
656                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
657             # Save shelves for this book
658             book_shelves = list(book.tags.filter(category='set'))
659
660         book.title = book_info.title
661         book.set_extra_info_value(book_info.to_dict())
662         book.save()
663
664         meta_tags = []
665         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
666         for field_name, category in categories:
667             try:
668                 tag_names = getattr(book_info, field_name)
669             except:
670                 tag_names = [getattr(book_info, category)]
671             for tag_name in tag_names:
672                 tag_sort_key = tag_name
673                 if category == 'author':
674                     tag_sort_key = tag_name.last_name
675                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
676                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
677                 if created:
678                     tag.name = tag_name
679                     tag.sort_key = sortify(tag_sort_key.lower())
680                     tag.save()
681                 meta_tags.append(tag)
682
683         book.tags = set(meta_tags + book_shelves)
684
685         book_tag = book.book_tag()
686
687         for n, child_book in enumerate(children):
688             child_book.parent = book
689             child_book.parent_number = n
690             child_book.save()
691
692         # Save XML and HTML files
693         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
694
695         # delete old fragments when overwriting
696         book.fragments.all().delete()
697
698         if book.build_html():
699             if not settings.NO_BUILD_TXT and build_txt:
700                 book.build_txt()
701
702         if not settings.NO_BUILD_EPUB and build_epub:
703             book.root_ancestor.build_epub()
704
705         book_descendants = list(book.children.all())
706         # add l-tag to descendants and their fragments
707         # delete unnecessary EPUB files
708         while len(book_descendants) > 0:
709             child_book = book_descendants.pop(0)
710             child_book.tags = list(child_book.tags) + [book_tag]
711             child_book.save()
712             for fragment in child_book.fragments.all():
713                 fragment.tags = set(list(fragment.tags) + [book_tag])
714             book_descendants += list(child_book.children.all())
715
716         book.save()
717
718         # refresh cache
719         book.reset_tag_counter()
720         book.reset_theme_counter()
721
722         return book
723
724     def reset_tag_counter(self):
725         if self.id is None:
726             return
727
728         cache_key = "Book.tag_counter/%d" % self.id
729         cache.delete(cache_key)
730         if self.parent:
731             self.parent.reset_tag_counter()
732
733     @property
734     def tag_counter(self):
735         if self.id:
736             cache_key = "Book.tag_counter/%d" % self.id
737             tags = cache.get(cache_key)
738         else:
739             tags = None
740
741         if tags is None:
742             tags = {}
743             for child in self.children.all().order_by():
744                 for tag_pk, value in child.tag_counter.iteritems():
745                     tags[tag_pk] = tags.get(tag_pk, 0) + value
746             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
747                 tags[tag.pk] = 1
748
749             if self.id:
750                 cache.set(cache_key, tags, CACHE_FOREVER)
751         return tags
752
753     def reset_theme_counter(self):
754         if self.id is None:
755             return
756
757         cache_key = "Book.theme_counter/%d" % self.id
758         cache.delete(cache_key)
759         if self.parent:
760             self.parent.reset_theme_counter()
761
762     @property
763     def theme_counter(self):
764         if self.id:
765             cache_key = "Book.theme_counter/%d" % self.id
766             tags = cache.get(cache_key)
767         else:
768             tags = None
769
770         if tags is None:
771             tags = {}
772             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
773                 for tag in fragment.tags.filter(category='theme').order_by():
774                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
775
776             if self.id:
777                 cache.set(cache_key, tags, CACHE_FOREVER)
778         return tags
779
780     def pretty_title(self, html_links=False):
781         book = self
782         names = list(book.tags.filter(category='author'))
783
784         books = []
785         while book:
786             books.append(book)
787             book = book.parent
788         names.extend(reversed(books))
789
790         if html_links:
791             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
792         else:
793             names = [tag.name for tag in names]
794
795         return ', '.join(names)
796
797     @classmethod
798     def tagged_top_level(cls, tags):
799         """ Returns top-level books tagged with `tags'.
800
801         It only returns those books which don't have ancestors which are
802         also tagged with those tags.
803
804         """
805         # get relevant books and their tags
806         objects = cls.tagged.with_all(tags)
807         # eliminate descendants
808         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
809         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
810         if descendants_keys:
811             objects = objects.exclude(pk__in=descendants_keys)
812
813         return objects
814
815
816 class Fragment(models.Model):
817     text = models.TextField()
818     short_text = models.TextField(editable=False)
819     anchor = models.CharField(max_length=120)
820     book = models.ForeignKey(Book, related_name='fragments')
821
822     objects = models.Manager()
823     tagged = managers.ModelTaggedItemManager(Tag)
824     tags = managers.TagDescriptor(Tag)
825
826     class Meta:
827         ordering = ('book', 'anchor',)
828         verbose_name = _('fragment')
829         verbose_name_plural = _('fragments')
830
831     def get_absolute_url(self):
832         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
833
834     def reset_short_html(self):
835         if self.id is None:
836             return
837
838         cache_key = "Fragment.short_html/%d/%s"
839         for lang, langname in settings.LANGUAGES:
840             cache.delete(cache_key % (self.id, lang))
841
842     def short_html(self):
843         if self.id:
844             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
845             short_html = cache.get(cache_key)
846         else:
847             short_html = None
848
849         if short_html is not None:
850             return mark_safe(short_html)
851         else:
852             short_html = unicode(render_to_string('catalogue/fragment_short.html',
853                 {'fragment': self}))
854             if self.id:
855                 cache.set(cache_key, short_html, CACHE_FOREVER)
856             return mark_safe(short_html)
857
858
859 class FileRecord(models.Model):
860     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
861     type = models.CharField(_('type'), max_length=20, db_index=True)
862     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
863     time = models.DateTimeField(_('time'), auto_now_add=True)
864
865     class Meta:
866         ordering = ('-time','-slug', '-type')
867         verbose_name = _('file record')
868         verbose_name_plural = _('file records')
869
870     def __unicode__(self):
871         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
872
873 ###########
874 #
875 # SIGNALS
876 #
877 ###########
878
879
880 def _tags_updated_handler(sender, affected_tags, **kwargs):
881     # reset tag global counter
882     # we want Tag.changed_at updated for API to know the tag was touched
883     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
884
885     # if book tags changed, reset book tag counter
886     if isinstance(sender, Book) and \
887                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
888                     exclude(category__in=('book', 'theme', 'set')).count():
889         sender.reset_tag_counter()
890     # if fragment theme changed, reset book theme counter
891     elif isinstance(sender, Fragment) and \
892                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
893                     filter(category='theme').count():
894         sender.book.reset_theme_counter()
895 tags_updated.connect(_tags_updated_handler)
896
897
898 def _pre_delete_handler(sender, instance, **kwargs):
899     """ refresh Book on BookMedia delete """
900     if sender == BookMedia:
901         instance.book.save()
902 pre_delete.connect(_pre_delete_handler)
903
904 def _post_save_handler(sender, instance, **kwargs):
905     """ refresh all the short_html stuff on BookMedia update """
906     if sender == BookMedia:
907         instance.book.save()
908 post_save.connect(_post_save_handler)