change insane default
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.core.cache import cache
10 from django.utils.translation import ugettext_lazy as _
11 from django.contrib.auth.models import User
12 from django.core.files import File
13 from django.template.loader import render_to_string
14 from django.utils.safestring import mark_safe
15 from django.utils.translation import get_language
16 from django.core.urlresolvers import reverse
17 from django.db.models.signals import post_save, m2m_changed, pre_delete
18
19 from django.conf import settings
20
21 from newtagging.models import TagBase, tags_updated
22 from newtagging import managers
23 from catalogue.fields import JSONField, OverwritingFileField
24 from catalogue.utils import ExistingFile
25
26 from librarian import dcparser, html, epub, NoDublinCore
27 import mutagen
28 from mutagen import id3
29 from slughifi import slughifi
30 from sortify import sortify
31
32
33 TAG_CATEGORIES = (
34     ('author', _('author')),
35     ('epoch', _('epoch')),
36     ('kind', _('kind')),
37     ('genre', _('genre')),
38     ('theme', _('theme')),
39     ('set', _('set')),
40     ('book', _('book')),
41 )
42
43 MEDIA_FORMATS = (
44     ('odt', _('ODT file')),
45     ('mp3', _('MP3 file')),
46     ('ogg', _('OGG file')),
47     ('daisy', _('DAISY file')), 
48 )
49
50 class TagSubcategoryManager(models.Manager):
51     def __init__(self, subcategory):
52         super(TagSubcategoryManager, self).__init__()
53         self.subcategory = subcategory
54
55     def get_query_set(self):
56         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
57
58
59 class Tag(TagBase):
60     name = models.CharField(_('name'), max_length=50, db_index=True)
61     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
63     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
64         db_index=True, choices=TAG_CATEGORIES)
65     description = models.TextField(_('description'), blank=True)
66     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
67
68     user = models.ForeignKey(User, blank=True, null=True)
69     book_count = models.IntegerField(_('book count'), blank=True, null=True)
70     gazeta_link = models.CharField(blank=True, max_length=240)
71     wiki_link = models.CharField(blank=True, max_length=240)
72
73     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
74     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
75
76     class UrlDeprecationWarning(DeprecationWarning):
77         pass
78
79     categories_rev = {
80         'autor': 'author',
81         'epoka': 'epoch',
82         'rodzaj': 'kind',
83         'gatunek': 'genre',
84         'motyw': 'theme',
85         'polka': 'set',
86     }
87     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
88
89     class Meta:
90         ordering = ('sort_key',)
91         verbose_name = _('tag')
92         verbose_name_plural = _('tags')
93         unique_together = (("slug", "category"),)
94
95     def __unicode__(self):
96         return self.name
97
98     def __repr__(self):
99         return "Tag(slug=%r)" % self.slug
100
101     @permalink
102     def get_absolute_url(self):
103         return ('catalogue.views.tagged_object_list', [self.url_chunk])
104
105     def has_description(self):
106         return len(self.description) > 0
107     has_description.short_description = _('description')
108     has_description.boolean = True
109
110     def get_count(self):
111         """ returns global book count for book tags, fragment count for themes """
112
113         if self.book_count is None:
114             if self.category == 'book':
115                 # never used
116                 objects = Book.objects.none()
117             elif self.category == 'theme':
118                 objects = Fragment.tagged.with_all((self,))
119             else:
120                 objects = Book.tagged.with_all((self,)).order_by()
121                 if self.category != 'set':
122                     # eliminate descendants
123                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
124                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
125                     if descendants_keys:
126                         objects = objects.exclude(pk__in=descendants_keys)
127             self.book_count = objects.count()
128             self.save()
129         return self.book_count
130
131     @staticmethod
132     def get_tag_list(tags):
133         if isinstance(tags, basestring):
134             real_tags = []
135             ambiguous_slugs = []
136             category = None
137             deprecated = False
138             tags_splitted = tags.split('/')
139             for name in tags_splitted:
140                 if category:
141                     real_tags.append(Tag.objects.get(slug=name, category=category))
142                     category = None
143                 elif name in Tag.categories_rev:
144                     category = Tag.categories_rev[name]
145                 else:
146                     try:
147                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
148                         deprecated = True 
149                     except Tag.MultipleObjectsReturned, e:
150                         ambiguous_slugs.append(name)
151
152             if category:
153                 # something strange left off
154                 raise Tag.DoesNotExist()
155             if ambiguous_slugs:
156                 # some tags should be qualified
157                 e = Tag.MultipleObjectsReturned()
158                 e.tags = real_tags
159                 e.ambiguous_slugs = ambiguous_slugs
160                 raise e
161             if deprecated:
162                 e = Tag.UrlDeprecationWarning()
163                 e.tags = real_tags
164                 raise e
165             return real_tags
166         else:
167             return TagBase.get_tag_list(tags)
168
169     @property
170     def url_chunk(self):
171         return '/'.join((Tag.categories_dict[self.category], self.slug))
172
173
174 # TODO: why is this hard-coded ?
175 def book_upload_path(ext=None, maxlen=100):
176     def get_dynamic_path(media, filename, ext=ext):
177         # how to put related book's slug here?
178         if not ext:
179             if media.type == 'daisy':
180                 ext = 'daisy.zip'
181             else:
182                 ext = media.type
183         if not media.name:
184             name = slughifi(filename.split(".")[0])
185         else:
186             name = slughifi(media.name)
187         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
188     return get_dynamic_path
189
190
191 class BookMedia(models.Model):
192     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
193     name        = models.CharField(_('name'), max_length="100")
194     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
195     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
196     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
197     book = models.ForeignKey('Book', related_name='media')
198     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
199
200     def __unicode__(self):
201         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
202
203     class Meta:
204         ordering            = ('type', 'name')
205         verbose_name        = _('book media')
206         verbose_name_plural = _('book media')
207
208     def save(self, *args, **kwargs):
209         try:
210             old = BookMedia.objects.get(pk=self.pk)
211         except BookMedia.DoesNotExist, e:
212             pass
213         else:
214             # if name changed, change the file name, too
215             if slughifi(self.name) != slughifi(old.name):
216                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
217
218         super(BookMedia, self).save(*args, **kwargs)
219         extra_info = self.get_extra_info_value()
220         extra_info.update(self.read_meta())
221         self.set_extra_info_value(extra_info)
222         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
223         return super(BookMedia, self).save(*args, **kwargs)
224
225     def read_meta(self):
226         """
227             Reads some metadata from the audiobook.
228         """
229
230         artist_name = director_name = project = funded_by = ''
231         if self.type == 'mp3':
232             try:
233                 audio = id3.ID3(self.file.path)
234                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
235                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
236                 project = ", ".join([t.data for t in audio.getall('PRIV') 
237                         if t.owner=='wolnelektury.pl?project'])
238                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
239                         if t.owner=='wolnelektury.pl?funded_by'])
240             except:
241                 pass
242         elif self.type == 'ogg':
243             try:
244                 audio = mutagen.File(self.file.path)
245                 artist_name = ', '.join(audio.get('artist', []))
246                 director_name = ', '.join(audio.get('conductor', []))
247                 project = ", ".join(audio.get('project', []))
248                 funded_by = ", ".join(audio.get('funded_by', []))
249             except:
250                 pass
251         else:
252             return {}
253         return {'artist_name': artist_name, 'director_name': director_name,
254                 'project': project, 'funded_by': funded_by}
255
256     @staticmethod
257     def read_source_sha1(filepath, filetype):
258         """
259             Reads source file SHA1 from audiobok metadata.
260         """
261
262         if filetype == 'mp3':
263             try:
264                 audio = id3.ID3(filepath)
265                 return [t.data for t in audio.getall('PRIV') 
266                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
267             except:
268                 return None
269         elif filetype == 'ogg':
270             try:
271                 audio = mutagen.File(filepath)
272                 return audio.get('flac_sha1', [None])[0] 
273             except:
274                 return None
275         else:
276             return None
277
278
279 class Book(models.Model):
280     title         = models.CharField(_('title'), max_length=120)
281     sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
282     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
283     description   = models.TextField(_('description'), blank=True)
284     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
285     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
286     parent_number = models.IntegerField(_('parent number'), default=0)
287     extra_info    = JSONField(_('extra information'), default='{}')
288     gazeta_link   = models.CharField(blank=True, max_length=240)
289     wiki_link     = models.CharField(blank=True, max_length=240)
290     # files generated during publication
291     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
292     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
293     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
294     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)    
295     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)        
296
297     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
298     objects  = models.Manager()
299     tagged   = managers.ModelTaggedItemManager(Tag)
300     tags     = managers.TagDescriptor(Tag)
301
302     class AlreadyExists(Exception):
303         pass
304
305     class Meta:
306         ordering = ('sort_key',)
307         verbose_name = _('book')
308         verbose_name_plural = _('books')
309
310     def __unicode__(self):
311         return self.title
312
313     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
314         self.sort_key = sortify(self.title)
315
316         if reset_short_html:
317             self.reset_short_html()
318
319         return super(Book, self).save(force_insert, force_update)
320
321     @permalink
322     def get_absolute_url(self):
323         return ('catalogue.views.book_detail', [self.slug])
324
325     @property
326     def name(self):
327         return self.title
328
329     def book_tag_slug(self):
330         return ('l-' + self.slug)[:120]
331
332     def book_tag(self):
333         slug = self.book_tag_slug()
334         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
335         if created:
336             book_tag.name = self.title[:50]
337             book_tag.sort_key = self.title.lower()
338             book_tag.save()
339         return book_tag
340
341     def has_media(self, type):
342         if   type == 'xml':
343             if self.xml_file:
344                 return True
345             else:
346                 return False
347         elif type == 'html':
348             if self.html_file:
349                 return True
350             else:
351                 return False        
352         elif type == 'txt':
353             if self.txt_file:
354                 return True
355             else:
356                 return False        
357         elif type == 'pdf':
358             if self.pdf_file:
359                 return True
360             else:
361                 return False  
362         elif type == 'epub':
363             if self.epub_file:
364                 return True
365             else:
366                 return False                          
367         else:
368             if self.media.filter(type=type).exists():
369                 return True
370             else:
371                 return False
372
373     def get_media(self, type):
374         if self.has_media(type):
375             if   type == "xml":
376                 return self.xml_file
377             elif type == "html":
378                 return self.html_file
379             elif type == "epub":
380                 return self.epub_file
381             elif type == "txt":
382                 return self.txt_file
383             elif type == "pdf":
384                 return self.pdf_file
385             else:                                             
386                 return self.media.filter(type=type)
387         else:
388             return None
389
390     def get_mp3(self):
391         return self.get_media("mp3")
392     def get_odt(self):
393         return self.get_media("odt")
394     def get_ogg(self):
395         return self.get_media("ogg")
396     def get_daisy(self):
397         return self.get_media("daisy")                       
398
399     def reset_short_html(self):
400         cache_key = "Book.short_html/%d/%s"
401         for lang, langname in settings.LANGUAGES:
402             cache.delete(cache_key % (self.id, lang))
403         # Fragment.short_html relies on book's tags, so reset it here too
404         for fragm in self.fragments.all():
405             fragm.reset_short_html()
406
407     def short_html(self):
408         cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
409         short_html = cache.get(cache_key)
410
411         if short_html is not None:
412             return mark_safe(short_html)
413         else:
414             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
415             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
416
417             formats = []
418             # files generated during publication               
419             if self.has_media("html"):
420                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
421             if self.has_media("pdf"):
422                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
423             if self.root_ancestor.has_media("epub"):
424                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
425             if self.has_media("txt"):
426                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
427             # other files
428             for m in self.media.order_by('type'):
429                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
430
431             formats = [mark_safe(format) for format in formats]
432
433             short_html = unicode(render_to_string('catalogue/book_short.html',
434                 {'book': self, 'tags': tags, 'formats': formats}))
435             cache.set(cache_key, short_html)
436             return mark_safe(short_html)
437
438     @property
439     def root_ancestor(self):
440         """ returns the oldest ancestor """
441
442         if not hasattr(self, '_root_ancestor'):
443             book = self
444             while book.parent:
445                 book = book.parent
446             self._root_ancestor = book
447         return self._root_ancestor
448
449
450     def has_description(self):
451         return len(self.description) > 0
452     has_description.short_description = _('description')
453     has_description.boolean = True
454
455     # ugly ugly ugly
456     def has_pdf_file(self):
457         return bool(self.pdf_file)
458     has_pdf_file.short_description = 'PDF'
459     has_pdf_file.boolean = True
460
461     def has_epub_file(self):
462         return bool(self.epub_file)
463     has_epub_file.short_description = 'EPUB'
464     has_epub_file.boolean = True
465
466     def has_txt_file(self):
467         return bool(self.txt_file)
468     has_txt_file.short_description = 'HTML'
469     has_txt_file.boolean = True
470
471     def has_html_file(self):
472         return bool(self.html_file)
473     has_html_file.short_description = 'HTML'
474     has_html_file.boolean = True
475
476     def has_odt_file(self):
477         return bool(self.has_media("odt"))
478     has_odt_file.short_description = 'ODT'
479     has_odt_file.boolean = True
480
481     def has_mp3_file(self):
482         return bool(self.has_media("mp3"))
483     has_mp3_file.short_description = 'MP3'
484     has_mp3_file.boolean = True
485
486     def has_ogg_file(self):
487         return bool(self.has_media("ogg"))
488     has_ogg_file.short_description = 'OGG'
489     has_ogg_file.boolean = True
490     
491     def has_daisy_file(self):
492         return bool(self.has_media("daisy"))
493     has_daisy_file.short_description = 'DAISY'
494     has_daisy_file.boolean = True    
495     
496     def build_epub(self, remove_descendants=True):
497         """ (Re)builds the epub file.
498             If book has a parent, does nothing.
499             Unless remove_descendants is False, descendants' epubs are removed.
500         """
501     
502         from StringIO import StringIO
503         from hashlib import sha1
504         from django.core.files.base import ContentFile
505         from librarian import DocProvider
506
507         class BookImportDocProvider(DocProvider):
508             """ used for joined EPUBs """
509
510             def __init__(self, book):
511                 self.book = book
512
513             def by_slug(self, slug):
514                 if slug == self.book.slug:
515                     return self.book.xml_file
516                 else:
517                     return Book.objects.get(slug=slug).xml_file
518
519         if self.parent:
520             # don't need an epub
521             return
522
523         epub_file = StringIO()
524         try:
525             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
526             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
527             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
528         except NoDublinCore:
529             pass
530
531         book_descendants = list(self.children.all())
532         while len(book_descendants) > 0:
533             child_book = book_descendants.pop(0)
534             if remove_descendants and child_book.has_epub_file():
535                 child_book.epub_file.delete()
536             # save anyway, to refresh short_html
537             child_book.save()
538             book_descendants += list(child_book.children.all())
539
540     def build_txt(self):
541         from StringIO import StringIO
542         from django.core.files.base import ContentFile
543         from librarian import text
544
545         out = StringIO()
546         text.transform(open(self.xml_file.path), out)
547         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
548
549
550     def build_html(self):
551         from tempfile import NamedTemporaryFile
552         from markupstring import MarkupString
553
554         meta_tags = list(self.tags.filter(
555             category__in=('author', 'epoch', 'genre', 'kind')))
556         book_tag = self.book_tag()
557
558         html_file = NamedTemporaryFile()
559         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
560             self.html_file.save('%s.html' % self.slug, File(html_file))
561
562             # get ancestor l-tags for adding to new fragments
563             ancestor_tags = []
564             p = self.parent
565             while p:
566                 ancestor_tags.append(p.book_tag())
567                 p = p.parent
568
569             # Delete old fragments and create them from scratch
570             self.fragments.all().delete()
571             # Extract fragments
572             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
573             for fragment in closed_fragments.values():
574                 try:
575                     theme_names = [s.strip() for s in fragment.themes.split(',')]
576                 except AttributeError:
577                     continue
578                 themes = []
579                 for theme_name in theme_names:
580                     if not theme_name:
581                         continue
582                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
583                     if created:
584                         tag.name = theme_name
585                         tag.sort_key = theme_name.lower()
586                         tag.save()
587                     themes.append(tag)
588                 if not themes:
589                     continue
590
591                 text = fragment.to_string()
592                 short_text = ''
593                 if (len(MarkupString(text)) > 240):
594                     short_text = unicode(MarkupString(text)[:160])
595                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
596                     text=text, short_text=short_text)
597
598                 new_fragment.save()
599                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
600             self.save()
601             return True
602         return False
603
604
605     @classmethod
606     def from_xml_file(cls, xml_file, **kwargs):
607         # use librarian to parse meta-data
608         book_info = dcparser.parse(xml_file)
609
610         if not isinstance(xml_file, File):
611             xml_file = File(open(xml_file))
612
613         try:
614             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
615         finally:
616             xml_file.close()
617
618     @classmethod
619     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
620         import re
621
622         # check for parts before we do anything
623         children = []
624         if hasattr(book_info, 'parts'):
625             for part_url in book_info.parts:
626                 base, slug = part_url.rsplit('/', 1)
627                 try:
628                     children.append(Book.objects.get(slug=slug))
629                 except Book.DoesNotExist, e:
630                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
631
632
633         # Read book metadata
634         book_base, book_slug = book_info.url.rsplit('/', 1)
635         if re.search(r'[^a-zA-Z0-9-]', book_slug):
636             raise ValueError('Invalid characters in slug')
637         book, created = Book.objects.get_or_create(slug=book_slug)
638
639         if created:
640             book_shelves = []
641         else:
642             if not overwrite:
643                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
644             # Save shelves for this book
645             book_shelves = list(book.tags.filter(category='set'))
646
647         book.title = book_info.title
648         book.set_extra_info_value(book_info.to_dict())
649         book.save()
650
651         meta_tags = []
652         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
653         for field_name, category in categories:
654             try:
655                 tag_names = getattr(book_info, field_name)
656             except:
657                 tag_names = [getattr(book_info, category)]
658             for tag_name in tag_names:
659                 tag_sort_key = tag_name
660                 if category == 'author':
661                     tag_sort_key = tag_name.last_name
662                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
663                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
664                 if created:
665                     tag.name = tag_name
666                     tag.sort_key = sortify(tag_sort_key.lower())
667                     tag.save()
668                 meta_tags.append(tag)
669
670         book.tags = set(meta_tags + book_shelves)
671
672         book_tag = book.book_tag()
673
674         for n, child_book in enumerate(children):
675             child_book.parent = book
676             child_book.parent_number = n
677             child_book.save()
678
679         # Save XML and HTML files
680         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
681
682         # delete old fragments when overwriting
683         book.fragments.all().delete()
684
685         if book.build_html():
686             if not settings.NO_BUILD_TXT and build_txt:
687                 book.build_txt()
688
689         if not settings.NO_BUILD_EPUB and build_epub:
690             book.root_ancestor.build_epub()
691
692         book_descendants = list(book.children.all())
693         # add l-tag to descendants and their fragments
694         # delete unnecessary EPUB files
695         while len(book_descendants) > 0:
696             child_book = book_descendants.pop(0)
697             child_book.tags = list(child_book.tags) + [book_tag]
698             child_book.save()
699             for fragment in child_book.fragments.all():
700                 fragment.tags = set(list(fragment.tags) + [book_tag])
701             book_descendants += list(child_book.children.all())
702
703         # refresh cache
704         book.reset_tag_counter()
705         book.reset_theme_counter()
706
707         book.save()
708         return book
709
710     def reset_tag_counter(self):
711         cache_key = "Book.tag_counter/%d" % self.id
712         cache.delete(cache_key)
713         if self.parent:
714             self.parent.reset_tag_counter()
715
716     @property
717     def tag_counter(self):
718         cache_key = "Book.tag_counter/%d" % self.id
719         tags = cache.get(cache_key)
720         if tags is None:
721             tags = {}
722             for child in self.children.all().order_by():
723                 for tag_pk, value in child.tag_counter.iteritems():
724                     tags[tag_pk] = tags.get(tag_pk, 0) + value
725             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
726                 tags[tag.pk] = 1
727
728             cache.set(cache_key, tags)
729         return tags
730
731     def reset_theme_counter(self):
732         cache_key = "Book.theme_counter/%d" % self.id
733         cache.delete(cache_key)
734         if self.parent:
735             self.parent.reset_theme_counter()
736
737     @property
738     def theme_counter(self):
739         cache_key = "Book.theme_counter/%d" % self.id
740         tags = cache.get(cache_key)
741         if tags is None:
742             tags = {}
743             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
744                 for tag in fragment.tags.filter(category='theme').order_by():
745                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
746
747             cache.set(cache_key, tags)
748         return tags
749
750     def pretty_title(self, html_links=False):
751         book = self
752         names = list(book.tags.filter(category='author'))
753
754         books = []
755         while book:
756             books.append(book)
757             book = book.parent
758         names.extend(reversed(books))
759
760         if html_links:
761             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
762         else:
763             names = [tag.name for tag in names]
764
765         return ', '.join(names)
766
767     @classmethod
768     def tagged_top_level(cls, tags):
769         """ Returns top-level books tagged with `tags'.
770
771         It only returns those books which don't have ancestors which are
772         also tagged with those tags.
773
774         """
775         # get relevant books and their tags
776         objects = cls.tagged.with_all(tags)
777         # eliminate descendants
778         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
779         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
780         if descendants_keys:
781             objects = objects.exclude(pk__in=descendants_keys)
782
783         return objects
784
785
786 class Fragment(models.Model):
787     text = models.TextField()
788     short_text = models.TextField(editable=False)
789     anchor = models.CharField(max_length=120)
790     book = models.ForeignKey(Book, related_name='fragments')
791
792     objects = models.Manager()
793     tagged = managers.ModelTaggedItemManager(Tag)
794     tags = managers.TagDescriptor(Tag)
795
796     class Meta:
797         ordering = ('book', 'anchor',)
798         verbose_name = _('fragment')
799         verbose_name_plural = _('fragments')
800
801     def get_absolute_url(self):
802         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
803
804     def reset_short_html(self):
805         cache_key = "Fragment.short_html/%d/%s"
806         for lang, langname in settings.LANGUAGES:
807             cache.delete(cache_key % (self.id, lang))
808
809     def short_html(self):
810         cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
811         short_html = cache.get(cache_key)
812
813         if short_html is not None:
814             return mark_safe(short_html)
815         else:
816             short_html = unicode(render_to_string('catalogue/fragment_short.html',
817                 {'fragment': self}))
818             cache.set(cache_key, short_html)
819             return mark_safe(short_html)
820
821
822 class FileRecord(models.Model):
823     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
824     type = models.CharField(_('type'), max_length=20, db_index=True)
825     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
826     time = models.DateTimeField(_('time'), auto_now_add=True)
827
828     class Meta:
829         ordering = ('-time','-slug', '-type')
830         verbose_name = _('file record')
831         verbose_name_plural = _('file records')
832
833     def __unicode__(self):
834         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
835
836 ###########
837 #
838 # SIGNALS
839 #
840 ###########
841
842
843 def _tags_updated_handler(sender, affected_tags, **kwargs):
844     # reset tag global counter
845     # we want Tag.changed_at updated for API to know the tag was touched
846     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
847
848     # if book tags changed, reset book tag counter
849     if isinstance(sender, Book) and \
850                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
851                     exclude(category__in=('book', 'theme', 'set')).count():
852         sender.reset_tag_counter()
853     # if fragment theme changed, reset book theme counter
854     elif isinstance(sender, Fragment) and \
855                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
856                     filter(category='theme').count():
857         sender.book.reset_theme_counter()
858 tags_updated.connect(_tags_updated_handler)
859
860
861 def _pre_delete_handler(sender, instance, **kwargs):
862     """ refresh Book on BookMedia delete """
863     if sender == BookMedia:
864         instance.book.save()
865 pre_delete.connect(_pre_delete_handler)
866
867 def _post_save_handler(sender, instance, **kwargs):
868     """ refresh all the short_html stuff on BookMedia update """
869     if sender == BookMedia:
870         instance.book.save()
871 post_save.connect(_post_save_handler)