Merge branch 'api'
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 from django.utils.translation import ugettext_lazy as _
10 from django.contrib.auth.models import User
11 from django.core.files import File
12 from django.template.loader import render_to_string
13 from django.utils.safestring import mark_safe
14 from django.utils.translation import get_language
15 from django.core.urlresolvers import reverse
16 from django.db.models.signals import post_save, m2m_changed, pre_delete
17
18 from django.conf import settings
19
20 from newtagging.models import TagBase, tags_updated
21 from newtagging import managers
22 from catalogue.fields import JSONField, OverwritingFileField
23 from catalogue.utils import ExistingFile
24
25 from librarian import dcparser, html, epub, NoDublinCore
26 import mutagen
27 from mutagen import id3
28 from slughifi import slughifi
29 from sortify import sortify
30
31
32 TAG_CATEGORIES = (
33     ('author', _('author')),
34     ('epoch', _('epoch')),
35     ('kind', _('kind')),
36     ('genre', _('genre')),
37     ('theme', _('theme')),
38     ('set', _('set')),
39     ('book', _('book')),
40 )
41
42 MEDIA_FORMATS = (
43     ('odt', _('ODT file')),
44     ('mp3', _('MP3 file')),
45     ('ogg', _('OGG file')),
46     ('daisy', _('DAISY file')), 
47 )
48
49 class TagSubcategoryManager(models.Manager):
50     def __init__(self, subcategory):
51         super(TagSubcategoryManager, self).__init__()
52         self.subcategory = subcategory
53
54     def get_query_set(self):
55         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
56
57
58 class Tag(TagBase):
59     name = models.CharField(_('name'), max_length=50, db_index=True)
60     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
62     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
63         db_index=True, choices=TAG_CATEGORIES)
64     description = models.TextField(_('description'), blank=True)
65     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
66
67     user = models.ForeignKey(User, blank=True, null=True)
68     book_count = models.IntegerField(_('book count'), blank=True, null=True)
69     gazeta_link = models.CharField(blank=True, max_length=240)
70     wiki_link = models.CharField(blank=True, max_length=240)
71
72     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
73     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
74
75     class UrlDeprecationWarning(DeprecationWarning):
76         pass
77
78     categories_rev = {
79         'autor': 'author',
80         'epoka': 'epoch',
81         'rodzaj': 'kind',
82         'gatunek': 'genre',
83         'motyw': 'theme',
84         'polka': 'set',
85     }
86     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
87
88     class Meta:
89         ordering = ('sort_key',)
90         verbose_name = _('tag')
91         verbose_name_plural = _('tags')
92         unique_together = (("slug", "category"),)
93
94     def __unicode__(self):
95         return self.name
96
97     def __repr__(self):
98         return "Tag(slug=%r)" % self.slug
99
100     @permalink
101     def get_absolute_url(self):
102         return ('catalogue.views.tagged_object_list', [self.url_chunk])
103
104     def has_description(self):
105         return len(self.description) > 0
106     has_description.short_description = _('description')
107     has_description.boolean = True
108
109     def get_count(self):
110         """ returns global book count for book tags, fragment count for themes """
111
112         if self.book_count is None:
113             if self.category == 'book':
114                 # never used
115                 objects = Book.objects.none()
116             elif self.category == 'theme':
117                 objects = Fragment.tagged.with_all((self,))
118             else:
119                 objects = Book.tagged.with_all((self,)).order_by()
120                 if self.category != 'set':
121                     # eliminate descendants
122                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
123                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
124                     if descendants_keys:
125                         objects = objects.exclude(pk__in=descendants_keys)
126             self.book_count = objects.count()
127             self.save()
128         return self.book_count
129
130     @staticmethod
131     def get_tag_list(tags):
132         if isinstance(tags, basestring):
133             real_tags = []
134             ambiguous_slugs = []
135             category = None
136             deprecated = False
137             tags_splitted = tags.split('/')
138             for name in tags_splitted:
139                 if category:
140                     real_tags.append(Tag.objects.get(slug=name, category=category))
141                     category = None
142                 elif name in Tag.categories_rev:
143                     category = Tag.categories_rev[name]
144                 else:
145                     try:
146                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
147                         deprecated = True 
148                     except Tag.MultipleObjectsReturned, e:
149                         ambiguous_slugs.append(name)
150
151             if category:
152                 # something strange left off
153                 raise Tag.DoesNotExist()
154             if ambiguous_slugs:
155                 # some tags should be qualified
156                 e = Tag.MultipleObjectsReturned()
157                 e.tags = real_tags
158                 e.ambiguous_slugs = ambiguous_slugs
159                 raise e
160             if deprecated:
161                 e = Tag.UrlDeprecationWarning()
162                 e.tags = real_tags
163                 raise e
164             return real_tags
165         else:
166             return TagBase.get_tag_list(tags)
167
168     @property
169     def url_chunk(self):
170         return '/'.join((Tag.categories_dict[self.category], self.slug))
171
172
173 # TODO: why is this hard-coded ?
174 def book_upload_path(ext=None, maxlen=100):
175     def get_dynamic_path(media, filename, ext=ext):
176         # how to put related book's slug here?
177         if not ext:
178             if media.type == 'daisy':
179                 ext = 'daisy.zip'
180             else:
181                 ext = media.type
182         if not media.name:
183             name = slughifi(filename.split(".")[0])
184         else:
185             name = slughifi(media.name)
186         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
187     return get_dynamic_path
188
189
190 class BookMedia(models.Model):
191     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100", editable=False)
192     name        = models.CharField(_('name'), max_length="100")
193     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
194     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
195     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
196     book = models.ForeignKey('Book', related_name='media')
197     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
198
199     def __unicode__(self):
200         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
201
202     class Meta:
203         ordering            = ('type', 'name')
204         verbose_name        = _('book media')
205         verbose_name_plural = _('book media')
206
207     def save(self, *args, **kwargs):
208         try:
209             old = BookMedia.objects.get(pk=self.pk)
210         except BookMedia.DoesNotExist, e:
211             pass
212         else:
213             # if name changed, change the file name, too
214             if slughifi(self.name) != slughifi(old.name):
215                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
216
217         super(BookMedia, self).save(*args, **kwargs)
218         extra_info = self.get_extra_info_value()
219         extra_info.update(self.read_meta())
220         self.set_extra_info_value(extra_info)
221         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
222         return super(BookMedia, self).save(*args, **kwargs)
223
224     def read_meta(self):
225         """
226             Reads some metadata from the audiobook.
227         """
228
229         artist_name = director_name = project = funded_by = ''
230         if self.type == 'mp3':
231             try:
232                 audio = id3.ID3(self.file.path)
233                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
234                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
235                 project = ", ".join([t.data for t in audio.getall('PRIV') 
236                         if t.owner=='wolnelektury.pl?project'])
237                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
238                         if t.owner=='wolnelektury.pl?funded_by'])
239             except:
240                 pass
241         elif self.type == 'ogg':
242             try:
243                 audio = mutagen.File(self.file.path)
244                 artist_name = ', '.join(audio.get('artist', []))
245                 director_name = ', '.join(audio.get('conductor', []))
246                 project = ", ".join(audio.get('project', []))
247                 funded_by = ", ".join(audio.get('funded_by', []))
248             except:
249                 pass
250         else:
251             return {}
252         return {'artist_name': artist_name, 'director_name': director_name,
253                 'project': project, 'funded_by': funded_by}
254
255     @staticmethod
256     def read_source_sha1(filepath, filetype):
257         """
258             Reads source file SHA1 from audiobok metadata.
259         """
260
261         if filetype == 'mp3':
262             try:
263                 audio = id3.ID3(filepath)
264                 return [t.data for t in audio.getall('PRIV') 
265                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
266             except:
267                 return None
268         elif filetype == 'ogg':
269             try:
270                 audio = mutagen.File(filepath)
271                 return audio.get('flac_sha1', [None])[0] 
272             except:
273                 return None
274         else:
275             return None
276
277
278 class Book(models.Model):
279     title         = models.CharField(_('title'), max_length=120)
280     sort_key = models.CharField(_('sort_key'), max_length=120, db_index=True, editable=False)
281     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
282     description   = models.TextField(_('description'), blank=True)
283     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
284     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
285     _short_html   = models.TextField(_('short HTML'), editable=False)
286     parent_number = models.IntegerField(_('parent number'), default=0)
287     extra_info    = JSONField(_('extra information'), default='{}')
288     gazeta_link   = models.CharField(blank=True, max_length=240)
289     wiki_link     = models.CharField(blank=True, max_length=240)
290     # files generated during publication
291     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
292     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
293     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
294     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)    
295     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)        
296
297     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
298     objects  = models.Manager()
299     tagged   = managers.ModelTaggedItemManager(Tag)
300     tags     = managers.TagDescriptor(Tag)
301
302     _tag_counter = JSONField(null=True, editable=False)
303     _theme_counter = JSONField(null=True, editable=False)
304
305     class AlreadyExists(Exception):
306         pass
307
308     class Meta:
309         ordering = ('sort_key',)
310         verbose_name = _('book')
311         verbose_name_plural = _('books')
312
313     def __unicode__(self):
314         return self.title
315
316     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
317         self.sort_key = sortify(self.title)
318
319         if reset_short_html:
320             # Reset _short_html during save
321             update = {}
322             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
323                 update[key] = ''
324                 self.__setattr__(key, '')
325             # Fragment.short_html relies on book's tags, so reset it here too
326             self.fragments.all().update(**update)
327
328         return super(Book, self).save(force_insert, force_update)
329
330     @permalink
331     def get_absolute_url(self):
332         return ('catalogue.views.book_detail', [self.slug])
333
334     @property
335     def name(self):
336         return self.title
337
338     def book_tag_slug(self):
339         return ('l-' + self.slug)[:120]
340
341     def book_tag(self):
342         slug = self.book_tag_slug()
343         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
344         if created:
345             book_tag.name = self.title[:50]
346             book_tag.sort_key = self.title.lower()
347             book_tag.save()
348         return book_tag
349
350     def has_media(self, type):
351         if   type == 'xml':
352             if self.xml_file:
353                 return True
354             else:
355                 return False
356         elif type == 'html':
357             if self.html_file:
358                 return True
359             else:
360                 return False        
361         elif type == 'txt':
362             if self.txt_file:
363                 return True
364             else:
365                 return False        
366         elif type == 'pdf':
367             if self.pdf_file:
368                 return True
369             else:
370                 return False  
371         elif type == 'epub':
372             if self.epub_file:
373                 return True
374             else:
375                 return False                          
376         else:
377             if self.media.filter(type=type).exists():
378                 return True
379             else:
380                 return False
381
382     def get_media(self, type):
383         if self.has_media(type):
384             if   type == "xml":
385                 return self.xml_file
386             elif type == "html":
387                 return self.html_file
388             elif type == "epub":
389                 return self.epub_file
390             elif type == "txt":
391                 return self.txt_file
392             elif type == "pdf":
393                 return self.pdf_file
394             else:                                             
395                 return self.media.filter(type=type)
396         else:
397             return None
398
399     def get_mp3(self):
400         return self.get_media("mp3")
401     def get_odt(self):
402         return self.get_media("odt")
403     def get_ogg(self):
404         return self.get_media("ogg")
405     def get_daisy(self):
406         return self.get_media("daisy")                       
407
408     def short_html(self):
409         key = '_short_html_%s' % get_language()
410         short_html = getattr(self, key)
411
412         if short_html and len(short_html):
413             return mark_safe(short_html)
414         else:
415             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
416             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
417
418             formats = []
419             # files generated during publication               
420             if self.has_media("html"):
421                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
422             if self.has_media("pdf"):
423                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
424             if self.root_ancestor.has_media("epub"):
425                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
426             if self.has_media("txt"):
427                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
428             # other files
429             for m in self.media.order_by('type'):
430                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
431
432             formats = [mark_safe(format) for format in formats]
433
434             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
435                 {'book': self, 'tags': tags, 'formats': formats})))
436             self.save(reset_short_html=False)
437             return mark_safe(getattr(self, key))
438
439
440     @property
441     def root_ancestor(self):
442         """ returns the oldest ancestor """
443
444         if not hasattr(self, '_root_ancestor'):
445             book = self
446             while book.parent:
447                 book = book.parent
448             self._root_ancestor = book
449         return self._root_ancestor
450
451
452     def has_description(self):
453         return len(self.description) > 0
454     has_description.short_description = _('description')
455     has_description.boolean = True
456
457     # ugly ugly ugly
458     def has_pdf_file(self):
459         return bool(self.pdf_file)
460     has_pdf_file.short_description = 'PDF'
461     has_pdf_file.boolean = True
462
463     def has_epub_file(self):
464         return bool(self.epub_file)
465     has_epub_file.short_description = 'EPUB'
466     has_epub_file.boolean = True
467
468     def has_txt_file(self):
469         return bool(self.txt_file)
470     has_txt_file.short_description = 'HTML'
471     has_txt_file.boolean = True
472
473     def has_html_file(self):
474         return bool(self.html_file)
475     has_html_file.short_description = 'HTML'
476     has_html_file.boolean = True
477
478     def has_odt_file(self):
479         return bool(self.has_media("odt"))
480     has_odt_file.short_description = 'ODT'
481     has_odt_file.boolean = True
482
483     def has_mp3_file(self):
484         return bool(self.has_media("mp3"))
485     has_mp3_file.short_description = 'MP3'
486     has_mp3_file.boolean = True
487
488     def has_ogg_file(self):
489         return bool(self.has_media("ogg"))
490     has_ogg_file.short_description = 'OGG'
491     has_ogg_file.boolean = True
492     
493     def has_daisy_file(self):
494         return bool(self.has_media("daisy"))
495     has_daisy_file.short_description = 'DAISY'
496     has_daisy_file.boolean = True    
497     
498     def build_epub(self, remove_descendants=True):
499         """ (Re)builds the epub file.
500             If book has a parent, does nothing.
501             Unless remove_descendants is False, descendants' epubs are removed.
502         """
503     
504         from StringIO import StringIO
505         from hashlib import sha1
506         from django.core.files.base import ContentFile
507         from librarian import DocProvider
508
509         class BookImportDocProvider(DocProvider):
510             """ used for joined EPUBs """
511
512             def __init__(self, book):
513                 self.book = book
514
515             def by_slug(self, slug):
516                 if slug == self.book.slug:
517                     return self.book.xml_file
518                 else:
519                     return Book.objects.get(slug=slug).xml_file
520
521         if self.parent:
522             # don't need an epub
523             return
524
525         epub_file = StringIO()
526         try:
527             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
528             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
529             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
530         except NoDublinCore:
531             pass
532
533         book_descendants = list(self.children.all())
534         while len(book_descendants) > 0:
535             child_book = book_descendants.pop(0)
536             if remove_descendants and child_book.has_epub_file():
537                 child_book.epub_file.delete()
538             # save anyway, to refresh short_html
539             child_book.save()
540             book_descendants += list(child_book.children.all())
541
542     def build_txt(self):
543         from StringIO import StringIO
544         from django.core.files.base import ContentFile
545         from librarian import text
546
547         out = StringIO()
548         text.transform(open(self.xml_file.path), out)
549         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
550         self.save()
551
552
553     @classmethod
554     def from_xml_file(cls, xml_file, **kwargs):
555         # use librarian to parse meta-data
556         book_info = dcparser.parse(xml_file)
557
558         if not isinstance(xml_file, File):
559             xml_file = File(open(xml_file))
560
561         try:
562             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
563         finally:
564             xml_file.close()
565
566     @classmethod
567     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True):
568         import re
569         from tempfile import NamedTemporaryFile
570         from markupstring import MarkupString
571         from django.core.files.storage import default_storage
572
573         # check for parts before we do anything
574         children = []
575         if hasattr(book_info, 'parts'):
576             for part_url in book_info.parts:
577                 base, slug = part_url.rsplit('/', 1)
578                 try:
579                     children.append(Book.objects.get(slug=slug))
580                 except Book.DoesNotExist, e:
581                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
582
583
584         # Read book metadata
585         book_base, book_slug = book_info.url.rsplit('/', 1)
586         if re.search(r'[^a-zA-Z0-9-]', book_slug):
587             raise ValueError('Invalid characters in slug')
588         book, created = Book.objects.get_or_create(slug=book_slug)
589
590         if created:
591             book_shelves = []
592         else:
593             if not overwrite:
594                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
595             # Save shelves for this book
596             book_shelves = list(book.tags.filter(category='set'))
597
598         book.title = book_info.title
599         book.set_extra_info_value(book_info.to_dict())
600         book._short_html = ''
601         book.save()
602
603         book_tags = []
604         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
605         for field_name, category in categories:
606             try:
607                 tag_names = getattr(book_info, field_name)
608             except:
609                 tag_names = [getattr(book_info, category)]
610             for tag_name in tag_names:
611                 tag_sort_key = tag_name
612                 if category == 'author':
613                     tag_sort_key = tag_name.last_name
614                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
615                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
616                 if created:
617                     tag.name = tag_name
618                     tag.sort_key = sortify(tag_sort_key.lower())
619                     tag.save()
620                 book_tags.append(tag)
621
622         book.tags = set(book_tags + book_shelves)
623
624         book_tag = book.book_tag()
625
626         for n, child_book in enumerate(children):
627             child_book.parent = book
628             child_book.parent_number = n
629             child_book.save()
630
631         # Save XML and HTML files
632         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
633
634         # delete old fragments when overwriting
635         book.fragments.all().delete()
636
637         html_file = NamedTemporaryFile()
638         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
639             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
640
641             # get ancestor l-tags for adding to new fragments
642             ancestor_tags = []
643             p = book.parent
644             while p:
645                 ancestor_tags.append(p.book_tag())
646                 p = p.parent
647
648             # Extract fragments
649             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
650             for fragment in closed_fragments.values():
651                 try:
652                     theme_names = [s.strip() for s in fragment.themes.split(',')]
653                 except AttributeError:
654                     continue
655                 themes = []
656                 for theme_name in theme_names:
657                     if not theme_name:
658                         continue
659                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
660                     if created:
661                         tag.name = theme_name
662                         tag.sort_key = theme_name.lower()
663                         tag.save()
664                     themes.append(tag)
665                 if not themes:
666                     continue
667
668                 text = fragment.to_string()
669                 short_text = ''
670                 if (len(MarkupString(text)) > 240):
671                     short_text = unicode(MarkupString(text)[:160])
672                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
673                     defaults={'text': text, 'short_text': short_text})
674
675                 new_fragment.save()
676                 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
677
678             if not settings.NO_BUILD_TXT and build_txt:
679                 book.build_txt()
680
681         if not settings.NO_BUILD_EPUB and build_epub:
682             book.root_ancestor.build_epub()
683
684         book_descendants = list(book.children.all())
685         # add l-tag to descendants and their fragments
686         # delete unnecessary EPUB files
687         while len(book_descendants) > 0:
688             child_book = book_descendants.pop(0)
689             child_book.tags = list(child_book.tags) + [book_tag]
690             child_book.save()
691             for fragment in child_book.fragments.all():
692                 fragment.tags = set(list(fragment.tags) + [book_tag])
693             book_descendants += list(child_book.children.all())
694
695         # refresh cache
696         book.reset_tag_counter()
697         book.reset_theme_counter()
698
699         book.save()
700         return book
701
702
703     def refresh_tag_counter(self):
704         tags = {}
705         for child in self.children.all().order_by():
706             for tag_pk, value in child.tag_counter.iteritems():
707                 tags[tag_pk] = tags.get(tag_pk, 0) + value
708         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
709             tags[tag.pk] = 1
710         self.set__tag_counter_value(tags)
711         self.save(reset_short_html=False)
712         return tags
713
714     def reset_tag_counter(self):
715         self._tag_counter = None
716         self.save(reset_short_html=False)
717         if self.parent:
718             self.parent.reset_tag_counter()
719
720     @property
721     def tag_counter(self):
722         if self._tag_counter is None:
723             return self.refresh_tag_counter()
724         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
725
726     def refresh_theme_counter(self):
727         tags = {}
728         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
729             for tag in fragment.tags.filter(category='theme').order_by():
730                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
731         self.set__theme_counter_value(tags)
732         self.save(reset_short_html=False)
733         return tags
734
735     def reset_theme_counter(self):
736         self._theme_counter = None
737         self.save(reset_short_html=False)
738         if self.parent:
739             self.parent.reset_theme_counter()
740
741     @property
742     def theme_counter(self):
743         if self._theme_counter is None:
744             return self.refresh_theme_counter()
745         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
746
747     def pretty_title(self, html_links=False):
748         book = self
749         names = list(book.tags.filter(category='author'))
750
751         books = []
752         while book:
753             books.append(book)
754             book = book.parent
755         names.extend(reversed(books))
756
757         if html_links:
758             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
759         else:
760             names = [tag.name for tag in names]
761
762         return ', '.join(names)
763
764     @classmethod
765     def tagged_top_level(cls, tags):
766         """ Returns top-level books tagged with `tags'.
767
768         It only returns those books which don't have ancestors which are
769         also tagged with those tags.
770
771         """
772         # get relevant books and their tags
773         objects = cls.tagged.with_all(tags)
774         # eliminate descendants
775         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
776         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
777         if descendants_keys:
778             objects = objects.exclude(pk__in=descendants_keys)
779
780         return objects
781
782
783 class Fragment(models.Model):
784     text = models.TextField()
785     short_text = models.TextField(editable=False)
786     _short_html = models.TextField(editable=False)
787     anchor = models.CharField(max_length=120)
788     book = models.ForeignKey(Book, related_name='fragments')
789
790     objects = models.Manager()
791     tagged = managers.ModelTaggedItemManager(Tag)
792     tags = managers.TagDescriptor(Tag)
793
794     class Meta:
795         ordering = ('book', 'anchor',)
796         verbose_name = _('fragment')
797         verbose_name_plural = _('fragments')
798
799     def get_absolute_url(self):
800         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
801
802     def short_html(self):
803         key = '_short_html_%s' % get_language()
804         short_html = getattr(self, key)
805         if short_html and len(short_html):
806             return mark_safe(short_html)
807         else:
808             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
809                 {'fragment': self})))
810             self.save()
811             return mark_safe(getattr(self, key))
812
813
814 class FileRecord(models.Model):
815     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
816     type = models.CharField(_('type'), max_length=20, db_index=True)
817     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
818     time = models.DateTimeField(_('time'), auto_now_add=True)
819
820     class Meta:
821         ordering = ('-time','-slug', '-type')
822         verbose_name = _('file record')
823         verbose_name_plural = _('file records')
824
825     def __unicode__(self):
826         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
827
828 ###########
829 #
830 # SIGNALS
831 #
832 ###########
833
834
835 def _tags_updated_handler(sender, affected_tags, **kwargs):
836     # reset tag global counter
837     # we want Tag.changed_at updated for API to know the tag was touched
838     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
839
840     # if book tags changed, reset book tag counter
841     if isinstance(sender, Book) and \
842                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
843                     exclude(category__in=('book', 'theme', 'set')).count():
844         sender.reset_tag_counter()
845     # if fragment theme changed, reset book theme counter
846     elif isinstance(sender, Fragment) and \
847                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
848                     filter(category='theme').count():
849         sender.book.reset_theme_counter()
850 tags_updated.connect(_tags_updated_handler)
851
852
853 def _pre_delete_handler(sender, instance, **kwargs):
854     """ refresh Book on BookMedia delete """
855     if sender == BookMedia:
856         instance.book.save()
857 pre_delete.connect(_pre_delete_handler)
858
859 def _post_save_handler(sender, instance, **kwargs):
860     """ refresh all the short_html stuff on BookMedia update """
861     if sender == BookMedia:
862         instance.book.save()
863 post_save.connect(_post_save_handler)