basic query using Dublin core fields works
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, ORMDocProvider, create_zip, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54
55 class TagSubcategoryManager(models.Manager):
56     def __init__(self, subcategory):
57         super(TagSubcategoryManager, self).__init__()
58         self.subcategory = subcategory
59
60     def get_query_set(self):
61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
62
63
64 class Tag(TagBase):
65     name = models.CharField(_('name'), max_length=50, db_index=True)
66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
69         db_index=True, choices=TAG_CATEGORIES)
70     description = models.TextField(_('description'), blank=True)
71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
72
73     user = models.ForeignKey(User, blank=True, null=True)
74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
75     gazeta_link = models.CharField(blank=True, max_length=240)
76     wiki_link = models.CharField(blank=True, max_length=240)
77
78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
80
81     class UrlDeprecationWarning(DeprecationWarning):
82         pass
83
84     categories_rev = {
85         'autor': 'author',
86         'epoka': 'epoch',
87         'rodzaj': 'kind',
88         'gatunek': 'genre',
89         'motyw': 'theme',
90         'polka': 'set',
91     }
92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('tag')
97         verbose_name_plural = _('tags')
98         unique_together = (("slug", "category"),)
99
100     def __unicode__(self):
101         return self.name
102
103     def __repr__(self):
104         return "Tag(slug=%r)" % self.slug
105
106     @permalink
107     def get_absolute_url(self):
108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
109
110     def has_description(self):
111         return len(self.description) > 0
112     has_description.short_description = _('description')
113     has_description.boolean = True
114
115     def get_count(self):
116         """ returns global book count for book tags, fragment count for themes """
117
118         if self.book_count is None:
119             if self.category == 'book':
120                 # never used
121                 objects = Book.objects.none()
122             elif self.category == 'theme':
123                 objects = Fragment.tagged.with_all((self,))
124             else:
125                 objects = Book.tagged.with_all((self,)).order_by()
126                 if self.category != 'set':
127                     # eliminate descendants
128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
130                     if descendants_keys:
131                         objects = objects.exclude(pk__in=descendants_keys)
132             self.book_count = objects.count()
133             self.save()
134         return self.book_count
135
136     @staticmethod
137     def get_tag_list(tags):
138         if isinstance(tags, basestring):
139             real_tags = []
140             ambiguous_slugs = []
141             category = None
142             deprecated = False
143             tags_splitted = tags.split('/')
144             for name in tags_splitted:
145                 if category:
146                     real_tags.append(Tag.objects.get(slug=name, category=category))
147                     category = None
148                 elif name in Tag.categories_rev:
149                     category = Tag.categories_rev[name]
150                 else:
151                     try:
152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
153                         deprecated = True 
154                     except Tag.MultipleObjectsReturned, e:
155                         ambiguous_slugs.append(name)
156
157             if category:
158                 # something strange left off
159                 raise Tag.DoesNotExist()
160             if ambiguous_slugs:
161                 # some tags should be qualified
162                 e = Tag.MultipleObjectsReturned()
163                 e.tags = real_tags
164                 e.ambiguous_slugs = ambiguous_slugs
165                 raise e
166             if deprecated:
167                 e = Tag.UrlDeprecationWarning()
168                 e.tags = real_tags
169                 raise e
170             return real_tags
171         else:
172             return TagBase.get_tag_list(tags)
173
174     @property
175     def url_chunk(self):
176         return '/'.join((Tag.categories_dict[self.category], self.slug))
177
178
179 # TODO: why is this hard-coded ?
180 def book_upload_path(ext=None, maxlen=100):
181     def get_dynamic_path(media, filename, ext=ext):
182         # how to put related book's slug here?
183         if not ext:
184             if media.type == 'daisy':
185                 ext = 'daisy.zip'
186             else:
187                 ext = media.type
188         if not media.name:
189             name = slughifi(filename.split(".")[0])
190         else:
191             name = slughifi(media.name)
192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
193     return get_dynamic_path
194
195
196 class BookMedia(models.Model):
197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
198     name        = models.CharField(_('name'), max_length="100")
199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
202     book = models.ForeignKey('Book', related_name='media')
203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
204
205     def __unicode__(self):
206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
207
208     class Meta:
209         ordering            = ('type', 'name')
210         verbose_name        = _('book media')
211         verbose_name_plural = _('book media')
212
213     def save(self, *args, **kwargs):
214         try:
215             old = BookMedia.objects.get(pk=self.pk)
216         except BookMedia.DoesNotExist, e:
217             pass
218         else:
219             # if name changed, change the file name, too
220             if slughifi(self.name) != slughifi(old.name):
221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
222
223         super(BookMedia, self).save(*args, **kwargs)
224
225         # remove the zip package for book with modified media
226         remove_zip(self.book.slug)
227
228         extra_info = self.get_extra_info_value()
229         extra_info.update(self.read_meta())
230         self.set_extra_info_value(extra_info)
231         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
232         return super(BookMedia, self).save(*args, **kwargs)
233
234     def read_meta(self):
235         """
236             Reads some metadata from the audiobook.
237         """
238
239         artist_name = director_name = project = funded_by = ''
240         if self.type == 'mp3':
241             try:
242                 audio = id3.ID3(self.file.path)
243                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
244                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
245                 project = ", ".join([t.data for t in audio.getall('PRIV') 
246                         if t.owner=='wolnelektury.pl?project'])
247                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
248                         if t.owner=='wolnelektury.pl?funded_by'])
249             except:
250                 pass
251         elif self.type == 'ogg':
252             try:
253                 audio = mutagen.File(self.file.path)
254                 artist_name = ', '.join(audio.get('artist', []))
255                 director_name = ', '.join(audio.get('conductor', []))
256                 project = ", ".join(audio.get('project', []))
257                 funded_by = ", ".join(audio.get('funded_by', []))
258             except:
259                 pass
260         else:
261             return {}
262         return {'artist_name': artist_name, 'director_name': director_name,
263                 'project': project, 'funded_by': funded_by}
264
265     @staticmethod
266     def read_source_sha1(filepath, filetype):
267         """
268             Reads source file SHA1 from audiobok metadata.
269         """
270
271         if filetype == 'mp3':
272             try:
273                 audio = id3.ID3(filepath)
274                 return [t.data for t in audio.getall('PRIV') 
275                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
276             except:
277                 return None
278         elif filetype == 'ogg':
279             try:
280                 audio = mutagen.File(filepath)
281                 return audio.get('flac_sha1', [None])[0] 
282             except:
283                 return None
284         else:
285             return None
286
287
288 class Book(models.Model):
289     title         = models.CharField(_('title'), max_length=120)
290     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
291     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
292     description   = models.TextField(_('description'), blank=True)
293     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
294     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
295     parent_number = models.IntegerField(_('parent number'), default=0)
296     extra_info    = JSONField(_('extra information'), default='{}')
297     gazeta_link   = models.CharField(blank=True, max_length=240)
298     wiki_link     = models.CharField(blank=True, max_length=240)
299     # files generated during publication
300
301     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
302     
303     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
304     objects  = models.Manager()
305     tagged   = managers.ModelTaggedItemManager(Tag)
306     tags     = managers.TagDescriptor(Tag)
307
308     html_built = django.dispatch.Signal()
309
310     class AlreadyExists(Exception):
311         pass
312
313     class Meta:
314         ordering = ('sort_key',)
315         verbose_name = _('book')
316         verbose_name_plural = _('books')
317
318     def __unicode__(self):
319         return self.title
320
321     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
322         self.sort_key = sortify(self.title)
323
324         ret = super(Book, self).save(force_insert, force_update)
325
326         if reset_short_html:
327             self.reset_short_html()
328
329         return ret
330
331     @permalink
332     def get_absolute_url(self):
333         return ('catalogue.views.book_detail', [self.slug])
334
335     @property
336     def name(self):
337         return self.title
338
339     def book_tag_slug(self):
340         return ('l-' + self.slug)[:120]
341
342     def book_tag(self):
343         slug = self.book_tag_slug()
344         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
345         if created:
346             book_tag.name = self.title[:50]
347             book_tag.sort_key = self.title.lower()
348             book_tag.save()
349         return book_tag
350
351     def has_media(self, type):
352         if type in Book.file_types:
353             return bool(getattr(self, "%s_file" % type))
354         else:
355             return self.media.filter(type=type).exists()
356
357     def get_media(self, type):
358         if self.has_media(type):
359             if type in Book.file_types:
360                 return getattr(self, "%s_file" % type)
361             else:                                             
362                 return self.media.filter(type=type)
363         else:
364             return None
365
366     def get_mp3(self):
367         return self.get_media("mp3")
368     def get_odt(self):
369         return self.get_media("odt")
370     def get_ogg(self):
371         return self.get_media("ogg")
372     def get_daisy(self):
373         return self.get_media("daisy")                       
374
375     def reset_short_html(self):
376         if self.id is None:
377             return
378
379         cache_key = "Book.short_html/%d/%s"
380         for lang, langname in settings.LANGUAGES:
381             cache.delete(cache_key % (self.id, lang))
382         # Fragment.short_html relies on book's tags, so reset it here too
383         for fragm in self.fragments.all():
384             fragm.reset_short_html()
385
386     def short_html(self):
387         if self.id:
388             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
389             short_html = cache.get(cache_key)
390         else:
391             short_html = None
392
393         if short_html is not None:
394             return mark_safe(short_html)
395         else:
396             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
397             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
398
399             formats = []
400             # files generated during publication
401             if self.has_media("html"):
402                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
403             if self.has_media("pdf"):
404                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
405             if self.has_media("mobi"):
406                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
407             if self.root_ancestor.has_media("epub"):
408                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
409             if self.has_media("txt"):
410                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
411             # other files
412             for m in self.media.order_by('type'):
413                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
414
415             formats = [mark_safe(format) for format in formats]
416
417             short_html = unicode(render_to_string('catalogue/book_short.html',
418                 {'book': self, 'tags': tags, 'formats': formats}))
419
420             if self.id:
421                 cache.set(cache_key, short_html, CACHE_FOREVER)
422             return mark_safe(short_html)
423
424     @property
425     def root_ancestor(self):
426         """ returns the oldest ancestor """
427
428         if not hasattr(self, '_root_ancestor'):
429             book = self
430             while book.parent:
431                 book = book.parent
432             self._root_ancestor = book
433         return self._root_ancestor
434
435
436     def has_description(self):
437         return len(self.description) > 0
438     has_description.short_description = _('description')
439     has_description.boolean = True
440
441     # ugly ugly ugly
442     def has_odt_file(self):
443         return bool(self.has_media("odt"))
444     has_odt_file.short_description = 'ODT'
445     has_odt_file.boolean = True
446
447     def has_mp3_file(self):
448         return bool(self.has_media("mp3"))
449     has_mp3_file.short_description = 'MP3'
450     has_mp3_file.boolean = True
451
452     def has_ogg_file(self):
453         return bool(self.has_media("ogg"))
454     has_ogg_file.short_description = 'OGG'
455     has_ogg_file.boolean = True
456
457     def has_daisy_file(self):
458         return bool(self.has_media("daisy"))
459     has_daisy_file.short_description = 'DAISY'
460     has_daisy_file.boolean = True
461
462     def build_pdf(self):
463         """ (Re)builds the pdf file.
464
465         """
466         from librarian import pdf
467         from tempfile import NamedTemporaryFile
468         import os
469
470         try:
471             pdf_file = NamedTemporaryFile(delete=False)
472             pdf.transform(ORMDocProvider(self),
473                       file_path=str(self.xml_file.path),
474                       output_file=pdf_file,
475                       )
476
477             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
478         finally:
479             unlink(pdf_file.name)
480
481         # remove zip with all pdf files
482         remove_zip(settings.ALL_PDF_ZIP)
483
484     def build_mobi(self):
485         """ (Re)builds the MOBI file.
486
487         """
488         from librarian import mobi
489         from tempfile import NamedTemporaryFile
490         import os
491
492         try:
493             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
494             mobi.transform(ORMDocProvider(self), verbose=1,
495                       file_path=str(self.xml_file.path),
496                       output_file=mobi_file.name,
497                       )
498
499             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
500         finally:
501             unlink(mobi_file.name)
502
503         # remove zip with all mobi files
504         remove_zip(settings.ALL_MOBI_ZIP)
505
506     def build_epub(self, remove_descendants=True):
507         """ (Re)builds the epub file.
508             If book has a parent, does nothing.
509             Unless remove_descendants is False, descendants' epubs are removed.
510         """
511         from StringIO import StringIO
512         from hashlib import sha1
513         from django.core.files.base import ContentFile
514
515         if self.parent:
516             # don't need an epub
517             return
518
519         epub_file = StringIO()
520         try:
521             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
522             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
523             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
524         except NoDublinCore:
525             pass
526
527         book_descendants = list(self.children.all())
528         while len(book_descendants) > 0:
529             child_book = book_descendants.pop(0)
530             if remove_descendants and child_book.has_epub_file():
531                 child_book.epub_file.delete()
532             # save anyway, to refresh short_html
533             child_book.save()
534             book_descendants += list(child_book.children.all())
535
536         # remove zip package with all epub files
537         remove_zip(settings.ALL_EPUB_ZIP)
538
539     def build_txt(self):
540         from StringIO import StringIO
541         from django.core.files.base import ContentFile
542         from librarian import text
543
544         out = StringIO()
545         text.transform(open(self.xml_file.path), out)
546         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
547
548
549     def build_html(self):
550         from tempfile import NamedTemporaryFile
551         from markupstring import MarkupString
552
553         meta_tags = list(self.tags.filter(
554             category__in=('author', 'epoch', 'genre', 'kind')))
555         book_tag = self.book_tag()
556
557         html_file = NamedTemporaryFile()
558         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
559             self.html_file.save('%s.html' % self.slug, File(html_file))
560
561             # get ancestor l-tags for adding to new fragments
562             ancestor_tags = []
563             p = self.parent
564             while p:
565                 ancestor_tags.append(p.book_tag())
566                 p = p.parent
567
568             # Delete old fragments and create them from scratch
569             self.fragments.all().delete()
570             # Extract fragments
571             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
572             for fragment in closed_fragments.values():
573                 try:
574                     theme_names = [s.strip() for s in fragment.themes.split(',')]
575                 except AttributeError:
576                     continue
577                 themes = []
578                 for theme_name in theme_names:
579                     if not theme_name:
580                         continue
581                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
582                     if created:
583                         tag.name = theme_name
584                         tag.sort_key = theme_name.lower()
585                         tag.save()
586                     themes.append(tag)
587                 if not themes:
588                     continue
589
590                 text = fragment.to_string()
591                 short_text = ''
592                 if (len(MarkupString(text)) > 240):
593                     short_text = unicode(MarkupString(text)[:160])
594                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
595                     text=text, short_text=short_text)
596
597                 new_fragment.save()
598                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
599             self.save()
600             self.html_built.send(sender=self)
601             return True
602         return False
603
604     @staticmethod
605     def zip_format(format_):
606         def pretty_file_name(book):
607             return "%s/%s.%s" % (
608                 b.get_extra_info_value()['author'],
609                 b.slug,
610                 format_)
611
612         field_name = "%s_file" % format_
613         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
614         paths = [(pretty_file_name(b), getattr(b, field_name).path)
615                     for b in books]
616         result = create_zip.delay(paths,
617                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
618         return result.wait()
619
620     def zip_audiobooks(self):
621         bm = BookMedia.objects.filter(book=self, type='mp3')
622         paths = map(lambda bm: (None, bm.file.path), bm)
623         result = create_zip.delay(paths, self.slug)
624         return result.wait()
625
626     @classmethod
627     def from_xml_file(cls, xml_file, **kwargs):
628         # use librarian to parse meta-data
629         book_info = dcparser.parse(xml_file)
630
631         if not isinstance(xml_file, File):
632             xml_file = File(open(xml_file))
633
634         try:
635             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
636         finally:
637             xml_file.close()
638
639     @classmethod
640     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
641             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
642         import re
643
644         # check for parts before we do anything
645         children = []
646         if hasattr(book_info, 'parts'):
647             for part_url in book_info.parts:
648                 base, slug = part_url.rsplit('/', 1)
649                 try:
650                     children.append(Book.objects.get(slug=slug))
651                 except Book.DoesNotExist, e:
652                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
653
654
655         # Read book metadata
656         book_base, book_slug = book_info.url.rsplit('/', 1)
657         if re.search(r'[^a-zA-Z0-9-]', book_slug):
658             raise ValueError('Invalid characters in slug')
659         book, created = Book.objects.get_or_create(slug=book_slug)
660
661         if created:
662             book_shelves = []
663         else:
664             if not overwrite:
665                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
666             # Save shelves for this book
667             book_shelves = list(book.tags.filter(category='set'))
668
669         book.title = book_info.title
670         book.set_extra_info_value(book_info.to_dict())
671         book.save()
672
673         meta_tags = []
674         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
675         for field_name, category in categories:
676             try:
677                 tag_names = getattr(book_info, field_name)
678             except:
679                 tag_names = [getattr(book_info, category)]
680             for tag_name in tag_names:
681                 tag_sort_key = tag_name
682                 if category == 'author':
683                     tag_sort_key = tag_name.last_name
684                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
685                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
686                 if created:
687                     tag.name = tag_name
688                     tag.sort_key = sortify(tag_sort_key.lower())
689                     tag.save()
690                 meta_tags.append(tag)
691
692         book.tags = set(meta_tags + book_shelves)
693
694         book_tag = book.book_tag()
695
696         for n, child_book in enumerate(children):
697             child_book.parent = book
698             child_book.parent_number = n
699             child_book.save()
700
701         # Save XML and HTML files
702         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
703
704         # delete old fragments when overwriting
705         book.fragments.all().delete()
706
707         if book.build_html():
708             if not settings.NO_BUILD_TXT and build_txt:
709                 book.build_txt()
710
711         if not settings.NO_BUILD_EPUB and build_epub:
712             book.root_ancestor.build_epub()
713
714         if not settings.NO_BUILD_PDF and build_pdf:
715             book.root_ancestor.build_pdf()
716
717         if not settings.NO_BUILD_MOBI and build_mobi:
718             book.build_mobi()
719
720         book_descendants = list(book.children.all())
721         # add l-tag to descendants and their fragments
722         # delete unnecessary EPUB files
723         while len(book_descendants) > 0:
724             child_book = book_descendants.pop(0)
725             child_book.tags = list(child_book.tags) + [book_tag]
726             child_book.save()
727             for fragment in child_book.fragments.all():
728                 fragment.tags = set(list(fragment.tags) + [book_tag])
729             book_descendants += list(child_book.children.all())
730
731         book.save()
732
733         # refresh cache
734         book.reset_tag_counter()
735         book.reset_theme_counter()
736
737         return book
738
739     def reset_tag_counter(self):
740         if self.id is None:
741             return
742
743         cache_key = "Book.tag_counter/%d" % self.id
744         cache.delete(cache_key)
745         if self.parent:
746             self.parent.reset_tag_counter()
747
748     @property
749     def tag_counter(self):
750         if self.id:
751             cache_key = "Book.tag_counter/%d" % self.id
752             tags = cache.get(cache_key)
753         else:
754             tags = None
755
756         if tags is None:
757             tags = {}
758             for child in self.children.all().order_by():
759                 for tag_pk, value in child.tag_counter.iteritems():
760                     tags[tag_pk] = tags.get(tag_pk, 0) + value
761             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
762                 tags[tag.pk] = 1
763
764             if self.id:
765                 cache.set(cache_key, tags, CACHE_FOREVER)
766         return tags
767
768     def reset_theme_counter(self):
769         if self.id is None:
770             return
771
772         cache_key = "Book.theme_counter/%d" % self.id
773         cache.delete(cache_key)
774         if self.parent:
775             self.parent.reset_theme_counter()
776
777     @property
778     def theme_counter(self):
779         if self.id:
780             cache_key = "Book.theme_counter/%d" % self.id
781             tags = cache.get(cache_key)
782         else:
783             tags = None
784
785         if tags is None:
786             tags = {}
787             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
788                 for tag in fragment.tags.filter(category='theme').order_by():
789                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
790
791             if self.id:
792                 cache.set(cache_key, tags, CACHE_FOREVER)
793         return tags
794
795     def pretty_title(self, html_links=False):
796         book = self
797         names = list(book.tags.filter(category='author'))
798
799         books = []
800         while book:
801             books.append(book)
802             book = book.parent
803         names.extend(reversed(books))
804
805         if html_links:
806             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
807         else:
808             names = [tag.name for tag in names]
809
810         return ', '.join(names)
811
812     @classmethod
813     def tagged_top_level(cls, tags):
814         """ Returns top-level books tagged with `tags'.
815
816         It only returns those books which don't have ancestors which are
817         also tagged with those tags.
818
819         """
820         # get relevant books and their tags
821         objects = cls.tagged.with_all(tags)
822         # eliminate descendants
823         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
824         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
825         if descendants_keys:
826             objects = objects.exclude(pk__in=descendants_keys)
827
828         return objects
829
830
831 def _has_factory(ftype):
832     has = lambda self: bool(getattr(self, "%s_file" % ftype))
833     has.short_description = t.upper()
834     has.boolean = True
835     has.__name__ = "has_%s_file" % ftype
836     return has
837
838     
839 # add the file fields
840 for t in Book.file_types:
841     field_name = "%s_file" % t
842     models.FileField(_("%s file" % t.upper()),
843             upload_to=book_upload_path(t),
844             blank=True).contribute_to_class(Book, field_name)
845
846     setattr(Book, "has_%s_file" % t, _has_factory(t))
847
848
849 class Fragment(models.Model):
850     text = models.TextField()
851     short_text = models.TextField(editable=False)
852     anchor = models.CharField(max_length=120)
853     book = models.ForeignKey(Book, related_name='fragments')
854
855     objects = models.Manager()
856     tagged = managers.ModelTaggedItemManager(Tag)
857     tags = managers.TagDescriptor(Tag)
858
859     class Meta:
860         ordering = ('book', 'anchor',)
861         verbose_name = _('fragment')
862         verbose_name_plural = _('fragments')
863
864     def get_absolute_url(self):
865         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
866
867     def reset_short_html(self):
868         if self.id is None:
869             return
870
871         cache_key = "Fragment.short_html/%d/%s"
872         for lang, langname in settings.LANGUAGES:
873             cache.delete(cache_key % (self.id, lang))
874
875     def short_html(self):
876         if self.id:
877             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
878             short_html = cache.get(cache_key)
879         else:
880             short_html = None
881
882         if short_html is not None:
883             return mark_safe(short_html)
884         else:
885             short_html = unicode(render_to_string('catalogue/fragment_short.html',
886                 {'fragment': self}))
887             if self.id:
888                 cache.set(cache_key, short_html, CACHE_FOREVER)
889             return mark_safe(short_html)
890
891
892 class FileRecord(models.Model):
893     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
894     type = models.CharField(_('type'), max_length=20, db_index=True)
895     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
896     time = models.DateTimeField(_('time'), auto_now_add=True)
897
898     class Meta:
899         ordering = ('-time','-slug', '-type')
900         verbose_name = _('file record')
901         verbose_name_plural = _('file records')
902
903     def __unicode__(self):
904         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
905
906 ###########
907 #
908 # SIGNALS
909 #
910 ###########
911
912
913 def _tags_updated_handler(sender, affected_tags, **kwargs):
914     # reset tag global counter
915     # we want Tag.changed_at updated for API to know the tag was touched
916     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
917
918     # if book tags changed, reset book tag counter
919     if isinstance(sender, Book) and \
920                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
921                     exclude(category__in=('book', 'theme', 'set')).count():
922         sender.reset_tag_counter()
923     # if fragment theme changed, reset book theme counter
924     elif isinstance(sender, Fragment) and \
925                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
926                     filter(category='theme').count():
927         sender.book.reset_theme_counter()
928 tags_updated.connect(_tags_updated_handler)
929
930
931 def _pre_delete_handler(sender, instance, **kwargs):
932     """ refresh Book on BookMedia delete """
933     if sender == BookMedia:
934         instance.book.save()
935 pre_delete.connect(_pre_delete_handler)
936
937 def _post_save_handler(sender, instance, **kwargs):
938     """ refresh all the short_html stuff on BookMedia update """
939     if sender == BookMedia:
940         instance.book.save()
941 post_save.connect(_post_save_handler)