url links to zip packages
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, BookImportDocProvider, create_zip_task, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54
55 class TagSubcategoryManager(models.Manager):
56     def __init__(self, subcategory):
57         super(TagSubcategoryManager, self).__init__()
58         self.subcategory = subcategory
59
60     def get_query_set(self):
61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
62
63
64 class Tag(TagBase):
65     name = models.CharField(_('name'), max_length=50, db_index=True)
66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
69         db_index=True, choices=TAG_CATEGORIES)
70     description = models.TextField(_('description'), blank=True)
71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
72
73     user = models.ForeignKey(User, blank=True, null=True)
74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
75     gazeta_link = models.CharField(blank=True, max_length=240)
76     wiki_link = models.CharField(blank=True, max_length=240)
77
78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
80
81     class UrlDeprecationWarning(DeprecationWarning):
82         pass
83
84     categories_rev = {
85         'autor': 'author',
86         'epoka': 'epoch',
87         'rodzaj': 'kind',
88         'gatunek': 'genre',
89         'motyw': 'theme',
90         'polka': 'set',
91     }
92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('tag')
97         verbose_name_plural = _('tags')
98         unique_together = (("slug", "category"),)
99
100     def __unicode__(self):
101         return self.name
102
103     def __repr__(self):
104         return "Tag(slug=%r)" % self.slug
105
106     @permalink
107     def get_absolute_url(self):
108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
109
110     def has_description(self):
111         return len(self.description) > 0
112     has_description.short_description = _('description')
113     has_description.boolean = True
114
115     def get_count(self):
116         """ returns global book count for book tags, fragment count for themes """
117
118         if self.book_count is None:
119             if self.category == 'book':
120                 # never used
121                 objects = Book.objects.none()
122             elif self.category == 'theme':
123                 objects = Fragment.tagged.with_all((self,))
124             else:
125                 objects = Book.tagged.with_all((self,)).order_by()
126                 if self.category != 'set':
127                     # eliminate descendants
128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
130                     if descendants_keys:
131                         objects = objects.exclude(pk__in=descendants_keys)
132             self.book_count = objects.count()
133             self.save()
134         return self.book_count
135
136     @staticmethod
137     def get_tag_list(tags):
138         if isinstance(tags, basestring):
139             real_tags = []
140             ambiguous_slugs = []
141             category = None
142             deprecated = False
143             tags_splitted = tags.split('/')
144             for name in tags_splitted:
145                 if category:
146                     real_tags.append(Tag.objects.get(slug=name, category=category))
147                     category = None
148                 elif name in Tag.categories_rev:
149                     category = Tag.categories_rev[name]
150                 else:
151                     try:
152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
153                         deprecated = True 
154                     except Tag.MultipleObjectsReturned, e:
155                         ambiguous_slugs.append(name)
156
157             if category:
158                 # something strange left off
159                 raise Tag.DoesNotExist()
160             if ambiguous_slugs:
161                 # some tags should be qualified
162                 e = Tag.MultipleObjectsReturned()
163                 e.tags = real_tags
164                 e.ambiguous_slugs = ambiguous_slugs
165                 raise e
166             if deprecated:
167                 e = Tag.UrlDeprecationWarning()
168                 e.tags = real_tags
169                 raise e
170             return real_tags
171         else:
172             return TagBase.get_tag_list(tags)
173
174     @property
175     def url_chunk(self):
176         return '/'.join((Tag.categories_dict[self.category], self.slug))
177
178
179 # TODO: why is this hard-coded ?
180 def book_upload_path(ext=None, maxlen=100):
181     def get_dynamic_path(media, filename, ext=ext):
182         # how to put related book's slug here?
183         if not ext:
184             if media.type == 'daisy':
185                 ext = 'daisy.zip'
186             else:
187                 ext = media.type
188         if not media.name:
189             name = slughifi(filename.split(".")[0])
190         else:
191             name = slughifi(media.name)
192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
193     return get_dynamic_path
194
195
196 class BookMedia(models.Model):
197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
198     name        = models.CharField(_('name'), max_length="100")
199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
202     book = models.ForeignKey('Book', related_name='media')
203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
204
205     def __unicode__(self):
206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
207
208     class Meta:
209         ordering            = ('type', 'name')
210         verbose_name        = _('book media')
211         verbose_name_plural = _('book media')
212
213     def save(self, *args, **kwargs):
214         try:
215             old = BookMedia.objects.get(pk=self.pk)
216         except BookMedia.DoesNotExist, e:
217             pass
218         else:
219             # if name changed, change the file name, too
220             if slughifi(self.name) != slughifi(old.name):
221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
222
223         super(BookMedia, self).save(*args, **kwargs)
224         extra_info = self.get_extra_info_value()
225         extra_info.update(self.read_meta())
226         self.set_extra_info_value(extra_info)
227         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
228         return super(BookMedia, self).save(*args, **kwargs)
229
230     def read_meta(self):
231         """
232             Reads some metadata from the audiobook.
233         """
234
235         artist_name = director_name = project = funded_by = ''
236         if self.type == 'mp3':
237             try:
238                 audio = id3.ID3(self.file.path)
239                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
240                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
241                 project = ", ".join([t.data for t in audio.getall('PRIV') 
242                         if t.owner=='wolnelektury.pl?project'])
243                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
244                         if t.owner=='wolnelektury.pl?funded_by'])
245             except:
246                 pass
247         elif self.type == 'ogg':
248             try:
249                 audio = mutagen.File(self.file.path)
250                 artist_name = ', '.join(audio.get('artist', []))
251                 director_name = ', '.join(audio.get('conductor', []))
252                 project = ", ".join(audio.get('project', []))
253                 funded_by = ", ".join(audio.get('funded_by', []))
254             except:
255                 pass
256         else:
257             return {}
258         return {'artist_name': artist_name, 'director_name': director_name,
259                 'project': project, 'funded_by': funded_by}
260
261     @staticmethod
262     def read_source_sha1(filepath, filetype):
263         """
264             Reads source file SHA1 from audiobok metadata.
265         """
266
267         if filetype == 'mp3':
268             try:
269                 audio = id3.ID3(filepath)
270                 return [t.data for t in audio.getall('PRIV') 
271                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
272             except:
273                 return None
274         elif filetype == 'ogg':
275             try:
276                 audio = mutagen.File(filepath)
277                 return audio.get('flac_sha1', [None])[0] 
278             except:
279                 return None
280         else:
281             return None
282
283
284 class Book(models.Model):
285     title         = models.CharField(_('title'), max_length=120)
286     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
287     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
288     description   = models.TextField(_('description'), blank=True)
289     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
290     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
291     parent_number = models.IntegerField(_('parent number'), default=0)
292     extra_info    = JSONField(_('extra information'), default='{}')
293     gazeta_link   = models.CharField(blank=True, max_length=240)
294     wiki_link     = models.CharField(blank=True, max_length=240)
295     # files generated during publication
296     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
297     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
298     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
299     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
300     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
301     
302     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
303     objects  = models.Manager()
304     tagged   = managers.ModelTaggedItemManager(Tag)
305     tags     = managers.TagDescriptor(Tag)
306
307     html_built = django.dispatch.Signal()
308
309     class AlreadyExists(Exception):
310         pass
311
312     class Meta:
313         ordering = ('sort_key',)
314         verbose_name = _('book')
315         verbose_name_plural = _('books')
316
317     def __unicode__(self):
318         return self.title
319
320     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
321         self.sort_key = sortify(self.title)
322
323         ret = super(Book, self).save(force_insert, force_update)
324
325         if reset_short_html:
326             self.reset_short_html()
327
328         return ret
329
330     @permalink
331     def get_absolute_url(self):
332         return ('catalogue.views.book_detail', [self.slug])
333
334     @property
335     def name(self):
336         return self.title
337
338     def book_tag_slug(self):
339         return ('l-' + self.slug)[:120]
340
341     def book_tag(self):
342         slug = self.book_tag_slug()
343         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
344         if created:
345             book_tag.name = self.title[:50]
346             book_tag.sort_key = self.title.lower()
347             book_tag.save()
348         return book_tag
349
350     def has_media(self, type):
351         if   type == 'xml':
352             if self.xml_file:
353                 return True
354             else:
355                 return False
356         elif type == 'html':
357             if self.html_file:
358                 return True
359             else:
360                 return False        
361         elif type == 'txt':
362             if self.txt_file:
363                 return True
364             else:
365                 return False        
366         elif type == 'pdf':
367             if self.pdf_file:
368                 return True
369             else:
370                 return False  
371         elif type == 'epub':
372             if self.epub_file:
373                 return True
374             else:
375                 return False                          
376         else:
377             if self.media.filter(type=type).exists():
378                 return True
379             else:
380                 return False
381
382     def get_media(self, type):
383         if self.has_media(type):
384             if   type == "xml":
385                 return self.xml_file
386             elif type == "html":
387                 return self.html_file
388             elif type == "epub":
389                 return self.epub_file
390             elif type == "txt":
391                 return self.txt_file
392             elif type == "pdf":
393                 return self.pdf_file
394             else:                                             
395                 return self.media.filter(type=type)
396         else:
397             return None
398
399     def get_mp3(self):
400         return self.get_media("mp3")
401     def get_odt(self):
402         return self.get_media("odt")
403     def get_ogg(self):
404         return self.get_media("ogg")
405     def get_daisy(self):
406         return self.get_media("daisy")                       
407
408     def reset_short_html(self):
409         if self.id is None:
410             return
411
412         cache_key = "Book.short_html/%d/%s"
413         for lang, langname in settings.LANGUAGES:
414             cache.delete(cache_key % (self.id, lang))
415         # Fragment.short_html relies on book's tags, so reset it here too
416         for fragm in self.fragments.all():
417             fragm.reset_short_html()
418
419     def short_html(self):
420         if self.id:
421             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
422             short_html = cache.get(cache_key)
423         else:
424             short_html = None
425
426         if short_html is not None:
427             return mark_safe(short_html)
428         else:
429             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
430             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
431
432             formats = []
433             # files generated during publication               
434             if self.has_media("html"):
435                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
436             if self.has_media("pdf"):
437                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
438             if self.root_ancestor.has_media("epub"):
439                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
440             if self.has_media("txt"):
441                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
442             # other files
443             for m in self.media.order_by('type'):
444                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
445
446             formats = [mark_safe(format) for format in formats]
447
448             short_html = unicode(render_to_string('catalogue/book_short.html',
449                 {'book': self, 'tags': tags, 'formats': formats}))
450
451             if self.id:
452                 cache.set(cache_key, short_html, CACHE_FOREVER)
453             return mark_safe(short_html)
454
455     @property
456     def root_ancestor(self):
457         """ returns the oldest ancestor """
458
459         if not hasattr(self, '_root_ancestor'):
460             book = self
461             while book.parent:
462                 book = book.parent
463             self._root_ancestor = book
464         return self._root_ancestor
465
466
467     def has_description(self):
468         return len(self.description) > 0
469     has_description.short_description = _('description')
470     has_description.boolean = True
471
472     # ugly ugly ugly
473     def has_pdf_file(self):
474         return bool(self.pdf_file)
475     has_pdf_file.short_description = 'PDF'
476     has_pdf_file.boolean = True
477
478     def has_epub_file(self):
479         return bool(self.epub_file)
480     has_epub_file.short_description = 'EPUB'
481     has_epub_file.boolean = True
482
483     def has_txt_file(self):
484         return bool(self.txt_file)
485     has_txt_file.short_description = 'HTML'
486     has_txt_file.boolean = True
487
488     def has_html_file(self):
489         return bool(self.html_file)
490     has_html_file.short_description = 'HTML'
491     has_html_file.boolean = True
492
493     def has_odt_file(self):
494         return bool(self.has_media("odt"))
495     has_odt_file.short_description = 'ODT'
496     has_odt_file.boolean = True
497
498     def has_mp3_file(self):
499         return bool(self.has_media("mp3"))
500     has_mp3_file.short_description = 'MP3'
501     has_mp3_file.boolean = True
502
503     def has_ogg_file(self):
504         return bool(self.has_media("ogg"))
505     has_ogg_file.short_description = 'OGG'
506     has_ogg_file.boolean = True
507
508     def has_daisy_file(self):
509         return bool(self.has_media("daisy"))
510     has_daisy_file.short_description = 'DAISY'
511     has_daisy_file.boolean = True
512
513     def build_pdf(self):
514         """ (Re)builds the pdf file.
515
516         """
517         from librarian import pdf
518         from tempfile import NamedTemporaryFile
519         import os
520
521         path, fname = os.path.realpath(self.xml_file.path).rsplit('/', 1)
522         try:
523             pdf_file = NamedTemporaryFile(delete=False)
524             pdf.transform(BookImportDocProvider(self),
525                       file_path=str(self.xml_file.path),
526                       output_file=pdf_file,
527                       )
528
529             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
530         finally:
531             unlink(pdf_file.name)
532
533     def build_epub(self, remove_descendants=True):
534         """ (Re)builds the epub file.
535             If book has a parent, does nothing.
536             Unless remove_descendants is False, descendants' epubs are removed.
537         """
538         from StringIO import StringIO
539         from hashlib import sha1
540         from django.core.files.base import ContentFile
541
542         if self.parent:
543             # don't need an epub
544             return
545
546         epub_file = StringIO()
547         try:
548             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
549             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
550             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
551         except NoDublinCore:
552             pass
553
554         book_descendants = list(self.children.all())
555         while len(book_descendants) > 0:
556             child_book = book_descendants.pop(0)
557             if remove_descendants and child_book.has_epub_file():
558                 child_book.epub_file.delete()
559             # save anyway, to refresh short_html
560             child_book.save()
561             book_descendants += list(child_book.children.all())
562
563     def build_txt(self):
564         from StringIO import StringIO
565         from django.core.files.base import ContentFile
566         from librarian import text
567
568         out = StringIO()
569         text.transform(open(self.xml_file.path), out)
570         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
571
572
573     def build_html(self):
574         from tempfile import NamedTemporaryFile
575         from markupstring import MarkupString
576
577         meta_tags = list(self.tags.filter(
578             category__in=('author', 'epoch', 'genre', 'kind')))
579         book_tag = self.book_tag()
580
581         html_file = NamedTemporaryFile()
582         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
583             self.html_file.save('%s.html' % self.slug, File(html_file))
584
585             # get ancestor l-tags for adding to new fragments
586             ancestor_tags = []
587             p = self.parent
588             while p:
589                 ancestor_tags.append(p.book_tag())
590                 p = p.parent
591
592             # Delete old fragments and create them from scratch
593             self.fragments.all().delete()
594             # Extract fragments
595             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
596             for fragment in closed_fragments.values():
597                 try:
598                     theme_names = [s.strip() for s in fragment.themes.split(',')]
599                 except AttributeError:
600                     continue
601                 themes = []
602                 for theme_name in theme_names:
603                     if not theme_name:
604                         continue
605                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
606                     if created:
607                         tag.name = theme_name
608                         tag.sort_key = theme_name.lower()
609                         tag.save()
610                     themes.append(tag)
611                 if not themes:
612                     continue
613
614                 text = fragment.to_string()
615                 short_text = ''
616                 if (len(MarkupString(text)) > 240):
617                     short_text = unicode(MarkupString(text)[:160])
618                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
619                     text=text, short_text=short_text)
620
621                 new_fragment.save()
622                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
623             self.save()
624             self.html_built.send(sender=self)
625             return True
626         return False
627
628     @staticmethod
629     def zip_epub():
630         books = Book.objects.all()
631
632         paths = filter(lambda x: x is not None,
633                        map(lambda b: b.epub_file and b.epub_file.path or None, books))
634         result = create_zip_task.delay(paths, settings.ALL_EPUB_ZIP)
635         return settings.MEDIA_URL + result.wait()
636
637     @staticmethod
638     def zip_pdf():
639         books = Book.objects.all()
640
641         paths = filter(lambda x: x is not None,
642                        map(lambda b: b.pdf_file and b.pdf_file.path or None, books))
643         result = create_zip_task.delay(paths, settings.ALL_PDF_ZIP)
644         return settings.MEDIA_URL + result.wait()
645
646     def zip_audiobooks(self):
647         bm = BookMedia.objects.filter(book=self)
648         paths = map(lambda bm: bm.file.path, bm)
649         result = create_zip_task.delay(paths, self.slug)
650
651         return settings.MEDIA_URL + result.wait()
652
653     def clean_zip_files(self):
654         remove_zip(self.slug)
655         remove_zip(settings.ALL_EPUB_ZIP)
656         remove_zip(settings.ALL_PDF_ZIP)
657
658     @classmethod
659     def from_xml_file(cls, xml_file, **kwargs):
660         # use librarian to parse meta-data
661         book_info = dcparser.parse(xml_file)
662
663         if not isinstance(xml_file, File):
664             xml_file = File(open(xml_file))
665
666         try:
667             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
668         finally:
669             xml_file.close()
670
671     @classmethod
672     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True):
673         import re
674
675         # check for parts before we do anything
676         children = []
677         if hasattr(book_info, 'parts'):
678             for part_url in book_info.parts:
679                 base, slug = part_url.rsplit('/', 1)
680                 try:
681                     children.append(Book.objects.get(slug=slug))
682                 except Book.DoesNotExist, e:
683                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
684
685
686         # Read book metadata
687         book_base, book_slug = book_info.url.rsplit('/', 1)
688         if re.search(r'[^a-zA-Z0-9-]', book_slug):
689             raise ValueError('Invalid characters in slug')
690         book, created = Book.objects.get_or_create(slug=book_slug)
691
692         if created:
693             book_shelves = []
694         else:
695             if not overwrite:
696                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
697             # Save shelves for this book
698             book_shelves = list(book.tags.filter(category='set'))
699
700         book.title = book_info.title
701         book.set_extra_info_value(book_info.to_dict())
702         book.save()
703
704         meta_tags = []
705         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
706         for field_name, category in categories:
707             try:
708                 tag_names = getattr(book_info, field_name)
709             except:
710                 tag_names = [getattr(book_info, category)]
711             for tag_name in tag_names:
712                 tag_sort_key = tag_name
713                 if category == 'author':
714                     tag_sort_key = tag_name.last_name
715                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
716                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
717                 if created:
718                     tag.name = tag_name
719                     tag.sort_key = sortify(tag_sort_key.lower())
720                     tag.save()
721                 meta_tags.append(tag)
722
723         book.tags = set(meta_tags + book_shelves)
724
725         book_tag = book.book_tag()
726
727         for n, child_book in enumerate(children):
728             child_book.parent = book
729             child_book.parent_number = n
730             child_book.save()
731
732         # Save XML and HTML files
733         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
734
735         # delete old fragments when overwriting
736         book.fragments.all().delete()
737
738         if book.build_html():
739             if not settings.NO_BUILD_TXT and build_txt:
740                 book.build_txt()
741
742         if not settings.NO_BUILD_EPUB and build_epub:
743             book.root_ancestor.build_epub()
744
745         if not settings.NO_BUILD_PDF and build_pdf:
746             book.root_ancestor.build_pdf()
747
748         book_descendants = list(book.children.all())
749         # add l-tag to descendants and their fragments
750         # delete unnecessary EPUB files
751         while len(book_descendants) > 0:
752             child_book = book_descendants.pop(0)
753             child_book.tags = list(child_book.tags) + [book_tag]
754             child_book.save()
755             for fragment in child_book.fragments.all():
756                 fragment.tags = set(list(fragment.tags) + [book_tag])
757             book_descendants += list(child_book.children.all())
758
759         book.save()
760
761         # refresh cache
762         book.reset_tag_counter()
763         book.reset_theme_counter()
764
765         return book
766
767     def reset_tag_counter(self):
768         if self.id is None:
769             return
770
771         cache_key = "Book.tag_counter/%d" % self.id
772         cache.delete(cache_key)
773         if self.parent:
774             self.parent.reset_tag_counter()
775
776     @property
777     def tag_counter(self):
778         if self.id:
779             cache_key = "Book.tag_counter/%d" % self.id
780             tags = cache.get(cache_key)
781         else:
782             tags = None
783
784         if tags is None:
785             tags = {}
786             for child in self.children.all().order_by():
787                 for tag_pk, value in child.tag_counter.iteritems():
788                     tags[tag_pk] = tags.get(tag_pk, 0) + value
789             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
790                 tags[tag.pk] = 1
791
792             if self.id:
793                 cache.set(cache_key, tags, CACHE_FOREVER)
794         return tags
795
796     def reset_theme_counter(self):
797         if self.id is None:
798             return
799
800         cache_key = "Book.theme_counter/%d" % self.id
801         cache.delete(cache_key)
802         if self.parent:
803             self.parent.reset_theme_counter()
804
805     @property
806     def theme_counter(self):
807         if self.id:
808             cache_key = "Book.theme_counter/%d" % self.id
809             tags = cache.get(cache_key)
810         else:
811             tags = None
812
813         if tags is None:
814             tags = {}
815             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
816                 for tag in fragment.tags.filter(category='theme').order_by():
817                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
818
819             if self.id:
820                 cache.set(cache_key, tags, CACHE_FOREVER)
821         return tags
822
823     def pretty_title(self, html_links=False):
824         book = self
825         names = list(book.tags.filter(category='author'))
826
827         books = []
828         while book:
829             books.append(book)
830             book = book.parent
831         names.extend(reversed(books))
832
833         if html_links:
834             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
835         else:
836             names = [tag.name for tag in names]
837
838         return ', '.join(names)
839
840     @classmethod
841     def tagged_top_level(cls, tags):
842         """ Returns top-level books tagged with `tags'.
843
844         It only returns those books which don't have ancestors which are
845         also tagged with those tags.
846
847         """
848         # get relevant books and their tags
849         objects = cls.tagged.with_all(tags)
850         # eliminate descendants
851         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
852         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
853         if descendants_keys:
854             objects = objects.exclude(pk__in=descendants_keys)
855
856         return objects
857
858
859 class Fragment(models.Model):
860     text = models.TextField()
861     short_text = models.TextField(editable=False)
862     anchor = models.CharField(max_length=120)
863     book = models.ForeignKey(Book, related_name='fragments')
864
865     objects = models.Manager()
866     tagged = managers.ModelTaggedItemManager(Tag)
867     tags = managers.TagDescriptor(Tag)
868
869     class Meta:
870         ordering = ('book', 'anchor',)
871         verbose_name = _('fragment')
872         verbose_name_plural = _('fragments')
873
874     def get_absolute_url(self):
875         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
876
877     def reset_short_html(self):
878         if self.id is None:
879             return
880
881         cache_key = "Fragment.short_html/%d/%s"
882         for lang, langname in settings.LANGUAGES:
883             cache.delete(cache_key % (self.id, lang))
884
885     def short_html(self):
886         if self.id:
887             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
888             short_html = cache.get(cache_key)
889         else:
890             short_html = None
891
892         if short_html is not None:
893             return mark_safe(short_html)
894         else:
895             short_html = unicode(render_to_string('catalogue/fragment_short.html',
896                 {'fragment': self}))
897             if self.id:
898                 cache.set(cache_key, short_html, CACHE_FOREVER)
899             return mark_safe(short_html)
900
901
902 class FileRecord(models.Model):
903     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
904     type = models.CharField(_('type'), max_length=20, db_index=True)
905     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
906     time = models.DateTimeField(_('time'), auto_now_add=True)
907
908     class Meta:
909         ordering = ('-time','-slug', '-type')
910         verbose_name = _('file record')
911         verbose_name_plural = _('file records')
912
913     def __unicode__(self):
914         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
915
916 ###########
917 #
918 # SIGNALS
919 #
920 ###########
921
922
923 def _tags_updated_handler(sender, affected_tags, **kwargs):
924     # reset tag global counter
925     # we want Tag.changed_at updated for API to know the tag was touched
926     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
927
928     # if book tags changed, reset book tag counter
929     if isinstance(sender, Book) and \
930                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
931                     exclude(category__in=('book', 'theme', 'set')).count():
932         sender.reset_tag_counter()
933     # if fragment theme changed, reset book theme counter
934     elif isinstance(sender, Fragment) and \
935                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
936                     filter(category='theme').count():
937         sender.book.reset_theme_counter()
938 tags_updated.connect(_tags_updated_handler)
939
940
941 def _pre_delete_handler(sender, instance, **kwargs):
942     """ refresh Book on BookMedia delete """
943     if sender == BookMedia:
944         instance.book.save()
945 pre_delete.connect(_pre_delete_handler)
946
947 def _post_save_handler(sender, instance, **kwargs):
948     """ refresh all the short_html stuff on BookMedia update """
949     if sender == BookMedia:
950         instance.book.save()
951 post_save.connect(_post_save_handler)