reset cache on chunk save
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.template.loader import render_to_string
14 from django.utils.datastructures import SortedDict
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import create_zip
26
27
28 TAG_CATEGORIES = (
29     ('author', _('author')),
30     ('epoch', _('epoch')),
31     ('kind', _('kind')),
32     ('genre', _('genre')),
33     ('theme', _('theme')),
34     ('set', _('set')),
35     ('book', _('book')),
36 )
37
38 MEDIA_FORMATS = (
39     ('odt', _('ODT file')),
40     ('mp3', _('MP3 file')),
41     ('ogg', _('OGG file')),
42     ('daisy', _('DAISY file')), 
43 )
44
45 # not quite, but Django wants you to set a timeout
46 CACHE_FOREVER = 2419200  # 28 days
47
48
49 class TagSubcategoryManager(models.Manager):
50     def __init__(self, subcategory):
51         super(TagSubcategoryManager, self).__init__()
52         self.subcategory = subcategory
53
54     def get_query_set(self):
55         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
56
57
58 class Tag(TagBase):
59     name = models.CharField(_('name'), max_length=50, db_index=True)
60     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
61     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
62     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
63         db_index=True, choices=TAG_CATEGORIES)
64     description = models.TextField(_('description'), blank=True)
65     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
66
67     user = models.ForeignKey(User, blank=True, null=True)
68     book_count = models.IntegerField(_('book count'), blank=True, null=True)
69     gazeta_link = models.CharField(blank=True, max_length=240)
70     wiki_link = models.CharField(blank=True, max_length=240)
71
72     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
73     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
74
75     class UrlDeprecationWarning(DeprecationWarning):
76         pass
77
78     categories_rev = {
79         'autor': 'author',
80         'epoka': 'epoch',
81         'rodzaj': 'kind',
82         'gatunek': 'genre',
83         'motyw': 'theme',
84         'polka': 'set',
85     }
86     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
87
88     class Meta:
89         ordering = ('sort_key',)
90         verbose_name = _('tag')
91         verbose_name_plural = _('tags')
92         unique_together = (("slug", "category"),)
93
94     def __unicode__(self):
95         return self.name
96
97     def __repr__(self):
98         return "Tag(slug=%r)" % self.slug
99
100     @permalink
101     def get_absolute_url(self):
102         return ('catalogue.views.tagged_object_list', [self.url_chunk])
103
104     def has_description(self):
105         return len(self.description) > 0
106     has_description.short_description = _('description')
107     has_description.boolean = True
108
109     def get_count(self):
110         """ returns global book count for book tags, fragment count for themes """
111
112         if self.book_count is None:
113             if self.category == 'book':
114                 # never used
115                 objects = Book.objects.none()
116             elif self.category == 'theme':
117                 objects = Fragment.tagged.with_all((self,))
118             else:
119                 objects = Book.tagged.with_all((self,)).order_by()
120                 if self.category != 'set':
121                     # eliminate descendants
122                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
123                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
124                     if descendants_keys:
125                         objects = objects.exclude(pk__in=descendants_keys)
126             self.book_count = objects.count()
127             self.save()
128         return self.book_count
129
130     @staticmethod
131     def get_tag_list(tags):
132         if isinstance(tags, basestring):
133             real_tags = []
134             ambiguous_slugs = []
135             category = None
136             deprecated = False
137             tags_splitted = tags.split('/')
138             for name in tags_splitted:
139                 if category:
140                     real_tags.append(Tag.objects.get(slug=name, category=category))
141                     category = None
142                 elif name in Tag.categories_rev:
143                     category = Tag.categories_rev[name]
144                 else:
145                     try:
146                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
147                         deprecated = True 
148                     except Tag.MultipleObjectsReturned, e:
149                         ambiguous_slugs.append(name)
150
151             if category:
152                 # something strange left off
153                 raise Tag.DoesNotExist()
154             if ambiguous_slugs:
155                 # some tags should be qualified
156                 e = Tag.MultipleObjectsReturned()
157                 e.tags = real_tags
158                 e.ambiguous_slugs = ambiguous_slugs
159                 raise e
160             if deprecated:
161                 e = Tag.UrlDeprecationWarning()
162                 e.tags = real_tags
163                 raise e
164             return real_tags
165         else:
166             return TagBase.get_tag_list(tags)
167
168     @property
169     def url_chunk(self):
170         return '/'.join((Tag.categories_dict[self.category], self.slug))
171
172
173 # TODO: why is this hard-coded ?
174 def book_upload_path(ext=None, maxlen=100):
175     def get_dynamic_path(media, filename, ext=ext):
176         from slughifi import slughifi
177
178         # how to put related book's slug here?
179         if not ext:
180             if media.type == 'daisy':
181                 ext = 'daisy.zip'
182             else:
183                 ext = media.type
184         if not media.name:
185             name = slughifi(filename.split(".")[0])
186         else:
187             name = slughifi(media.name)
188         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
189     return get_dynamic_path
190
191
192 class BookMedia(models.Model):
193     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
194     name        = models.CharField(_('name'), max_length="100")
195     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
196     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
197     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
198     book = models.ForeignKey('Book', related_name='media')
199     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
200
201     def __unicode__(self):
202         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
203
204     class Meta:
205         ordering            = ('type', 'name')
206         verbose_name        = _('book media')
207         verbose_name_plural = _('book media')
208
209     def save(self, *args, **kwargs):
210         from slughifi import slughifi
211         from catalogue.utils import ExistingFile, remove_zip
212
213         try:
214             old = BookMedia.objects.get(pk=self.pk)
215         except BookMedia.DoesNotExist, e:
216             pass
217         else:
218             # if name changed, change the file name, too
219             if slughifi(self.name) != slughifi(old.name):
220                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
221
222         super(BookMedia, self).save(*args, **kwargs)
223
224         # remove the zip package for book with modified media
225         remove_zip(self.book.slug)
226
227         extra_info = self.get_extra_info_value()
228         extra_info.update(self.read_meta())
229         self.set_extra_info_value(extra_info)
230         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
231         return super(BookMedia, self).save(*args, **kwargs)
232
233     def read_meta(self):
234         """
235             Reads some metadata from the audiobook.
236         """
237         import mutagen
238         from mutagen import id3
239
240         artist_name = director_name = project = funded_by = ''
241         if self.type == 'mp3':
242             try:
243                 audio = id3.ID3(self.file.path)
244                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
245                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
246                 project = ", ".join([t.data for t in audio.getall('PRIV') 
247                         if t.owner=='wolnelektury.pl?project'])
248                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
249                         if t.owner=='wolnelektury.pl?funded_by'])
250             except:
251                 pass
252         elif self.type == 'ogg':
253             try:
254                 audio = mutagen.File(self.file.path)
255                 artist_name = ', '.join(audio.get('artist', []))
256                 director_name = ', '.join(audio.get('conductor', []))
257                 project = ", ".join(audio.get('project', []))
258                 funded_by = ", ".join(audio.get('funded_by', []))
259             except:
260                 pass
261         else:
262             return {}
263         return {'artist_name': artist_name, 'director_name': director_name,
264                 'project': project, 'funded_by': funded_by}
265
266     @staticmethod
267     def read_source_sha1(filepath, filetype):
268         """
269             Reads source file SHA1 from audiobok metadata.
270         """
271         import mutagen
272         from mutagen import id3
273
274         if filetype == 'mp3':
275             try:
276                 audio = id3.ID3(filepath)
277                 return [t.data for t in audio.getall('PRIV') 
278                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
279             except:
280                 return None
281         elif filetype == 'ogg':
282             try:
283                 audio = mutagen.File(filepath)
284                 return audio.get('flac_sha1', [None])[0] 
285             except:
286                 return None
287         else:
288             return None
289
290
291 class Book(models.Model):
292     title         = models.CharField(_('title'), max_length=120)
293     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
294     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
295     description   = models.TextField(_('description'), blank=True)
296     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
297     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
298     parent_number = models.IntegerField(_('parent number'), default=0)
299     extra_info    = JSONField(_('extra information'), default='{}')
300     gazeta_link   = models.CharField(blank=True, max_length=240)
301     wiki_link     = models.CharField(blank=True, max_length=240)
302     # files generated during publication
303
304     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
305     
306     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
307     objects  = models.Manager()
308     tagged   = managers.ModelTaggedItemManager(Tag)
309     tags     = managers.TagDescriptor(Tag)
310
311     html_built = django.dispatch.Signal()
312     published = django.dispatch.Signal()
313
314     class AlreadyExists(Exception):
315         pass
316
317     class Meta:
318         ordering = ('sort_key',)
319         verbose_name = _('book')
320         verbose_name_plural = _('books')
321
322     def __unicode__(self):
323         return self.title
324
325     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
326         from sortify import sortify
327
328         self.sort_key = sortify(self.title)
329
330         ret = super(Book, self).save(force_insert, force_update)
331
332         if reset_short_html:
333             self.reset_short_html()
334
335         return ret
336
337     @permalink
338     def get_absolute_url(self):
339         return ('catalogue.views.book_detail', [self.slug])
340
341     @property
342     def name(self):
343         return self.title
344
345     def book_tag_slug(self):
346         return ('l-' + self.slug)[:120]
347
348     def book_tag(self):
349         slug = self.book_tag_slug()
350         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
351         if created:
352             book_tag.name = self.title[:50]
353             book_tag.sort_key = self.title.lower()
354             book_tag.save()
355         return book_tag
356
357     def has_media(self, type):
358         if type in Book.file_types:
359             return bool(getattr(self, "%s_file" % type))
360         else:
361             return self.media.filter(type=type).exists()
362
363     def get_media(self, type):
364         if self.has_media(type):
365             if type in Book.file_types:
366                 return getattr(self, "%s_file" % type)
367             else:                                             
368                 return self.media.filter(type=type)
369         else:
370             return None
371
372     def get_mp3(self):
373         return self.get_media("mp3")
374     def get_odt(self):
375         return self.get_media("odt")
376     def get_ogg(self):
377         return self.get_media("ogg")
378     def get_daisy(self):
379         return self.get_media("daisy")                       
380
381     def reset_short_html(self):
382         if self.id is None:
383             return
384
385         cache_key = "Book.short_html/%d/%s"
386         for lang, langname in settings.LANGUAGES:
387             cache.delete(cache_key % (self.id, lang))
388         # Fragment.short_html relies on book's tags, so reset it here too
389         for fragm in self.fragments.all():
390             fragm.reset_short_html()
391
392     def short_html(self):
393         if self.id:
394             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
395             short_html = cache.get(cache_key)
396         else:
397             short_html = None
398
399         if short_html is not None:
400             return mark_safe(short_html)
401         else:
402             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
403             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
404
405             formats = []
406             # files generated during publication
407             if self.has_media("html"):
408                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
409             if self.has_media("pdf"):
410                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
411             if self.has_media("mobi"):
412                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
413             if self.root_ancestor.has_media("epub"):
414                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
415             if self.has_media("txt"):
416                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
417             # other files
418             for m in self.media.order_by('type'):
419                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
420
421             formats = [mark_safe(format) for format in formats]
422
423             short_html = unicode(render_to_string('catalogue/book_short.html',
424                 {'book': self, 'tags': tags, 'formats': formats}))
425
426             if self.id:
427                 cache.set(cache_key, short_html, CACHE_FOREVER)
428             return mark_safe(short_html)
429
430     @property
431     def root_ancestor(self):
432         """ returns the oldest ancestor """
433
434         if not hasattr(self, '_root_ancestor'):
435             book = self
436             while book.parent:
437                 book = book.parent
438             self._root_ancestor = book
439         return self._root_ancestor
440
441
442     def has_description(self):
443         return len(self.description) > 0
444     has_description.short_description = _('description')
445     has_description.boolean = True
446
447     # ugly ugly ugly
448     def has_odt_file(self):
449         return bool(self.has_media("odt"))
450     has_odt_file.short_description = 'ODT'
451     has_odt_file.boolean = True
452
453     def has_mp3_file(self):
454         return bool(self.has_media("mp3"))
455     has_mp3_file.short_description = 'MP3'
456     has_mp3_file.boolean = True
457
458     def has_ogg_file(self):
459         return bool(self.has_media("ogg"))
460     has_ogg_file.short_description = 'OGG'
461     has_ogg_file.boolean = True
462
463     def has_daisy_file(self):
464         return bool(self.has_media("daisy"))
465     has_daisy_file.short_description = 'DAISY'
466     has_daisy_file.boolean = True
467
468     def build_pdf(self):
469         """ (Re)builds the pdf file.
470
471         """
472         from tempfile import NamedTemporaryFile
473         from os import unlink
474         from django.core.files import File
475         from librarian import pdf
476         from catalogue.utils import ORMDocProvider, remove_zip
477
478         try:
479             pdf_file = NamedTemporaryFile(delete=False)
480             pdf.transform(ORMDocProvider(self),
481                       file_path=str(self.xml_file.path),
482                       output_file=pdf_file,
483                       )
484
485             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
486         finally:
487             unlink(pdf_file.name)
488
489         # remove zip with all pdf files
490         remove_zip(settings.ALL_PDF_ZIP)
491
492     def build_mobi(self):
493         """ (Re)builds the MOBI file.
494
495         """
496         from tempfile import NamedTemporaryFile
497         from os import unlink
498         from django.core.files import File
499         from librarian import mobi
500         from catalogue.utils import ORMDocProvider, remove_zip
501
502         try:
503             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
504             mobi.transform(ORMDocProvider(self), verbose=1,
505                       file_path=str(self.xml_file.path),
506                       output_file=mobi_file.name,
507                       )
508
509             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
510         finally:
511             unlink(mobi_file.name)
512
513         # remove zip with all mobi files
514         remove_zip(settings.ALL_MOBI_ZIP)
515
516     def build_epub(self, remove_descendants=True):
517         """ (Re)builds the epub file.
518             If book has a parent, does nothing.
519             Unless remove_descendants is False, descendants' epubs are removed.
520         """
521         from StringIO import StringIO
522         from hashlib import sha1
523         from django.core.files.base import ContentFile
524         from librarian import epub, NoDublinCore
525         from catalogue.utils import ORMDocProvider, remove_zip
526
527         if self.parent:
528             # don't need an epub
529             return
530
531         epub_file = StringIO()
532         try:
533             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
534             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
535             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
536         except NoDublinCore:
537             pass
538
539         book_descendants = list(self.children.all())
540         while len(book_descendants) > 0:
541             child_book = book_descendants.pop(0)
542             if remove_descendants and child_book.has_epub_file():
543                 child_book.epub_file.delete()
544             # save anyway, to refresh short_html
545             child_book.save()
546             book_descendants += list(child_book.children.all())
547
548         # remove zip package with all epub files
549         remove_zip(settings.ALL_EPUB_ZIP)
550
551     def build_txt(self):
552         from StringIO import StringIO
553         from django.core.files.base import ContentFile
554         from librarian import text
555
556         out = StringIO()
557         text.transform(open(self.xml_file.path), out)
558         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
559
560
561     def build_html(self):
562         from tempfile import NamedTemporaryFile
563         from markupstring import MarkupString
564         from django.core.files import File
565         from slughifi import slughifi
566         from librarian import html
567
568         meta_tags = list(self.tags.filter(
569             category__in=('author', 'epoch', 'genre', 'kind')))
570         book_tag = self.book_tag()
571
572         html_file = NamedTemporaryFile()
573         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
574             self.html_file.save('%s.html' % self.slug, File(html_file))
575
576             # get ancestor l-tags for adding to new fragments
577             ancestor_tags = []
578             p = self.parent
579             while p:
580                 ancestor_tags.append(p.book_tag())
581                 p = p.parent
582
583             # Delete old fragments and create them from scratch
584             self.fragments.all().delete()
585             # Extract fragments
586             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
587             for fragment in closed_fragments.values():
588                 try:
589                     theme_names = [s.strip() for s in fragment.themes.split(',')]
590                 except AttributeError:
591                     continue
592                 themes = []
593                 for theme_name in theme_names:
594                     if not theme_name:
595                         continue
596                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
597                     if created:
598                         tag.name = theme_name
599                         tag.sort_key = theme_name.lower()
600                         tag.save()
601                     themes.append(tag)
602                 if not themes:
603                     continue
604
605                 text = fragment.to_string()
606                 short_text = ''
607                 if (len(MarkupString(text)) > 240):
608                     short_text = unicode(MarkupString(text)[:160])
609                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
610                     text=text, short_text=short_text)
611
612                 new_fragment.save()
613                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
614             self.save()
615             self.html_built.send(sender=self)
616             return True
617         return False
618
619     @staticmethod
620     def zip_format(format_):
621         def pretty_file_name(book):
622             return "%s/%s.%s" % (
623                 b.get_extra_info_value()['author'],
624                 b.slug,
625                 format_)
626
627         field_name = "%s_file" % format_
628         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
629         paths = [(pretty_file_name(b), getattr(b, field_name).path)
630                     for b in books]
631         result = create_zip.delay(paths,
632                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
633         return result.wait()
634
635     def zip_audiobooks(self):
636         bm = BookMedia.objects.filter(book=self, type='mp3')
637         paths = map(lambda bm: (None, bm.file.path), bm)
638         result = create_zip.delay(paths, self.slug)
639         return result.wait()
640
641     @classmethod
642     def from_xml_file(cls, xml_file, **kwargs):
643         from django.core.files import File
644         from librarian import dcparser
645
646         # use librarian to parse meta-data
647         book_info = dcparser.parse(xml_file)
648
649         if not isinstance(xml_file, File):
650             xml_file = File(open(xml_file))
651
652         try:
653             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
654         finally:
655             xml_file.close()
656
657     @classmethod
658     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
659             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
660         import re
661         from slughifi import slughifi
662         from sortify import sortify
663
664         # check for parts before we do anything
665         children = []
666         if hasattr(book_info, 'parts'):
667             for part_url in book_info.parts:
668                 base, slug = part_url.rsplit('/', 1)
669                 try:
670                     children.append(Book.objects.get(slug=slug))
671                 except Book.DoesNotExist, e:
672                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
673
674
675         # Read book metadata
676         book_base, book_slug = book_info.url.rsplit('/', 1)
677         if re.search(r'[^a-zA-Z0-9-]', book_slug):
678             raise ValueError('Invalid characters in slug')
679         book, created = Book.objects.get_or_create(slug=book_slug)
680
681         if created:
682             book_shelves = []
683         else:
684             if not overwrite:
685                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
686             # Save shelves for this book
687             book_shelves = list(book.tags.filter(category='set'))
688
689         book.title = book_info.title
690         book.set_extra_info_value(book_info.to_dict())
691         book.save()
692
693         meta_tags = []
694         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
695         for field_name, category in categories:
696             try:
697                 tag_names = getattr(book_info, field_name)
698             except:
699                 tag_names = [getattr(book_info, category)]
700             for tag_name in tag_names:
701                 tag_sort_key = tag_name
702                 if category == 'author':
703                     tag_sort_key = tag_name.last_name
704                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
705                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
706                 if created:
707                     tag.name = tag_name
708                     tag.sort_key = sortify(tag_sort_key.lower())
709                     tag.save()
710                 meta_tags.append(tag)
711
712         book.tags = set(meta_tags + book_shelves)
713
714         book_tag = book.book_tag()
715
716         for n, child_book in enumerate(children):
717             child_book.parent = book
718             child_book.parent_number = n
719             child_book.save()
720
721         # Save XML and HTML files
722         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
723
724         # delete old fragments when overwriting
725         book.fragments.all().delete()
726
727         if book.build_html():
728             if not settings.NO_BUILD_TXT and build_txt:
729                 book.build_txt()
730
731         if not settings.NO_BUILD_EPUB and build_epub:
732             book.root_ancestor.build_epub()
733
734         if not settings.NO_BUILD_PDF and build_pdf:
735             book.root_ancestor.build_pdf()
736
737         if not settings.NO_BUILD_MOBI and build_mobi:
738             book.build_mobi()
739
740         book_descendants = list(book.children.all())
741         # add l-tag to descendants and their fragments
742         # delete unnecessary EPUB files
743         while len(book_descendants) > 0:
744             child_book = book_descendants.pop(0)
745             child_book.tags = list(child_book.tags) + [book_tag]
746             child_book.save()
747             for fragment in child_book.fragments.all():
748                 fragment.tags = set(list(fragment.tags) + [book_tag])
749             book_descendants += list(child_book.children.all())
750
751         book.save()
752
753         # refresh cache
754         book.reset_tag_counter()
755         book.reset_theme_counter()
756
757         cls.published.send(sender=book)
758         return book
759
760     def reset_tag_counter(self):
761         if self.id is None:
762             return
763
764         cache_key = "Book.tag_counter/%d" % self.id
765         cache.delete(cache_key)
766         if self.parent:
767             self.parent.reset_tag_counter()
768
769     @property
770     def tag_counter(self):
771         if self.id:
772             cache_key = "Book.tag_counter/%d" % self.id
773             tags = cache.get(cache_key)
774         else:
775             tags = None
776
777         if tags is None:
778             tags = {}
779             for child in self.children.all().order_by():
780                 for tag_pk, value in child.tag_counter.iteritems():
781                     tags[tag_pk] = tags.get(tag_pk, 0) + value
782             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
783                 tags[tag.pk] = 1
784
785             if self.id:
786                 cache.set(cache_key, tags, CACHE_FOREVER)
787         return tags
788
789     def reset_theme_counter(self):
790         if self.id is None:
791             return
792
793         cache_key = "Book.theme_counter/%d" % self.id
794         cache.delete(cache_key)
795         if self.parent:
796             self.parent.reset_theme_counter()
797
798     @property
799     def theme_counter(self):
800         if self.id:
801             cache_key = "Book.theme_counter/%d" % self.id
802             tags = cache.get(cache_key)
803         else:
804             tags = None
805
806         if tags is None:
807             tags = {}
808             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
809                 for tag in fragment.tags.filter(category='theme').order_by():
810                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
811
812             if self.id:
813                 cache.set(cache_key, tags, CACHE_FOREVER)
814         return tags
815
816     def pretty_title(self, html_links=False):
817         book = self
818         names = list(book.tags.filter(category='author'))
819
820         books = []
821         while book:
822             books.append(book)
823             book = book.parent
824         names.extend(reversed(books))
825
826         if html_links:
827             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
828         else:
829             names = [tag.name for tag in names]
830
831         return ', '.join(names)
832
833     @classmethod
834     def tagged_top_level(cls, tags):
835         """ Returns top-level books tagged with `tags'.
836
837         It only returns those books which don't have ancestors which are
838         also tagged with those tags.
839
840         """
841         # get relevant books and their tags
842         objects = cls.tagged.with_all(tags)
843         # eliminate descendants
844         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
845         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
846         if descendants_keys:
847             objects = objects.exclude(pk__in=descendants_keys)
848
849         return objects
850
851     @classmethod
852     def book_list(cls, filter=None):
853         """Generates a hierarchical listing of all books.
854
855         Books are optionally filtered with a test function.
856
857         """
858
859         books_by_parent = {}
860         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
861         if filter:
862             books = books.filter(filter).distinct()
863             book_ids = set((book.pk for book in books))
864             for book in books:
865                 parent = book.parent_id
866                 if parent not in book_ids:
867                     parent = None
868                 books_by_parent.setdefault(parent, []).append(book)
869         else:
870             for book in books:
871                 books_by_parent.setdefault(book.parent_id, []).append(book)
872
873         orphans = []
874         books_by_author = SortedDict()
875         for tag in Tag.objects.filter(category='author'):
876             books_by_author[tag] = []
877
878         for book in books_by_parent.get(None,()):
879             authors = list(book.tags.filter(category='author'))
880             if authors:
881                 for author in authors:
882                     books_by_author[author].append(book)
883             else:
884                 orphans.append(book)
885
886         return books_by_author, orphans, books_by_parent
887
888
889 def _has_factory(ftype):
890     has = lambda self: bool(getattr(self, "%s_file" % ftype))
891     has.short_description = t.upper()
892     has.boolean = True
893     has.__name__ = "has_%s_file" % ftype
894     return has
895
896     
897 # add the file fields
898 for t in Book.file_types:
899     field_name = "%s_file" % t
900     models.FileField(_("%s file" % t.upper()),
901             upload_to=book_upload_path(t),
902             blank=True).contribute_to_class(Book, field_name)
903
904     setattr(Book, "has_%s_file" % t, _has_factory(t))
905
906
907 class Fragment(models.Model):
908     text = models.TextField()
909     short_text = models.TextField(editable=False)
910     anchor = models.CharField(max_length=120)
911     book = models.ForeignKey(Book, related_name='fragments')
912
913     objects = models.Manager()
914     tagged = managers.ModelTaggedItemManager(Tag)
915     tags = managers.TagDescriptor(Tag)
916
917     class Meta:
918         ordering = ('book', 'anchor',)
919         verbose_name = _('fragment')
920         verbose_name_plural = _('fragments')
921
922     def get_absolute_url(self):
923         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
924
925     def reset_short_html(self):
926         if self.id is None:
927             return
928
929         cache_key = "Fragment.short_html/%d/%s"
930         for lang, langname in settings.LANGUAGES:
931             cache.delete(cache_key % (self.id, lang))
932
933     def short_html(self):
934         if self.id:
935             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
936             short_html = cache.get(cache_key)
937         else:
938             short_html = None
939
940         if short_html is not None:
941             return mark_safe(short_html)
942         else:
943             short_html = unicode(render_to_string('catalogue/fragment_short.html',
944                 {'fragment': self}))
945             if self.id:
946                 cache.set(cache_key, short_html, CACHE_FOREVER)
947             return mark_safe(short_html)
948
949
950 class FileRecord(models.Model):
951     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
952     type = models.CharField(_('type'), max_length=20, db_index=True)
953     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
954     time = models.DateTimeField(_('time'), auto_now_add=True)
955
956     class Meta:
957         ordering = ('-time','-slug', '-type')
958         verbose_name = _('file record')
959         verbose_name_plural = _('file records')
960
961     def __unicode__(self):
962         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
963
964
965 class Collection(models.Model):
966     """A collection of books, which might be defined before publishing them."""
967     title = models.CharField(_('title'), max_length=120, db_index=True)
968     slug = models.SlugField(_('slug'), max_length=120, primary_key=True)
969     description = models.TextField(_('description'), null=True, blank=True)
970
971     models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
972     book_slugs = models.TextField(_('book slugs'))
973
974     class Meta:
975         ordering = ('title',)
976         verbose_name = _('collection')
977         verbose_name_plural = _('collections')
978
979     def __unicode__(self):
980         return self.title
981
982
983 ###########
984 #
985 # SIGNALS
986 #
987 ###########
988
989
990 def _tags_updated_handler(sender, affected_tags, **kwargs):
991     # reset tag global counter
992     # we want Tag.changed_at updated for API to know the tag was touched
993     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
994
995     # if book tags changed, reset book tag counter
996     if isinstance(sender, Book) and \
997                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
998                     exclude(category__in=('book', 'theme', 'set')).count():
999         sender.reset_tag_counter()
1000     # if fragment theme changed, reset book theme counter
1001     elif isinstance(sender, Fragment) and \
1002                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1003                     filter(category='theme').count():
1004         sender.book.reset_theme_counter()
1005 tags_updated.connect(_tags_updated_handler)
1006
1007
1008 def _pre_delete_handler(sender, instance, **kwargs):
1009     """ refresh Book on BookMedia delete """
1010     if sender == BookMedia:
1011         instance.book.save()
1012 pre_delete.connect(_pre_delete_handler)
1013
1014 def _post_save_handler(sender, instance, **kwargs):
1015     """ refresh all the short_html stuff on BookMedia update """
1016     if sender == BookMedia:
1017         instance.book.save()
1018 post_save.connect(_post_save_handler)