removing zip pacjes in Book/BookMedia + test
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.core.files import File
14 from django.template.loader import render_to_string
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import ExistingFile, BookImportDocProvider, create_zip_task, remove_zip
26
27 from librarian import dcparser, html, epub, NoDublinCore
28 import mutagen
29 from mutagen import id3
30 from slughifi import slughifi
31 from sortify import sortify
32 from os import unlink
33
34 TAG_CATEGORIES = (
35     ('author', _('author')),
36     ('epoch', _('epoch')),
37     ('kind', _('kind')),
38     ('genre', _('genre')),
39     ('theme', _('theme')),
40     ('set', _('set')),
41     ('book', _('book')),
42 )
43
44 MEDIA_FORMATS = (
45     ('odt', _('ODT file')),
46     ('mp3', _('MP3 file')),
47     ('ogg', _('OGG file')),
48     ('daisy', _('DAISY file')), 
49 )
50
51 # not quite, but Django wants you to set a timeout
52 CACHE_FOREVER = 2419200  # 28 days
53
54
55 class TagSubcategoryManager(models.Manager):
56     def __init__(self, subcategory):
57         super(TagSubcategoryManager, self).__init__()
58         self.subcategory = subcategory
59
60     def get_query_set(self):
61         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
62
63
64 class Tag(TagBase):
65     name = models.CharField(_('name'), max_length=50, db_index=True)
66     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
67     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
68     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
69         db_index=True, choices=TAG_CATEGORIES)
70     description = models.TextField(_('description'), blank=True)
71     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
72
73     user = models.ForeignKey(User, blank=True, null=True)
74     book_count = models.IntegerField(_('book count'), blank=True, null=True)
75     gazeta_link = models.CharField(blank=True, max_length=240)
76     wiki_link = models.CharField(blank=True, max_length=240)
77
78     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
79     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
80
81     class UrlDeprecationWarning(DeprecationWarning):
82         pass
83
84     categories_rev = {
85         'autor': 'author',
86         'epoka': 'epoch',
87         'rodzaj': 'kind',
88         'gatunek': 'genre',
89         'motyw': 'theme',
90         'polka': 'set',
91     }
92     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
93
94     class Meta:
95         ordering = ('sort_key',)
96         verbose_name = _('tag')
97         verbose_name_plural = _('tags')
98         unique_together = (("slug", "category"),)
99
100     def __unicode__(self):
101         return self.name
102
103     def __repr__(self):
104         return "Tag(slug=%r)" % self.slug
105
106     @permalink
107     def get_absolute_url(self):
108         return ('catalogue.views.tagged_object_list', [self.url_chunk])
109
110     def has_description(self):
111         return len(self.description) > 0
112     has_description.short_description = _('description')
113     has_description.boolean = True
114
115     def get_count(self):
116         """ returns global book count for book tags, fragment count for themes """
117
118         if self.book_count is None:
119             if self.category == 'book':
120                 # never used
121                 objects = Book.objects.none()
122             elif self.category == 'theme':
123                 objects = Fragment.tagged.with_all((self,))
124             else:
125                 objects = Book.tagged.with_all((self,)).order_by()
126                 if self.category != 'set':
127                     # eliminate descendants
128                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
129                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
130                     if descendants_keys:
131                         objects = objects.exclude(pk__in=descendants_keys)
132             self.book_count = objects.count()
133             self.save()
134         return self.book_count
135
136     @staticmethod
137     def get_tag_list(tags):
138         if isinstance(tags, basestring):
139             real_tags = []
140             ambiguous_slugs = []
141             category = None
142             deprecated = False
143             tags_splitted = tags.split('/')
144             for name in tags_splitted:
145                 if category:
146                     real_tags.append(Tag.objects.get(slug=name, category=category))
147                     category = None
148                 elif name in Tag.categories_rev:
149                     category = Tag.categories_rev[name]
150                 else:
151                     try:
152                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
153                         deprecated = True 
154                     except Tag.MultipleObjectsReturned, e:
155                         ambiguous_slugs.append(name)
156
157             if category:
158                 # something strange left off
159                 raise Tag.DoesNotExist()
160             if ambiguous_slugs:
161                 # some tags should be qualified
162                 e = Tag.MultipleObjectsReturned()
163                 e.tags = real_tags
164                 e.ambiguous_slugs = ambiguous_slugs
165                 raise e
166             if deprecated:
167                 e = Tag.UrlDeprecationWarning()
168                 e.tags = real_tags
169                 raise e
170             return real_tags
171         else:
172             return TagBase.get_tag_list(tags)
173
174     @property
175     def url_chunk(self):
176         return '/'.join((Tag.categories_dict[self.category], self.slug))
177
178
179 # TODO: why is this hard-coded ?
180 def book_upload_path(ext=None, maxlen=100):
181     def get_dynamic_path(media, filename, ext=ext):
182         # how to put related book's slug here?
183         if not ext:
184             if media.type == 'daisy':
185                 ext = 'daisy.zip'
186             else:
187                 ext = media.type
188         if not media.name:
189             name = slughifi(filename.split(".")[0])
190         else:
191             name = slughifi(media.name)
192         return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
193     return get_dynamic_path
194
195
196 class BookMedia(models.Model):
197     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
198     name        = models.CharField(_('name'), max_length="100")
199     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
200     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
201     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
202     book = models.ForeignKey('Book', related_name='media')
203     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
204
205     def __unicode__(self):
206         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
207
208     class Meta:
209         ordering            = ('type', 'name')
210         verbose_name        = _('book media')
211         verbose_name_plural = _('book media')
212
213     def save(self, *args, **kwargs):
214         try:
215             old = BookMedia.objects.get(pk=self.pk)
216         except BookMedia.DoesNotExist, e:
217             pass
218         else:
219             # if name changed, change the file name, too
220             if slughifi(self.name) != slughifi(old.name):
221                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
222
223         # remove the zip package for book with modified media
224         remove_zip(self.book.slug)
225
226         super(BookMedia, self).save(*args, **kwargs)
227         extra_info = self.get_extra_info_value()
228         extra_info.update(self.read_meta())
229         self.set_extra_info_value(extra_info)
230         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
231         return super(BookMedia, self).save(*args, **kwargs)
232
233     def read_meta(self):
234         """
235             Reads some metadata from the audiobook.
236         """
237
238         artist_name = director_name = project = funded_by = ''
239         if self.type == 'mp3':
240             try:
241                 audio = id3.ID3(self.file.path)
242                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
243                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
244                 project = ", ".join([t.data for t in audio.getall('PRIV') 
245                         if t.owner=='wolnelektury.pl?project'])
246                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
247                         if t.owner=='wolnelektury.pl?funded_by'])
248             except:
249                 pass
250         elif self.type == 'ogg':
251             try:
252                 audio = mutagen.File(self.file.path)
253                 artist_name = ', '.join(audio.get('artist', []))
254                 director_name = ', '.join(audio.get('conductor', []))
255                 project = ", ".join(audio.get('project', []))
256                 funded_by = ", ".join(audio.get('funded_by', []))
257             except:
258                 pass
259         else:
260             return {}
261         return {'artist_name': artist_name, 'director_name': director_name,
262                 'project': project, 'funded_by': funded_by}
263
264     @staticmethod
265     def read_source_sha1(filepath, filetype):
266         """
267             Reads source file SHA1 from audiobok metadata.
268         """
269
270         if filetype == 'mp3':
271             try:
272                 audio = id3.ID3(filepath)
273                 return [t.data for t in audio.getall('PRIV') 
274                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
275             except:
276                 return None
277         elif filetype == 'ogg':
278             try:
279                 audio = mutagen.File(filepath)
280                 return audio.get('flac_sha1', [None])[0] 
281             except:
282                 return None
283         else:
284             return None
285
286
287 class Book(models.Model):
288     title         = models.CharField(_('title'), max_length=120)
289     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
290     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
291     description   = models.TextField(_('description'), blank=True)
292     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
293     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
294     parent_number = models.IntegerField(_('parent number'), default=0)
295     extra_info    = JSONField(_('extra information'), default='{}')
296     gazeta_link   = models.CharField(blank=True, max_length=240)
297     wiki_link     = models.CharField(blank=True, max_length=240)
298     # files generated during publication
299     xml_file      = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
300     html_file     = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
301     pdf_file      = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
302     epub_file     = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
303     txt_file      = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
304     
305     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
306     objects  = models.Manager()
307     tagged   = managers.ModelTaggedItemManager(Tag)
308     tags     = managers.TagDescriptor(Tag)
309
310     html_built = django.dispatch.Signal()
311
312     class AlreadyExists(Exception):
313         pass
314
315     class Meta:
316         ordering = ('sort_key',)
317         verbose_name = _('book')
318         verbose_name_plural = _('books')
319
320     def __unicode__(self):
321         return self.title
322
323     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
324         self.sort_key = sortify(self.title)
325
326         ret = super(Book, self).save(force_insert, force_update)
327
328         if reset_short_html:
329             self.reset_short_html()
330
331         return ret
332
333     @permalink
334     def get_absolute_url(self):
335         return ('catalogue.views.book_detail', [self.slug])
336
337     @property
338     def name(self):
339         return self.title
340
341     def book_tag_slug(self):
342         return ('l-' + self.slug)[:120]
343
344     def book_tag(self):
345         slug = self.book_tag_slug()
346         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
347         if created:
348             book_tag.name = self.title[:50]
349             book_tag.sort_key = self.title.lower()
350             book_tag.save()
351         return book_tag
352
353     def has_media(self, type):
354         if   type == 'xml':
355             if self.xml_file:
356                 return True
357             else:
358                 return False
359         elif type == 'html':
360             if self.html_file:
361                 return True
362             else:
363                 return False        
364         elif type == 'txt':
365             if self.txt_file:
366                 return True
367             else:
368                 return False        
369         elif type == 'pdf':
370             if self.pdf_file:
371                 return True
372             else:
373                 return False  
374         elif type == 'epub':
375             if self.epub_file:
376                 return True
377             else:
378                 return False                          
379         else:
380             if self.media.filter(type=type).exists():
381                 return True
382             else:
383                 return False
384
385     def get_media(self, type):
386         if self.has_media(type):
387             if   type == "xml":
388                 return self.xml_file
389             elif type == "html":
390                 return self.html_file
391             elif type == "epub":
392                 return self.epub_file
393             elif type == "txt":
394                 return self.txt_file
395             elif type == "pdf":
396                 return self.pdf_file
397             else:                                             
398                 return self.media.filter(type=type)
399         else:
400             return None
401
402     def get_mp3(self):
403         return self.get_media("mp3")
404     def get_odt(self):
405         return self.get_media("odt")
406     def get_ogg(self):
407         return self.get_media("ogg")
408     def get_daisy(self):
409         return self.get_media("daisy")                       
410
411     def reset_short_html(self):
412         if self.id is None:
413             return
414
415         cache_key = "Book.short_html/%d/%s"
416         for lang, langname in settings.LANGUAGES:
417             cache.delete(cache_key % (self.id, lang))
418         # Fragment.short_html relies on book's tags, so reset it here too
419         for fragm in self.fragments.all():
420             fragm.reset_short_html()
421
422     def short_html(self):
423         if self.id:
424             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
425             short_html = cache.get(cache_key)
426         else:
427             short_html = None
428
429         if short_html is not None:
430             return mark_safe(short_html)
431         else:
432             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
433             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
434
435             formats = []
436             # files generated during publication               
437             if self.has_media("html"):
438                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
439             if self.has_media("pdf"):
440                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
441             if self.root_ancestor.has_media("epub"):
442                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
443             if self.has_media("txt"):
444                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
445             # other files
446             for m in self.media.order_by('type'):
447                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
448
449             formats = [mark_safe(format) for format in formats]
450
451             short_html = unicode(render_to_string('catalogue/book_short.html',
452                 {'book': self, 'tags': tags, 'formats': formats}))
453
454             if self.id:
455                 cache.set(cache_key, short_html, CACHE_FOREVER)
456             return mark_safe(short_html)
457
458     @property
459     def root_ancestor(self):
460         """ returns the oldest ancestor """
461
462         if not hasattr(self, '_root_ancestor'):
463             book = self
464             while book.parent:
465                 book = book.parent
466             self._root_ancestor = book
467         return self._root_ancestor
468
469
470     def has_description(self):
471         return len(self.description) > 0
472     has_description.short_description = _('description')
473     has_description.boolean = True
474
475     # ugly ugly ugly
476     def has_pdf_file(self):
477         return bool(self.pdf_file)
478     has_pdf_file.short_description = 'PDF'
479     has_pdf_file.boolean = True
480
481     def has_epub_file(self):
482         return bool(self.epub_file)
483     has_epub_file.short_description = 'EPUB'
484     has_epub_file.boolean = True
485
486     def has_txt_file(self):
487         return bool(self.txt_file)
488     has_txt_file.short_description = 'HTML'
489     has_txt_file.boolean = True
490
491     def has_html_file(self):
492         return bool(self.html_file)
493     has_html_file.short_description = 'HTML'
494     has_html_file.boolean = True
495
496     def has_odt_file(self):
497         return bool(self.has_media("odt"))
498     has_odt_file.short_description = 'ODT'
499     has_odt_file.boolean = True
500
501     def has_mp3_file(self):
502         return bool(self.has_media("mp3"))
503     has_mp3_file.short_description = 'MP3'
504     has_mp3_file.boolean = True
505
506     def has_ogg_file(self):
507         return bool(self.has_media("ogg"))
508     has_ogg_file.short_description = 'OGG'
509     has_ogg_file.boolean = True
510
511     def has_daisy_file(self):
512         return bool(self.has_media("daisy"))
513     has_daisy_file.short_description = 'DAISY'
514     has_daisy_file.boolean = True
515
516     def build_pdf(self):
517         """ (Re)builds the pdf file.
518
519         """
520         from librarian import pdf
521         from tempfile import NamedTemporaryFile
522         import os
523
524         # remove zip with all pdf files
525         remove_zip(settings.ALL_PDF_ZIP)
526
527         path, fname = os.path.realpath(self.xml_file.path).rsplit('/', 1)
528         try:
529             pdf_file = NamedTemporaryFile(delete=False)
530             pdf.transform(BookImportDocProvider(self),
531                       file_path=str(self.xml_file.path),
532                       output_file=pdf_file,
533                       )
534
535             self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
536         finally:
537             unlink(pdf_file.name)
538
539     def build_epub(self, remove_descendants=True):
540         """ (Re)builds the epub file.
541             If book has a parent, does nothing.
542             Unless remove_descendants is False, descendants' epubs are removed.
543         """
544         from StringIO import StringIO
545         from hashlib import sha1
546         from django.core.files.base import ContentFile
547
548         if self.parent:
549             # don't need an epub
550             return
551
552         # remove zip package with all epub files
553         remove_zip(settings.ALL_EPUB_ZIP)
554
555         epub_file = StringIO()
556         try:
557             epub.transform(BookImportDocProvider(self), self.slug, output_file=epub_file)
558             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
559             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
560         except NoDublinCore:
561             pass
562
563         book_descendants = list(self.children.all())
564         while len(book_descendants) > 0:
565             child_book = book_descendants.pop(0)
566             if remove_descendants and child_book.has_epub_file():
567                 child_book.epub_file.delete()
568             # save anyway, to refresh short_html
569             child_book.save()
570             book_descendants += list(child_book.children.all())
571
572     def build_txt(self):
573         from StringIO import StringIO
574         from django.core.files.base import ContentFile
575         from librarian import text
576
577         out = StringIO()
578         text.transform(open(self.xml_file.path), out)
579         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
580
581
582     def build_html(self):
583         from tempfile import NamedTemporaryFile
584         from markupstring import MarkupString
585
586         meta_tags = list(self.tags.filter(
587             category__in=('author', 'epoch', 'genre', 'kind')))
588         book_tag = self.book_tag()
589
590         html_file = NamedTemporaryFile()
591         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
592             self.html_file.save('%s.html' % self.slug, File(html_file))
593
594             # get ancestor l-tags for adding to new fragments
595             ancestor_tags = []
596             p = self.parent
597             while p:
598                 ancestor_tags.append(p.book_tag())
599                 p = p.parent
600
601             # Delete old fragments and create them from scratch
602             self.fragments.all().delete()
603             # Extract fragments
604             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
605             for fragment in closed_fragments.values():
606                 try:
607                     theme_names = [s.strip() for s in fragment.themes.split(',')]
608                 except AttributeError:
609                     continue
610                 themes = []
611                 for theme_name in theme_names:
612                     if not theme_name:
613                         continue
614                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
615                     if created:
616                         tag.name = theme_name
617                         tag.sort_key = theme_name.lower()
618                         tag.save()
619                     themes.append(tag)
620                 if not themes:
621                     continue
622
623                 text = fragment.to_string()
624                 short_text = ''
625                 if (len(MarkupString(text)) > 240):
626                     short_text = unicode(MarkupString(text)[:160])
627                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
628                     text=text, short_text=short_text)
629
630                 new_fragment.save()
631                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
632             self.save()
633             self.html_built.send(sender=self)
634             return True
635         return False
636
637     @staticmethod
638     def zip_epub():
639         books = Book.objects.all()
640
641         paths = filter(lambda x: x is not None,
642                        map(lambda b: b.epub_file and b.epub_file.path or None, books))
643         if settings.USE_CELERY:
644             result = create_zip_task.delay(paths, settings.ALL_EPUB_ZIP)
645             return result.wait()
646         else:
647             result = create_zip_task(paths, settings.ALL_EPUB_ZIP)
648             return result
649
650     @staticmethod
651     def zip_pdf():
652         books = Book.objects.all()
653
654         paths = filter(lambda x: x is not None,
655                        map(lambda b: b.pdf_file and b.pdf_file.path or None, books))
656         if settings.USE_CELERY:
657             result = create_zip_task.delay(paths, settings.ALL_PDF_ZIP)
658             return result.wait()
659         else:
660             result = create_zip_task(paths, settings.ALL_PDF_ZIP)
661             return result
662
663     def zip_audiobooks(self):
664         bm = BookMedia.objects.filter(book=self)
665         paths = map(lambda bm: bm.file.path, bm)
666         if settings.USE_CELERY:
667             result = create_zip_task.delay(paths, self.slug)
668             return result.wait()
669         else:
670             result = create_zip_task(paths, self.slug)
671             return result
672
673
674     @classmethod
675     def from_xml_file(cls, xml_file, **kwargs):
676         # use librarian to parse meta-data
677         book_info = dcparser.parse(xml_file)
678
679         if not isinstance(xml_file, File):
680             xml_file = File(open(xml_file))
681
682         try:
683             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
684         finally:
685             xml_file.close()
686
687     @classmethod
688     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True):
689         import re
690
691         # check for parts before we do anything
692         children = []
693         if hasattr(book_info, 'parts'):
694             for part_url in book_info.parts:
695                 base, slug = part_url.rsplit('/', 1)
696                 try:
697                     children.append(Book.objects.get(slug=slug))
698                 except Book.DoesNotExist, e:
699                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
700
701
702         # Read book metadata
703         book_base, book_slug = book_info.url.rsplit('/', 1)
704         if re.search(r'[^a-zA-Z0-9-]', book_slug):
705             raise ValueError('Invalid characters in slug')
706         book, created = Book.objects.get_or_create(slug=book_slug)
707
708         if created:
709             book_shelves = []
710         else:
711             if not overwrite:
712                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
713             # Save shelves for this book
714             book_shelves = list(book.tags.filter(category='set'))
715
716         book.title = book_info.title
717         book.set_extra_info_value(book_info.to_dict())
718         book.save()
719
720         meta_tags = []
721         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
722         for field_name, category in categories:
723             try:
724                 tag_names = getattr(book_info, field_name)
725             except:
726                 tag_names = [getattr(book_info, category)]
727             for tag_name in tag_names:
728                 tag_sort_key = tag_name
729                 if category == 'author':
730                     tag_sort_key = tag_name.last_name
731                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
732                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
733                 if created:
734                     tag.name = tag_name
735                     tag.sort_key = sortify(tag_sort_key.lower())
736                     tag.save()
737                 meta_tags.append(tag)
738
739         book.tags = set(meta_tags + book_shelves)
740
741         book_tag = book.book_tag()
742
743         for n, child_book in enumerate(children):
744             child_book.parent = book
745             child_book.parent_number = n
746             child_book.save()
747
748         # Save XML and HTML files
749         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
750
751         # delete old fragments when overwriting
752         book.fragments.all().delete()
753
754         if book.build_html():
755             if not settings.NO_BUILD_TXT and build_txt:
756                 book.build_txt()
757
758         if not settings.NO_BUILD_EPUB and build_epub:
759             book.root_ancestor.build_epub()
760
761         if not settings.NO_BUILD_PDF and build_pdf:
762             book.root_ancestor.build_pdf()
763
764         book_descendants = list(book.children.all())
765         # add l-tag to descendants and their fragments
766         # delete unnecessary EPUB files
767         while len(book_descendants) > 0:
768             child_book = book_descendants.pop(0)
769             child_book.tags = list(child_book.tags) + [book_tag]
770             child_book.save()
771             for fragment in child_book.fragments.all():
772                 fragment.tags = set(list(fragment.tags) + [book_tag])
773             book_descendants += list(child_book.children.all())
774
775         book.save()
776
777         # refresh cache
778         book.reset_tag_counter()
779         book.reset_theme_counter()
780
781         return book
782
783     def reset_tag_counter(self):
784         if self.id is None:
785             return
786
787         cache_key = "Book.tag_counter/%d" % self.id
788         cache.delete(cache_key)
789         if self.parent:
790             self.parent.reset_tag_counter()
791
792     @property
793     def tag_counter(self):
794         if self.id:
795             cache_key = "Book.tag_counter/%d" % self.id
796             tags = cache.get(cache_key)
797         else:
798             tags = None
799
800         if tags is None:
801             tags = {}
802             for child in self.children.all().order_by():
803                 for tag_pk, value in child.tag_counter.iteritems():
804                     tags[tag_pk] = tags.get(tag_pk, 0) + value
805             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
806                 tags[tag.pk] = 1
807
808             if self.id:
809                 cache.set(cache_key, tags, CACHE_FOREVER)
810         return tags
811
812     def reset_theme_counter(self):
813         if self.id is None:
814             return
815
816         cache_key = "Book.theme_counter/%d" % self.id
817         cache.delete(cache_key)
818         if self.parent:
819             self.parent.reset_theme_counter()
820
821     @property
822     def theme_counter(self):
823         if self.id:
824             cache_key = "Book.theme_counter/%d" % self.id
825             tags = cache.get(cache_key)
826         else:
827             tags = None
828
829         if tags is None:
830             tags = {}
831             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
832                 for tag in fragment.tags.filter(category='theme').order_by():
833                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
834
835             if self.id:
836                 cache.set(cache_key, tags, CACHE_FOREVER)
837         return tags
838
839     def pretty_title(self, html_links=False):
840         book = self
841         names = list(book.tags.filter(category='author'))
842
843         books = []
844         while book:
845             books.append(book)
846             book = book.parent
847         names.extend(reversed(books))
848
849         if html_links:
850             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
851         else:
852             names = [tag.name for tag in names]
853
854         return ', '.join(names)
855
856     @classmethod
857     def tagged_top_level(cls, tags):
858         """ Returns top-level books tagged with `tags'.
859
860         It only returns those books which don't have ancestors which are
861         also tagged with those tags.
862
863         """
864         # get relevant books and their tags
865         objects = cls.tagged.with_all(tags)
866         # eliminate descendants
867         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
868         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
869         if descendants_keys:
870             objects = objects.exclude(pk__in=descendants_keys)
871
872         return objects
873
874
875 class Fragment(models.Model):
876     text = models.TextField()
877     short_text = models.TextField(editable=False)
878     anchor = models.CharField(max_length=120)
879     book = models.ForeignKey(Book, related_name='fragments')
880
881     objects = models.Manager()
882     tagged = managers.ModelTaggedItemManager(Tag)
883     tags = managers.TagDescriptor(Tag)
884
885     class Meta:
886         ordering = ('book', 'anchor',)
887         verbose_name = _('fragment')
888         verbose_name_plural = _('fragments')
889
890     def get_absolute_url(self):
891         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
892
893     def reset_short_html(self):
894         if self.id is None:
895             return
896
897         cache_key = "Fragment.short_html/%d/%s"
898         for lang, langname in settings.LANGUAGES:
899             cache.delete(cache_key % (self.id, lang))
900
901     def short_html(self):
902         if self.id:
903             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
904             short_html = cache.get(cache_key)
905         else:
906             short_html = None
907
908         if short_html is not None:
909             return mark_safe(short_html)
910         else:
911             short_html = unicode(render_to_string('catalogue/fragment_short.html',
912                 {'fragment': self}))
913             if self.id:
914                 cache.set(cache_key, short_html, CACHE_FOREVER)
915             return mark_safe(short_html)
916
917
918 class FileRecord(models.Model):
919     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
920     type = models.CharField(_('type'), max_length=20, db_index=True)
921     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
922     time = models.DateTimeField(_('time'), auto_now_add=True)
923
924     class Meta:
925         ordering = ('-time','-slug', '-type')
926         verbose_name = _('file record')
927         verbose_name_plural = _('file records')
928
929     def __unicode__(self):
930         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
931
932 ###########
933 #
934 # SIGNALS
935 #
936 ###########
937
938
939 def _tags_updated_handler(sender, affected_tags, **kwargs):
940     # reset tag global counter
941     # we want Tag.changed_at updated for API to know the tag was touched
942     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
943
944     # if book tags changed, reset book tag counter
945     if isinstance(sender, Book) and \
946                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
947                     exclude(category__in=('book', 'theme', 'set')).count():
948         sender.reset_tag_counter()
949     # if fragment theme changed, reset book theme counter
950     elif isinstance(sender, Fragment) and \
951                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
952                     filter(category='theme').count():
953         sender.book.reset_theme_counter()
954 tags_updated.connect(_tags_updated_handler)
955
956
957 def _pre_delete_handler(sender, instance, **kwargs):
958     """ refresh Book on BookMedia delete """
959     if sender == BookMedia:
960         instance.book.save()
961 pre_delete.connect(_pre_delete_handler)
962
963 def _post_save_handler(sender, instance, **kwargs):
964     """ refresh all the short_html stuff on BookMedia update """
965     if sender == BookMedia:
966         instance.book.save()
967 post_save.connect(_post_save_handler)