2e20717fcd55f90f5d0f3ff090c43cb551132f06
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.template.loader import render_to_string
14 from django.utils.datastructures import SortedDict
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, m2m_changed, pre_delete
19
20 from django.conf import settings
21
22 from newtagging.models import TagBase, tags_updated
23 from newtagging import managers
24 from catalogue.fields import JSONField, OverwritingFileField
25 from catalogue.utils import create_zip
26 from shutil import copy
27
28 from os import path
29
30
31 TAG_CATEGORIES = (
32     ('author', _('author')),
33     ('epoch', _('epoch')),
34     ('kind', _('kind')),
35     ('genre', _('genre')),
36     ('theme', _('theme')),
37     ('set', _('set')),
38     ('book', _('book')),
39 )
40
41 MEDIA_FORMATS = (
42     ('odt', _('ODT file')),
43     ('mp3', _('MP3 file')),
44     ('ogg', _('OGG file')),
45     ('daisy', _('DAISY file')), 
46 )
47
48 # not quite, but Django wants you to set a timeout
49 CACHE_FOREVER = 2419200  # 28 days
50
51
52 class TagSubcategoryManager(models.Manager):
53     def __init__(self, subcategory):
54         super(TagSubcategoryManager, self).__init__()
55         self.subcategory = subcategory
56
57     def get_query_set(self):
58         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
59
60
61 class Tag(TagBase):
62     name = models.CharField(_('name'), max_length=50, db_index=True)
63     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
64     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
65     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
66         db_index=True, choices=TAG_CATEGORIES)
67     description = models.TextField(_('description'), blank=True)
68     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
69
70     user = models.ForeignKey(User, blank=True, null=True)
71     book_count = models.IntegerField(_('book count'), blank=True, null=True)
72     gazeta_link = models.CharField(blank=True, max_length=240)
73     wiki_link = models.CharField(blank=True, max_length=240)
74
75     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
76     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
77
78     class UrlDeprecationWarning(DeprecationWarning):
79         pass
80
81     categories_rev = {
82         'autor': 'author',
83         'epoka': 'epoch',
84         'rodzaj': 'kind',
85         'gatunek': 'genre',
86         'motyw': 'theme',
87         'polka': 'set',
88     }
89     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
90
91     class Meta:
92         ordering = ('sort_key',)
93         verbose_name = _('tag')
94         verbose_name_plural = _('tags')
95         unique_together = (("slug", "category"),)
96
97     def __unicode__(self):
98         return self.name
99
100     def __repr__(self):
101         return "Tag(slug=%r)" % self.slug
102
103     @permalink
104     def get_absolute_url(self):
105         return ('catalogue.views.tagged_object_list', [self.url_chunk])
106
107     def has_description(self):
108         return len(self.description) > 0
109     has_description.short_description = _('description')
110     has_description.boolean = True
111
112     def get_count(self):
113         """ returns global book count for book tags, fragment count for themes """
114
115         if self.book_count is None:
116             if self.category == 'book':
117                 # never used
118                 objects = Book.objects.none()
119             elif self.category == 'theme':
120                 objects = Fragment.tagged.with_all((self,))
121             else:
122                 objects = Book.tagged.with_all((self,)).order_by()
123                 if self.category != 'set':
124                     # eliminate descendants
125                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
126                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
127                     if descendants_keys:
128                         objects = objects.exclude(pk__in=descendants_keys)
129             self.book_count = objects.count()
130             self.save()
131         return self.book_count
132
133     @staticmethod
134     def get_tag_list(tags):
135         if isinstance(tags, basestring):
136             real_tags = []
137             ambiguous_slugs = []
138             category = None
139             deprecated = False
140             tags_splitted = tags.split('/')
141             for name in tags_splitted:
142                 if category:
143                     real_tags.append(Tag.objects.get(slug=name, category=category))
144                     category = None
145                 elif name in Tag.categories_rev:
146                     category = Tag.categories_rev[name]
147                 else:
148                     try:
149                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
150                         deprecated = True 
151                     except Tag.MultipleObjectsReturned, e:
152                         ambiguous_slugs.append(name)
153
154             if category:
155                 # something strange left off
156                 raise Tag.DoesNotExist()
157             if ambiguous_slugs:
158                 # some tags should be qualified
159                 e = Tag.MultipleObjectsReturned()
160                 e.tags = real_tags
161                 e.ambiguous_slugs = ambiguous_slugs
162                 raise e
163             if deprecated:
164                 e = Tag.UrlDeprecationWarning()
165                 e.tags = real_tags
166                 raise e
167             return real_tags
168         else:
169             return TagBase.get_tag_list(tags)
170
171     @property
172     def url_chunk(self):
173         return '/'.join((Tag.categories_dict[self.category], self.slug))
174
175
176 def get_dynamic_path(media, filename, ext=None, maxlen=100):
177     from slughifi import slughifi
178     
179     # how to put related book's slug here?
180     if not ext:
181         if media.type == 'daisy':
182             ext = 'daisy.zip'
183         else:
184             ext = media.type
185     if media is None or not media.name:
186         name = slughifi(filename.split(".")[0])
187     else:
188         name = slughifi(media.name)
189     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
190
191
192 # TODO: why is this hard-coded ?
193 def book_upload_path(ext=None, maxlen=100):
194     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
195
196
197 class BookMedia(models.Model):
198     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
199     name        = models.CharField(_('name'), max_length="100")
200     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
201     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
202     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
203     book = models.ForeignKey('Book', related_name='media')
204     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
205
206     def __unicode__(self):
207         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
208
209     class Meta:
210         ordering            = ('type', 'name')
211         verbose_name        = _('book media')
212         verbose_name_plural = _('book media')
213
214     def save(self, *args, **kwargs):
215         from slughifi import slughifi
216         from catalogue.utils import ExistingFile, remove_zip
217
218         try:
219             old = BookMedia.objects.get(pk=self.pk)
220         except BookMedia.DoesNotExist, e:
221             pass
222         else:
223             # if name changed, change the file name, too
224             if slughifi(self.name) != slughifi(old.name):
225                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
226
227         super(BookMedia, self).save(*args, **kwargs)
228
229         # remove the zip package for book with modified media
230         remove_zip(self.book.slug)
231
232         extra_info = self.get_extra_info_value()
233         extra_info.update(self.read_meta())
234         self.set_extra_info_value(extra_info)
235         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
236         return super(BookMedia, self).save(*args, **kwargs)
237
238     def read_meta(self):
239         """
240             Reads some metadata from the audiobook.
241         """
242         import mutagen
243         from mutagen import id3
244
245         artist_name = director_name = project = funded_by = ''
246         if self.type == 'mp3':
247             try:
248                 audio = id3.ID3(self.file.path)
249                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
250                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
251                 project = ", ".join([t.data for t in audio.getall('PRIV') 
252                         if t.owner=='wolnelektury.pl?project'])
253                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
254                         if t.owner=='wolnelektury.pl?funded_by'])
255             except:
256                 pass
257         elif self.type == 'ogg':
258             try:
259                 audio = mutagen.File(self.file.path)
260                 artist_name = ', '.join(audio.get('artist', []))
261                 director_name = ', '.join(audio.get('conductor', []))
262                 project = ", ".join(audio.get('project', []))
263                 funded_by = ", ".join(audio.get('funded_by', []))
264             except:
265                 pass
266         else:
267             return {}
268         return {'artist_name': artist_name, 'director_name': director_name,
269                 'project': project, 'funded_by': funded_by}
270
271     @staticmethod
272     def read_source_sha1(filepath, filetype):
273         """
274             Reads source file SHA1 from audiobok metadata.
275         """
276         import mutagen
277         from mutagen import id3
278
279         if filetype == 'mp3':
280             try:
281                 audio = id3.ID3(filepath)
282                 return [t.data for t in audio.getall('PRIV') 
283                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
284             except:
285                 return None
286         elif filetype == 'ogg':
287             try:
288                 audio = mutagen.File(filepath)
289                 return audio.get('flac_sha1', [None])[0] 
290             except:
291                 return None
292         else:
293             return None
294
295
296 class Book(models.Model):
297     title         = models.CharField(_('title'), max_length=120)
298     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
299     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
300     description   = models.TextField(_('description'), blank=True)
301     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
302     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
303     parent_number = models.IntegerField(_('parent number'), default=0)
304     extra_info    = JSONField(_('extra information'), default='{}')
305     gazeta_link   = models.CharField(blank=True, max_length=240)
306     wiki_link     = models.CharField(blank=True, max_length=240)
307     # files generated during publication
308
309     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
310     
311     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
312     objects  = models.Manager()
313     tagged   = managers.ModelTaggedItemManager(Tag)
314     tags     = managers.TagDescriptor(Tag)
315
316     html_built = django.dispatch.Signal()
317     published = django.dispatch.Signal()
318
319     class AlreadyExists(Exception):
320         pass
321
322     class Meta:
323         ordering = ('sort_key',)
324         verbose_name = _('book')
325         verbose_name_plural = _('books')
326
327     def __unicode__(self):
328         return self.title
329
330     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
331         from sortify import sortify
332
333         self.sort_key = sortify(self.title)
334
335         ret = super(Book, self).save(force_insert, force_update)
336
337         if reset_short_html:
338             self.reset_short_html()
339
340         return ret
341
342     @permalink
343     def get_absolute_url(self):
344         return ('catalogue.views.book_detail', [self.slug])
345
346     @property
347     def name(self):
348         return self.title
349
350     def book_tag_slug(self):
351         return ('l-' + self.slug)[:120]
352
353     def book_tag(self):
354         slug = self.book_tag_slug()
355         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
356         if created:
357             book_tag.name = self.title[:50]
358             book_tag.sort_key = self.title.lower()
359             book_tag.save()
360         return book_tag
361
362     def has_media(self, type):
363         if type in Book.file_types:
364             return bool(getattr(self, "%s_file" % type))
365         else:
366             return self.media.filter(type=type).exists()
367
368     def get_media(self, type):
369         if self.has_media(type):
370             if type in Book.file_types:
371                 return getattr(self, "%s_file" % type)
372             else:                                             
373                 return self.media.filter(type=type)
374         else:
375             return None
376
377     def get_mp3(self):
378         return self.get_media("mp3")
379     def get_odt(self):
380         return self.get_media("odt")
381     def get_ogg(self):
382         return self.get_media("ogg")
383     def get_daisy(self):
384         return self.get_media("daisy")                       
385
386     def reset_short_html(self):
387         if self.id is None:
388             return
389
390         cache_key = "Book.short_html/%d/%s"
391         for lang, langname in settings.LANGUAGES:
392             cache.delete(cache_key % (self.id, lang))
393         # Fragment.short_html relies on book's tags, so reset it here too
394         for fragm in self.fragments.all():
395             fragm.reset_short_html()
396
397     def short_html(self):
398         if self.id:
399             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
400             short_html = cache.get(cache_key)
401         else:
402             short_html = None
403
404         if short_html is not None:
405             return mark_safe(short_html)
406         else:
407             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
408             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
409
410             formats = []
411             # files generated during publication
412             if self.has_media("html"):
413                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
414             if self.has_media("pdf"):
415                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
416             if self.has_media("mobi"):
417                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
418             if self.root_ancestor.has_media("epub"):
419                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
420             if self.has_media("txt"):
421                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
422             # other files
423             for m in self.media.order_by('type'):
424                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
425
426             formats = [mark_safe(format) for format in formats]
427
428             short_html = unicode(render_to_string('catalogue/book_short.html',
429                 {'book': self, 'tags': tags, 'formats': formats}))
430
431             if self.id:
432                 cache.set(cache_key, short_html, CACHE_FOREVER)
433             return mark_safe(short_html)
434
435     @property
436     def root_ancestor(self):
437         """ returns the oldest ancestor """
438
439         if not hasattr(self, '_root_ancestor'):
440             book = self
441             while book.parent:
442                 book = book.parent
443             self._root_ancestor = book
444         return self._root_ancestor
445
446
447     def has_description(self):
448         return len(self.description) > 0
449     has_description.short_description = _('description')
450     has_description.boolean = True
451
452     # ugly ugly ugly
453     def has_odt_file(self):
454         return bool(self.has_media("odt"))
455     has_odt_file.short_description = 'ODT'
456     has_odt_file.boolean = True
457
458     def has_mp3_file(self):
459         return bool(self.has_media("mp3"))
460     has_mp3_file.short_description = 'MP3'
461     has_mp3_file.boolean = True
462
463     def has_ogg_file(self):
464         return bool(self.has_media("ogg"))
465     has_ogg_file.short_description = 'OGG'
466     has_ogg_file.boolean = True
467
468     def has_daisy_file(self):
469         return bool(self.has_media("daisy"))
470     has_daisy_file.short_description = 'DAISY'
471     has_daisy_file.boolean = True
472
473     def build_pdf(self, customizations=None, file_name=None):
474         """ (Re)builds the pdf file.
475         customizations - customizations which are passed to LaTeX class file.
476         file_name - save the pdf file under a different name and DO NOT save it in db.
477         """
478         from tempfile import NamedTemporaryFile
479         from os import unlink
480         from django.core.files import File
481         from librarian import pdf
482         from catalogue.utils import ORMDocProvider, remove_zip
483         from django.core.files.move import file_move_safe
484
485         try:
486             pdf_file = NamedTemporaryFile(delete=False)
487             pdf.transform(ORMDocProvider(self),
488                       file_path=str(self.xml_file.path),
489                       output_file=pdf_file,
490                       customizations=customizations
491                       )
492
493             if file_name is None:
494                 self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
495             else:
496                 copy(pdf_file.name, path.join(settings.MEDIA_ROOT, get_dynamic_path(None, file_name, ext='pdf')))
497         finally:
498             unlink(pdf_file.name)
499
500         # remove zip with all pdf files
501         remove_zip(settings.ALL_PDF_ZIP)
502
503     def build_mobi(self):
504         """ (Re)builds the MOBI file.
505
506         """
507         from tempfile import NamedTemporaryFile
508         from os import unlink
509         from django.core.files import File
510         from librarian import mobi
511         from catalogue.utils import ORMDocProvider, remove_zip
512
513         try:
514             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
515             mobi.transform(ORMDocProvider(self), verbose=1,
516                       file_path=str(self.xml_file.path),
517                       output_file=mobi_file.name,
518                       )
519
520             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
521         finally:
522             unlink(mobi_file.name)
523
524         # remove zip with all mobi files
525         remove_zip(settings.ALL_MOBI_ZIP)
526
527     def build_epub(self, remove_descendants=True):
528         """ (Re)builds the epub file.
529             If book has a parent, does nothing.
530             Unless remove_descendants is False, descendants' epubs are removed.
531         """
532         from StringIO import StringIO
533         from hashlib import sha1
534         from django.core.files.base import ContentFile
535         from librarian import epub, NoDublinCore
536         from catalogue.utils import ORMDocProvider, remove_zip
537
538         if self.parent:
539             # don't need an epub
540             return
541
542         epub_file = StringIO()
543         try:
544             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
545             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
546             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
547         except NoDublinCore:
548             pass
549
550         book_descendants = list(self.children.all())
551         while len(book_descendants) > 0:
552             child_book = book_descendants.pop(0)
553             if remove_descendants and child_book.has_epub_file():
554                 child_book.epub_file.delete()
555             # save anyway, to refresh short_html
556             child_book.save()
557             book_descendants += list(child_book.children.all())
558
559         # remove zip package with all epub files
560         remove_zip(settings.ALL_EPUB_ZIP)
561
562     def build_txt(self):
563         from StringIO import StringIO
564         from django.core.files.base import ContentFile
565         from librarian import text
566
567         out = StringIO()
568         text.transform(open(self.xml_file.path), out)
569         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
570
571
572     def build_html(self):
573         from tempfile import NamedTemporaryFile
574         from markupstring import MarkupString
575         from django.core.files import File
576         from slughifi import slughifi
577         from librarian import html
578
579         meta_tags = list(self.tags.filter(
580             category__in=('author', 'epoch', 'genre', 'kind')))
581         book_tag = self.book_tag()
582
583         html_file = NamedTemporaryFile()
584         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
585             self.html_file.save('%s.html' % self.slug, File(html_file))
586
587             # get ancestor l-tags for adding to new fragments
588             ancestor_tags = []
589             p = self.parent
590             while p:
591                 ancestor_tags.append(p.book_tag())
592                 p = p.parent
593
594             # Delete old fragments and create them from scratch
595             self.fragments.all().delete()
596             # Extract fragments
597             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
598             for fragment in closed_fragments.values():
599                 try:
600                     theme_names = [s.strip() for s in fragment.themes.split(',')]
601                 except AttributeError:
602                     continue
603                 themes = []
604                 for theme_name in theme_names:
605                     if not theme_name:
606                         continue
607                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
608                     if created:
609                         tag.name = theme_name
610                         tag.sort_key = theme_name.lower()
611                         tag.save()
612                     themes.append(tag)
613                 if not themes:
614                     continue
615
616                 text = fragment.to_string()
617                 short_text = ''
618                 if (len(MarkupString(text)) > 240):
619                     short_text = unicode(MarkupString(text)[:160])
620                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
621                     text=text, short_text=short_text)
622
623                 new_fragment.save()
624                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
625             self.save()
626             self.html_built.send(sender=self)
627             return True
628         return False
629
630     @staticmethod
631     def zip_format(format_):
632         def pretty_file_name(book):
633             return "%s/%s.%s" % (
634                 b.get_extra_info_value()['author'],
635                 b.slug,
636                 format_)
637
638         field_name = "%s_file" % format_
639         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
640         paths = [(pretty_file_name(b), getattr(b, field_name).path)
641                     for b in books]
642         result = create_zip.delay(paths,
643                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
644         return result.wait()
645
646     def zip_audiobooks(self):
647         bm = BookMedia.objects.filter(book=self, type='mp3')
648         paths = map(lambda bm: (None, bm.file.path), bm)
649         result = create_zip.delay(paths, self.slug)
650         return result.wait()
651
652     @classmethod
653     def from_xml_file(cls, xml_file, **kwargs):
654         from django.core.files import File
655         from librarian import dcparser
656
657         # use librarian to parse meta-data
658         book_info = dcparser.parse(xml_file)
659
660         if not isinstance(xml_file, File):
661             xml_file = File(open(xml_file))
662
663         try:
664             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
665         finally:
666             xml_file.close()
667
668     @classmethod
669     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
670             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
671         import re
672         from slughifi import slughifi
673         from sortify import sortify
674
675         # check for parts before we do anything
676         children = []
677         if hasattr(book_info, 'parts'):
678             for part_url in book_info.parts:
679                 base, slug = part_url.rsplit('/', 1)
680                 try:
681                     children.append(Book.objects.get(slug=slug))
682                 except Book.DoesNotExist, e:
683                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
684
685
686         # Read book metadata
687         book_base, book_slug = book_info.url.rsplit('/', 1)
688         if re.search(r'[^a-zA-Z0-9-]', book_slug):
689             raise ValueError('Invalid characters in slug')
690         book, created = Book.objects.get_or_create(slug=book_slug)
691
692         if created:
693             book_shelves = []
694         else:
695             if not overwrite:
696                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
697             # Save shelves for this book
698             book_shelves = list(book.tags.filter(category='set'))
699
700         book.title = book_info.title
701         book.set_extra_info_value(book_info.to_dict())
702         book.save()
703
704         meta_tags = []
705         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
706         for field_name, category in categories:
707             try:
708                 tag_names = getattr(book_info, field_name)
709             except:
710                 tag_names = [getattr(book_info, category)]
711             for tag_name in tag_names:
712                 tag_sort_key = tag_name
713                 if category == 'author':
714                     tag_sort_key = tag_name.last_name
715                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
716                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
717                 if created:
718                     tag.name = tag_name
719                     tag.sort_key = sortify(tag_sort_key.lower())
720                     tag.save()
721                 meta_tags.append(tag)
722
723         book.tags = set(meta_tags + book_shelves)
724
725         book_tag = book.book_tag()
726
727         for n, child_book in enumerate(children):
728             child_book.parent = book
729             child_book.parent_number = n
730             child_book.save()
731
732         # Save XML and HTML files
733         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
734
735         # delete old fragments when overwriting
736         book.fragments.all().delete()
737
738         if book.build_html():
739             if not settings.NO_BUILD_TXT and build_txt:
740                 book.build_txt()
741
742         if not settings.NO_BUILD_EPUB and build_epub:
743             book.root_ancestor.build_epub()
744
745         if not settings.NO_BUILD_PDF and build_pdf:
746             book.root_ancestor.build_pdf()
747
748         if not settings.NO_BUILD_MOBI and build_mobi:
749             book.build_mobi()
750
751         book_descendants = list(book.children.all())
752         # add l-tag to descendants and their fragments
753         # delete unnecessary EPUB files
754         while len(book_descendants) > 0:
755             child_book = book_descendants.pop(0)
756             child_book.tags = list(child_book.tags) + [book_tag]
757             child_book.save()
758             for fragment in child_book.fragments.all():
759                 fragment.tags = set(list(fragment.tags) + [book_tag])
760             book_descendants += list(child_book.children.all())
761
762         book.save()
763
764         # refresh cache
765         book.reset_tag_counter()
766         book.reset_theme_counter()
767
768         cls.published.send(sender=book)
769         return book
770
771     def reset_tag_counter(self):
772         if self.id is None:
773             return
774
775         cache_key = "Book.tag_counter/%d" % self.id
776         cache.delete(cache_key)
777         if self.parent:
778             self.parent.reset_tag_counter()
779
780     @property
781     def tag_counter(self):
782         if self.id:
783             cache_key = "Book.tag_counter/%d" % self.id
784             tags = cache.get(cache_key)
785         else:
786             tags = None
787
788         if tags is None:
789             tags = {}
790             for child in self.children.all().order_by():
791                 for tag_pk, value in child.tag_counter.iteritems():
792                     tags[tag_pk] = tags.get(tag_pk, 0) + value
793             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
794                 tags[tag.pk] = 1
795
796             if self.id:
797                 cache.set(cache_key, tags, CACHE_FOREVER)
798         return tags
799
800     def reset_theme_counter(self):
801         if self.id is None:
802             return
803
804         cache_key = "Book.theme_counter/%d" % self.id
805         cache.delete(cache_key)
806         if self.parent:
807             self.parent.reset_theme_counter()
808
809     @property
810     def theme_counter(self):
811         if self.id:
812             cache_key = "Book.theme_counter/%d" % self.id
813             tags = cache.get(cache_key)
814         else:
815             tags = None
816
817         if tags is None:
818             tags = {}
819             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
820                 for tag in fragment.tags.filter(category='theme').order_by():
821                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
822
823             if self.id:
824                 cache.set(cache_key, tags, CACHE_FOREVER)
825         return tags
826
827     def pretty_title(self, html_links=False):
828         book = self
829         names = list(book.tags.filter(category='author'))
830
831         books = []
832         while book:
833             books.append(book)
834             book = book.parent
835         names.extend(reversed(books))
836
837         if html_links:
838             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
839         else:
840             names = [tag.name for tag in names]
841
842         return ', '.join(names)
843
844     @classmethod
845     def tagged_top_level(cls, tags):
846         """ Returns top-level books tagged with `tags'.
847
848         It only returns those books which don't have ancestors which are
849         also tagged with those tags.
850
851         """
852         # get relevant books and their tags
853         objects = cls.tagged.with_all(tags)
854         # eliminate descendants
855         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
856         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
857         if descendants_keys:
858             objects = objects.exclude(pk__in=descendants_keys)
859
860         return objects
861
862     @classmethod
863     def book_list(cls, filter=None):
864         """Generates a hierarchical listing of all books.
865
866         Books are optionally filtered with a test function.
867
868         """
869
870         books_by_parent = {}
871         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
872         if filter:
873             books = books.filter(filter).distinct()
874             book_ids = set((book.pk for book in books))
875             for book in books:
876                 parent = book.parent_id
877                 if parent not in book_ids:
878                     parent = None
879                 books_by_parent.setdefault(parent, []).append(book)
880         else:
881             for book in books:
882                 books_by_parent.setdefault(book.parent_id, []).append(book)
883
884         orphans = []
885         books_by_author = SortedDict()
886         for tag in Tag.objects.filter(category='author'):
887             books_by_author[tag] = []
888
889         for book in books_by_parent.get(None,()):
890             authors = list(book.tags.filter(category='author'))
891             if authors:
892                 for author in authors:
893                     books_by_author[author].append(book)
894             else:
895                 orphans.append(book)
896
897         return books_by_author, orphans, books_by_parent
898
899     _audiences_pl = {
900         "SP1": (1, u"szkoła podstawowa"),
901         "SP2": (1, u"szkoła podstawowa"),
902         "P": (1, u"szkoła podstawowa"),
903         "G": (2, u"gimnazjum"),
904         "L": (3, u"liceum"),
905         "LP": (3, u"liceum"),
906     }
907     def audiences_pl(self):
908         audiences = self.get_extra_info_value().get('audiences', [])
909         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
910         return [a[1] for a in audiences]
911
912
913 def _has_factory(ftype):
914     has = lambda self: bool(getattr(self, "%s_file" % ftype))
915     has.short_description = t.upper()
916     has.boolean = True
917     has.__name__ = "has_%s_file" % ftype
918     return has
919
920     
921 # add the file fields
922 for t in Book.file_types:
923     field_name = "%s_file" % t
924     models.FileField(_("%s file" % t.upper()),
925             upload_to=book_upload_path(t),
926             blank=True).contribute_to_class(Book, field_name)
927
928     setattr(Book, "has_%s_file" % t, _has_factory(t))
929
930
931 class Fragment(models.Model):
932     text = models.TextField()
933     short_text = models.TextField(editable=False)
934     anchor = models.CharField(max_length=120)
935     book = models.ForeignKey(Book, related_name='fragments')
936
937     objects = models.Manager()
938     tagged = managers.ModelTaggedItemManager(Tag)
939     tags = managers.TagDescriptor(Tag)
940
941     class Meta:
942         ordering = ('book', 'anchor',)
943         verbose_name = _('fragment')
944         verbose_name_plural = _('fragments')
945
946     def get_absolute_url(self):
947         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
948
949     def reset_short_html(self):
950         if self.id is None:
951             return
952
953         cache_key = "Fragment.short_html/%d/%s"
954         for lang, langname in settings.LANGUAGES:
955             cache.delete(cache_key % (self.id, lang))
956
957     def short_html(self):
958         if self.id:
959             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
960             short_html = cache.get(cache_key)
961         else:
962             short_html = None
963
964         if short_html is not None:
965             return mark_safe(short_html)
966         else:
967             short_html = unicode(render_to_string('catalogue/fragment_short.html',
968                 {'fragment': self}))
969             if self.id:
970                 cache.set(cache_key, short_html, CACHE_FOREVER)
971             return mark_safe(short_html)
972
973
974 class FileRecord(models.Model):
975     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
976     type = models.CharField(_('type'), max_length=20, db_index=True)
977     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
978     time = models.DateTimeField(_('time'), auto_now_add=True)
979
980     class Meta:
981         ordering = ('-time','-slug', '-type')
982         verbose_name = _('file record')
983         verbose_name_plural = _('file records')
984
985     def __unicode__(self):
986         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
987
988 ###########
989 #
990 # SIGNALS
991 #
992 ###########
993
994
995 def _tags_updated_handler(sender, affected_tags, **kwargs):
996     # reset tag global counter
997     # we want Tag.changed_at updated for API to know the tag was touched
998     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
999
1000     # if book tags changed, reset book tag counter
1001     if isinstance(sender, Book) and \
1002                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1003                     exclude(category__in=('book', 'theme', 'set')).count():
1004         sender.reset_tag_counter()
1005     # if fragment theme changed, reset book theme counter
1006     elif isinstance(sender, Fragment) and \
1007                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1008                     filter(category='theme').count():
1009         sender.book.reset_theme_counter()
1010 tags_updated.connect(_tags_updated_handler)
1011
1012
1013 def _pre_delete_handler(sender, instance, **kwargs):
1014     """ refresh Book on BookMedia delete """
1015     if sender == BookMedia:
1016         instance.book.save()
1017 pre_delete.connect(_pre_delete_handler)
1018
1019 def _post_save_handler(sender, instance, **kwargs):
1020     """ refresh all the short_html stuff on BookMedia update """
1021     if sender == BookMedia:
1022         instance.book.save()
1023 post_save.connect(_post_save_handler)