customized pdf generation.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.core.files.storage import DefaultStorage
12 from django.utils.translation import ugettext_lazy as _
13 from django.contrib.auth.models import User
14 from django.template.loader import render_to_string
15 from django.utils.datastructures import SortedDict
16 from django.utils.safestring import mark_safe
17 from django.utils.translation import get_language
18 from django.core.urlresolvers import reverse
19 from django.db.models.signals import post_save, m2m_changed, pre_delete
20
21 from django.conf import settings
22
23 from newtagging.models import TagBase, tags_updated
24 from newtagging import managers
25 from catalogue.fields import JSONField, OverwritingFileField
26 from catalogue.utils import create_zip
27 from shutil import copy
28 from glob import glob
29 import re
30 from os import path
31
32
33 TAG_CATEGORIES = (
34     ('author', _('author')),
35     ('epoch', _('epoch')),
36     ('kind', _('kind')),
37     ('genre', _('genre')),
38     ('theme', _('theme')),
39     ('set', _('set')),
40     ('book', _('book')),
41 )
42
43 MEDIA_FORMATS = (
44     ('odt', _('ODT file')),
45     ('mp3', _('MP3 file')),
46     ('ogg', _('OGG file')),
47     ('daisy', _('DAISY file')), 
48 )
49
50 # not quite, but Django wants you to set a timeout
51 CACHE_FOREVER = 2419200  # 28 days
52
53
54 class TagSubcategoryManager(models.Manager):
55     def __init__(self, subcategory):
56         super(TagSubcategoryManager, self).__init__()
57         self.subcategory = subcategory
58
59     def get_query_set(self):
60         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
61
62
63 class Tag(TagBase):
64     name = models.CharField(_('name'), max_length=50, db_index=True)
65     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
67     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
68         db_index=True, choices=TAG_CATEGORIES)
69     description = models.TextField(_('description'), blank=True)
70     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
71
72     user = models.ForeignKey(User, blank=True, null=True)
73     book_count = models.IntegerField(_('book count'), blank=True, null=True)
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76
77     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
78     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
79
80     class UrlDeprecationWarning(DeprecationWarning):
81         pass
82
83     categories_rev = {
84         'autor': 'author',
85         'epoka': 'epoch',
86         'rodzaj': 'kind',
87         'gatunek': 'genre',
88         'motyw': 'theme',
89         'polka': 'set',
90     }
91     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
92
93     class Meta:
94         ordering = ('sort_key',)
95         verbose_name = _('tag')
96         verbose_name_plural = _('tags')
97         unique_together = (("slug", "category"),)
98
99     def __unicode__(self):
100         return self.name
101
102     def __repr__(self):
103         return "Tag(slug=%r)" % self.slug
104
105     @permalink
106     def get_absolute_url(self):
107         return ('catalogue.views.tagged_object_list', [self.url_chunk])
108
109     def has_description(self):
110         return len(self.description) > 0
111     has_description.short_description = _('description')
112     has_description.boolean = True
113
114     def get_count(self):
115         """ returns global book count for book tags, fragment count for themes """
116
117         if self.book_count is None:
118             if self.category == 'book':
119                 # never used
120                 objects = Book.objects.none()
121             elif self.category == 'theme':
122                 objects = Fragment.tagged.with_all((self,))
123             else:
124                 objects = Book.tagged.with_all((self,)).order_by()
125                 if self.category != 'set':
126                     # eliminate descendants
127                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
128                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
129                     if descendants_keys:
130                         objects = objects.exclude(pk__in=descendants_keys)
131             self.book_count = objects.count()
132             self.save()
133         return self.book_count
134
135     @staticmethod
136     def get_tag_list(tags):
137         if isinstance(tags, basestring):
138             real_tags = []
139             ambiguous_slugs = []
140             category = None
141             deprecated = False
142             tags_splitted = tags.split('/')
143             for name in tags_splitted:
144                 if category:
145                     real_tags.append(Tag.objects.get(slug=name, category=category))
146                     category = None
147                 elif name in Tag.categories_rev:
148                     category = Tag.categories_rev[name]
149                 else:
150                     try:
151                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
152                         deprecated = True 
153                     except Tag.MultipleObjectsReturned, e:
154                         ambiguous_slugs.append(name)
155
156             if category:
157                 # something strange left off
158                 raise Tag.DoesNotExist()
159             if ambiguous_slugs:
160                 # some tags should be qualified
161                 e = Tag.MultipleObjectsReturned()
162                 e.tags = real_tags
163                 e.ambiguous_slugs = ambiguous_slugs
164                 raise e
165             if deprecated:
166                 e = Tag.UrlDeprecationWarning()
167                 e.tags = real_tags
168                 raise e
169             return real_tags
170         else:
171             return TagBase.get_tag_list(tags)
172
173     @property
174     def url_chunk(self):
175         return '/'.join((Tag.categories_dict[self.category], self.slug))
176
177
178 def get_dynamic_path(media, filename, ext=None, maxlen=100):
179     from slughifi import slughifi
180
181     # how to put related book's slug here?
182     if not ext:
183         if media.type == 'daisy':
184             ext = 'daisy.zip'
185         else:
186             ext = media.type
187     if media is None or not media.name:
188         name = slughifi(filename.split(".")[0])
189     else:
190         name = slughifi(media.name)
191     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
192
193
194 # TODO: why is this hard-coded ?
195 def book_upload_path(ext=None, maxlen=100):
196     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
197
198
199 def get_customized_pdf_path(book, customizations):
200     """
201     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
202     """
203     customizations.sort()
204     h = hash(tuple(customizations))
205     pdf_name = '%s-custom-%s' % (book.slug, h)
206     pdf_file = models.get_dynamic_path(None, pdf_name, ext='pdf')
207     return pdf_file
208
209
210 def get_existing_customized_pdf(book):
211     """
212     Returns a list of paths to generated customized pdf of a book
213     """
214     pdf_glob = '%s-custom-' % (book.slug,)
215     pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf')
216     pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob)
217     return glob(path.join(settings.MEDIA_ROOT, pdf_glob))
218
219
220 class BookMedia(models.Model):
221     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
222     name        = models.CharField(_('name'), max_length="100")
223     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
224     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
225     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
226     book = models.ForeignKey('Book', related_name='media')
227     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
228
229     def __unicode__(self):
230         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
231
232     class Meta:
233         ordering            = ('type', 'name')
234         verbose_name        = _('book media')
235         verbose_name_plural = _('book media')
236
237     def save(self, *args, **kwargs):
238         from slughifi import slughifi
239         from catalogue.utils import ExistingFile, remove_zip
240
241         try:
242             old = BookMedia.objects.get(pk=self.pk)
243         except BookMedia.DoesNotExist, e:
244             pass
245         else:
246             # if name changed, change the file name, too
247             if slughifi(self.name) != slughifi(old.name):
248                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
249
250         super(BookMedia, self).save(*args, **kwargs)
251
252         # remove the zip package for book with modified media
253         remove_zip(self.book.slug)
254
255         extra_info = self.get_extra_info_value()
256         extra_info.update(self.read_meta())
257         self.set_extra_info_value(extra_info)
258         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
259         return super(BookMedia, self).save(*args, **kwargs)
260
261     def read_meta(self):
262         """
263             Reads some metadata from the audiobook.
264         """
265         import mutagen
266         from mutagen import id3
267
268         artist_name = director_name = project = funded_by = ''
269         if self.type == 'mp3':
270             try:
271                 audio = id3.ID3(self.file.path)
272                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
273                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
274                 project = ", ".join([t.data for t in audio.getall('PRIV') 
275                         if t.owner=='wolnelektury.pl?project'])
276                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
277                         if t.owner=='wolnelektury.pl?funded_by'])
278             except:
279                 pass
280         elif self.type == 'ogg':
281             try:
282                 audio = mutagen.File(self.file.path)
283                 artist_name = ', '.join(audio.get('artist', []))
284                 director_name = ', '.join(audio.get('conductor', []))
285                 project = ", ".join(audio.get('project', []))
286                 funded_by = ", ".join(audio.get('funded_by', []))
287             except:
288                 pass
289         else:
290             return {}
291         return {'artist_name': artist_name, 'director_name': director_name,
292                 'project': project, 'funded_by': funded_by}
293
294     @staticmethod
295     def read_source_sha1(filepath, filetype):
296         """
297             Reads source file SHA1 from audiobok metadata.
298         """
299         import mutagen
300         from mutagen import id3
301
302         if filetype == 'mp3':
303             try:
304                 audio = id3.ID3(filepath)
305                 return [t.data for t in audio.getall('PRIV') 
306                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
307             except:
308                 return None
309         elif filetype == 'ogg':
310             try:
311                 audio = mutagen.File(filepath)
312                 return audio.get('flac_sha1', [None])[0] 
313             except:
314                 return None
315         else:
316             return None
317
318
319 class Book(models.Model):
320     title         = models.CharField(_('title'), max_length=120)
321     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
322     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
323     description   = models.TextField(_('description'), blank=True)
324     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
325     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
326     parent_number = models.IntegerField(_('parent number'), default=0)
327     extra_info    = JSONField(_('extra information'), default='{}')
328     gazeta_link   = models.CharField(blank=True, max_length=240)
329     wiki_link     = models.CharField(blank=True, max_length=240)
330     # files generated during publication
331
332     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
333     
334     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
335     objects  = models.Manager()
336     tagged   = managers.ModelTaggedItemManager(Tag)
337     tags     = managers.TagDescriptor(Tag)
338
339     html_built = django.dispatch.Signal()
340     published = django.dispatch.Signal()
341
342     class AlreadyExists(Exception):
343         pass
344
345     class Meta:
346         ordering = ('sort_key',)
347         verbose_name = _('book')
348         verbose_name_plural = _('books')
349
350     def __unicode__(self):
351         return self.title
352
353     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
354         from sortify import sortify
355
356         self.sort_key = sortify(self.title)
357
358         ret = super(Book, self).save(force_insert, force_update)
359
360         if reset_short_html:
361             self.reset_short_html()
362
363         return ret
364
365     @permalink
366     def get_absolute_url(self):
367         return ('catalogue.views.book_detail', [self.slug])
368
369     @property
370     def name(self):
371         return self.title
372
373     def book_tag_slug(self):
374         return ('l-' + self.slug)[:120]
375
376     def book_tag(self):
377         slug = self.book_tag_slug()
378         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
379         if created:
380             book_tag.name = self.title[:50]
381             book_tag.sort_key = self.title.lower()
382             book_tag.save()
383         return book_tag
384
385     def has_media(self, type):
386         if type in Book.file_types:
387             return bool(getattr(self, "%s_file" % type))
388         else:
389             return self.media.filter(type=type).exists()
390
391     def get_media(self, type):
392         if self.has_media(type):
393             if type in Book.file_types:
394                 return getattr(self, "%s_file" % type)
395             else:                                             
396                 return self.media.filter(type=type)
397         else:
398             return None
399
400     def get_mp3(self):
401         return self.get_media("mp3")
402     def get_odt(self):
403         return self.get_media("odt")
404     def get_ogg(self):
405         return self.get_media("ogg")
406     def get_daisy(self):
407         return self.get_media("daisy")                       
408
409     def reset_short_html(self):
410         if self.id is None:
411             return
412
413         cache_key = "Book.short_html/%d/%s"
414         for lang, langname in settings.LANGUAGES:
415             cache.delete(cache_key % (self.id, lang))
416         # Fragment.short_html relies on book's tags, so reset it here too
417         for fragm in self.fragments.all():
418             fragm.reset_short_html()
419
420     def short_html(self):
421         if self.id:
422             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
423             short_html = cache.get(cache_key)
424         else:
425             short_html = None
426
427         if short_html is not None:
428             return mark_safe(short_html)
429         else:
430             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
431             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
432
433             formats = []
434             # files generated during publication
435             if self.has_media("html"):
436                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
437             if self.has_media("pdf"):
438                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
439             if self.has_media("mobi"):
440                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
441             if self.root_ancestor.has_media("epub"):
442                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
443             if self.has_media("txt"):
444                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
445             # other files
446             for m in self.media.order_by('type'):
447                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
448
449             formats = [mark_safe(format) for format in formats]
450
451             short_html = unicode(render_to_string('catalogue/book_short.html',
452                 {'book': self, 'tags': tags, 'formats': formats}))
453
454             if self.id:
455                 cache.set(cache_key, short_html, CACHE_FOREVER)
456             return mark_safe(short_html)
457
458     @property
459     def root_ancestor(self):
460         """ returns the oldest ancestor """
461
462         if not hasattr(self, '_root_ancestor'):
463             book = self
464             while book.parent:
465                 book = book.parent
466             self._root_ancestor = book
467         return self._root_ancestor
468
469
470     def has_description(self):
471         return len(self.description) > 0
472     has_description.short_description = _('description')
473     has_description.boolean = True
474
475     # ugly ugly ugly
476     def has_odt_file(self):
477         return bool(self.has_media("odt"))
478     has_odt_file.short_description = 'ODT'
479     has_odt_file.boolean = True
480
481     def has_mp3_file(self):
482         return bool(self.has_media("mp3"))
483     has_mp3_file.short_description = 'MP3'
484     has_mp3_file.boolean = True
485
486     def has_ogg_file(self):
487         return bool(self.has_media("ogg"))
488     has_ogg_file.short_description = 'OGG'
489     has_ogg_file.boolean = True
490
491     def has_daisy_file(self):
492         return bool(self.has_media("daisy"))
493     has_daisy_file.short_description = 'DAISY'
494     has_daisy_file.boolean = True
495
496     def build_pdf(self, customizations=None, file_name=None):
497         """ (Re)builds the pdf file.
498         customizations - customizations which are passed to LaTeX class file.
499         file_name - save the pdf file under a different name and DO NOT save it in db.
500         """
501         from tempfile import NamedTemporaryFile
502         from os import unlink
503         from django.core.files import File
504         from librarian import pdf
505         from catalogue.utils import ORMDocProvider, remove_zip
506
507         try:
508             pdf_file = NamedTemporaryFile(delete=False)
509             pdf.transform(ORMDocProvider(self),
510                       file_path=str(self.xml_file.path),
511                       output_file=pdf_file,
512                       customizations=customizations
513                       )
514
515             if file_name is None:
516                 # we'd like to be sure not to overwrite changes happening while
517                 # (timely) pdf generation is taking place (async celery scenario)
518                 current_self = Book.objects.get(id=self.id)
519                 current_self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
520                 self.pdf_file = current_self.pdf_file
521             else:
522                 print "safing %s" % file_name
523                 print "to: %s" % DefaultStorage().path(file_name)
524                 DefaultStorage().save(file_name, File(open(pdf_file.name)))
525         finally:
526             unlink(pdf_file.name)
527
528         # remove cached downloadables
529         remove_zip(settings.ALL_PDF_ZIP)
530         for customized_pdf in get_existing_customized_pdf(self):
531             unlink(customized_pdf)
532
533     def build_mobi(self):
534         """ (Re)builds the MOBI file.
535
536         """
537         from tempfile import NamedTemporaryFile
538         from os import unlink
539         from django.core.files import File
540         from librarian import mobi
541         from catalogue.utils import ORMDocProvider, remove_zip
542
543         try:
544             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
545             mobi.transform(ORMDocProvider(self), verbose=1,
546                       file_path=str(self.xml_file.path),
547                       output_file=mobi_file.name,
548                       )
549
550             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
551         finally:
552             unlink(mobi_file.name)
553
554         # remove zip with all mobi files
555         remove_zip(settings.ALL_MOBI_ZIP)
556
557     def build_epub(self, remove_descendants=True):
558         """ (Re)builds the epub file.
559             If book has a parent, does nothing.
560             Unless remove_descendants is False, descendants' epubs are removed.
561         """
562         from StringIO import StringIO
563         from hashlib import sha1
564         from django.core.files.base import ContentFile
565         from librarian import epub, NoDublinCore
566         from catalogue.utils import ORMDocProvider, remove_zip
567
568         if self.parent:
569             # don't need an epub
570             return
571
572         epub_file = StringIO()
573         try:
574             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
575             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
576             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
577         except NoDublinCore:
578             pass
579
580         book_descendants = list(self.children.all())
581         while len(book_descendants) > 0:
582             child_book = book_descendants.pop(0)
583             if remove_descendants and child_book.has_epub_file():
584                 child_book.epub_file.delete()
585             # save anyway, to refresh short_html
586             child_book.save()
587             book_descendants += list(child_book.children.all())
588
589         # remove zip package with all epub files
590         remove_zip(settings.ALL_EPUB_ZIP)
591
592     def build_txt(self):
593         from StringIO import StringIO
594         from django.core.files.base import ContentFile
595         from librarian import text
596
597         out = StringIO()
598         text.transform(open(self.xml_file.path), out)
599         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
600
601
602     def build_html(self):
603         from tempfile import NamedTemporaryFile
604         from markupstring import MarkupString
605         from django.core.files import File
606         from slughifi import slughifi
607         from librarian import html
608
609         meta_tags = list(self.tags.filter(
610             category__in=('author', 'epoch', 'genre', 'kind')))
611         book_tag = self.book_tag()
612
613         html_file = NamedTemporaryFile()
614         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
615             self.html_file.save('%s.html' % self.slug, File(html_file))
616
617             # get ancestor l-tags for adding to new fragments
618             ancestor_tags = []
619             p = self.parent
620             while p:
621                 ancestor_tags.append(p.book_tag())
622                 p = p.parent
623
624             # Delete old fragments and create them from scratch
625             self.fragments.all().delete()
626             # Extract fragments
627             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
628             for fragment in closed_fragments.values():
629                 try:
630                     theme_names = [s.strip() for s in fragment.themes.split(',')]
631                 except AttributeError:
632                     continue
633                 themes = []
634                 for theme_name in theme_names:
635                     if not theme_name:
636                         continue
637                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
638                     if created:
639                         tag.name = theme_name
640                         tag.sort_key = theme_name.lower()
641                         tag.save()
642                     themes.append(tag)
643                 if not themes:
644                     continue
645
646                 text = fragment.to_string()
647                 short_text = ''
648                 if (len(MarkupString(text)) > 240):
649                     short_text = unicode(MarkupString(text)[:160])
650                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
651                     text=text, short_text=short_text)
652
653                 new_fragment.save()
654                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
655             self.save()
656             self.html_built.send(sender=self)
657             return True
658         return False
659
660     @staticmethod
661     def zip_format(format_):
662         def pretty_file_name(book):
663             return "%s/%s.%s" % (
664                 b.get_extra_info_value()['author'],
665                 b.slug,
666                 format_)
667
668         field_name = "%s_file" % format_
669         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
670         paths = [(pretty_file_name(b), getattr(b, field_name).path)
671                     for b in books]
672         result = create_zip.delay(paths,
673                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
674         return result.wait()
675
676     def zip_audiobooks(self):
677         bm = BookMedia.objects.filter(book=self, type='mp3')
678         paths = map(lambda bm: (None, bm.file.path), bm)
679         result = create_zip.delay(paths, self.slug)
680         return result.wait()
681
682     @classmethod
683     def from_xml_file(cls, xml_file, **kwargs):
684         from django.core.files import File
685         from librarian import dcparser
686
687         # use librarian to parse meta-data
688         book_info = dcparser.parse(xml_file)
689
690         if not isinstance(xml_file, File):
691             xml_file = File(open(xml_file))
692
693         try:
694             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
695         finally:
696             xml_file.close()
697
698     @classmethod
699     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
700             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
701         import re
702         from slughifi import slughifi
703         from sortify import sortify
704
705         # check for parts before we do anything
706         children = []
707         if hasattr(book_info, 'parts'):
708             for part_url in book_info.parts:
709                 base, slug = part_url.rsplit('/', 1)
710                 try:
711                     children.append(Book.objects.get(slug=slug))
712                 except Book.DoesNotExist, e:
713                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
714
715
716         # Read book metadata
717         book_base, book_slug = book_info.url.rsplit('/', 1)
718         if re.search(r'[^a-zA-Z0-9-]', book_slug):
719             raise ValueError('Invalid characters in slug')
720         book, created = Book.objects.get_or_create(slug=book_slug)
721
722         if created:
723             book_shelves = []
724         else:
725             if not overwrite:
726                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
727             # Save shelves for this book
728             book_shelves = list(book.tags.filter(category='set'))
729
730         book.title = book_info.title
731         book.set_extra_info_value(book_info.to_dict())
732         book.save()
733
734         meta_tags = []
735         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
736         for field_name, category in categories:
737             try:
738                 tag_names = getattr(book_info, field_name)
739             except:
740                 tag_names = [getattr(book_info, category)]
741             for tag_name in tag_names:
742                 tag_sort_key = tag_name
743                 if category == 'author':
744                     tag_sort_key = tag_name.last_name
745                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
746                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
747                 if created:
748                     tag.name = tag_name
749                     tag.sort_key = sortify(tag_sort_key.lower())
750                     tag.save()
751                 meta_tags.append(tag)
752
753         book.tags = set(meta_tags + book_shelves)
754
755         book_tag = book.book_tag()
756
757         for n, child_book in enumerate(children):
758             child_book.parent = book
759             child_book.parent_number = n
760             child_book.save()
761
762         # Save XML and HTML files
763         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
764
765         # delete old fragments when overwriting
766         book.fragments.all().delete()
767
768         if book.build_html():
769             if not settings.NO_BUILD_TXT and build_txt:
770                 book.build_txt()
771
772         if not settings.NO_BUILD_EPUB and build_epub:
773             book.root_ancestor.build_epub()
774
775         if not settings.NO_BUILD_PDF and build_pdf:
776             book.root_ancestor.build_pdf()
777
778         if not settings.NO_BUILD_MOBI and build_mobi:
779             book.build_mobi()
780
781         book_descendants = list(book.children.all())
782         # add l-tag to descendants and their fragments
783         # delete unnecessary EPUB files
784         while len(book_descendants) > 0:
785             child_book = book_descendants.pop(0)
786             child_book.tags = list(child_book.tags) + [book_tag]
787             child_book.save()
788             for fragment in child_book.fragments.all():
789                 fragment.tags = set(list(fragment.tags) + [book_tag])
790             book_descendants += list(child_book.children.all())
791
792         book.save()
793
794         # refresh cache
795         book.reset_tag_counter()
796         book.reset_theme_counter()
797
798         cls.published.send(sender=book)
799         return book
800
801     def reset_tag_counter(self):
802         if self.id is None:
803             return
804
805         cache_key = "Book.tag_counter/%d" % self.id
806         cache.delete(cache_key)
807         if self.parent:
808             self.parent.reset_tag_counter()
809
810     @property
811     def tag_counter(self):
812         if self.id:
813             cache_key = "Book.tag_counter/%d" % self.id
814             tags = cache.get(cache_key)
815         else:
816             tags = None
817
818         if tags is None:
819             tags = {}
820             for child in self.children.all().order_by():
821                 for tag_pk, value in child.tag_counter.iteritems():
822                     tags[tag_pk] = tags.get(tag_pk, 0) + value
823             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
824                 tags[tag.pk] = 1
825
826             if self.id:
827                 cache.set(cache_key, tags, CACHE_FOREVER)
828         return tags
829
830     def reset_theme_counter(self):
831         if self.id is None:
832             return
833
834         cache_key = "Book.theme_counter/%d" % self.id
835         cache.delete(cache_key)
836         if self.parent:
837             self.parent.reset_theme_counter()
838
839     @property
840     def theme_counter(self):
841         if self.id:
842             cache_key = "Book.theme_counter/%d" % self.id
843             tags = cache.get(cache_key)
844         else:
845             tags = None
846
847         if tags is None:
848             tags = {}
849             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
850                 for tag in fragment.tags.filter(category='theme').order_by():
851                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
852
853             if self.id:
854                 cache.set(cache_key, tags, CACHE_FOREVER)
855         return tags
856
857     def pretty_title(self, html_links=False):
858         book = self
859         names = list(book.tags.filter(category='author'))
860
861         books = []
862         while book:
863             books.append(book)
864             book = book.parent
865         names.extend(reversed(books))
866
867         if html_links:
868             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
869         else:
870             names = [tag.name for tag in names]
871
872         return ', '.join(names)
873
874     @classmethod
875     def tagged_top_level(cls, tags):
876         """ Returns top-level books tagged with `tags'.
877
878         It only returns those books which don't have ancestors which are
879         also tagged with those tags.
880
881         """
882         # get relevant books and their tags
883         objects = cls.tagged.with_all(tags)
884         # eliminate descendants
885         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
886         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
887         if descendants_keys:
888             objects = objects.exclude(pk__in=descendants_keys)
889
890         return objects
891
892     @classmethod
893     def book_list(cls, filter=None):
894         """Generates a hierarchical listing of all books.
895
896         Books are optionally filtered with a test function.
897
898         """
899
900         books_by_parent = {}
901         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
902         if filter:
903             books = books.filter(filter).distinct()
904             book_ids = set((book.pk for book in books))
905             for book in books:
906                 parent = book.parent_id
907                 if parent not in book_ids:
908                     parent = None
909                 books_by_parent.setdefault(parent, []).append(book)
910         else:
911             for book in books:
912                 books_by_parent.setdefault(book.parent_id, []).append(book)
913
914         orphans = []
915         books_by_author = SortedDict()
916         for tag in Tag.objects.filter(category='author'):
917             books_by_author[tag] = []
918
919         for book in books_by_parent.get(None,()):
920             authors = list(book.tags.filter(category='author'))
921             if authors:
922                 for author in authors:
923                     books_by_author[author].append(book)
924             else:
925                 orphans.append(book)
926
927         return books_by_author, orphans, books_by_parent
928
929     _audiences_pl = {
930         "SP1": (1, u"szkoła podstawowa"),
931         "SP2": (1, u"szkoła podstawowa"),
932         "P": (1, u"szkoła podstawowa"),
933         "G": (2, u"gimnazjum"),
934         "L": (3, u"liceum"),
935         "LP": (3, u"liceum"),
936     }
937     def audiences_pl(self):
938         audiences = self.get_extra_info_value().get('audiences', [])
939         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
940         return [a[1] for a in audiences]
941
942
943 def _has_factory(ftype):
944     has = lambda self: bool(getattr(self, "%s_file" % ftype))
945     has.short_description = t.upper()
946     has.boolean = True
947     has.__name__ = "has_%s_file" % ftype
948     return has
949
950     
951 # add the file fields
952 for t in Book.file_types:
953     field_name = "%s_file" % t
954     models.FileField(_("%s file" % t.upper()),
955             upload_to=book_upload_path(t),
956             blank=True).contribute_to_class(Book, field_name)
957
958     setattr(Book, "has_%s_file" % t, _has_factory(t))
959
960
961 class Fragment(models.Model):
962     text = models.TextField()
963     short_text = models.TextField(editable=False)
964     anchor = models.CharField(max_length=120)
965     book = models.ForeignKey(Book, related_name='fragments')
966
967     objects = models.Manager()
968     tagged = managers.ModelTaggedItemManager(Tag)
969     tags = managers.TagDescriptor(Tag)
970
971     class Meta:
972         ordering = ('book', 'anchor',)
973         verbose_name = _('fragment')
974         verbose_name_plural = _('fragments')
975
976     def get_absolute_url(self):
977         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
978
979     def reset_short_html(self):
980         if self.id is None:
981             return
982
983         cache_key = "Fragment.short_html/%d/%s"
984         for lang, langname in settings.LANGUAGES:
985             cache.delete(cache_key % (self.id, lang))
986
987     def short_html(self):
988         if self.id:
989             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
990             short_html = cache.get(cache_key)
991         else:
992             short_html = None
993
994         if short_html is not None:
995             return mark_safe(short_html)
996         else:
997             short_html = unicode(render_to_string('catalogue/fragment_short.html',
998                 {'fragment': self}))
999             if self.id:
1000                 cache.set(cache_key, short_html, CACHE_FOREVER)
1001             return mark_safe(short_html)
1002
1003
1004 class FileRecord(models.Model):
1005     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
1006     type = models.CharField(_('type'), max_length=20, db_index=True)
1007     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
1008     time = models.DateTimeField(_('time'), auto_now_add=True)
1009
1010     class Meta:
1011         ordering = ('-time','-slug', '-type')
1012         verbose_name = _('file record')
1013         verbose_name_plural = _('file records')
1014
1015     def __unicode__(self):
1016         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
1017
1018 ###########
1019 #
1020 # SIGNALS
1021 #
1022 ###########
1023
1024
1025 def _tags_updated_handler(sender, affected_tags, **kwargs):
1026     # reset tag global counter
1027     # we want Tag.changed_at updated for API to know the tag was touched
1028     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
1029
1030     # if book tags changed, reset book tag counter
1031     if isinstance(sender, Book) and \
1032                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1033                     exclude(category__in=('book', 'theme', 'set')).count():
1034         sender.reset_tag_counter()
1035     # if fragment theme changed, reset book theme counter
1036     elif isinstance(sender, Fragment) and \
1037                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1038                     filter(category='theme').count():
1039         sender.book.reset_theme_counter()
1040 tags_updated.connect(_tags_updated_handler)
1041
1042
1043 def _pre_delete_handler(sender, instance, **kwargs):
1044     """ refresh Book on BookMedia delete """
1045     if sender == BookMedia:
1046         instance.book.save()
1047 pre_delete.connect(_pre_delete_handler)
1048
1049 def _post_save_handler(sender, instance, **kwargs):
1050     """ refresh all the short_html stuff on BookMedia update """
1051     if sender == BookMedia:
1052         instance.book.save()
1053 post_save.connect(_post_save_handler)