Fix for a bug introduced in refactoring: customized pdfs were instantly deleted,...
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from datetime import datetime
6
7 from django.db import models
8 from django.db.models import permalink, Q
9 import django.dispatch
10 from django.core.cache import cache
11 from django.core.files.storage import DefaultStorage
12 from django.utils.translation import ugettext_lazy as _
13 from django.contrib.auth.models import User
14 from django.template.loader import render_to_string
15 from django.utils.datastructures import SortedDict
16 from django.utils.safestring import mark_safe
17 from django.utils.translation import get_language
18 from django.core.urlresolvers import reverse
19 from django.db.models.signals import post_save, m2m_changed, pre_delete
20
21 from django.conf import settings
22
23 from newtagging.models import TagBase, tags_updated
24 from newtagging import managers
25 from catalogue.fields import JSONField, OverwritingFileField
26 from catalogue.utils import create_zip
27 from shutil import copy
28 from glob import glob
29 import re
30 from os import path
31
32
33 TAG_CATEGORIES = (
34     ('author', _('author')),
35     ('epoch', _('epoch')),
36     ('kind', _('kind')),
37     ('genre', _('genre')),
38     ('theme', _('theme')),
39     ('set', _('set')),
40     ('book', _('book')),
41 )
42
43 MEDIA_FORMATS = (
44     ('odt', _('ODT file')),
45     ('mp3', _('MP3 file')),
46     ('ogg', _('OGG file')),
47     ('daisy', _('DAISY file')), 
48 )
49
50 # not quite, but Django wants you to set a timeout
51 CACHE_FOREVER = 2419200  # 28 days
52
53
54 class TagSubcategoryManager(models.Manager):
55     def __init__(self, subcategory):
56         super(TagSubcategoryManager, self).__init__()
57         self.subcategory = subcategory
58
59     def get_query_set(self):
60         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
61
62
63 class Tag(TagBase):
64     name = models.CharField(_('name'), max_length=50, db_index=True)
65     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
66     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
67     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
68         db_index=True, choices=TAG_CATEGORIES)
69     description = models.TextField(_('description'), blank=True)
70     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
71
72     user = models.ForeignKey(User, blank=True, null=True)
73     book_count = models.IntegerField(_('book count'), blank=True, null=True)
74     gazeta_link = models.CharField(blank=True, max_length=240)
75     wiki_link = models.CharField(blank=True, max_length=240)
76
77     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
78     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
79
80     class UrlDeprecationWarning(DeprecationWarning):
81         pass
82
83     categories_rev = {
84         'autor': 'author',
85         'epoka': 'epoch',
86         'rodzaj': 'kind',
87         'gatunek': 'genre',
88         'motyw': 'theme',
89         'polka': 'set',
90     }
91     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
92
93     class Meta:
94         ordering = ('sort_key',)
95         verbose_name = _('tag')
96         verbose_name_plural = _('tags')
97         unique_together = (("slug", "category"),)
98
99     def __unicode__(self):
100         return self.name
101
102     def __repr__(self):
103         return "Tag(slug=%r)" % self.slug
104
105     @permalink
106     def get_absolute_url(self):
107         return ('catalogue.views.tagged_object_list', [self.url_chunk])
108
109     def has_description(self):
110         return len(self.description) > 0
111     has_description.short_description = _('description')
112     has_description.boolean = True
113
114     def get_count(self):
115         """ returns global book count for book tags, fragment count for themes """
116
117         if self.book_count is None:
118             if self.category == 'book':
119                 # never used
120                 objects = Book.objects.none()
121             elif self.category == 'theme':
122                 objects = Fragment.tagged.with_all((self,))
123             else:
124                 objects = Book.tagged.with_all((self,)).order_by()
125                 if self.category != 'set':
126                     # eliminate descendants
127                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
128                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
129                     if descendants_keys:
130                         objects = objects.exclude(pk__in=descendants_keys)
131             self.book_count = objects.count()
132             self.save()
133         return self.book_count
134
135     @staticmethod
136     def get_tag_list(tags):
137         if isinstance(tags, basestring):
138             real_tags = []
139             ambiguous_slugs = []
140             category = None
141             deprecated = False
142             tags_splitted = tags.split('/')
143             for name in tags_splitted:
144                 if category:
145                     real_tags.append(Tag.objects.get(slug=name, category=category))
146                     category = None
147                 elif name in Tag.categories_rev:
148                     category = Tag.categories_rev[name]
149                 else:
150                     try:
151                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
152                         deprecated = True 
153                     except Tag.MultipleObjectsReturned, e:
154                         ambiguous_slugs.append(name)
155
156             if category:
157                 # something strange left off
158                 raise Tag.DoesNotExist()
159             if ambiguous_slugs:
160                 # some tags should be qualified
161                 e = Tag.MultipleObjectsReturned()
162                 e.tags = real_tags
163                 e.ambiguous_slugs = ambiguous_slugs
164                 raise e
165             if deprecated:
166                 e = Tag.UrlDeprecationWarning()
167                 e.tags = real_tags
168                 raise e
169             return real_tags
170         else:
171             return TagBase.get_tag_list(tags)
172
173     @property
174     def url_chunk(self):
175         return '/'.join((Tag.categories_dict[self.category], self.slug))
176
177
178 def get_dynamic_path(media, filename, ext=None, maxlen=100):
179     from slughifi import slughifi
180
181     # how to put related book's slug here?
182     if not ext:
183         if media.type == 'daisy':
184             ext = 'daisy.zip'
185         else:
186             ext = media.type
187     if media is None or not media.name:
188         name = slughifi(filename.split(".")[0])
189     else:
190         name = slughifi(media.name)
191     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
192
193
194 # TODO: why is this hard-coded ?
195 def book_upload_path(ext=None, maxlen=100):
196     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
197
198
199 def get_customized_pdf_path(book, customizations):
200     """
201     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
202     """
203     customizations.sort()
204     h = hash(tuple(customizations))
205     pdf_name = '%s-custom-%s' % (book.slug, h)
206     pdf_file = get_dynamic_path(None, pdf_name, ext='pdf')
207     return pdf_file
208
209
210 def get_existing_customized_pdf(book):
211     """
212     Returns a list of paths to generated customized pdf of a book
213     """
214     pdf_glob = '%s-custom-' % (book.slug,)
215     pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf')
216     pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob)
217     return glob(path.join(settings.MEDIA_ROOT, pdf_glob))
218
219
220 class BookMedia(models.Model):
221     type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
222     name        = models.CharField(_('name'), max_length="100")
223     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
224     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
225     extra_info  = JSONField(_('extra information'), default='{}', editable=False)
226     book = models.ForeignKey('Book', related_name='media')
227     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
228
229     def __unicode__(self):
230         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
231
232     class Meta:
233         ordering            = ('type', 'name')
234         verbose_name        = _('book media')
235         verbose_name_plural = _('book media')
236
237     def save(self, *args, **kwargs):
238         from slughifi import slughifi
239         from catalogue.utils import ExistingFile, remove_zip
240
241         try:
242             old = BookMedia.objects.get(pk=self.pk)
243         except BookMedia.DoesNotExist, e:
244             pass
245         else:
246             # if name changed, change the file name, too
247             if slughifi(self.name) != slughifi(old.name):
248                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
249
250         super(BookMedia, self).save(*args, **kwargs)
251
252         # remove the zip package for book with modified media
253         remove_zip(self.book.slug)
254
255         extra_info = self.get_extra_info_value()
256         extra_info.update(self.read_meta())
257         self.set_extra_info_value(extra_info)
258         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
259         return super(BookMedia, self).save(*args, **kwargs)
260
261     def read_meta(self):
262         """
263             Reads some metadata from the audiobook.
264         """
265         import mutagen
266         from mutagen import id3
267
268         artist_name = director_name = project = funded_by = ''
269         if self.type == 'mp3':
270             try:
271                 audio = id3.ID3(self.file.path)
272                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
273                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
274                 project = ", ".join([t.data for t in audio.getall('PRIV') 
275                         if t.owner=='wolnelektury.pl?project'])
276                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
277                         if t.owner=='wolnelektury.pl?funded_by'])
278             except:
279                 pass
280         elif self.type == 'ogg':
281             try:
282                 audio = mutagen.File(self.file.path)
283                 artist_name = ', '.join(audio.get('artist', []))
284                 director_name = ', '.join(audio.get('conductor', []))
285                 project = ", ".join(audio.get('project', []))
286                 funded_by = ", ".join(audio.get('funded_by', []))
287             except:
288                 pass
289         else:
290             return {}
291         return {'artist_name': artist_name, 'director_name': director_name,
292                 'project': project, 'funded_by': funded_by}
293
294     @staticmethod
295     def read_source_sha1(filepath, filetype):
296         """
297             Reads source file SHA1 from audiobok metadata.
298         """
299         import mutagen
300         from mutagen import id3
301
302         if filetype == 'mp3':
303             try:
304                 audio = id3.ID3(filepath)
305                 return [t.data for t in audio.getall('PRIV') 
306                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
307             except:
308                 return None
309         elif filetype == 'ogg':
310             try:
311                 audio = mutagen.File(filepath)
312                 return audio.get('flac_sha1', [None])[0] 
313             except:
314                 return None
315         else:
316             return None
317
318
319 class Book(models.Model):
320     title         = models.CharField(_('title'), max_length=120)
321     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
322     slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
323     description   = models.TextField(_('description'), blank=True)
324     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
325     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
326     parent_number = models.IntegerField(_('parent number'), default=0)
327     extra_info    = JSONField(_('extra information'), default='{}')
328     gazeta_link   = models.CharField(blank=True, max_length=240)
329     wiki_link     = models.CharField(blank=True, max_length=240)
330     # files generated during publication
331
332     file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
333     
334     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
335     objects  = models.Manager()
336     tagged   = managers.ModelTaggedItemManager(Tag)
337     tags     = managers.TagDescriptor(Tag)
338
339     html_built = django.dispatch.Signal()
340     published = django.dispatch.Signal()
341
342     class AlreadyExists(Exception):
343         pass
344
345     class Meta:
346         ordering = ('sort_key',)
347         verbose_name = _('book')
348         verbose_name_plural = _('books')
349
350     def __unicode__(self):
351         return self.title
352
353     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
354         from sortify import sortify
355
356         self.sort_key = sortify(self.title)
357
358         ret = super(Book, self).save(force_insert, force_update)
359
360         if reset_short_html:
361             self.reset_short_html()
362
363         return ret
364
365     @permalink
366     def get_absolute_url(self):
367         return ('catalogue.views.book_detail', [self.slug])
368
369     @property
370     def name(self):
371         return self.title
372
373     def book_tag_slug(self):
374         return ('l-' + self.slug)[:120]
375
376     def book_tag(self):
377         slug = self.book_tag_slug()
378         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
379         if created:
380             book_tag.name = self.title[:50]
381             book_tag.sort_key = self.title.lower()
382             book_tag.save()
383         return book_tag
384
385     def has_media(self, type):
386         if type in Book.file_types:
387             return bool(getattr(self, "%s_file" % type))
388         else:
389             return self.media.filter(type=type).exists()
390
391     def get_media(self, type):
392         if self.has_media(type):
393             if type in Book.file_types:
394                 return getattr(self, "%s_file" % type)
395             else:                                             
396                 return self.media.filter(type=type)
397         else:
398             return None
399
400     def get_mp3(self):
401         return self.get_media("mp3")
402     def get_odt(self):
403         return self.get_media("odt")
404     def get_ogg(self):
405         return self.get_media("ogg")
406     def get_daisy(self):
407         return self.get_media("daisy")                       
408
409     def reset_short_html(self):
410         if self.id is None:
411             return
412
413         cache_key = "Book.short_html/%d/%s"
414         for lang, langname in settings.LANGUAGES:
415             cache.delete(cache_key % (self.id, lang))
416         # Fragment.short_html relies on book's tags, so reset it here too
417         for fragm in self.fragments.all():
418             fragm.reset_short_html()
419
420     def short_html(self):
421         if self.id:
422             cache_key = "Book.short_html/%d/%s" % (self.id, get_language())
423             short_html = cache.get(cache_key)
424         else:
425             short_html = None
426
427         if short_html is not None:
428             return mark_safe(short_html)
429         else:
430             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
431             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
432
433             formats = []
434             # files generated during publication
435             if self.has_media("html"):
436                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
437             if self.has_media("pdf"):
438                 formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
439             if self.has_media("mobi"):
440                 formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
441             if self.root_ancestor.has_media("epub"):
442                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
443             if self.has_media("txt"):
444                 formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
445             # other files
446             for m in self.media.order_by('type'):
447                 formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
448
449             formats = [mark_safe(format) for format in formats]
450
451             short_html = unicode(render_to_string('catalogue/book_short.html',
452                 {'book': self, 'tags': tags, 'formats': formats}))
453
454             if self.id:
455                 cache.set(cache_key, short_html, CACHE_FOREVER)
456             return mark_safe(short_html)
457
458     @property
459     def root_ancestor(self):
460         """ returns the oldest ancestor """
461
462         if not hasattr(self, '_root_ancestor'):
463             book = self
464             while book.parent:
465                 book = book.parent
466             self._root_ancestor = book
467         return self._root_ancestor
468
469
470     def has_description(self):
471         return len(self.description) > 0
472     has_description.short_description = _('description')
473     has_description.boolean = True
474
475     # ugly ugly ugly
476     def has_odt_file(self):
477         return bool(self.has_media("odt"))
478     has_odt_file.short_description = 'ODT'
479     has_odt_file.boolean = True
480
481     def has_mp3_file(self):
482         return bool(self.has_media("mp3"))
483     has_mp3_file.short_description = 'MP3'
484     has_mp3_file.boolean = True
485
486     def has_ogg_file(self):
487         return bool(self.has_media("ogg"))
488     has_ogg_file.short_description = 'OGG'
489     has_ogg_file.boolean = True
490
491     def has_daisy_file(self):
492         return bool(self.has_media("daisy"))
493     has_daisy_file.short_description = 'DAISY'
494     has_daisy_file.boolean = True
495
496     def build_pdf(self, customizations=None, file_name=None):
497         """ (Re)builds the pdf file.
498         customizations - customizations which are passed to LaTeX class file.
499         file_name - save the pdf file under a different name and DO NOT save it in db.
500         """
501         from tempfile import NamedTemporaryFile
502         from os import unlink
503         from django.core.files import File
504         from librarian import pdf
505         from catalogue.utils import ORMDocProvider, remove_zip
506
507         try:
508             pdf_file = NamedTemporaryFile(delete=False)
509             pdf.transform(ORMDocProvider(self),
510                       file_path=str(self.xml_file.path),
511                       output_file=pdf_file,
512                       customizations=customizations
513                       )
514
515             if file_name is None:
516                 # we'd like to be sure not to overwrite changes happening while
517                 # (timely) pdf generation is taking place (async celery scenario)
518                 current_self = Book.objects.get(id=self.id)
519                 current_self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
520                 self.pdf_file = current_self.pdf_file
521
522                 # remove cached downloadables
523                 remove_zip(settings.ALL_PDF_ZIP)
524
525                 for customized_pdf in get_existing_customized_pdf(self):
526                     unlink(customized_pdf)
527             else:
528                 print "save %s to: %s" % (file_name, DefaultStorage().path(file_name))
529
530                 pdf_django_file = File(open(pdf_file.name))
531                 DefaultStorage().save(file_name, pdf_django_file)
532                 pdf_django_file.close()
533         finally:
534             pass
535             unlink(pdf_file.name)
536
537     def build_mobi(self):
538         """ (Re)builds the MOBI file.
539
540         """
541         from tempfile import NamedTemporaryFile
542         from os import unlink
543         from django.core.files import File
544         from librarian import mobi
545         from catalogue.utils import ORMDocProvider, remove_zip
546
547         try:
548             mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
549             mobi.transform(ORMDocProvider(self), verbose=1,
550                       file_path=str(self.xml_file.path),
551                       output_file=mobi_file.name,
552                       )
553
554             self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
555         finally:
556             unlink(mobi_file.name)
557
558         # remove zip with all mobi files
559         remove_zip(settings.ALL_MOBI_ZIP)
560
561     def build_epub(self, remove_descendants=True):
562         """ (Re)builds the epub file.
563             If book has a parent, does nothing.
564             Unless remove_descendants is False, descendants' epubs are removed.
565         """
566         from StringIO import StringIO
567         from hashlib import sha1
568         from django.core.files.base import ContentFile
569         from librarian import epub, NoDublinCore
570         from catalogue.utils import ORMDocProvider, remove_zip
571
572         if self.parent:
573             # don't need an epub
574             return
575
576         epub_file = StringIO()
577         try:
578             epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
579             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
580             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
581         except NoDublinCore:
582             pass
583
584         book_descendants = list(self.children.all())
585         while len(book_descendants) > 0:
586             child_book = book_descendants.pop(0)
587             if remove_descendants and child_book.has_epub_file():
588                 child_book.epub_file.delete()
589             # save anyway, to refresh short_html
590             child_book.save()
591             book_descendants += list(child_book.children.all())
592
593         # remove zip package with all epub files
594         remove_zip(settings.ALL_EPUB_ZIP)
595
596     def build_txt(self):
597         from StringIO import StringIO
598         from django.core.files.base import ContentFile
599         from librarian import text
600
601         out = StringIO()
602         text.transform(open(self.xml_file.path), out)
603         self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
604
605
606     def build_html(self):
607         from tempfile import NamedTemporaryFile
608         from markupstring import MarkupString
609         from django.core.files import File
610         from slughifi import slughifi
611         from librarian import html
612
613         meta_tags = list(self.tags.filter(
614             category__in=('author', 'epoch', 'genre', 'kind')))
615         book_tag = self.book_tag()
616
617         html_file = NamedTemporaryFile()
618         if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
619             self.html_file.save('%s.html' % self.slug, File(html_file))
620
621             # get ancestor l-tags for adding to new fragments
622             ancestor_tags = []
623             p = self.parent
624             while p:
625                 ancestor_tags.append(p.book_tag())
626                 p = p.parent
627
628             # Delete old fragments and create them from scratch
629             self.fragments.all().delete()
630             # Extract fragments
631             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
632             for fragment in closed_fragments.values():
633                 try:
634                     theme_names = [s.strip() for s in fragment.themes.split(',')]
635                 except AttributeError:
636                     continue
637                 themes = []
638                 for theme_name in theme_names:
639                     if not theme_name:
640                         continue
641                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
642                     if created:
643                         tag.name = theme_name
644                         tag.sort_key = theme_name.lower()
645                         tag.save()
646                     themes.append(tag)
647                 if not themes:
648                     continue
649
650                 text = fragment.to_string()
651                 short_text = ''
652                 if (len(MarkupString(text)) > 240):
653                     short_text = unicode(MarkupString(text)[:160])
654                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
655                     text=text, short_text=short_text)
656
657                 new_fragment.save()
658                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
659             self.save()
660             self.html_built.send(sender=self)
661             return True
662         return False
663
664     @staticmethod
665     def zip_format(format_):
666         def pretty_file_name(book):
667             return "%s/%s.%s" % (
668                 b.get_extra_info_value()['author'],
669                 b.slug,
670                 format_)
671
672         field_name = "%s_file" % format_
673         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
674         paths = [(pretty_file_name(b), getattr(b, field_name).path)
675                     for b in books]
676         result = create_zip.delay(paths,
677                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
678         return result.wait()
679
680     def zip_audiobooks(self):
681         bm = BookMedia.objects.filter(book=self, type='mp3')
682         paths = map(lambda bm: (None, bm.file.path), bm)
683         result = create_zip.delay(paths, self.slug)
684         return result.wait()
685
686     @classmethod
687     def from_xml_file(cls, xml_file, **kwargs):
688         from django.core.files import File
689         from librarian import dcparser
690
691         # use librarian to parse meta-data
692         book_info = dcparser.parse(xml_file)
693
694         if not isinstance(xml_file, File):
695             xml_file = File(open(xml_file))
696
697         try:
698             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
699         finally:
700             xml_file.close()
701
702     @classmethod
703     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
704             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
705         import re
706         from slughifi import slughifi
707         from sortify import sortify
708
709         # check for parts before we do anything
710         children = []
711         if hasattr(book_info, 'parts'):
712             for part_url in book_info.parts:
713                 base, slug = part_url.rsplit('/', 1)
714                 try:
715                     children.append(Book.objects.get(slug=slug))
716                 except Book.DoesNotExist, e:
717                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
718
719
720         # Read book metadata
721         book_base, book_slug = book_info.url.rsplit('/', 1)
722         if re.search(r'[^a-zA-Z0-9-]', book_slug):
723             raise ValueError('Invalid characters in slug')
724         book, created = Book.objects.get_or_create(slug=book_slug)
725
726         if created:
727             book_shelves = []
728         else:
729             if not overwrite:
730                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
731             # Save shelves for this book
732             book_shelves = list(book.tags.filter(category='set'))
733
734         book.title = book_info.title
735         book.set_extra_info_value(book_info.to_dict())
736         book.save()
737
738         meta_tags = []
739         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
740         for field_name, category in categories:
741             try:
742                 tag_names = getattr(book_info, field_name)
743             except:
744                 tag_names = [getattr(book_info, category)]
745             for tag_name in tag_names:
746                 tag_sort_key = tag_name
747                 if category == 'author':
748                     tag_sort_key = tag_name.last_name
749                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
750                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
751                 if created:
752                     tag.name = tag_name
753                     tag.sort_key = sortify(tag_sort_key.lower())
754                     tag.save()
755                 meta_tags.append(tag)
756
757         book.tags = set(meta_tags + book_shelves)
758
759         book_tag = book.book_tag()
760
761         for n, child_book in enumerate(children):
762             child_book.parent = book
763             child_book.parent_number = n
764             child_book.save()
765
766         # Save XML and HTML files
767         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
768
769         # delete old fragments when overwriting
770         book.fragments.all().delete()
771
772         if book.build_html():
773             if not settings.NO_BUILD_TXT and build_txt:
774                 book.build_txt()
775
776         if not settings.NO_BUILD_EPUB and build_epub:
777             book.root_ancestor.build_epub()
778
779         if not settings.NO_BUILD_PDF and build_pdf:
780             book.root_ancestor.build_pdf()
781
782         if not settings.NO_BUILD_MOBI and build_mobi:
783             book.build_mobi()
784
785         book_descendants = list(book.children.all())
786         # add l-tag to descendants and their fragments
787         # delete unnecessary EPUB files
788         while len(book_descendants) > 0:
789             child_book = book_descendants.pop(0)
790             child_book.tags = list(child_book.tags) + [book_tag]
791             child_book.save()
792             for fragment in child_book.fragments.all():
793                 fragment.tags = set(list(fragment.tags) + [book_tag])
794             book_descendants += list(child_book.children.all())
795
796         book.save()
797
798         # refresh cache
799         book.reset_tag_counter()
800         book.reset_theme_counter()
801
802         cls.published.send(sender=book)
803         return book
804
805     def reset_tag_counter(self):
806         if self.id is None:
807             return
808
809         cache_key = "Book.tag_counter/%d" % self.id
810         cache.delete(cache_key)
811         if self.parent:
812             self.parent.reset_tag_counter()
813
814     @property
815     def tag_counter(self):
816         if self.id:
817             cache_key = "Book.tag_counter/%d" % self.id
818             tags = cache.get(cache_key)
819         else:
820             tags = None
821
822         if tags is None:
823             tags = {}
824             for child in self.children.all().order_by():
825                 for tag_pk, value in child.tag_counter.iteritems():
826                     tags[tag_pk] = tags.get(tag_pk, 0) + value
827             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
828                 tags[tag.pk] = 1
829
830             if self.id:
831                 cache.set(cache_key, tags, CACHE_FOREVER)
832         return tags
833
834     def reset_theme_counter(self):
835         if self.id is None:
836             return
837
838         cache_key = "Book.theme_counter/%d" % self.id
839         cache.delete(cache_key)
840         if self.parent:
841             self.parent.reset_theme_counter()
842
843     @property
844     def theme_counter(self):
845         if self.id:
846             cache_key = "Book.theme_counter/%d" % self.id
847             tags = cache.get(cache_key)
848         else:
849             tags = None
850
851         if tags is None:
852             tags = {}
853             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
854                 for tag in fragment.tags.filter(category='theme').order_by():
855                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
856
857             if self.id:
858                 cache.set(cache_key, tags, CACHE_FOREVER)
859         return tags
860
861     def pretty_title(self, html_links=False):
862         book = self
863         names = list(book.tags.filter(category='author'))
864
865         books = []
866         while book:
867             books.append(book)
868             book = book.parent
869         names.extend(reversed(books))
870
871         if html_links:
872             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
873         else:
874             names = [tag.name for tag in names]
875
876         return ', '.join(names)
877
878     @classmethod
879     def tagged_top_level(cls, tags):
880         """ Returns top-level books tagged with `tags'.
881
882         It only returns those books which don't have ancestors which are
883         also tagged with those tags.
884
885         """
886         # get relevant books and their tags
887         objects = cls.tagged.with_all(tags)
888         # eliminate descendants
889         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in objects])
890         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags)]
891         if descendants_keys:
892             objects = objects.exclude(pk__in=descendants_keys)
893
894         return objects
895
896     @classmethod
897     def book_list(cls, filter=None):
898         """Generates a hierarchical listing of all books.
899
900         Books are optionally filtered with a test function.
901
902         """
903
904         books_by_parent = {}
905         books = cls.objects.all().order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
906         if filter:
907             books = books.filter(filter).distinct()
908             book_ids = set((book.pk for book in books))
909             for book in books:
910                 parent = book.parent_id
911                 if parent not in book_ids:
912                     parent = None
913                 books_by_parent.setdefault(parent, []).append(book)
914         else:
915             for book in books:
916                 books_by_parent.setdefault(book.parent_id, []).append(book)
917
918         orphans = []
919         books_by_author = SortedDict()
920         for tag in Tag.objects.filter(category='author'):
921             books_by_author[tag] = []
922
923         for book in books_by_parent.get(None,()):
924             authors = list(book.tags.filter(category='author'))
925             if authors:
926                 for author in authors:
927                     books_by_author[author].append(book)
928             else:
929                 orphans.append(book)
930
931         return books_by_author, orphans, books_by_parent
932
933     _audiences_pl = {
934         "SP1": (1, u"szkoła podstawowa"),
935         "SP2": (1, u"szkoła podstawowa"),
936         "P": (1, u"szkoła podstawowa"),
937         "G": (2, u"gimnazjum"),
938         "L": (3, u"liceum"),
939         "LP": (3, u"liceum"),
940     }
941     def audiences_pl(self):
942         audiences = self.get_extra_info_value().get('audiences', [])
943         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
944         return [a[1] for a in audiences]
945
946
947 def _has_factory(ftype):
948     has = lambda self: bool(getattr(self, "%s_file" % ftype))
949     has.short_description = t.upper()
950     has.boolean = True
951     has.__name__ = "has_%s_file" % ftype
952     return has
953
954     
955 # add the file fields
956 for t in Book.file_types:
957     field_name = "%s_file" % t
958     models.FileField(_("%s file" % t.upper()),
959             upload_to=book_upload_path(t),
960             blank=True).contribute_to_class(Book, field_name)
961
962     setattr(Book, "has_%s_file" % t, _has_factory(t))
963
964
965 class Fragment(models.Model):
966     text = models.TextField()
967     short_text = models.TextField(editable=False)
968     anchor = models.CharField(max_length=120)
969     book = models.ForeignKey(Book, related_name='fragments')
970
971     objects = models.Manager()
972     tagged = managers.ModelTaggedItemManager(Tag)
973     tags = managers.TagDescriptor(Tag)
974
975     class Meta:
976         ordering = ('book', 'anchor',)
977         verbose_name = _('fragment')
978         verbose_name_plural = _('fragments')
979
980     def get_absolute_url(self):
981         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
982
983     def reset_short_html(self):
984         if self.id is None:
985             return
986
987         cache_key = "Fragment.short_html/%d/%s"
988         for lang, langname in settings.LANGUAGES:
989             cache.delete(cache_key % (self.id, lang))
990
991     def short_html(self):
992         if self.id:
993             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
994             short_html = cache.get(cache_key)
995         else:
996             short_html = None
997
998         if short_html is not None:
999             return mark_safe(short_html)
1000         else:
1001             short_html = unicode(render_to_string('catalogue/fragment_short.html',
1002                 {'fragment': self}))
1003             if self.id:
1004                 cache.set(cache_key, short_html, CACHE_FOREVER)
1005             return mark_safe(short_html)
1006
1007
1008 class FileRecord(models.Model):
1009     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
1010     type = models.CharField(_('type'), max_length=20, db_index=True)
1011     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
1012     time = models.DateTimeField(_('time'), auto_now_add=True)
1013
1014     class Meta:
1015         ordering = ('-time','-slug', '-type')
1016         verbose_name = _('file record')
1017         verbose_name_plural = _('file records')
1018
1019     def __unicode__(self):
1020         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
1021
1022 ###########
1023 #
1024 # SIGNALS
1025 #
1026 ###########
1027
1028
1029 def _tags_updated_handler(sender, affected_tags, **kwargs):
1030     # reset tag global counter
1031     # we want Tag.changed_at updated for API to know the tag was touched
1032     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
1033
1034     # if book tags changed, reset book tag counter
1035     if isinstance(sender, Book) and \
1036                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1037                     exclude(category__in=('book', 'theme', 'set')).count():
1038         sender.reset_tag_counter()
1039     # if fragment theme changed, reset book theme counter
1040     elif isinstance(sender, Fragment) and \
1041                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1042                     filter(category='theme').count():
1043         sender.book.reset_theme_counter()
1044 tags_updated.connect(_tags_updated_handler)
1045
1046
1047 def _pre_delete_handler(sender, instance, **kwargs):
1048     """ refresh Book on BookMedia delete """
1049     if sender == BookMedia:
1050         instance.book.save()
1051 pre_delete.connect(_pre_delete_handler)
1052
1053 def _post_save_handler(sender, instance, **kwargs):
1054     """ refresh all the short_html stuff on BookMedia update """
1055     if sender == BookMedia:
1056         instance.book.save()
1057 post_save.connect(_post_save_handler)