Test shouldn't build FB2
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import namedtuple
6
7 from django.db import models
8 from django.db.models import permalink
9 import django.dispatch
10 from django.core.cache import get_cache
11 from django.utils.translation import ugettext_lazy as _
12 from django.contrib.auth.models import User
13 from django.template.loader import render_to_string
14 from django.utils.datastructures import SortedDict
15 from django.utils.safestring import mark_safe
16 from django.utils.translation import get_language
17 from django.core.urlresolvers import reverse
18 from django.db.models.signals import post_save, pre_delete, post_delete
19 import jsonfield
20
21 from django.conf import settings
22
23 from newtagging.models import TagBase, tags_updated
24 from newtagging import managers
25 from catalogue.fields import OverwritingFileField
26 from catalogue.utils import create_zip, split_tags, truncate_html_words
27 from catalogue import tasks
28 import re
29
30
31 # Those are hard-coded here so that makemessages sees them.
32 TAG_CATEGORIES = (
33     ('author', _('author')),
34     ('epoch', _('epoch')),
35     ('kind', _('kind')),
36     ('genre', _('genre')),
37     ('theme', _('theme')),
38     ('set', _('set')),
39     ('book', _('book')),
40 )
41
42
43 permanent_cache = get_cache('permanent')
44
45
46 class TagSubcategoryManager(models.Manager):
47     def __init__(self, subcategory):
48         super(TagSubcategoryManager, self).__init__()
49         self.subcategory = subcategory
50
51     def get_query_set(self):
52         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
53
54
55 class Tag(TagBase):
56     """A tag attachable to books and fragments (and possibly anything).
57     
58     Used to represent searchable metadata (authors, epochs, genres, kinds),
59     fragment themes (motifs) and some book hierarchy related kludges."""
60     name = models.CharField(_('name'), max_length=50, db_index=True)
61     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
62     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
63     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
64         db_index=True, choices=TAG_CATEGORIES)
65     description = models.TextField(_('description'), blank=True)
66
67     user = models.ForeignKey(User, blank=True, null=True)
68     book_count = models.IntegerField(_('book count'), blank=True, null=True)
69     gazeta_link = models.CharField(blank=True, max_length=240)
70     wiki_link = models.CharField(blank=True, max_length=240)
71
72     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
73     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
74
75     class UrlDeprecationWarning(DeprecationWarning):
76         pass
77
78     categories_rev = {
79         'autor': 'author',
80         'epoka': 'epoch',
81         'rodzaj': 'kind',
82         'gatunek': 'genre',
83         'motyw': 'theme',
84         'polka': 'set',
85     }
86     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
87
88     class Meta:
89         ordering = ('sort_key',)
90         verbose_name = _('tag')
91         verbose_name_plural = _('tags')
92         unique_together = (("slug", "category"),)
93
94     def __unicode__(self):
95         return self.name
96
97     def __repr__(self):
98         return "Tag(slug=%r)" % self.slug
99
100     @permalink
101     def get_absolute_url(self):
102         return ('catalogue.views.tagged_object_list', [self.url_chunk])
103
104     def has_description(self):
105         return len(self.description) > 0
106     has_description.short_description = _('description')
107     has_description.boolean = True
108
109     def get_count(self):
110         """Returns global book count for book tags, fragment count for themes."""
111
112         if self.category == 'book':
113             # never used
114             objects = Book.objects.none()
115         elif self.category == 'theme':
116             objects = Fragment.tagged.with_all((self,))
117         else:
118             objects = Book.tagged.with_all((self,)).order_by()
119             if self.category != 'set':
120                 # eliminate descendants
121                 l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects.iterator()])
122                 descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
123                 if descendants_keys:
124                     objects = objects.exclude(pk__in=descendants_keys)
125         return objects.count()
126
127     @staticmethod
128     def get_tag_list(tags):
129         if isinstance(tags, basestring):
130             real_tags = []
131             ambiguous_slugs = []
132             category = None
133             deprecated = False
134             tags_splitted = tags.split('/')
135             for name in tags_splitted:
136                 if category:
137                     real_tags.append(Tag.objects.get(slug=name, category=category))
138                     category = None
139                 elif name in Tag.categories_rev:
140                     category = Tag.categories_rev[name]
141                 else:
142                     try:
143                         real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
144                         deprecated = True 
145                     except Tag.MultipleObjectsReturned, e:
146                         ambiguous_slugs.append(name)
147
148             if category:
149                 # something strange left off
150                 raise Tag.DoesNotExist()
151             if ambiguous_slugs:
152                 # some tags should be qualified
153                 e = Tag.MultipleObjectsReturned()
154                 e.tags = real_tags
155                 e.ambiguous_slugs = ambiguous_slugs
156                 raise e
157             if deprecated:
158                 e = Tag.UrlDeprecationWarning()
159                 e.tags = real_tags
160                 raise e
161             return real_tags
162         else:
163             return TagBase.get_tag_list(tags)
164
165     @property
166     def url_chunk(self):
167         return '/'.join((Tag.categories_dict[self.category], self.slug))
168
169     @staticmethod
170     def tags_from_info(info):
171         from slughifi import slughifi
172         from sortify import sortify
173         meta_tags = []
174         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
175         for field_name, category in categories:
176             try:
177                 tag_names = getattr(info, field_name)
178             except:
179                 try:
180                     tag_names = [getattr(info, category)]
181                 except:
182                     # For instance, Pictures do not have 'genre' field.
183                     continue
184             for tag_name in tag_names:
185                 tag_sort_key = tag_name
186                 if category == 'author':
187                     tag_sort_key = tag_name.last_name
188                     tag_name = tag_name.readable()
189                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
190                 if created:
191                     tag.name = tag_name
192                     tag.sort_key = sortify(tag_sort_key.lower())
193                     tag.save()
194                 meta_tags.append(tag)
195         return meta_tags
196
197
198
199 def get_dynamic_path(media, filename, ext=None, maxlen=100):
200     from slughifi import slughifi
201
202     # how to put related book's slug here?
203     if not ext:
204         # BookMedia case
205         ext = media.formats[media.type].ext
206     if media is None or not media.name:
207         name = slughifi(filename.split(".")[0])
208     else:
209         name = slughifi(media.name)
210     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
211
212
213 # TODO: why is this hard-coded ?
214 def book_upload_path(ext=None, maxlen=100):
215     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
216
217
218 class BookMedia(models.Model):
219     """Represents media attached to a book."""
220     FileFormat = namedtuple("FileFormat", "name ext")
221     formats = SortedDict([
222         ('mp3', FileFormat(name='MP3', ext='mp3')),
223         ('ogg', FileFormat(name='Ogg Vorbis', ext='ogg')),
224         ('daisy', FileFormat(name='DAISY', ext='daisy.zip')),
225     ])
226     format_choices = [(k, _('%s file') % t.name)
227             for k, t in formats.items()]
228
229     type        = models.CharField(_('type'), choices=format_choices, max_length="100")
230     name        = models.CharField(_('name'), max_length="100")
231     file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
232     uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
233     extra_info  = jsonfield.JSONField(_('extra information'), default='{}', editable=False)
234     book = models.ForeignKey('Book', related_name='media')
235     source_sha1 = models.CharField(null=True, blank=True, max_length=40, editable=False)
236
237     def __unicode__(self):
238         return "%s (%s)" % (self.name, self.file.name.split("/")[-1])
239
240     class Meta:
241         ordering            = ('type', 'name')
242         verbose_name        = _('book media')
243         verbose_name_plural = _('book media')
244
245     def save(self, *args, **kwargs):
246         from slughifi import slughifi
247         from catalogue.utils import ExistingFile, remove_zip
248
249         try:
250             old = BookMedia.objects.get(pk=self.pk)
251         except BookMedia.DoesNotExist:
252             old = None
253         else:
254             # if name changed, change the file name, too
255             if slughifi(self.name) != slughifi(old.name):
256                 self.file.save(None, ExistingFile(self.file.path), save=False, leave=True)
257
258         super(BookMedia, self).save(*args, **kwargs)
259
260         # remove the zip package for book with modified media
261         if old:
262             remove_zip("%s_%s" % (old.book.slug, old.type))
263         remove_zip("%s_%s" % (self.book.slug, self.type))
264
265         extra_info = self.extra_info
266         extra_info.update(self.read_meta())
267         self.extra_info = extra_info
268         self.source_sha1 = self.read_source_sha1(self.file.path, self.type)
269         return super(BookMedia, self).save(*args, **kwargs)
270
271     def read_meta(self):
272         """
273             Reads some metadata from the audiobook.
274         """
275         import mutagen
276         from mutagen import id3
277
278         artist_name = director_name = project = funded_by = ''
279         if self.type == 'mp3':
280             try:
281                 audio = id3.ID3(self.file.path)
282                 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
283                 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
284                 project = ", ".join([t.data for t in audio.getall('PRIV') 
285                         if t.owner=='wolnelektury.pl?project'])
286                 funded_by = ", ".join([t.data for t in audio.getall('PRIV') 
287                         if t.owner=='wolnelektury.pl?funded_by'])
288             except:
289                 pass
290         elif self.type == 'ogg':
291             try:
292                 audio = mutagen.File(self.file.path)
293                 artist_name = ', '.join(audio.get('artist', []))
294                 director_name = ', '.join(audio.get('conductor', []))
295                 project = ", ".join(audio.get('project', []))
296                 funded_by = ", ".join(audio.get('funded_by', []))
297             except:
298                 pass
299         else:
300             return {}
301         return {'artist_name': artist_name, 'director_name': director_name,
302                 'project': project, 'funded_by': funded_by}
303
304     @staticmethod
305     def read_source_sha1(filepath, filetype):
306         """
307             Reads source file SHA1 from audiobok metadata.
308         """
309         import mutagen
310         from mutagen import id3
311
312         if filetype == 'mp3':
313             try:
314                 audio = id3.ID3(filepath)
315                 return [t.data for t in audio.getall('PRIV') 
316                         if t.owner=='wolnelektury.pl?flac_sha1'][0]
317             except:
318                 return None
319         elif filetype == 'ogg':
320             try:
321                 audio = mutagen.File(filepath)
322                 return audio.get('flac_sha1', [None])[0] 
323             except:
324                 return None
325         else:
326             return None
327
328
329 class Book(models.Model):
330     """Represents a book imported from WL-XML."""
331     title         = models.CharField(_('title'), max_length=120)
332     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
333     slug = models.SlugField(_('slug'), max_length=120, db_index=True,
334             unique=True)
335     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
336     language = models.CharField(_('language code'), max_length=3, db_index=True,
337                     default=settings.CATALOGUE_DEFAULT_LANGUAGE)
338     description   = models.TextField(_('description'), blank=True)
339     created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
340     changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
341     parent_number = models.IntegerField(_('parent number'), default=0)
342     extra_info    = jsonfield.JSONField(_('extra information'), default='{}')
343     gazeta_link   = models.CharField(blank=True, max_length=240)
344     wiki_link     = models.CharField(blank=True, max_length=240)
345     # files generated during publication
346
347     cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
348                 null=True, blank=True)
349     ebook_formats = ['pdf', 'epub', 'mobi', 'fb2', 'txt']
350     formats = ebook_formats + ['html', 'xml']
351
352     parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
353
354     _related_info = jsonfield.JSONField(blank=True, null=True, editable=False)
355
356     objects  = models.Manager()
357     tagged   = managers.ModelTaggedItemManager(Tag)
358     tags     = managers.TagDescriptor(Tag)
359
360     html_built = django.dispatch.Signal()
361     published = django.dispatch.Signal()
362
363     class AlreadyExists(Exception):
364         pass
365
366     class Meta:
367         ordering = ('sort_key',)
368         verbose_name = _('book')
369         verbose_name_plural = _('books')
370
371     def __unicode__(self):
372         return self.title
373
374     def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
375         from sortify import sortify
376
377         self.sort_key = sortify(self.title)
378
379         ret = super(Book, self).save(force_insert, force_update)
380
381         if reset_short_html:
382             self.reset_short_html()
383
384         return ret
385
386     @permalink
387     def get_absolute_url(self):
388         return ('catalogue.views.book_detail', [self.slug])
389
390     @property
391     def name(self):
392         return self.title
393
394     def book_tag_slug(self):
395         return ('l-' + self.slug)[:120]
396
397     def book_tag(self):
398         slug = self.book_tag_slug()
399         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
400         if created:
401             book_tag.name = self.title[:50]
402             book_tag.sort_key = self.title.lower()
403             book_tag.save()
404         return book_tag
405
406     def has_media(self, type_):
407         if type_ in Book.formats:
408             return bool(getattr(self, "%s_file" % type_))
409         else:
410             return self.media.filter(type=type_).exists()
411
412     def get_media(self, type_):
413         if self.has_media(type_):
414             if type_ in Book.formats:
415                 return getattr(self, "%s_file" % type_)
416             else:                                             
417                 return self.media.filter(type=type_)
418         else:
419             return None
420
421     def get_mp3(self):
422         return self.get_media("mp3")
423     def get_odt(self):
424         return self.get_media("odt")
425     def get_ogg(self):
426         return self.get_media("ogg")
427     def get_daisy(self):
428         return self.get_media("daisy")                       
429
430     def reset_short_html(self):
431         if self.id is None:
432             return
433
434         type(self).objects.filter(pk=self.pk).update(_related_info=None)
435         # Fragment.short_html relies on book's tags, so reset it here too
436         for fragm in self.fragments.all().iterator():
437             fragm.reset_short_html()
438
439     def has_description(self):
440         return len(self.description) > 0
441     has_description.short_description = _('description')
442     has_description.boolean = True
443
444     # ugly ugly ugly
445     def has_mp3_file(self):
446         return bool(self.has_media("mp3"))
447     has_mp3_file.short_description = 'MP3'
448     has_mp3_file.boolean = True
449
450     def has_ogg_file(self):
451         return bool(self.has_media("ogg"))
452     has_ogg_file.short_description = 'OGG'
453     has_ogg_file.boolean = True
454
455     def has_daisy_file(self):
456         return bool(self.has_media("daisy"))
457     has_daisy_file.short_description = 'DAISY'
458     has_daisy_file.boolean = True
459
460     def wldocument(self, parse_dublincore=True):
461         from catalogue.import_utils import ORMDocProvider
462         from librarian.parser import WLDocument
463
464         return WLDocument.from_file(self.xml_file.path,
465                 provider=ORMDocProvider(self),
466                 parse_dublincore=parse_dublincore)
467
468     def build_cover(self, book_info=None):
469         """(Re)builds the cover image."""
470         from StringIO import StringIO
471         from django.core.files.base import ContentFile
472         from librarian.cover import WLCover
473
474         if book_info is None:
475             book_info = self.wldocument().book_info
476
477         cover = WLCover(book_info).image()
478         imgstr = StringIO()
479         cover.save(imgstr, 'png')
480         self.cover.save(None, ContentFile(imgstr.getvalue()))
481
482     def build_html(self):
483         from django.core.files.base import ContentFile
484         from slughifi import slughifi
485         from librarian import html
486
487         meta_tags = list(self.tags.filter(
488             category__in=('author', 'epoch', 'genre', 'kind')))
489         book_tag = self.book_tag()
490
491         html_output = self.wldocument(parse_dublincore=False).as_html()
492         if html_output:
493             self.html_file.save('%s.html' % self.slug,
494                     ContentFile(html_output.get_string()))
495
496             # get ancestor l-tags for adding to new fragments
497             ancestor_tags = []
498             p = self.parent
499             while p:
500                 ancestor_tags.append(p.book_tag())
501                 p = p.parent
502
503             # Delete old fragments and create them from scratch
504             self.fragments.all().delete()
505             # Extract fragments
506             closed_fragments, open_fragments = html.extract_fragments(self.html_file.path)
507             for fragment in closed_fragments.values():
508                 try:
509                     theme_names = [s.strip() for s in fragment.themes.split(',')]
510                 except AttributeError:
511                     continue
512                 themes = []
513                 for theme_name in theme_names:
514                     if not theme_name:
515                         continue
516                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
517                     if created:
518                         tag.name = theme_name
519                         tag.sort_key = theme_name.lower()
520                         tag.save()
521                     themes.append(tag)
522                 if not themes:
523                     continue
524
525                 text = fragment.to_string()
526                 short_text = truncate_html_words(text, 15)
527                 if text == short_text:
528                     short_text = ''
529                 new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
530                     text=text, short_text=short_text)
531
532                 new_fragment.save()
533                 new_fragment.tags = set(meta_tags + themes + [book_tag] + ancestor_tags)
534             self.save()
535             self.html_built.send(sender=self)
536             return True
537         return False
538
539     # Thin wrappers for builder tasks
540     def build_pdf(self, *args, **kwargs):
541         """(Re)builds PDF."""
542         return tasks.build_pdf.delay(self.pk, *args, **kwargs)
543     def build_epub(self, *args, **kwargs):
544         """(Re)builds EPUB."""
545         return tasks.build_epub.delay(self.pk, *args, **kwargs)
546     def build_mobi(self, *args, **kwargs):
547         """(Re)builds MOBI."""
548         return tasks.build_mobi.delay(self.pk, *args, **kwargs)
549     def build_fb2(self, *args, **kwargs):
550         """(Re)build FB2"""
551         return tasks.build_fb2.delay(self.pk, *args, **kwargs)
552     def build_txt(self, *args, **kwargs):
553         """(Re)builds TXT."""
554         return tasks.build_txt.delay(self.pk, *args, **kwargs)
555
556     @staticmethod
557     def zip_format(format_):
558         def pretty_file_name(book):
559             return "%s/%s.%s" % (
560                 b.extra_info['author'],
561                 b.slug,
562                 format_)
563
564         field_name = "%s_file" % format_
565         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
566         paths = [(pretty_file_name(b), getattr(b, field_name).path)
567                     for b in books.iterator()]
568         return create_zip(paths,
569                     getattr(settings, "ALL_%s_ZIP" % format_.upper()))
570
571     def zip_audiobooks(self, format_):
572         bm = BookMedia.objects.filter(book=self, type=format_)
573         paths = map(lambda bm: (None, bm.file.path), bm)
574         return create_zip(paths, "%s_%s" % (self.slug, format_))
575
576     def search_index(self, book_info=None, reuse_index=False, index_tags=True):
577         import search
578         if reuse_index:
579             idx = search.ReusableIndex()
580         else:
581             idx = search.Index()
582             
583         idx.open()
584         try:
585             idx.index_book(self, book_info)
586             if index_tags:
587                 idx.index_tags()
588         finally:
589             idx.close()
590
591     @classmethod
592     def from_xml_file(cls, xml_file, **kwargs):
593         from django.core.files import File
594         from librarian import dcparser
595
596         # use librarian to parse meta-data
597         book_info = dcparser.parse(xml_file)
598
599         if not isinstance(xml_file, File):
600             xml_file = File(open(xml_file))
601
602         try:
603             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
604         finally:
605             xml_file.close()
606
607     @classmethod
608     def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
609             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True, build_fb2=True,
610             search_index=True, search_index_tags=True, search_index_reuse=False):
611
612         # check for parts before we do anything
613         children = []
614         if hasattr(book_info, 'parts'):
615             for part_url in book_info.parts:
616                 try:
617                     children.append(Book.objects.get(slug=part_url.slug))
618                 except Book.DoesNotExist:
619                     raise Book.DoesNotExist(_('Book "%s" does not exist.') %
620                             part_url.slug)
621
622
623         # Read book metadata
624         book_slug = book_info.url.slug
625         if re.search(r'[^a-z0-9-]', book_slug):
626             raise ValueError('Invalid characters in slug')
627         book, created = Book.objects.get_or_create(slug=book_slug)
628
629         if created:
630             book_shelves = []
631         else:
632             if not overwrite:
633                 raise Book.AlreadyExists(_('Book %s already exists') % (
634                         book_slug))
635             # Save shelves for this book
636             book_shelves = list(book.tags.filter(category='set'))
637
638         book.language = book_info.language
639         book.title = book_info.title
640         if book_info.variant_of:
641             book.common_slug = book_info.variant_of.slug
642         else:
643             book.common_slug = book.slug
644         book.extra_info = book_info.to_dict()
645         book.save()
646
647         meta_tags = Tag.tags_from_info(book_info)
648
649         book.tags = set(meta_tags + book_shelves)
650
651         book_tag = book.book_tag()
652
653         for n, child_book in enumerate(children):
654             child_book.parent = book
655             child_book.parent_number = n
656             child_book.save()
657
658         # Save XML and HTML files
659         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
660
661         # delete old fragments when overwriting
662         book.fragments.all().delete()
663
664         if book.build_html():
665             if not settings.NO_BUILD_TXT and build_txt:
666                 book.build_txt()
667
668         book.build_cover(book_info)
669
670         if not settings.NO_BUILD_EPUB and build_epub:
671             book.build_epub()
672
673         if not settings.NO_BUILD_PDF and build_pdf:
674             book.build_pdf()
675
676         if not settings.NO_BUILD_MOBI and build_mobi:
677             book.build_mobi()
678
679         if not settings.NO_BUILD_FB2 and build_fb2:
680             book.build_fb2()
681
682         if not settings.NO_SEARCH_INDEX and search_index:
683             book.search_index(index_tags=search_index_tags, reuse_index=search_index_reuse)
684             #index_book.delay(book.id, book_info)
685
686         book_descendants = list(book.children.all())
687         descendants_tags = set()
688         # add l-tag to descendants and their fragments
689         while len(book_descendants) > 0:
690             child_book = book_descendants.pop(0)
691             descendants_tags.update(child_book.tags)
692             child_book.tags = list(child_book.tags) + [book_tag]
693             child_book.save()
694             for fragment in child_book.fragments.all().iterator():
695                 fragment.tags = set(list(fragment.tags) + [book_tag])
696             book_descendants += list(child_book.children.all())
697
698         for tag in descendants_tags:
699             tasks.touch_tag(tag)
700
701         book.save()
702
703         # refresh cache
704         book.reset_tag_counter()
705         book.reset_theme_counter()
706
707         cls.published.send(sender=book)
708         return book
709
710     def related_info(self):
711         """Keeps info about related objects (tags, media) in cache field."""
712         if self._related_info is not None:
713             return self._related_info
714         else:
715             rel = {'tags': {}, 'media': {}}
716
717             tags = self.tags.filter(category__in=(
718                     'author', 'kind', 'genre', 'epoch'))
719             tags = split_tags(tags)
720             for category in tags:
721                 rel['tags'][category] = [
722                         (t.name, t.slug) for t in tags[category]]
723
724             for media_format in BookMedia.formats:
725                 rel['media'][media_format] = self.has_media(media_format)
726
727             book = self
728             parents = []
729             while book.parent:
730                 parents.append((book.parent.title, book.parent.slug))
731                 book = book.parent
732             parents = parents[::-1]
733             if parents:
734                 rel['parents'] = parents
735
736             if self.pk:
737                 type(self).objects.filter(pk=self.pk).update(_related_info=rel)
738             return rel
739
740     def related_themes(self):
741         theme_counter = self.theme_counter
742         book_themes = list(Tag.objects.filter(pk__in=theme_counter.keys()))
743         for tag in book_themes:
744             tag.count = theme_counter[tag.pk]
745         return book_themes
746
747     def reset_tag_counter(self):
748         if self.id is None:
749             return
750
751         cache_key = "Book.tag_counter/%d" % self.id
752         permanent_cache.delete(cache_key)
753         if self.parent:
754             self.parent.reset_tag_counter()
755
756     @property
757     def tag_counter(self):
758         if self.id:
759             cache_key = "Book.tag_counter/%d" % self.id
760             tags = permanent_cache.get(cache_key)
761         else:
762             tags = None
763
764         if tags is None:
765             tags = {}
766             for child in self.children.all().order_by().iterator():
767                 for tag_pk, value in child.tag_counter.iteritems():
768                     tags[tag_pk] = tags.get(tag_pk, 0) + value
769             for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by().iterator():
770                 tags[tag.pk] = 1
771
772             if self.id:
773                 permanent_cache.set(cache_key, tags)
774         return tags
775
776     def reset_theme_counter(self):
777         if self.id is None:
778             return
779
780         cache_key = "Book.theme_counter/%d" % self.id
781         permanent_cache.delete(cache_key)
782         if self.parent:
783             self.parent.reset_theme_counter()
784
785     @property
786     def theme_counter(self):
787         if self.id:
788             cache_key = "Book.theme_counter/%d" % self.id
789             tags = permanent_cache.get(cache_key)
790         else:
791             tags = None
792
793         if tags is None:
794             tags = {}
795             for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by().iterator():
796                 for tag in fragment.tags.filter(category='theme').order_by().iterator():
797                     tags[tag.pk] = tags.get(tag.pk, 0) + 1
798
799             if self.id:
800                 permanent_cache.set(cache_key, tags)
801         return tags
802
803     def pretty_title(self, html_links=False):
804         book = self
805         names = list(book.tags.filter(category='author'))
806
807         books = []
808         while book:
809             books.append(book)
810             book = book.parent
811         names.extend(reversed(books))
812
813         if html_links:
814             names = ['<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in names]
815         else:
816             names = [tag.name for tag in names]
817
818         return ', '.join(names)
819
820     @classmethod
821     def tagged_top_level(cls, tags):
822         """ Returns top-level books tagged with `tags`.
823
824         It only returns those books which don't have ancestors which are
825         also tagged with those tags.
826
827         """
828         # get relevant books and their tags
829         objects = cls.tagged.with_all(tags)
830         # eliminate descendants
831         l_tags = Tag.objects.filter(category='book',
832             slug__in=[book.book_tag_slug() for book in objects.iterator()])
833         descendants_keys = [book.pk for book in cls.tagged.with_any(l_tags).iterator()]
834         if descendants_keys:
835             objects = objects.exclude(pk__in=descendants_keys)
836
837         return objects
838
839     @classmethod
840     def book_list(cls, filter=None):
841         """Generates a hierarchical listing of all books.
842
843         Books are optionally filtered with a test function.
844
845         """
846
847         books_by_parent = {}
848         books = cls.objects.all().order_by('parent_number', 'sort_key').only(
849                 'title', 'parent', 'slug')
850         if filter:
851             books = books.filter(filter).distinct()
852             
853             book_ids = set(b['pk'] for b in books.values("pk").iterator())
854             for book in books.iterator():
855                 parent = book.parent_id
856                 if parent not in book_ids:
857                     parent = None
858                 books_by_parent.setdefault(parent, []).append(book)
859         else:
860             for book in books.iterator():
861                 books_by_parent.setdefault(book.parent_id, []).append(book)
862
863         orphans = []
864         books_by_author = SortedDict()
865         for tag in Tag.objects.filter(category='author').iterator():
866             books_by_author[tag] = []
867
868         for book in books_by_parent.get(None,()):
869             authors = list(book.tags.filter(category='author'))
870             if authors:
871                 for author in authors:
872                     books_by_author[author].append(book)
873             else:
874                 orphans.append(book)
875
876         return books_by_author, orphans, books_by_parent
877
878     _audiences_pl = {
879         "SP1": (1, u"szkoła podstawowa"),
880         "SP2": (1, u"szkoła podstawowa"),
881         "P": (1, u"szkoła podstawowa"),
882         "G": (2, u"gimnazjum"),
883         "L": (3, u"liceum"),
884         "LP": (3, u"liceum"),
885     }
886     def audiences_pl(self):
887         audiences = self.extra_info.get('audiences', [])
888         audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
889         return [a[1] for a in audiences]
890
891     def choose_fragment(self):
892         tag = self.book_tag()
893         fragments = Fragment.tagged.with_any([tag])
894         if fragments.exists():
895             return fragments.order_by('?')[0]
896         elif self.parent:
897             return self.parent.choose_fragment()
898         else:
899             return None
900
901
902 def _has_factory(ftype):
903     has = lambda self: bool(getattr(self, "%s_file" % ftype))
904     has.short_description = ftype.upper()
905     has.__doc__ = None
906     has.boolean = True
907     has.__name__ = "has_%s_file" % ftype
908     return has
909
910     
911 # add the file fields
912 for t in Book.formats:
913     field_name = "%s_file" % t
914     models.FileField(_("%s file" % t.upper()),
915             upload_to=book_upload_path(t),
916             blank=True).contribute_to_class(Book, field_name)
917
918     setattr(Book, "has_%s_file" % t, _has_factory(t))
919
920
921 class Fragment(models.Model):
922     """Represents a themed fragment of a book."""
923     text = models.TextField()
924     short_text = models.TextField(editable=False)
925     anchor = models.CharField(max_length=120)
926     book = models.ForeignKey(Book, related_name='fragments')
927
928     objects = models.Manager()
929     tagged = managers.ModelTaggedItemManager(Tag)
930     tags = managers.TagDescriptor(Tag)
931
932     class Meta:
933         ordering = ('book', 'anchor',)
934         verbose_name = _('fragment')
935         verbose_name_plural = _('fragments')
936
937     def get_absolute_url(self):
938         return '%s#m%s' % (reverse('book_text', args=[self.book.slug]), self.anchor)
939
940     def reset_short_html(self):
941         if self.id is None:
942             return
943
944         cache_key = "Fragment.short_html/%d/%s"
945         for lang, langname in settings.LANGUAGES:
946             permanent_cache.delete(cache_key % (self.id, lang))
947
948     def get_short_text(self):
949         """Returns short version of the fragment."""
950         return self.short_text if self.short_text else self.text
951
952     def short_html(self):
953         if self.id:
954             cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
955             short_html = permanent_cache.get(cache_key)
956         else:
957             short_html = None
958
959         if short_html is not None:
960             return mark_safe(short_html)
961         else:
962             short_html = unicode(render_to_string('catalogue/fragment_short.html',
963                 {'fragment': self}))
964             if self.id:
965                 permanent_cache.set(cache_key, short_html)
966             return mark_safe(short_html)
967
968
969 class Collection(models.Model):
970     """A collection of books, which might be defined before publishing them."""
971     title = models.CharField(_('title'), max_length=120, db_index=True)
972     slug = models.SlugField(_('slug'), max_length=120, primary_key=True)
973     description = models.TextField(_('description'), null=True, blank=True)
974
975     models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
976     book_slugs = models.TextField(_('book slugs'))
977
978     class Meta:
979         ordering = ('title',)
980         verbose_name = _('collection')
981         verbose_name_plural = _('collections')
982
983     def __unicode__(self):
984         return self.title
985
986
987 ###########
988 #
989 # SIGNALS
990 #
991 ###########
992
993
994 def _tags_updated_handler(sender, affected_tags, **kwargs):
995     # reset tag global counter
996     # we want Tag.changed_at updated for API to know the tag was touched
997     for tag in affected_tags:
998         tasks.touch_tag(tag)
999
1000     # if book tags changed, reset book tag counter
1001     if isinstance(sender, Book) and \
1002                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1003                     exclude(category__in=('book', 'theme', 'set')).count():
1004         sender.reset_tag_counter()
1005     # if fragment theme changed, reset book theme counter
1006     elif isinstance(sender, Fragment) and \
1007                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
1008                     filter(category='theme').count():
1009         sender.book.reset_theme_counter()
1010 tags_updated.connect(_tags_updated_handler)
1011
1012
1013 def _pre_delete_handler(sender, instance, **kwargs):
1014     """ refresh Book on BookMedia delete """
1015     if sender == BookMedia:
1016         instance.book.save()
1017 pre_delete.connect(_pre_delete_handler)
1018
1019
1020 def _post_save_handler(sender, instance, **kwargs):
1021     """ refresh all the short_html stuff on BookMedia update """
1022     if sender == BookMedia:
1023         instance.book.save()
1024 post_save.connect(_post_save_handler)
1025
1026
1027 if not settings.NO_SEARCH_INDEX:
1028     @django.dispatch.receiver(post_delete, sender=Book)
1029     def _remove_book_from_index_handler(sender, instance, **kwargs):
1030         """ remove the book from search index, when it is deleted."""
1031         import search
1032         search.JVM.attachCurrentThread()
1033         idx = search.Index()
1034         idx.open(timeout=10000)  # 10 seconds timeout.
1035         try:
1036             idx.remove_book(instance)
1037             idx.index_tags()
1038         finally:
1039             idx.close()