fixes #529: daisy
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase, tags_updated
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import dcparser, html, epub, NoDublinCore
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), blank=False, null=True)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     def get_count(self):
102         """ returns global book count for book tags, fragment count for themes """
103
104         if self.book_count is None:
105             if self.category == 'book':
106                 # never used
107                 objects = Book.objects.none()
108             elif self.category == 'theme':
109                 objects = Fragment.tagged.with_all((self,))
110             else:
111                 objects = Book.tagged.with_all((self,)).order_by()
112                 if self.category != 'set':
113                     # eliminate descendants
114                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
116                     if descendants_keys:
117                         objects = objects.exclude(pk__in=descendants_keys)
118             self.book_count = objects.count()
119             self.save()
120         return self.book_count
121
122     @staticmethod
123     def get_tag_list(tags):
124         if isinstance(tags, basestring):
125             real_tags = []
126             ambiguous_slugs = []
127             category = None
128             tags_splitted = tags.split('/')
129             for index, name in enumerate(tags_splitted):
130                 if name in Tag.categories_rev:
131                     category = Tag.categories_rev[name]
132                 else:
133                     if category:
134                         real_tags.append(Tag.objects.get(slug=name, category=category))
135                         category = None
136                     else:
137                         try:
138                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
139                         except Tag.MultipleObjectsReturned, e:
140                             ambiguous_slugs.append(name)
141
142             if category:
143                 # something strange left off
144                 raise Tag.DoesNotExist()
145             if ambiguous_slugs:
146                 # some tags should be qualified
147                 e = Tag.MultipleObjectsReturned()
148                 e.tags = real_tags
149                 e.ambiguous_slugs = ambiguous_slugs
150                 raise e
151             else:
152                 return real_tags
153         else:
154             return TagBase.get_tag_list(tags)
155
156     @property
157     def url_chunk(self):
158         return '/'.join((Tag.categories_dict[self.category], self.slug))
159
160
161 # TODO: why is this hard-coded ?
162 def book_upload_path(ext):
163     def get_dynamic_path(book, filename):
164         return 'lektura/%s.%s' % (book.slug, ext)
165     return get_dynamic_path
166
167
168 class Book(models.Model):
169     title = models.CharField(_('title'), max_length=120)
170     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
171     description = models.TextField(_('description'), blank=True)
172     created_at = models.DateTimeField(_('creation date'), auto_now=True)
173     _short_html = models.TextField(_('short HTML'), editable=False)
174     parent_number = models.IntegerField(_('parent number'), default=0)
175     extra_info = JSONField(_('extra information'))
176     gazeta_link = models.CharField(blank=True, max_length=240)
177     wiki_link = models.CharField(blank=True, max_length=240)
178
179
180     # Formats
181     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
182     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
183     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
184     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
185     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
186     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
187     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
188     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
189     daisy_file = models.FileField(_('DAISY file'), upload_to=book_upload_path('daisy.zip'), blank=True)
190
191     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
192
193     objects = models.Manager()
194     tagged = managers.ModelTaggedItemManager(Tag)
195     tags = managers.TagDescriptor(Tag)
196
197     _tag_counter = JSONField(null=True, editable=False)
198     _theme_counter = JSONField(null=True, editable=False)
199
200     class AlreadyExists(Exception):
201         pass
202
203     class Meta:
204         ordering = ('title',)
205         verbose_name = _('book')
206         verbose_name_plural = _('books')
207
208     def __unicode__(self):
209         return self.title
210
211     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
212         if reset_short_html:
213             # Reset _short_html during save
214             update = {}
215             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
216                 update[key] = ''
217                 self.__setattr__(key, '')
218             # Fragment.short_html relies on book's tags, so reset it here too
219             self.fragments.all().update(**update)
220
221         book = super(Book, self).save(force_insert, force_update)
222
223         if refresh_mp3 and self.mp3_file:
224             print self.mp3_file, self.mp3_file.path
225             extra_info = self.get_extra_info_value()
226             extra_info.update(self.get_mp3_info())
227             self.set_extra_info_value(extra_info)
228             book = super(Book, self).save(force_insert, force_update)
229
230         return book
231
232     @permalink
233     def get_absolute_url(self):
234         return ('catalogue.views.book_detail', [self.slug])
235
236     @property
237     def name(self):
238         return self.title
239
240     def book_tag_slug(self):
241         return ('l-' + self.slug)[:120]
242
243     def book_tag(self):
244         slug = self.book_tag_slug()
245         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
246         if created:
247             book_tag.name = self.title[:50]
248             book_tag.sort_key = slug
249             book_tag.save()
250         return book_tag
251
252     def short_html(self):
253         key = '_short_html_%s' % get_language()
254         short_html = getattr(self, key)
255
256         if short_html and len(short_html):
257             return mark_safe(short_html)
258         else:
259             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
260             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
261
262             formats = []
263             if self.html_file:
264                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
265             if self.pdf_file:
266                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
267             if self.root_ancestor.epub_file:
268                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.epub_file.url)
269             if self.odt_file:
270                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
271             if self.txt_file:
272                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
273             if self.mp3_file:
274                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
275             if self.ogg_file:
276                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
277             if self.daisy_file:
278                 formats.append(u'<a href="%s">DAISY</a>' % self.daisy_file.url)
279
280             formats = [mark_safe(format) for format in formats]
281
282             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
283                 {'book': self, 'tags': tags, 'formats': formats})))
284             self.save(reset_short_html=False)
285             return mark_safe(getattr(self, key))
286
287
288     @property
289     def root_ancestor(self):
290         """ returns the oldest ancestor """
291
292         if not hasattr(self, '_root_ancestor'):
293             book = self
294             while book.parent:
295                 book = book.parent
296             self._root_ancestor = book
297         return self._root_ancestor
298
299
300     def get_mp3_info(self):
301         """Retrieves artist and director names from audio ID3 tags."""
302         audio = id3.ID3(self.mp3_file.path)
303         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
304         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
305         return {'artist_name': artist_name, 'director_name': director_name}
306
307     def has_description(self):
308         return len(self.description) > 0
309     has_description.short_description = _('description')
310     has_description.boolean = True
311
312     def has_pdf_file(self):
313         return bool(self.pdf_file)
314     has_pdf_file.short_description = 'PDF'
315     has_pdf_file.boolean = True
316
317     def has_epub_file(self):
318         return bool(self.epub_file)
319     has_epub_file.short_description = 'EPUB'
320     has_epub_file.boolean = True
321
322     def has_odt_file(self):
323         return bool(self.odt_file)
324     has_odt_file.short_description = 'ODT'
325     has_odt_file.boolean = True
326
327     def has_html_file(self):
328         return bool(self.html_file)
329     has_html_file.short_description = 'HTML'
330     has_html_file.boolean = True
331
332     def build_epub(self, remove_descendants=True):
333         """ (Re)builds the epub file.
334             If book has a parent, does nothing.
335             Unless remove_descendants is False, descendants' epubs are removed.
336         """
337     
338         from StringIO import StringIO
339         from hashlib import sha1
340         from django.core.files.base import ContentFile
341         from librarian import DocProvider
342
343         class BookImportDocProvider(DocProvider):
344             """ used for joined EPUBs """
345
346             def __init__(self, book):
347                 self.book = book
348
349             def by_slug(self, slug):
350                 if slug == self.book.slug:
351                     return self.book.xml_file
352                 else:
353                     return Book.objects.get(slug=slug).xml_file
354
355         if self.parent:
356             # don't need an epub
357             return
358
359         epub_file = StringIO()
360         try:
361             epub.transform(BookImportDocProvider(self), self.slug, epub_file)
362             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()), save=False)
363             self.save()
364             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
365         except NoDublinCore:
366             pass
367
368         book_descendants = list(self.children.all())
369         while len(book_descendants) > 0:
370             child_book = book_descendants.pop(0)
371             if remove_descendants and child_book.has_epub_file():
372                 child_book.epub_file.delete()
373             # save anyway, to refresh short_html
374             child_book.save()
375             book_descendants += list(child_book.children.all())
376
377
378     @classmethod
379     def from_xml_file(cls, xml_file, overwrite=False):
380         # use librarian to parse meta-data
381         book_info = dcparser.parse(xml_file)
382
383         if not isinstance(xml_file, File):
384             xml_file = File(open(xml_file))
385
386         try:
387             return cls.from_text_and_meta(xml_file, book_info, overwrite)
388         finally:
389             xml_file.close()
390
391     @classmethod
392     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
393         from tempfile import NamedTemporaryFile
394         from slughifi import slughifi
395         from markupstring import MarkupString
396         from django.core.files.storage import default_storage
397
398         # Read book metadata
399         book_base, book_slug = book_info.url.rsplit('/', 1)
400         book, created = Book.objects.get_or_create(slug=book_slug)
401
402         if created:
403             book_shelves = []
404         else:
405             if not overwrite:
406                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
407             # Save shelves for this book
408             book_shelves = list(book.tags.filter(category='set'))
409
410         book.title = book_info.title
411         book.set_extra_info_value(book_info.to_dict())
412         book._short_html = ''
413         book.save()
414
415         book_tags = []
416         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
417         for field_name, category in categories:
418             try:
419                 tag_names = getattr(book_info, field_name)
420             except:
421                 tag_names = [getattr(book_info, category)]
422             for tag_name in tag_names:
423                 tag_sort_key = tag_name
424                 if category == 'author':
425                     tag_sort_key = tag_name.last_name
426                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
427                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
428                 if created:
429                     tag.name = tag_name
430                     tag.sort_key = slughifi(tag_sort_key)
431                     tag.save()
432                 book_tags.append(tag)
433
434         book.tags = book_tags + book_shelves
435
436         book_tag = book.book_tag()
437
438         if hasattr(book_info, 'parts'):
439             for n, part_url in enumerate(book_info.parts):
440                 base, slug = part_url.rsplit('/', 1)
441                 try:
442                     child_book = Book.objects.get(slug=slug)
443                     child_book.parent = book
444                     child_book.parent_number = n
445                     child_book.save()
446                 except Book.DoesNotExist, e:
447                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
448
449         # Save XML and HTML files
450         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
451
452         html_file = NamedTemporaryFile()
453         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
454             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
455
456             # Extract fragments
457             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
458             for fragment in closed_fragments.values():
459                 try:
460                     theme_names = [s.strip() for s in fragment.themes.split(',')]
461                 except AttributeError:
462                     continue
463                 themes = []
464                 for theme_name in theme_names:
465                     if not theme_name:
466                         continue
467                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
468                     if created:
469                         tag.name = theme_name
470                         tag.sort_key = slughifi(theme_name)
471                         tag.save()
472                     themes.append(tag)
473                 if not themes:
474                     continue
475
476                 text = fragment.to_string()
477                 short_text = ''
478                 if (len(MarkupString(text)) > 240):
479                     short_text = unicode(MarkupString(text)[:160])
480                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
481                     defaults={'text': text, 'short_text': short_text})
482
483                 new_fragment.save()
484                 new_fragment.tags = set(book_tags + themes + [book_tag])
485
486         book.build_epub(remove_descendants=False)
487
488         book_descendants = list(book.children.all())
489         # add l-tag to descendants and their fragments
490         # delete unnecessary EPUB files
491         while len(book_descendants) > 0:
492             child_book = book_descendants.pop(0)
493             child_book.tags = list(child_book.tags) + [book_tag]
494             if child_book.has_epub_file():
495                 child_book.epub_file.delete()
496             child_book.save()
497             for fragment in child_book.fragments.all():
498                 fragment.tags = set(list(fragment.tags) + [book_tag])
499             book_descendants += list(child_book.children.all())
500
501         # refresh cache
502         book.tag_counter
503         book.theme_counter
504
505         book.save()
506         return book
507
508
509     def refresh_tag_counter(self):
510         tags = {}
511         for child in self.children.all().order_by():
512             for tag_pk, value in child.tag_counter.iteritems():
513                 tags[tag_pk] = tags.get(tag_pk, 0) + value
514         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
515             tags[tag.pk] = 1
516         self.set__tag_counter_value(tags)
517         self.save(reset_short_html=False, refresh_mp3=False)
518         return tags
519
520     def reset_tag_counter(self):
521         self._tag_counter = None
522         self.save(reset_short_html=False, refresh_mp3=False)
523         if self.parent:
524             self.parent.reset_tag_counter()
525
526     @property
527     def tag_counter(self):
528         if self._tag_counter is None:
529             return self.refresh_tag_counter()
530         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
531
532     def refresh_theme_counter(self):
533         tags = {}
534         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
535             for tag in fragment.tags.filter(category='theme').order_by():
536                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
537         self.set__theme_counter_value(tags)
538         self.save(reset_short_html=False, refresh_mp3=False)
539         return tags
540
541     def reset_theme_counter(self):
542         self._theme_counter = None
543         self.save(reset_short_html=False, refresh_mp3=False)
544         if self.parent:
545             self.parent.reset_theme_counter()
546
547     @property
548     def theme_counter(self):
549         if self._theme_counter is None:
550             return self.refresh_theme_counter()
551         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
552
553
554
555 class Fragment(models.Model):
556     text = models.TextField()
557     short_text = models.TextField(editable=False)
558     _short_html = models.TextField(editable=False)
559     anchor = models.CharField(max_length=120)
560     book = models.ForeignKey(Book, related_name='fragments')
561
562     objects = models.Manager()
563     tagged = managers.ModelTaggedItemManager(Tag)
564     tags = managers.TagDescriptor(Tag)
565
566     class Meta:
567         ordering = ('book', 'anchor',)
568         verbose_name = _('fragment')
569         verbose_name_plural = _('fragments')
570
571     def get_absolute_url(self):
572         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
573
574     def short_html(self):
575         key = '_short_html_%s' % get_language()
576         short_html = getattr(self, key)
577         if short_html and len(short_html):
578             return mark_safe(short_html)
579         else:
580             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
581                 {'fragment': self})))
582             self.save()
583             return mark_safe(getattr(self, key))
584
585
586 class BookStub(models.Model):
587     title = models.CharField(_('title'), max_length=120)
588     author = models.CharField(_('author'), max_length=120)
589     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
590     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
591     translator = models.TextField(_('translator'), blank=True)
592     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
593
594     class Meta:
595         ordering = ('title',)
596         verbose_name = _('book stub')
597         verbose_name_plural = _('book stubs')
598
599     def __unicode__(self):
600         return self.title
601
602     @permalink
603     def get_absolute_url(self):
604         return ('catalogue.views.book_detail', [self.slug])
605
606     def in_pd(self):
607         return self.pd is not None and self.pd <= datetime.now().year
608
609     @property
610     def name(self):
611         return self.title
612
613
614 class FileRecord(models.Model):
615     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
616     type = models.CharField(_('type'), max_length=20, db_index=True)
617     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
618     time = models.DateTimeField(_('time'), auto_now_add=True)
619
620     class Meta:
621         ordering = ('-time','-slug', '-type')
622         verbose_name = _('file record')
623         verbose_name_plural = _('file records')
624
625     def __unicode__(self):
626         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
627
628
629 def _tags_updated_handler(sender, affected_tags, **kwargs):
630     # reset tag global counter
631     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
632
633     # if book tags changed, reset book tag counter
634     if isinstance(sender, Book) and \
635                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
636                     exclude(category__in=('book', 'theme', 'set')).count():
637         sender.reset_tag_counter()
638     # if fragment theme changed, reset book theme counter
639     elif isinstance(sender, Fragment) and \
640                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
641                     filter(category='theme').count():
642         sender.book.reset_theme_counter()
643 tags_updated.connect(_tags_updated_handler)
644