b1ffdb63db4300540dc99101fa5e6a3f5c66a0a9
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase, tags_updated
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import dcparser, html, epub, NoDublinCore
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), blank=False, null=True)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     def get_count(self):
102         """ returns global book count for book tags, fragment count for themes """
103
104         if self.book_count is None:
105             if self.category == 'book':
106                 # never used
107                 objects = Book.objects.none()
108             elif self.category == 'theme':
109                 objects = Fragment.tagged.with_all((self,))
110             else:
111                 objects = Book.tagged.with_all((self,)).order_by()
112                 if self.category != 'set':
113                     # eliminate descendants
114                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
116                     if descendants_keys:
117                         objects = objects.exclude(pk__in=descendants_keys)
118             self.book_count = objects.count()
119             self.save()
120         return self.book_count
121
122     @staticmethod
123     def get_tag_list(tags):
124         if isinstance(tags, basestring):
125             real_tags = []
126             ambiguous_slugs = []
127             category = None
128             tags_splitted = tags.split('/')
129             for index, name in enumerate(tags_splitted):
130                 if name in Tag.categories_rev:
131                     category = Tag.categories_rev[name]
132                 else:
133                     if category:
134                         real_tags.append(Tag.objects.get(slug=name, category=category))
135                         category = None
136                     else:
137                         try:
138                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
139                         except Tag.MultipleObjectsReturned, e:
140                             ambiguous_slugs.append(name)
141
142             if category:
143                 # something strange left off
144                 raise Tag.DoesNotExist()
145             if ambiguous_slugs:
146                 # some tags should be qualified
147                 e = Tag.MultipleObjectsReturned()
148                 e.tags = real_tags
149                 e.ambiguous_slugs = ambiguous_slugs
150                 raise e
151             else:
152                 return real_tags
153         else:
154             return TagBase.get_tag_list(tags)
155
156     @property
157     def url_chunk(self):
158         return '/'.join((Tag.categories_dict[self.category], self.slug))
159
160
161 # TODO: why is this hard-coded ?
162 def book_upload_path(ext):
163     def get_dynamic_path(book, filename):
164         return 'lektura/%s.%s' % (book.slug, ext)
165     return get_dynamic_path
166
167
168 class Book(models.Model):
169     title = models.CharField(_('title'), max_length=120)
170     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
171     description = models.TextField(_('description'), blank=True)
172     created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
173     _short_html = models.TextField(_('short HTML'), editable=False)
174     parent_number = models.IntegerField(_('parent number'), default=0)
175     extra_info = JSONField(_('extra information'))
176     gazeta_link = models.CharField(blank=True, max_length=240)
177     wiki_link = models.CharField(blank=True, max_length=240)
178
179
180     # Formats
181     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
182     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
183     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
184     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
185     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
186     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
187     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
188     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
189     daisy_file = models.FileField(_('DAISY file'), upload_to=book_upload_path('daisy.zip'), blank=True)
190
191     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
192
193     objects = models.Manager()
194     tagged = managers.ModelTaggedItemManager(Tag)
195     tags = managers.TagDescriptor(Tag)
196
197     _tag_counter = JSONField(null=True, editable=False)
198     _theme_counter = JSONField(null=True, editable=False)
199
200     class AlreadyExists(Exception):
201         pass
202
203     class Meta:
204         ordering = ('title',)
205         verbose_name = _('book')
206         verbose_name_plural = _('books')
207
208     def __unicode__(self):
209         return self.title
210
211     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
212         if reset_short_html:
213             # Reset _short_html during save
214             update = {}
215             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
216                 update[key] = ''
217                 self.__setattr__(key, '')
218             # Fragment.short_html relies on book's tags, so reset it here too
219             self.fragments.all().update(**update)
220
221         book = super(Book, self).save(force_insert, force_update)
222
223         if refresh_mp3 and self.mp3_file:
224             print self.mp3_file, self.mp3_file.path
225             extra_info = self.get_extra_info_value()
226             extra_info.update(self.get_mp3_info())
227             self.set_extra_info_value(extra_info)
228             book = super(Book, self).save(force_insert, force_update)
229
230         return book
231
232     @permalink
233     def get_absolute_url(self):
234         return ('catalogue.views.book_detail', [self.slug])
235
236     @property
237     def name(self):
238         return self.title
239
240     def book_tag_slug(self):
241         return ('l-' + self.slug)[:120]
242
243     def book_tag(self):
244         slug = self.book_tag_slug()
245         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
246         if created:
247             book_tag.name = self.title[:50]
248             book_tag.sort_key = self.title.lower()
249             book_tag.save()
250         return book_tag
251
252     def short_html(self):
253         key = '_short_html_%s' % get_language()
254         short_html = getattr(self, key)
255
256         if short_html and len(short_html):
257             return mark_safe(short_html)
258         else:
259             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
260             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
261
262             formats = []
263             if self.html_file:
264                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
265             if self.pdf_file:
266                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
267             if self.root_ancestor.epub_file:
268                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.epub_file.url)
269             if self.odt_file:
270                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
271             if self.txt_file:
272                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
273             if self.mp3_file:
274                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
275             if self.ogg_file:
276                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
277             if self.daisy_file:
278                 formats.append(u'<a href="%s">DAISY</a>' % self.daisy_file.url)
279
280             formats = [mark_safe(format) for format in formats]
281
282             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
283                 {'book': self, 'tags': tags, 'formats': formats})))
284             self.save(reset_short_html=False)
285             return mark_safe(getattr(self, key))
286
287
288     @property
289     def root_ancestor(self):
290         """ returns the oldest ancestor """
291
292         if not hasattr(self, '_root_ancestor'):
293             book = self
294             while book.parent:
295                 book = book.parent
296             self._root_ancestor = book
297         return self._root_ancestor
298
299
300     def get_mp3_info(self):
301         """Retrieves artist and director names from audio ID3 tags."""
302         audio = id3.ID3(self.mp3_file.path)
303         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
304         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
305         return {'artist_name': artist_name, 'director_name': director_name}
306
307     def has_description(self):
308         return len(self.description) > 0
309     has_description.short_description = _('description')
310     has_description.boolean = True
311
312     def has_pdf_file(self):
313         return bool(self.pdf_file)
314     has_pdf_file.short_description = 'PDF'
315     has_pdf_file.boolean = True
316
317     def has_epub_file(self):
318         return bool(self.epub_file)
319     has_epub_file.short_description = 'EPUB'
320     has_epub_file.boolean = True
321
322     def has_odt_file(self):
323         return bool(self.odt_file)
324     has_odt_file.short_description = 'ODT'
325     has_odt_file.boolean = True
326
327     def has_html_file(self):
328         return bool(self.html_file)
329     has_html_file.short_description = 'HTML'
330     has_html_file.boolean = True
331
332     def build_epub(self, remove_descendants=True):
333         """ (Re)builds the epub file.
334             If book has a parent, does nothing.
335             Unless remove_descendants is False, descendants' epubs are removed.
336         """
337     
338         from StringIO import StringIO
339         from hashlib import sha1
340         from django.core.files.base import ContentFile
341         from librarian import DocProvider
342
343         class BookImportDocProvider(DocProvider):
344             """ used for joined EPUBs """
345
346             def __init__(self, book):
347                 self.book = book
348
349             def by_slug(self, slug):
350                 if slug == self.book.slug:
351                     return self.book.xml_file
352                 else:
353                     return Book.objects.get(slug=slug).xml_file
354
355         if self.parent:
356             # don't need an epub
357             return
358
359         epub_file = StringIO()
360         try:
361             epub.transform(BookImportDocProvider(self), self.slug, epub_file)
362             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()), save=False)
363             self.save()
364             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
365         except NoDublinCore:
366             pass
367
368         book_descendants = list(self.children.all())
369         while len(book_descendants) > 0:
370             child_book = book_descendants.pop(0)
371             if remove_descendants and child_book.has_epub_file():
372                 child_book.epub_file.delete()
373             # save anyway, to refresh short_html
374             child_book.save()
375             book_descendants += list(child_book.children.all())
376
377
378     @classmethod
379     def from_xml_file(cls, xml_file, overwrite=False):
380         # use librarian to parse meta-data
381         book_info = dcparser.parse(xml_file)
382
383         if not isinstance(xml_file, File):
384             xml_file = File(open(xml_file))
385
386         try:
387             return cls.from_text_and_meta(xml_file, book_info, overwrite)
388         finally:
389             xml_file.close()
390
391     @classmethod
392     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
393         from tempfile import NamedTemporaryFile
394         from slughifi import slughifi
395         from markupstring import MarkupString
396         from django.core.files.storage import default_storage
397
398         # check for parts before we do anything
399         children = []
400         if hasattr(book_info, 'parts'):
401             for part_url in book_info.parts:
402                 base, slug = part_url.rsplit('/', 1)
403                 try:
404                     children.append(Book.objects.get(slug=slug))
405                 except Book.DoesNotExist, e:
406                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
407
408
409         # Read book metadata
410         book_base, book_slug = book_info.url.rsplit('/', 1)
411         book, created = Book.objects.get_or_create(slug=book_slug)
412
413         if created:
414             book_shelves = []
415         else:
416             if not overwrite:
417                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
418             # Save shelves for this book
419             book_shelves = list(book.tags.filter(category='set'))
420
421         book.title = book_info.title
422         book.set_extra_info_value(book_info.to_dict())
423         book._short_html = ''
424         book.save()
425
426         book_tags = []
427         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
428         for field_name, category in categories:
429             try:
430                 tag_names = getattr(book_info, field_name)
431             except:
432                 tag_names = [getattr(book_info, category)]
433             for tag_name in tag_names:
434                 tag_sort_key = tag_name
435                 if category == 'author':
436                     tag_sort_key = tag_name.last_name
437                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
438                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
439                 if created:
440                     tag.name = tag_name
441                     tag.sort_key = tag_sort_key.lower()
442                     tag.save()
443                 book_tags.append(tag)
444
445         book.tags = book_tags + book_shelves
446
447         book_tag = book.book_tag()
448
449         for n, child_book in enumerate(children):
450             child_book.parent = book
451             child_book.parent_number = n
452             child_book.save()
453
454         # Save XML and HTML files
455         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
456
457         # delete old fragments when overwriting
458         book.fragments.all().delete()
459
460         html_file = NamedTemporaryFile()
461         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
462             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
463
464             # get ancestor l-tags for adding to new fragments
465             ancestor_tags = []
466             p = book.parent
467             while p:
468                 ancestor_tags.append(p.book_tag())
469                 p = p.parent
470
471             # Extract fragments
472             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
473             for fragment in closed_fragments.values():
474                 try:
475                     theme_names = [s.strip() for s in fragment.themes.split(',')]
476                 except AttributeError:
477                     continue
478                 themes = []
479                 for theme_name in theme_names:
480                     if not theme_name:
481                         continue
482                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
483                     if created:
484                         tag.name = theme_name
485                         tag.sort_key = theme_name.lower()
486                         tag.save()
487                     themes.append(tag)
488                 if not themes:
489                     continue
490
491                 text = fragment.to_string()
492                 short_text = ''
493                 if (len(MarkupString(text)) > 240):
494                     short_text = unicode(MarkupString(text)[:160])
495                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
496                     defaults={'text': text, 'short_text': short_text})
497
498                 new_fragment.save()
499                 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
500
501         if not book.parent:
502             book.build_epub(remove_descendants=False)
503
504         book_descendants = list(book.children.all())
505         # add l-tag to descendants and their fragments
506         # delete unnecessary EPUB files
507         while len(book_descendants) > 0:
508             child_book = book_descendants.pop(0)
509             child_book.tags = list(child_book.tags) + [book_tag]
510             if child_book.has_epub_file():
511                 child_book.epub_file.delete()
512             child_book.save()
513             for fragment in child_book.fragments.all():
514                 fragment.tags = set(list(fragment.tags) + [book_tag])
515             book_descendants += list(child_book.children.all())
516
517         # refresh cache
518         book.reset_tag_counter()
519         book.reset_theme_counter()
520
521         book.save()
522         return book
523
524
525     def refresh_tag_counter(self):
526         tags = {}
527         for child in self.children.all().order_by():
528             for tag_pk, value in child.tag_counter.iteritems():
529                 tags[tag_pk] = tags.get(tag_pk, 0) + value
530         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
531             tags[tag.pk] = 1
532         self.set__tag_counter_value(tags)
533         self.save(reset_short_html=False, refresh_mp3=False)
534         return tags
535
536     def reset_tag_counter(self):
537         self._tag_counter = None
538         self.save(reset_short_html=False, refresh_mp3=False)
539         if self.parent:
540             self.parent.reset_tag_counter()
541
542     @property
543     def tag_counter(self):
544         if self._tag_counter is None:
545             return self.refresh_tag_counter()
546         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
547
548     def refresh_theme_counter(self):
549         tags = {}
550         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
551             for tag in fragment.tags.filter(category='theme').order_by():
552                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
553         self.set__theme_counter_value(tags)
554         self.save(reset_short_html=False, refresh_mp3=False)
555         return tags
556
557     def reset_theme_counter(self):
558         self._theme_counter = None
559         self.save(reset_short_html=False, refresh_mp3=False)
560         if self.parent:
561             self.parent.reset_theme_counter()
562
563     @property
564     def theme_counter(self):
565         if self._theme_counter is None:
566             return self.refresh_theme_counter()
567         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
568
569
570
571 class Fragment(models.Model):
572     text = models.TextField()
573     short_text = models.TextField(editable=False)
574     _short_html = models.TextField(editable=False)
575     anchor = models.CharField(max_length=120)
576     book = models.ForeignKey(Book, related_name='fragments')
577
578     objects = models.Manager()
579     tagged = managers.ModelTaggedItemManager(Tag)
580     tags = managers.TagDescriptor(Tag)
581
582     class Meta:
583         ordering = ('book', 'anchor',)
584         verbose_name = _('fragment')
585         verbose_name_plural = _('fragments')
586
587     def get_absolute_url(self):
588         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
589
590     def short_html(self):
591         key = '_short_html_%s' % get_language()
592         short_html = getattr(self, key)
593         if short_html and len(short_html):
594             return mark_safe(short_html)
595         else:
596             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
597                 {'fragment': self})))
598             self.save()
599             return mark_safe(getattr(self, key))
600
601
602 class BookStub(models.Model):
603     title = models.CharField(_('title'), max_length=120)
604     author = models.CharField(_('author'), max_length=120)
605     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
606     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
607     translator = models.TextField(_('translator'), blank=True)
608     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
609
610     class Meta:
611         ordering = ('title',)
612         verbose_name = _('book stub')
613         verbose_name_plural = _('book stubs')
614
615     def __unicode__(self):
616         return self.title
617
618     @permalink
619     def get_absolute_url(self):
620         return ('catalogue.views.book_detail', [self.slug])
621
622     def in_pd(self):
623         return self.pd is not None and self.pd <= datetime.now().year
624
625     @property
626     def name(self):
627         return self.title
628
629
630 class FileRecord(models.Model):
631     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
632     type = models.CharField(_('type'), max_length=20, db_index=True)
633     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
634     time = models.DateTimeField(_('time'), auto_now_add=True)
635
636     class Meta:
637         ordering = ('-time','-slug', '-type')
638         verbose_name = _('file record')
639         verbose_name_plural = _('file records')
640
641     def __unicode__(self):
642         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
643
644
645 def _tags_updated_handler(sender, affected_tags, **kwargs):
646     # reset tag global counter
647     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
648
649     # if book tags changed, reset book tag counter
650     if isinstance(sender, Book) and \
651                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
652                     exclude(category__in=('book', 'theme', 'set')).count():
653         sender.reset_tag_counter()
654     # if fragment theme changed, reset book theme counter
655     elif isinstance(sender, Fragment) and \
656                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
657                     filter(category='theme').count():
658         sender.book.reset_theme_counter()
659 tags_updated.connect(_tags_updated_handler)
660