librarian bump
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase, tags_updated
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import dcparser, html, epub, NoDublinCore
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), blank=False, null=True)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     def get_count(self):
102         """ returns global book count for book tags, fragment count for themes """
103
104         if self.book_count is None:
105             if self.category == 'book':
106                 # never used
107                 objects = Book.objects.none()
108             elif self.category == 'theme':
109                 objects = Fragment.tagged.with_all((self,))
110             else:
111                 objects = Book.tagged.with_all((self,)).order_by()
112                 if self.category != 'set':
113                     # eliminate descendants
114                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
116                     if descendants_keys:
117                         objects = objects.exclude(pk__in=descendants_keys)
118             self.book_count = objects.count()
119             self.save()
120         return self.book_count
121
122     @staticmethod
123     def get_tag_list(tags):
124         if isinstance(tags, basestring):
125             real_tags = []
126             ambiguous_slugs = []
127             category = None
128             tags_splitted = tags.split('/')
129             for index, name in enumerate(tags_splitted):
130                 if name in Tag.categories_rev:
131                     category = Tag.categories_rev[name]
132                 else:
133                     if category:
134                         real_tags.append(Tag.objects.get(slug=name, category=category))
135                         category = None
136                     else:
137                         try:
138                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
139                         except Tag.MultipleObjectsReturned, e:
140                             ambiguous_slugs.append(name)
141
142             if category:
143                 # something strange left off
144                 raise Tag.DoesNotExist()
145             if ambiguous_slugs:
146                 # some tags should be qualified
147                 e = Tag.MultipleObjectsReturned()
148                 e.tags = real_tags
149                 e.ambiguous_slugs = ambiguous_slugs
150                 raise e
151             else:
152                 return real_tags
153         else:
154             return TagBase.get_tag_list(tags)
155
156     @property
157     def url_chunk(self):
158         return '/'.join((Tag.categories_dict[self.category], self.slug))
159
160
161 # TODO: why is this hard-coded ?
162 def book_upload_path(ext):
163     def get_dynamic_path(book, filename):
164         return 'lektura/%s.%s' % (book.slug, ext)
165     return get_dynamic_path
166
167
168 class Book(models.Model):
169     title = models.CharField(_('title'), max_length=120)
170     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
171     description = models.TextField(_('description'), blank=True)
172     created_at = models.DateTimeField(_('creation date'), auto_now=True)
173     _short_html = models.TextField(_('short HTML'), editable=False)
174     parent_number = models.IntegerField(_('parent number'), default=0)
175     extra_info = JSONField(_('extra information'))
176     gazeta_link = models.CharField(blank=True, max_length=240)
177     wiki_link = models.CharField(blank=True, max_length=240)
178
179
180     # Formats
181     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
182     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
183     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
184     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
185     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
186     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
187     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
188     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
189     daisy_file = models.FileField(_('DAISY file'), upload_to=book_upload_path('daisy.zip'), blank=True)
190
191     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
192
193     objects = models.Manager()
194     tagged = managers.ModelTaggedItemManager(Tag)
195     tags = managers.TagDescriptor(Tag)
196
197     _tag_counter = JSONField(null=True, editable=False)
198     _theme_counter = JSONField(null=True, editable=False)
199
200     class AlreadyExists(Exception):
201         pass
202
203     class Meta:
204         ordering = ('title',)
205         verbose_name = _('book')
206         verbose_name_plural = _('books')
207
208     def __unicode__(self):
209         return self.title
210
211     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
212         if reset_short_html:
213             # Reset _short_html during save
214             update = {}
215             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
216                 update[key] = ''
217                 self.__setattr__(key, '')
218             # Fragment.short_html relies on book's tags, so reset it here too
219             self.fragments.all().update(**update)
220
221         book = super(Book, self).save(force_insert, force_update)
222
223         if refresh_mp3 and self.mp3_file:
224             print self.mp3_file, self.mp3_file.path
225             extra_info = self.get_extra_info_value()
226             extra_info.update(self.get_mp3_info())
227             self.set_extra_info_value(extra_info)
228             book = super(Book, self).save(force_insert, force_update)
229
230         return book
231
232     @permalink
233     def get_absolute_url(self):
234         return ('catalogue.views.book_detail', [self.slug])
235
236     @property
237     def name(self):
238         return self.title
239
240     def book_tag_slug(self):
241         return ('l-' + self.slug)[:120]
242
243     def book_tag(self):
244         slug = self.book_tag_slug()
245         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
246         if created:
247             book_tag.name = self.title[:50]
248             book_tag.sort_key = slug
249             book_tag.save()
250         return book_tag
251
252     def short_html(self):
253         key = '_short_html_%s' % get_language()
254         short_html = getattr(self, key)
255
256         if short_html and len(short_html):
257             return mark_safe(short_html)
258         else:
259             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
260             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
261
262             formats = []
263             if self.html_file:
264                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
265             if self.pdf_file:
266                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
267             if self.root_ancestor.epub_file:
268                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.epub_file.url)
269             if self.odt_file:
270                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
271             if self.txt_file:
272                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
273             if self.mp3_file:
274                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
275             if self.ogg_file:
276                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
277             if self.daisy_file:
278                 formats.append(u'<a href="%s">DAISY</a>' % self.daisy_file.url)
279
280             formats = [mark_safe(format) for format in formats]
281
282             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
283                 {'book': self, 'tags': tags, 'formats': formats})))
284             self.save(reset_short_html=False)
285             return mark_safe(getattr(self, key))
286
287
288     @property
289     def root_ancestor(self):
290         """ returns the oldest ancestor """
291
292         if not hasattr(self, '_root_ancestor'):
293             book = self
294             while book.parent:
295                 book = book.parent
296             self._root_ancestor = book
297         return self._root_ancestor
298
299
300     def get_mp3_info(self):
301         """Retrieves artist and director names from audio ID3 tags."""
302         audio = id3.ID3(self.mp3_file.path)
303         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
304         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
305         return {'artist_name': artist_name, 'director_name': director_name}
306
307     def has_description(self):
308         return len(self.description) > 0
309     has_description.short_description = _('description')
310     has_description.boolean = True
311
312     def has_pdf_file(self):
313         return bool(self.pdf_file)
314     has_pdf_file.short_description = 'PDF'
315     has_pdf_file.boolean = True
316
317     def has_epub_file(self):
318         return bool(self.epub_file)
319     has_epub_file.short_description = 'EPUB'
320     has_epub_file.boolean = True
321
322     def has_odt_file(self):
323         return bool(self.odt_file)
324     has_odt_file.short_description = 'ODT'
325     has_odt_file.boolean = True
326
327     def has_html_file(self):
328         return bool(self.html_file)
329     has_html_file.short_description = 'HTML'
330     has_html_file.boolean = True
331
332     def build_epub(self, remove_descendants=True):
333         """ (Re)builds the epub file.
334             If book has a parent, does nothing.
335             Unless remove_descendants is False, descendants' epubs are removed.
336         """
337     
338         from StringIO import StringIO
339         from hashlib import sha1
340         from django.core.files.base import ContentFile
341         from librarian import DocProvider
342
343         class BookImportDocProvider(DocProvider):
344             """ used for joined EPUBs """
345
346             def __init__(self, book):
347                 self.book = book
348
349             def by_slug(self, slug):
350                 if slug == self.book.slug:
351                     return self.book.xml_file
352                 else:
353                     return Book.objects.get(slug=slug).xml_file
354
355         if self.parent:
356             # don't need an epub
357             return
358
359         epub_file = StringIO()
360         try:
361             epub.transform(BookImportDocProvider(self), self.slug, epub_file)
362             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()), save=False)
363             self.save()
364             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
365         except NoDublinCore:
366             pass
367
368         book_descendants = list(self.children.all())
369         while len(book_descendants) > 0:
370             child_book = book_descendants.pop(0)
371             if remove_descendants and child_book.has_epub_file():
372                 child_book.epub_file.delete()
373             # save anyway, to refresh short_html
374             child_book.save()
375             book_descendants += list(child_book.children.all())
376
377
378     @classmethod
379     def from_xml_file(cls, xml_file, overwrite=False):
380         # use librarian to parse meta-data
381         book_info = dcparser.parse(xml_file)
382
383         if not isinstance(xml_file, File):
384             xml_file = File(open(xml_file))
385
386         try:
387             return cls.from_text_and_meta(xml_file, book_info, overwrite)
388         finally:
389             xml_file.close()
390
391     @classmethod
392     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
393         from tempfile import NamedTemporaryFile
394         from slughifi import slughifi
395         from markupstring import MarkupString
396         from django.core.files.storage import default_storage
397
398         # Read book metadata
399         book_base, book_slug = book_info.url.rsplit('/', 1)
400         book, created = Book.objects.get_or_create(slug=book_slug)
401
402         if created:
403             book_shelves = []
404         else:
405             if not overwrite:
406                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
407             # Save shelves for this book
408             book_shelves = list(book.tags.filter(category='set'))
409
410         book.title = book_info.title
411         book.set_extra_info_value(book_info.to_dict())
412         book._short_html = ''
413         book.save()
414
415         book_tags = []
416         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
417         for field_name, category in categories:
418             try:
419                 tag_names = getattr(book_info, field_name)
420             except:
421                 tag_names = [getattr(book_info, category)]
422             for tag_name in tag_names:
423                 tag_sort_key = tag_name
424                 if category == 'author':
425                     tag_sort_key = tag_name.last_name
426                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
427                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
428                 if created:
429                     tag.name = tag_name
430                     tag.sort_key = slughifi(tag_sort_key)
431                     tag.save()
432                 book_tags.append(tag)
433
434         book.tags = book_tags + book_shelves
435
436         book_tag = book.book_tag()
437
438         if hasattr(book_info, 'parts'):
439             for n, part_url in enumerate(book_info.parts):
440                 base, slug = part_url.rsplit('/', 1)
441                 try:
442                     child_book = Book.objects.get(slug=slug)
443                     child_book.parent = book
444                     child_book.parent_number = n
445                     child_book.save()
446                 except Book.DoesNotExist, e:
447                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
448
449         # Save XML and HTML files
450         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
451
452         # delete old fragments when overwriting
453         book.fragments.all().delete()
454
455         html_file = NamedTemporaryFile()
456         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
457             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
458
459             # get ancestor l-tags for adding to new fragments
460             ancestor_tags = []
461             p = book.parent
462             while p:
463                 ancestor_tags.append(p.book_tag())
464                 p = p.parent
465
466             # Extract fragments
467             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
468             for fragment in closed_fragments.values():
469                 try:
470                     theme_names = [s.strip() for s in fragment.themes.split(',')]
471                 except AttributeError:
472                     continue
473                 themes = []
474                 for theme_name in theme_names:
475                     if not theme_name:
476                         continue
477                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
478                     if created:
479                         tag.name = theme_name
480                         tag.sort_key = slughifi(theme_name)
481                         tag.save()
482                     themes.append(tag)
483                 if not themes:
484                     continue
485
486                 text = fragment.to_string()
487                 short_text = ''
488                 if (len(MarkupString(text)) > 240):
489                     short_text = unicode(MarkupString(text)[:160])
490                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
491                     defaults={'text': text, 'short_text': short_text})
492
493                 new_fragment.save()
494                 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
495
496         if not book.parent:
497             book.build_epub(remove_descendants=False)
498
499         book_descendants = list(book.children.all())
500         # add l-tag to descendants and their fragments
501         # delete unnecessary EPUB files
502         while len(book_descendants) > 0:
503             child_book = book_descendants.pop(0)
504             child_book.tags = list(child_book.tags) + [book_tag]
505             if child_book.has_epub_file():
506                 child_book.epub_file.delete()
507             child_book.save()
508             for fragment in child_book.fragments.all():
509                 fragment.tags = set(list(fragment.tags) + [book_tag])
510             book_descendants += list(child_book.children.all())
511
512         # refresh cache
513         book.reset_tag_counter()
514         book.reset_theme_counter()
515
516         book.save()
517         return book
518
519
520     def refresh_tag_counter(self):
521         tags = {}
522         for child in self.children.all().order_by():
523             for tag_pk, value in child.tag_counter.iteritems():
524                 tags[tag_pk] = tags.get(tag_pk, 0) + value
525         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
526             tags[tag.pk] = 1
527         self.set__tag_counter_value(tags)
528         self.save(reset_short_html=False, refresh_mp3=False)
529         return tags
530
531     def reset_tag_counter(self):
532         self._tag_counter = None
533         self.save(reset_short_html=False, refresh_mp3=False)
534         if self.parent:
535             self.parent.reset_tag_counter()
536
537     @property
538     def tag_counter(self):
539         if self._tag_counter is None:
540             return self.refresh_tag_counter()
541         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
542
543     def refresh_theme_counter(self):
544         tags = {}
545         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
546             for tag in fragment.tags.filter(category='theme').order_by():
547                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
548         self.set__theme_counter_value(tags)
549         self.save(reset_short_html=False, refresh_mp3=False)
550         return tags
551
552     def reset_theme_counter(self):
553         self._theme_counter = None
554         self.save(reset_short_html=False, refresh_mp3=False)
555         if self.parent:
556             self.parent.reset_theme_counter()
557
558     @property
559     def theme_counter(self):
560         if self._theme_counter is None:
561             return self.refresh_theme_counter()
562         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
563
564
565
566 class Fragment(models.Model):
567     text = models.TextField()
568     short_text = models.TextField(editable=False)
569     _short_html = models.TextField(editable=False)
570     anchor = models.CharField(max_length=120)
571     book = models.ForeignKey(Book, related_name='fragments')
572
573     objects = models.Manager()
574     tagged = managers.ModelTaggedItemManager(Tag)
575     tags = managers.TagDescriptor(Tag)
576
577     class Meta:
578         ordering = ('book', 'anchor',)
579         verbose_name = _('fragment')
580         verbose_name_plural = _('fragments')
581
582     def get_absolute_url(self):
583         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
584
585     def short_html(self):
586         key = '_short_html_%s' % get_language()
587         short_html = getattr(self, key)
588         if short_html and len(short_html):
589             return mark_safe(short_html)
590         else:
591             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
592                 {'fragment': self})))
593             self.save()
594             return mark_safe(getattr(self, key))
595
596
597 class BookStub(models.Model):
598     title = models.CharField(_('title'), max_length=120)
599     author = models.CharField(_('author'), max_length=120)
600     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
601     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
602     translator = models.TextField(_('translator'), blank=True)
603     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
604
605     class Meta:
606         ordering = ('title',)
607         verbose_name = _('book stub')
608         verbose_name_plural = _('book stubs')
609
610     def __unicode__(self):
611         return self.title
612
613     @permalink
614     def get_absolute_url(self):
615         return ('catalogue.views.book_detail', [self.slug])
616
617     def in_pd(self):
618         return self.pd is not None and self.pd <= datetime.now().year
619
620     @property
621     def name(self):
622         return self.title
623
624
625 class FileRecord(models.Model):
626     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
627     type = models.CharField(_('type'), max_length=20, db_index=True)
628     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
629     time = models.DateTimeField(_('time'), auto_now_add=True)
630
631     class Meta:
632         ordering = ('-time','-slug', '-type')
633         verbose_name = _('file record')
634         verbose_name_plural = _('file records')
635
636     def __unicode__(self):
637         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
638
639
640 def _tags_updated_handler(sender, affected_tags, **kwargs):
641     # reset tag global counter
642     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
643
644     # if book tags changed, reset book tag counter
645     if isinstance(sender, Book) and \
646                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
647                     exclude(category__in=('book', 'theme', 'set')).count():
648         sender.reset_tag_counter()
649     # if fragment theme changed, reset book theme counter
650     elif isinstance(sender, Fragment) and \
651                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
652                     filter(category='theme').count():
653         sender.book.reset_theme_counter()
654 tags_updated.connect(_tags_updated_handler)
655