librarian update
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase, tags_updated
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import dcparser, html, epub, NoDublinCore
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), blank=False, null=True)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     def get_count(self):
102         """ returns global book count for book tags, fragment count for themes """
103
104         if self.book_count is None:
105             if self.category == 'book':
106                 # never used
107                 objects = Book.objects.none()
108             elif self.category == 'theme':
109                 objects = Fragment.tagged.with_all((self,))
110             else:
111                 objects = Book.tagged.with_all((self,)).order_by()
112                 if self.category != 'set':
113                     # eliminate descendants
114                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
115                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
116                     if descendants_keys:
117                         objects = objects.exclude(pk__in=descendants_keys)
118             self.book_count = objects.count()
119             self.save()
120         return self.book_count
121
122     @staticmethod
123     def get_tag_list(tags):
124         if isinstance(tags, basestring):
125             real_tags = []
126             ambiguous_slugs = []
127             category = None
128             tags_splitted = tags.split('/')
129             for index, name in enumerate(tags_splitted):
130                 if name in Tag.categories_rev:
131                     category = Tag.categories_rev[name]
132                 else:
133                     if category:
134                         real_tags.append(Tag.objects.get(slug=name, category=category))
135                         category = None
136                     else:
137                         try:
138                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
139                         except Tag.MultipleObjectsReturned, e:
140                             ambiguous_slugs.append(name)
141
142             if category:
143                 # something strange left off
144                 raise Tag.DoesNotExist()
145             if ambiguous_slugs:
146                 # some tags should be qualified
147                 e = Tag.MultipleObjectsReturned()
148                 e.tags = real_tags
149                 e.ambiguous_slugs = ambiguous_slugs
150                 raise e
151             else:
152                 return real_tags
153         else:
154             return TagBase.get_tag_list(tags)
155
156     @property
157     def url_chunk(self):
158         return '/'.join((Tag.categories_dict[self.category], self.slug))
159
160
161 # TODO: why is this hard-coded ?
162 def book_upload_path(ext):
163     def get_dynamic_path(book, filename):
164         return 'lektura/%s.%s' % (book.slug, ext)
165     return get_dynamic_path
166
167
168 class Book(models.Model):
169     title = models.CharField(_('title'), max_length=120)
170     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
171     description = models.TextField(_('description'), blank=True)
172     created_at = models.DateTimeField(_('creation date'), auto_now=True)
173     _short_html = models.TextField(_('short HTML'), editable=False)
174     parent_number = models.IntegerField(_('parent number'), default=0)
175     extra_info = JSONField(_('extra information'))
176     gazeta_link = models.CharField(blank=True, max_length=240)
177     wiki_link = models.CharField(blank=True, max_length=240)
178
179
180     # Formats
181     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
182     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
183     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
184     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
185     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
186     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
187     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
188     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
189
190     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
191
192     objects = models.Manager()
193     tagged = managers.ModelTaggedItemManager(Tag)
194     tags = managers.TagDescriptor(Tag)
195
196     _tag_counter = JSONField(null=True, editable=False)
197     _theme_counter = JSONField(null=True, editable=False)
198
199     class AlreadyExists(Exception):
200         pass
201
202     class Meta:
203         ordering = ('title',)
204         verbose_name = _('book')
205         verbose_name_plural = _('books')
206
207     def __unicode__(self):
208         return self.title
209
210     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
211         if reset_short_html:
212             # Reset _short_html during save
213             update = {}
214             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
215                 update[key] = ''
216                 self.__setattr__(key, '')
217             # Fragment.short_html relies on book's tags, so reset it here too
218             self.fragments.all().update(**update)
219
220         book = super(Book, self).save(force_insert, force_update)
221
222         if refresh_mp3 and self.mp3_file:
223             print self.mp3_file, self.mp3_file.path
224             extra_info = self.get_extra_info_value()
225             extra_info.update(self.get_mp3_info())
226             self.set_extra_info_value(extra_info)
227             book = super(Book, self).save(force_insert, force_update)
228
229         return book
230
231     @permalink
232     def get_absolute_url(self):
233         return ('catalogue.views.book_detail', [self.slug])
234
235     @property
236     def name(self):
237         return self.title
238
239     def book_tag_slug(self):
240         return ('l-' + self.slug)[:120]
241
242     def book_tag(self):
243         slug = self.book_tag_slug()
244         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
245         if created:
246             book_tag.name = self.title[:50]
247             book_tag.sort_key = slug
248             book_tag.save()
249         return book_tag
250
251     def short_html(self):
252         key = '_short_html_%s' % get_language()
253         short_html = getattr(self, key)
254
255         if short_html and len(short_html):
256             return mark_safe(short_html)
257         else:
258             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
259             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
260
261             formats = []
262             if self.html_file:
263                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
264             if self.pdf_file:
265                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
266             if self.root_ancestor.epub_file:
267                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.epub_file.url)
268             if self.odt_file:
269                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
270             if self.txt_file:
271                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
272             if self.mp3_file:
273                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
274             if self.ogg_file:
275                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
276
277             formats = [mark_safe(format) for format in formats]
278
279             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
280                 {'book': self, 'tags': tags, 'formats': formats})))
281             self.save(reset_short_html=False)
282             return mark_safe(getattr(self, key))
283
284
285     @property
286     def root_ancestor(self):
287         """ returns the oldest ancestor """
288
289         if not hasattr(self, '_root_ancestor'):
290             book = self
291             while book.parent:
292                 book = book.parent
293             self._root_ancestor = book
294         return self._root_ancestor
295
296
297     def get_mp3_info(self):
298         """Retrieves artist and director names from audio ID3 tags."""
299         audio = id3.ID3(self.mp3_file.path)
300         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
301         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
302         return {'artist_name': artist_name, 'director_name': director_name}
303
304     def has_description(self):
305         return len(self.description) > 0
306     has_description.short_description = _('description')
307     has_description.boolean = True
308
309     def has_pdf_file(self):
310         return bool(self.pdf_file)
311     has_pdf_file.short_description = 'PDF'
312     has_pdf_file.boolean = True
313
314     def has_epub_file(self):
315         return bool(self.epub_file)
316     has_epub_file.short_description = 'EPUB'
317     has_epub_file.boolean = True
318
319     def has_odt_file(self):
320         return bool(self.odt_file)
321     has_odt_file.short_description = 'ODT'
322     has_odt_file.boolean = True
323
324     def has_html_file(self):
325         return bool(self.html_file)
326     has_html_file.short_description = 'HTML'
327     has_html_file.boolean = True
328
329     def build_epub(self, remove_descendants=True):
330         """ (Re)builds the epub file.
331             If book has a parent, does nothing.
332             Unless remove_descendants is False, descendants' epubs are removed.
333         """
334     
335         from StringIO import StringIO
336         from hashlib import sha1
337         from django.core.files.base import ContentFile
338         from librarian import DocProvider
339
340         class BookImportDocProvider(DocProvider):
341             """ used for joined EPUBs """
342
343             def __init__(self, book):
344                 self.book = book
345
346             def by_slug(self, slug):
347                 if slug == self.book.slug:
348                     return self.book.xml_file
349                 else:
350                     return Book.objects.get(slug=slug).xml_file
351
352         if self.parent:
353             # don't need an epub
354             return
355
356         epub_file = StringIO()
357         try:
358             epub.transform(BookImportDocProvider(self), self.slug, epub_file)
359             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()), save=False)
360             self.save()
361             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
362         except NoDublinCore:
363             pass
364
365         book_descendants = list(self.children.all())
366         while len(book_descendants) > 0:
367             child_book = book_descendants.pop(0)
368             if remove_descendants and child_book.has_epub_file():
369                 child_book.epub_file.delete()
370             # save anyway, to refresh short_html
371             child_book.save()
372             book_descendants += list(child_book.children.all())
373
374
375     @classmethod
376     def from_xml_file(cls, xml_file, overwrite=False):
377         # use librarian to parse meta-data
378         book_info = dcparser.parse(xml_file)
379
380         if not isinstance(xml_file, File):
381             xml_file = File(open(xml_file))
382
383         try:
384             return cls.from_text_and_meta(xml_file, book_info, overwrite)
385         finally:
386             xml_file.close()
387
388     @classmethod
389     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
390         from tempfile import NamedTemporaryFile
391         from slughifi import slughifi
392         from markupstring import MarkupString
393         from django.core.files.storage import default_storage
394
395         # Read book metadata
396         book_base, book_slug = book_info.url.rsplit('/', 1)
397         book, created = Book.objects.get_or_create(slug=book_slug)
398
399         if created:
400             book_shelves = []
401         else:
402             if not overwrite:
403                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
404             # Save shelves for this book
405             book_shelves = list(book.tags.filter(category='set'))
406
407         book.title = book_info.title
408         book.set_extra_info_value(book_info.to_dict())
409         book._short_html = ''
410         book.save()
411
412         book_tags = []
413         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
414         for field_name, category in categories:
415             try:
416                 tag_names = getattr(book_info, field_name)
417             except:
418                 tag_names = [getattr(book_info, category)]
419             for tag_name in tag_names:
420                 tag_sort_key = tag_name
421                 if category == 'author':
422                     tag_sort_key = tag_name.last_name
423                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
424                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
425                 if created:
426                     tag.name = tag_name
427                     tag.sort_key = slughifi(tag_sort_key)
428                     tag.save()
429                 book_tags.append(tag)
430
431         book.tags = book_tags + book_shelves
432
433         book_tag = book.book_tag()
434
435         if hasattr(book_info, 'parts'):
436             for n, part_url in enumerate(book_info.parts):
437                 base, slug = part_url.rsplit('/', 1)
438                 try:
439                     child_book = Book.objects.get(slug=slug)
440                     child_book.parent = book
441                     child_book.parent_number = n
442                     child_book.save()
443                 except Book.DoesNotExist, e:
444                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
445
446         # Save XML and HTML files
447         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
448
449         html_file = NamedTemporaryFile()
450         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
451             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
452
453             # Extract fragments
454             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
455             for fragment in closed_fragments.values():
456                 try:
457                     theme_names = [s.strip() for s in fragment.themes.split(',')]
458                 except AttributeError:
459                     continue
460                 themes = []
461                 for theme_name in theme_names:
462                     if not theme_name:
463                         continue
464                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
465                     if created:
466                         tag.name = theme_name
467                         tag.sort_key = slughifi(theme_name)
468                         tag.save()
469                     themes.append(tag)
470                 if not themes:
471                     continue
472
473                 text = fragment.to_string()
474                 short_text = ''
475                 if (len(MarkupString(text)) > 240):
476                     short_text = unicode(MarkupString(text)[:160])
477                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
478                     defaults={'text': text, 'short_text': short_text})
479
480                 new_fragment.save()
481                 new_fragment.tags = set(book_tags + themes + [book_tag])
482
483         book.build_epub(remove_descendants=False)
484
485         book_descendants = list(book.children.all())
486         # add l-tag to descendants and their fragments
487         # delete unnecessary EPUB files
488         while len(book_descendants) > 0:
489             child_book = book_descendants.pop(0)
490             child_book.tags = list(child_book.tags) + [book_tag]
491             if child_book.has_epub_file():
492                 child_book.epub_file.delete()
493             child_book.save()
494             for fragment in child_book.fragments.all():
495                 fragment.tags = set(list(fragment.tags) + [book_tag])
496             book_descendants += list(child_book.children.all())
497
498         # refresh cache
499         book.tag_counter
500         book.theme_counter
501
502         book.save()
503         return book
504
505
506     def refresh_tag_counter(self):
507         tags = {}
508         for child in self.children.all().order_by():
509             for tag_pk, value in child.tag_counter.iteritems():
510                 tags[tag_pk] = tags.get(tag_pk, 0) + value
511         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
512             tags[tag.pk] = 1
513         self.set__tag_counter_value(tags)
514         self.save(reset_short_html=False, refresh_mp3=False)
515         return tags
516
517     def reset_tag_counter(self):
518         self._tag_counter = None
519         self.save(reset_short_html=False, refresh_mp3=False)
520         if self.parent:
521             self.parent.reset_tag_counter()
522
523     @property
524     def tag_counter(self):
525         if self._tag_counter is None:
526             return self.refresh_tag_counter()
527         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
528
529     def refresh_theme_counter(self):
530         tags = {}
531         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
532             for tag in fragment.tags.filter(category='theme').order_by():
533                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
534         self.set__theme_counter_value(tags)
535         self.save(reset_short_html=False, refresh_mp3=False)
536         return tags
537
538     def reset_theme_counter(self):
539         self._theme_counter = None
540         self.save(reset_short_html=False, refresh_mp3=False)
541         if self.parent:
542             self.parent.reset_theme_counter()
543
544     @property
545     def theme_counter(self):
546         if self._theme_counter is None:
547             return self.refresh_theme_counter()
548         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
549
550
551
552 class Fragment(models.Model):
553     text = models.TextField()
554     short_text = models.TextField(editable=False)
555     _short_html = models.TextField(editable=False)
556     anchor = models.CharField(max_length=120)
557     book = models.ForeignKey(Book, related_name='fragments')
558
559     objects = models.Manager()
560     tagged = managers.ModelTaggedItemManager(Tag)
561     tags = managers.TagDescriptor(Tag)
562
563     class Meta:
564         ordering = ('book', 'anchor',)
565         verbose_name = _('fragment')
566         verbose_name_plural = _('fragments')
567
568     def get_absolute_url(self):
569         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
570
571     def short_html(self):
572         key = '_short_html_%s' % get_language()
573         short_html = getattr(self, key)
574         if short_html and len(short_html):
575             return mark_safe(short_html)
576         else:
577             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
578                 {'fragment': self})))
579             self.save()
580             return mark_safe(getattr(self, key))
581
582
583 class BookStub(models.Model):
584     title = models.CharField(_('title'), max_length=120)
585     author = models.CharField(_('author'), max_length=120)
586     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
587     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
588     translator = models.TextField(_('translator'), blank=True)
589     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
590
591     class Meta:
592         ordering = ('title',)
593         verbose_name = _('book stub')
594         verbose_name_plural = _('book stubs')
595
596     def __unicode__(self):
597         return self.title
598
599     @permalink
600     def get_absolute_url(self):
601         return ('catalogue.views.book_detail', [self.slug])
602
603     def in_pd(self):
604         return self.pd is not None and self.pd <= datetime.now().year
605
606     @property
607     def name(self):
608         return self.title
609
610
611 class FileRecord(models.Model):
612     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
613     type = models.CharField(_('type'), max_length=20, db_index=True)
614     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
615     time = models.DateTimeField(_('time'), auto_now_add=True)
616
617     class Meta:
618         ordering = ('-time','-slug', '-type')
619         verbose_name = _('file record')
620         verbose_name_plural = _('file records')
621
622     def __unicode__(self):
623         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
624
625
626 def _tags_updated_handler(sender, affected_tags, **kwargs):
627     # reset tag global counter
628     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
629
630     # if book tags changed, reset book tag counter
631     if isinstance(sender, Book) and \
632                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
633                     exclude(category__in=('book', 'theme', 'set')).count():
634         sender.reset_tag_counter()
635     # if fragment theme changed, reset book theme counter
636     elif isinstance(sender, Fragment) and \
637                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
638                     filter(category='theme').count():
639         sender.book.reset_theme_counter()
640 tags_updated.connect(_tags_updated_handler)
641