tests, epub, tag counters, l-tags
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     class Meta:
60         ordering = ('sort_key',)
61         verbose_name = _('tag')
62         verbose_name_plural = _('tags')
63
64     def __unicode__(self):
65         return self.name
66
67     def __repr__(self):
68         return "Tag(slug=%r)" % self.slug
69
70     @permalink
71     def get_absolute_url(self):
72         return ('catalogue.views.tagged_object_list', [self.slug])
73
74     def has_description(self):
75         return len(self.description) > 0
76     has_description.short_description = _('description')
77     has_description.boolean = True
78
79     def alive(self):
80         return self.death is None
81
82     def in_pd(self):
83         """ tests whether an author is in public domain """
84         return self.death is not None and self.goes_to_pd() <= datetime.now().year
85
86     def goes_to_pd(self):
87         """ calculates the year of public domain entry for an author """
88         return self.death + 71 if self.death is not None else None
89
90     @staticmethod
91     def get_tag_list(tags):
92         if isinstance(tags, basestring):
93             tag_slugs = tags.split('/')
94             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
95         else:
96             return TagBase.get_tag_list(tags)
97
98
99 # TODO: why is this hard-coded ? 
100 def book_upload_path(ext):
101     def get_dynamic_path(book, filename):
102         return 'lektura/%s.%s' % (book.slug, ext)
103     return get_dynamic_path
104
105
106 class Book(models.Model):
107     title = models.CharField(_('title'), max_length=120)
108     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
109     description = models.TextField(_('description'), blank=True)
110     created_at = models.DateTimeField(_('creation date'), auto_now=True)
111     _short_html = models.TextField(_('short HTML'), editable=False)
112     parent_number = models.IntegerField(_('parent number'), default=0)
113     extra_info = JSONField(_('extra information'))
114     gazeta_link = models.CharField(blank=True, max_length=240)
115     wiki_link = models.CharField(blank=True, max_length=240)
116
117
118     # Formats
119     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
120     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
121     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
122     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
123     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
124     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
125     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
126     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
127
128     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
129
130     objects = models.Manager()
131     tagged = managers.ModelTaggedItemManager(Tag)
132     tags = managers.TagDescriptor(Tag)
133     
134     _tag_counter = JSONField(editable=False, default='')
135     _theme_counter = JSONField(editable=False, default='')
136
137     class AlreadyExists(Exception):
138         pass
139
140     class Meta:
141         ordering = ('title',)
142         verbose_name = _('book')
143         verbose_name_plural = _('books')
144
145     def __unicode__(self):
146         return self.title
147
148     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
149         if reset_short_html:
150             # Reset _short_html during save
151             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
152                 self.__setattr__(key, '')
153
154         book = super(Book, self).save(force_insert, force_update)
155
156         if refresh_mp3 and self.mp3_file:
157             print self.mp3_file, self.mp3_file.path
158             extra_info = self.get_extra_info_value()
159             extra_info.update(self.get_mp3_info())
160             self.set_extra_info_value(extra_info)
161             book = super(Book, self).save(force_insert, force_update)
162
163         return book
164
165     @permalink
166     def get_absolute_url(self):
167         return ('catalogue.views.book_detail', [self.slug])
168
169     @property
170     def name(self):
171         return self.title
172     
173     def book_tag(self):
174         slug = ('l-' + self.slug)[:120]
175         book_tag, created = Tag.objects.get_or_create(slug=slug)
176         if created:
177             book_tag.name = self.title[:50]
178             book_tag.sort_key = slug
179             book_tag.category = 'book'
180             book_tag.save()
181         return book_tag
182
183     def short_html(self):
184         key = '_short_html_%s' % get_language()
185         short_html = getattr(self, key)
186
187         if short_html and len(short_html):
188             return mark_safe(short_html)
189         else:
190             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
191             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
192
193             formats = []
194             if self.html_file:
195                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
196             if self.pdf_file:
197                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
198             if self.epub_file:
199                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
200             if self.odt_file:
201                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
202             if self.txt_file:
203                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
204             if self.mp3_file:
205                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
206             if self.ogg_file:
207                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
208
209             formats = [mark_safe(format) for format in formats]
210
211             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
212                 {'book': self, 'tags': tags, 'formats': formats})))
213             self.save(reset_short_html=False)
214             return mark_safe(getattr(self, key))
215
216
217     def get_mp3_info(self):
218         """Retrieves artist and director names from audio ID3 tags."""
219         audio = id3.ID3(self.mp3_file.path)
220         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
221         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
222         return {'artist_name': artist_name, 'director_name': director_name}
223
224     def has_description(self):
225         return len(self.description) > 0
226     has_description.short_description = _('description')
227     has_description.boolean = True
228
229     def has_pdf_file(self):
230         return bool(self.pdf_file)
231     has_pdf_file.short_description = 'PDF'
232     has_pdf_file.boolean = True
233
234     def has_epub_file(self):
235         return bool(self.epub_file)
236     has_epub_file.short_description = 'EPUB'
237     has_epub_file.boolean = True
238
239     def has_odt_file(self):
240         return bool(self.odt_file)
241     has_odt_file.short_description = 'ODT'
242     has_odt_file.boolean = True
243
244     def has_html_file(self):
245         return bool(self.html_file)
246     has_html_file.short_description = 'HTML'
247     has_html_file.boolean = True
248
249     @classmethod
250     def from_xml_file(cls, xml_file, overwrite=False):
251         # use librarian to parse meta-data
252         book_info = dcparser.parse(xml_file)
253
254         if not isinstance(xml_file, File):
255             xml_file = File(xml_file)
256
257         try:
258             return cls.from_text_and_meta(xml_file, book_info, overwrite)
259         finally:
260             xml_file.close()
261
262     @classmethod
263     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
264         from tempfile import NamedTemporaryFile
265         from slughifi import slughifi
266         from markupstring import MarkupString
267
268         # Read book metadata
269         book_base, book_slug = book_info.url.rsplit('/', 1)
270         book, created = Book.objects.get_or_create(slug=book_slug)
271
272         if created:
273             book_shelves = []
274         else:
275             if not overwrite:
276                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
277             # Save shelves for this book
278             book_shelves = list(book.tags.filter(category='set'))
279
280         book.title = book_info.title
281         book.set_extra_info_value(book_info.to_dict())
282         book._short_html = ''
283         book.save()
284
285         book_tags = []
286         for category in ('kind', 'genre', 'author', 'epoch'):
287             tag_name = getattr(book_info, category)
288             tag_sort_key = tag_name
289             if category == 'author':
290                 tag_sort_key = tag_name.last_name
291                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
292             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
293             if created:
294                 tag.name = tag_name
295                 tag.sort_key = slughifi(tag_sort_key)
296                 tag.category = category
297                 tag.save()
298             book_tags.append(tag)
299
300         book.tags = book_tags
301
302         book_tag = book.book_tag()
303
304         if hasattr(book_info, 'parts'):
305             for n, part_url in enumerate(book_info.parts):
306                 base, slug = part_url.rsplit('/', 1)
307                 try:
308                     child_book = Book.objects.get(slug=slug)
309                     child_book.parent = book
310                     child_book.parent_number = n
311                     child_book.save()
312                 except Book.DoesNotExist, e:
313                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
314
315         book_descendants = list(book.children.all())
316         while len(book_descendants) > 0:
317             child_book = book_descendants.pop(0)
318             child_book.tags = list(child_book.tags) + [book_tag]
319             child_book.save()
320             for fragment in child_book.fragments.all():
321                 fragment.tags = set(list(fragment.tags) + [book_tag])
322             book_descendants += list(child_book.children.all())
323
324         # Save XML and HTML files
325         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
326
327         html_file = NamedTemporaryFile()
328         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
329             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
330
331             # Extract fragments
332             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
333             book_themes = []
334             for fragment in closed_fragments.values():
335                 text = fragment.to_string()
336                 short_text = ''
337                 if (len(MarkupString(text)) > 240):
338                     short_text = unicode(MarkupString(text)[:160])
339                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
340                     defaults={'text': text, 'short_text': short_text})
341
342                 try:
343                     theme_names = [s.strip() for s in fragment.themes.split(',')]
344                 except AttributeError:
345                     continue
346                 themes = []
347                 for theme_name in theme_names:
348                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
349                     if created:
350                         tag.name = theme_name
351                         tag.sort_key = slughifi(theme_name)
352                         tag.category = 'theme'
353                         tag.save()
354                     themes.append(tag)
355                 new_fragment.save()
356                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
357                 book_themes += themes
358
359             book_themes = set(book_themes)
360             book.tags = list(book.tags) + list(book_themes) + book_shelves
361
362         book.save()
363         return book
364     
365     
366     def refresh_tag_counter(self):
367         tags = {}
368         for child in self.children.all().order_by():
369             for tag_pk, value in child.tag_counter.iteritems():
370                 tags[tag_pk] = tags.get(tag_pk, 0) + value
371         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
372             tags[tag.pk] = 1
373         self.set__tag_counter_value(tags)
374         self.save(reset_short_html=False, refresh_mp3=False)
375         return tags
376     
377     @property
378     def tag_counter(self):
379         if self._tag_counter == '':
380             return self.refresh_tag_counter()
381         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
382         #return self.get__tag_counter_value()
383
384     def refresh_theme_counter(self):
385         tags = {}
386         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
387             for tag in fragment.tags.filter(category='theme').order_by():
388                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
389         self.set__theme_counter_value(tags)
390         self.save(reset_short_html=False, refresh_mp3=False)
391         return tags
392     
393     @property
394     def theme_counter(self):
395         if self._theme_counter == '':
396             return self.refresh_theme_counter()
397         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
398         return self.get__theme_counter_value()
399     
400
401
402 class Fragment(models.Model):
403     text = models.TextField()
404     short_text = models.TextField(editable=False)
405     _short_html = models.TextField(editable=False)
406     anchor = models.CharField(max_length=120)
407     book = models.ForeignKey(Book, related_name='fragments')
408
409     objects = models.Manager()
410     tagged = managers.ModelTaggedItemManager(Tag)
411     tags = managers.TagDescriptor(Tag)
412
413     class Meta:
414         ordering = ('book', 'anchor',)
415         verbose_name = _('fragment')
416         verbose_name_plural = _('fragments')
417
418     def get_absolute_url(self):
419         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
420
421     def short_html(self):
422         key = '_short_html_%s' % get_language()
423         short_html = getattr(self, key)
424         if short_html and len(short_html):
425             return mark_safe(short_html)
426         else:
427             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
428                 for tag in self.book.tags if tag.category == 'author']
429
430             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
431                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
432             self.save()
433             return mark_safe(getattr(self, key))
434
435
436 class BookStub(models.Model):
437     title = models.CharField(_('title'), max_length=120)
438     author = models.CharField(_('author'), max_length=120)
439     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
440     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
441     translator = models.TextField(_('translator'), blank=True)
442     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
443
444     class Meta:
445         ordering = ('title',)
446         verbose_name = _('book stub')
447         verbose_name_plural = _('book stubs')
448
449     def __unicode__(self):
450         return self.title
451
452     @permalink
453     def get_absolute_url(self):
454         return ('catalogue.views.book_detail', [self.slug])
455
456     def in_pd(self):
457         return self.pd is not None and self.pd <= datetime.now().year
458
459     @property
460     def name(self):
461         return self.title
462
463