changed Fragment.short_html (#309)
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     @staticmethod
102     def get_tag_list(tags):
103         if isinstance(tags, basestring):
104             real_tags = []
105             ambiguous_slugs = []
106             category = None
107             tags_splitted = tags.split('/')
108             for index, name in enumerate(tags_splitted):
109                 if name in Tag.categories_rev:
110                     category = Tag.categories_rev[name]
111                 else:
112                     if category:
113                         real_tags.append(Tag.objects.get(slug=name, category=category))
114                         category = None
115                     else:
116                         try:
117                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
118                         except Tag.MultipleObjectsReturned, e:
119                             ambiguous_slugs.append(name)
120
121             if category:
122                 # something strange left off
123                 raise Tag.DoesNotExist()
124             if ambiguous_slugs:
125                 # some tags should be qualified
126                 e = Tag.MultipleObjectsReturned()
127                 e.tags = real_tags
128                 e.ambiguous_slugs = ambiguous_slugs
129                 raise e
130             else:
131                 return real_tags
132         else:
133             return TagBase.get_tag_list(tags)
134
135     @property
136     def url_chunk(self):
137         return '/'.join((Tag.categories_dict[self.category], self.slug))
138
139
140 # TODO: why is this hard-coded ?
141 def book_upload_path(ext):
142     def get_dynamic_path(book, filename):
143         return 'lektura/%s.%s' % (book.slug, ext)
144     return get_dynamic_path
145
146
147 class Book(models.Model):
148     title = models.CharField(_('title'), max_length=120)
149     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
150     description = models.TextField(_('description'), blank=True)
151     created_at = models.DateTimeField(_('creation date'), auto_now=True)
152     _short_html = models.TextField(_('short HTML'), editable=False)
153     parent_number = models.IntegerField(_('parent number'), default=0)
154     extra_info = JSONField(_('extra information'))
155     gazeta_link = models.CharField(blank=True, max_length=240)
156     wiki_link = models.CharField(blank=True, max_length=240)
157
158
159     # Formats
160     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
161     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
162     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
163     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
164     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
165     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
166     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
167     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
168
169     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
170
171     objects = models.Manager()
172     tagged = managers.ModelTaggedItemManager(Tag)
173     tags = managers.TagDescriptor(Tag)
174
175     _tag_counter = JSONField(null=True, editable=False)
176     _theme_counter = JSONField(null=True, editable=False)
177
178     class AlreadyExists(Exception):
179         pass
180
181     class Meta:
182         ordering = ('title',)
183         verbose_name = _('book')
184         verbose_name_plural = _('books')
185
186     def __unicode__(self):
187         return self.title
188
189     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
190         if reset_short_html:
191             # Reset _short_html during save
192             update = {}
193             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
194                 update[key] = ''
195                 self.__setattr__(key, '')
196             # Fragment.short_html relies on book's tags, so reset it here too
197             self.fragments.all().update(**update)
198
199         book = super(Book, self).save(force_insert, force_update)
200
201         if refresh_mp3 and self.mp3_file:
202             print self.mp3_file, self.mp3_file.path
203             extra_info = self.get_extra_info_value()
204             extra_info.update(self.get_mp3_info())
205             self.set_extra_info_value(extra_info)
206             book = super(Book, self).save(force_insert, force_update)
207
208         return book
209
210     @permalink
211     def get_absolute_url(self):
212         return ('catalogue.views.book_detail', [self.slug])
213
214     @property
215     def name(self):
216         return self.title
217
218     def book_tag(self):
219         slug = ('l-' + self.slug)[:120]
220         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
221         if created:
222             book_tag.name = self.title[:50]
223             book_tag.sort_key = slug
224             book_tag.save()
225         return book_tag
226
227     def short_html(self):
228         key = '_short_html_%s' % get_language()
229         short_html = getattr(self, key)
230
231         if short_html and len(short_html):
232             return mark_safe(short_html)
233         else:
234             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
235             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
236
237             formats = []
238             if self.html_file:
239                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
240             if self.pdf_file:
241                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
242             if self.epub_file:
243                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
244             if self.odt_file:
245                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
246             if self.txt_file:
247                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
248             if self.mp3_file:
249                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
250             if self.ogg_file:
251                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
252
253             formats = [mark_safe(format) for format in formats]
254
255             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
256                 {'book': self, 'tags': tags, 'formats': formats})))
257             self.save(reset_short_html=False)
258             return mark_safe(getattr(self, key))
259
260
261     def get_mp3_info(self):
262         """Retrieves artist and director names from audio ID3 tags."""
263         audio = id3.ID3(self.mp3_file.path)
264         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
265         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
266         return {'artist_name': artist_name, 'director_name': director_name}
267
268     def has_description(self):
269         return len(self.description) > 0
270     has_description.short_description = _('description')
271     has_description.boolean = True
272
273     def has_pdf_file(self):
274         return bool(self.pdf_file)
275     has_pdf_file.short_description = 'PDF'
276     has_pdf_file.boolean = True
277
278     def has_epub_file(self):
279         return bool(self.epub_file)
280     has_epub_file.short_description = 'EPUB'
281     has_epub_file.boolean = True
282
283     def has_odt_file(self):
284         return bool(self.odt_file)
285     has_odt_file.short_description = 'ODT'
286     has_odt_file.boolean = True
287
288     def has_html_file(self):
289         return bool(self.html_file)
290     has_html_file.short_description = 'HTML'
291     has_html_file.boolean = True
292
293     @classmethod
294     def from_xml_file(cls, xml_file, overwrite=False):
295         # use librarian to parse meta-data
296         book_info = dcparser.parse(xml_file)
297
298         if not isinstance(xml_file, File):
299             xml_file = File(xml_file)
300
301         try:
302             return cls.from_text_and_meta(xml_file, book_info, overwrite)
303         finally:
304             xml_file.close()
305
306     @classmethod
307     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
308         from tempfile import NamedTemporaryFile
309         from slughifi import slughifi
310         from markupstring import MarkupString
311
312         # Read book metadata
313         book_base, book_slug = book_info.url.rsplit('/', 1)
314         book, created = Book.objects.get_or_create(slug=book_slug)
315
316         if created:
317             book_shelves = []
318         else:
319             if not overwrite:
320                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
321             # Save shelves for this book
322             book_shelves = list(book.tags.filter(category='set'))
323
324         book.title = book_info.title
325         book.set_extra_info_value(book_info.to_dict())
326         book._short_html = ''
327         book.save()
328
329         book_tags = []
330         for category in ('kind', 'genre', 'author', 'epoch'):
331             tag_name = getattr(book_info, category)
332             tag_sort_key = tag_name
333             if category == 'author':
334                 tag_sort_key = tag_name.last_name
335                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
336             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
337             if created:
338                 tag.name = tag_name
339                 tag.sort_key = slughifi(tag_sort_key)
340                 tag.save()
341             book_tags.append(tag)
342
343         book.tags = book_tags
344
345         book_tag = book.book_tag()
346
347         if hasattr(book_info, 'parts'):
348             for n, part_url in enumerate(book_info.parts):
349                 base, slug = part_url.rsplit('/', 1)
350                 try:
351                     child_book = Book.objects.get(slug=slug)
352                     child_book.parent = book
353                     child_book.parent_number = n
354                     child_book.save()
355                 except Book.DoesNotExist, e:
356                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
357
358         book_descendants = list(book.children.all())
359         while len(book_descendants) > 0:
360             child_book = book_descendants.pop(0)
361             child_book.tags = list(child_book.tags) + [book_tag]
362             child_book.save()
363             for fragment in child_book.fragments.all():
364                 fragment.tags = set(list(fragment.tags) + [book_tag])
365             book_descendants += list(child_book.children.all())
366
367         # Save XML and HTML files
368         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
369
370         html_file = NamedTemporaryFile()
371         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
372             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
373
374             # Extract fragments
375             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
376             book_themes = []
377             for fragment in closed_fragments.values():
378                 text = fragment.to_string()
379                 short_text = ''
380                 if (len(MarkupString(text)) > 240):
381                     short_text = unicode(MarkupString(text)[:160])
382                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
383                     defaults={'text': text, 'short_text': short_text})
384
385                 try:
386                     theme_names = [s.strip() for s in fragment.themes.split(',')]
387                 except AttributeError:
388                     continue
389                 themes = []
390                 for theme_name in theme_names:
391                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
392                     if created:
393                         tag.name = theme_name
394                         tag.sort_key = slughifi(theme_name)
395                         tag.save()
396                     themes.append(tag)
397                 new_fragment.save()
398                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
399                 book_themes += themes
400
401             book_themes = set(book_themes)
402             book.tags = list(book.tags) + list(book_themes) + book_shelves
403
404         book.save()
405         return book
406
407
408     def refresh_tag_counter(self):
409         tags = {}
410         for child in self.children.all().order_by():
411             for tag_pk, value in child.tag_counter.iteritems():
412                 tags[tag_pk] = tags.get(tag_pk, 0) + value
413         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
414             tags[tag.pk] = 1
415         self.set__tag_counter_value(tags)
416         self.save(reset_short_html=False, refresh_mp3=False)
417         return tags
418
419     @property
420     def tag_counter(self):
421         if self._tag_counter is None:
422             return self.refresh_tag_counter()
423         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
424
425     def refresh_theme_counter(self):
426         tags = {}
427         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
428             for tag in fragment.tags.filter(category='theme').order_by():
429                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
430         self.set__theme_counter_value(tags)
431         self.save(reset_short_html=False, refresh_mp3=False)
432         return tags
433
434     @property
435     def theme_counter(self):
436         if self._theme_counter is None:
437             return self.refresh_theme_counter()
438         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
439
440
441
442 class Fragment(models.Model):
443     text = models.TextField()
444     short_text = models.TextField(editable=False)
445     _short_html = models.TextField(editable=False)
446     anchor = models.CharField(max_length=120)
447     book = models.ForeignKey(Book, related_name='fragments')
448
449     objects = models.Manager()
450     tagged = managers.ModelTaggedItemManager(Tag)
451     tags = managers.TagDescriptor(Tag)
452
453     class Meta:
454         ordering = ('book', 'anchor',)
455         verbose_name = _('fragment')
456         verbose_name_plural = _('fragments')
457
458     def get_absolute_url(self):
459         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
460
461     def short_html(self):
462         key = '_short_html_%s' % get_language()
463         short_html = getattr(self, key)
464         if short_html and len(short_html):
465             return mark_safe(short_html)
466         else:
467             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
468                 {'fragment': self})))
469             self.save()
470             return mark_safe(getattr(self, key))
471
472
473 class BookStub(models.Model):
474     title = models.CharField(_('title'), max_length=120)
475     author = models.CharField(_('author'), max_length=120)
476     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
477     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
478     translator = models.TextField(_('translator'), blank=True)
479     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
480
481     class Meta:
482         ordering = ('title',)
483         verbose_name = _('book stub')
484         verbose_name_plural = _('book stubs')
485
486     def __unicode__(self):
487         return self.title
488
489     @permalink
490     def get_absolute_url(self):
491         return ('catalogue.views.book_detail', [self.slug])
492
493     def in_pd(self):
494         return self.pd is not None and self.pd <= datetime.now().year
495
496     @property
497     def name(self):
498         return self.title
499
500