Merge branch 'master' of http://github.com/fnp/wolnelektury
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58     
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     @staticmethod
102     def get_tag_list(tags):
103         if isinstance(tags, basestring):
104             real_tags = []
105             ambiguous_slugs = []
106             category = None
107             tags_splitted = tags.split('/')
108             for index, name in enumerate(tags_splitted):
109                 if name in Tag.categories_rev:
110                     category = Tag.categories_rev[name]
111                 else:
112                     if category:
113                         real_tags.append(Tag.objects.get(slug=name, category=category))
114                         category = None
115                     else:
116                         try:
117                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
118                         except Tag.MultipleObjectsReturned, e:
119                             ambiguous_slugs.append(name)
120                             
121             if category:
122                 # something strange left off
123                 raise Tag.DoesNotExist()
124             if ambiguous_slugs:
125                 # some tags should be qualified
126                 e = Tag.MultipleObjectsReturned()
127                 e.tags = real_tags
128                 e.ambiguous_slugs = ambiguous_slugs
129                 raise e
130             else:
131                 return real_tags
132         else:
133             return TagBase.get_tag_list(tags)
134     
135     @property
136     def url_chunk(self):
137         return '/'.join((Tag.categories_dict[self.category], self.slug))
138
139
140 # TODO: why is this hard-coded ? 
141 def book_upload_path(ext):
142     def get_dynamic_path(book, filename):
143         return 'lektura/%s.%s' % (book.slug, ext)
144     return get_dynamic_path
145
146
147 class Book(models.Model):
148     title = models.CharField(_('title'), max_length=120)
149     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
150     description = models.TextField(_('description'), blank=True)
151     created_at = models.DateTimeField(_('creation date'), auto_now=True)
152     _short_html = models.TextField(_('short HTML'), editable=False)
153     parent_number = models.IntegerField(_('parent number'), default=0)
154     extra_info = JSONField(_('extra information'))
155     gazeta_link = models.CharField(blank=True, max_length=240)
156     wiki_link = models.CharField(blank=True, max_length=240)
157
158
159     # Formats
160     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
161     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
162     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
163     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
164     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
165     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
166     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
167     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
168
169     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
170
171     objects = models.Manager()
172     tagged = managers.ModelTaggedItemManager(Tag)
173     tags = managers.TagDescriptor(Tag)
174     
175     _tag_counter = JSONField(null=True, editable=False)
176     _theme_counter = JSONField(null=True, editable=False)
177
178     class AlreadyExists(Exception):
179         pass
180
181     class Meta:
182         ordering = ('title',)
183         verbose_name = _('book')
184         verbose_name_plural = _('books')
185
186     def __unicode__(self):
187         return self.title
188
189     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
190         if reset_short_html:
191             # Reset _short_html during save
192             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
193                 self.__setattr__(key, '')
194
195         book = super(Book, self).save(force_insert, force_update)
196
197         if refresh_mp3 and self.mp3_file:
198             print self.mp3_file, self.mp3_file.path
199             extra_info = self.get_extra_info_value()
200             extra_info.update(self.get_mp3_info())
201             self.set_extra_info_value(extra_info)
202             book = super(Book, self).save(force_insert, force_update)
203
204         return book
205
206     @permalink
207     def get_absolute_url(self):
208         return ('catalogue.views.book_detail', [self.slug])
209
210     @property
211     def name(self):
212         return self.title
213     
214     def book_tag(self):
215         slug = ('l-' + self.slug)[:120]
216         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
217         if created:
218             book_tag.name = self.title[:50]
219             book_tag.sort_key = slug
220             book_tag.save()
221         return book_tag
222
223     def short_html(self):
224         key = '_short_html_%s' % get_language()
225         short_html = getattr(self, key)
226
227         if short_html and len(short_html):
228             return mark_safe(short_html)
229         else:
230             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
231             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
232
233             formats = []
234             if self.html_file:
235                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
236             if self.pdf_file:
237                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
238             if self.epub_file:
239                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
240             if self.odt_file:
241                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
242             if self.txt_file:
243                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
244             if self.mp3_file:
245                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
246             if self.ogg_file:
247                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
248
249             formats = [mark_safe(format) for format in formats]
250
251             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
252                 {'book': self, 'tags': tags, 'formats': formats})))
253             self.save(reset_short_html=False)
254             return mark_safe(getattr(self, key))
255
256
257     def get_mp3_info(self):
258         """Retrieves artist and director names from audio ID3 tags."""
259         audio = id3.ID3(self.mp3_file.path)
260         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
261         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
262         return {'artist_name': artist_name, 'director_name': director_name}
263
264     def has_description(self):
265         return len(self.description) > 0
266     has_description.short_description = _('description')
267     has_description.boolean = True
268
269     def has_pdf_file(self):
270         return bool(self.pdf_file)
271     has_pdf_file.short_description = 'PDF'
272     has_pdf_file.boolean = True
273
274     def has_epub_file(self):
275         return bool(self.epub_file)
276     has_epub_file.short_description = 'EPUB'
277     has_epub_file.boolean = True
278
279     def has_odt_file(self):
280         return bool(self.odt_file)
281     has_odt_file.short_description = 'ODT'
282     has_odt_file.boolean = True
283
284     def has_html_file(self):
285         return bool(self.html_file)
286     has_html_file.short_description = 'HTML'
287     has_html_file.boolean = True
288
289     @classmethod
290     def from_xml_file(cls, xml_file, overwrite=False):
291         # use librarian to parse meta-data
292         book_info = dcparser.parse(xml_file)
293
294         if not isinstance(xml_file, File):
295             xml_file = File(xml_file)
296
297         try:
298             return cls.from_text_and_meta(xml_file, book_info, overwrite)
299         finally:
300             xml_file.close()
301
302     @classmethod
303     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
304         from tempfile import NamedTemporaryFile
305         from slughifi import slughifi
306         from markupstring import MarkupString
307
308         # Read book metadata
309         book_base, book_slug = book_info.url.rsplit('/', 1)
310         book, created = Book.objects.get_or_create(slug=book_slug)
311
312         if created:
313             book_shelves = []
314         else:
315             if not overwrite:
316                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
317             # Save shelves for this book
318             book_shelves = list(book.tags.filter(category='set'))
319
320         book.title = book_info.title
321         book.set_extra_info_value(book_info.to_dict())
322         book._short_html = ''
323         book.save()
324
325         book_tags = []
326         for category in ('kind', 'genre', 'author', 'epoch'):
327             tag_name = getattr(book_info, category)
328             tag_sort_key = tag_name
329             if category == 'author':
330                 tag_sort_key = tag_name.last_name
331                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
332             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
333             if created:
334                 tag.name = tag_name
335                 tag.sort_key = slughifi(tag_sort_key)
336                 tag.save()
337             book_tags.append(tag)
338
339         book.tags = book_tags
340
341         book_tag = book.book_tag()
342
343         if hasattr(book_info, 'parts'):
344             for n, part_url in enumerate(book_info.parts):
345                 base, slug = part_url.rsplit('/', 1)
346                 try:
347                     child_book = Book.objects.get(slug=slug)
348                     child_book.parent = book
349                     child_book.parent_number = n
350                     child_book.save()
351                 except Book.DoesNotExist, e:
352                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
353
354         book_descendants = list(book.children.all())
355         while len(book_descendants) > 0:
356             child_book = book_descendants.pop(0)
357             child_book.tags = list(child_book.tags) + [book_tag]
358             child_book.save()
359             for fragment in child_book.fragments.all():
360                 fragment.tags = set(list(fragment.tags) + [book_tag])
361             book_descendants += list(child_book.children.all())
362
363         # Save XML and HTML files
364         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
365
366         html_file = NamedTemporaryFile()
367         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
368             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
369
370             # Extract fragments
371             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
372             book_themes = []
373             for fragment in closed_fragments.values():
374                 text = fragment.to_string()
375                 short_text = ''
376                 if (len(MarkupString(text)) > 240):
377                     short_text = unicode(MarkupString(text)[:160])
378                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
379                     defaults={'text': text, 'short_text': short_text})
380
381                 try:
382                     theme_names = [s.strip() for s in fragment.themes.split(',')]
383                 except AttributeError:
384                     continue
385                 themes = []
386                 for theme_name in theme_names:
387                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
388                     if created:
389                         tag.name = theme_name
390                         tag.sort_key = slughifi(theme_name)
391                         tag.save()
392                     themes.append(tag)
393                 new_fragment.save()
394                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
395                 book_themes += themes
396
397             book_themes = set(book_themes)
398             book.tags = list(book.tags) + list(book_themes) + book_shelves
399
400         book.save()
401         return book
402     
403     
404     def refresh_tag_counter(self):
405         tags = {}
406         for child in self.children.all().order_by():
407             for tag_pk, value in child.tag_counter.iteritems():
408                 tags[tag_pk] = tags.get(tag_pk, 0) + value
409         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
410             tags[tag.pk] = 1
411         self.set__tag_counter_value(tags)
412         self.save(reset_short_html=False, refresh_mp3=False)
413         return tags
414     
415     @property
416     def tag_counter(self):
417         if self._tag_counter is None:
418             return self.refresh_tag_counter()
419         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
420
421     def refresh_theme_counter(self):
422         tags = {}
423         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
424             for tag in fragment.tags.filter(category='theme').order_by():
425                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
426         self.set__theme_counter_value(tags)
427         self.save(reset_short_html=False, refresh_mp3=False)
428         return tags
429     
430     @property
431     def theme_counter(self):
432         if self._theme_counter is None:
433             return self.refresh_theme_counter()
434         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
435     
436
437
438 class Fragment(models.Model):
439     text = models.TextField()
440     short_text = models.TextField(editable=False)
441     _short_html = models.TextField(editable=False)
442     anchor = models.CharField(max_length=120)
443     book = models.ForeignKey(Book, related_name='fragments')
444
445     objects = models.Manager()
446     tagged = managers.ModelTaggedItemManager(Tag)
447     tags = managers.TagDescriptor(Tag)
448
449     class Meta:
450         ordering = ('book', 'anchor',)
451         verbose_name = _('fragment')
452         verbose_name_plural = _('fragments')
453
454     def get_absolute_url(self):
455         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
456
457     def short_html(self):
458         key = '_short_html_%s' % get_language()
459         short_html = getattr(self, key)
460         if short_html and len(short_html):
461             return mark_safe(short_html)
462         else:
463             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
464                 for tag in self.book.tags if tag.category == 'author']
465
466             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
467                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
468             self.save()
469             return mark_safe(getattr(self, key))
470
471
472 class BookStub(models.Model):
473     title = models.CharField(_('title'), max_length=120)
474     author = models.CharField(_('author'), max_length=120)
475     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
476     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
477     translator = models.TextField(_('translator'), blank=True)
478     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
479
480     class Meta:
481         ordering = ('title',)
482         verbose_name = _('book stub')
483         verbose_name_plural = _('book stubs')
484
485     def __unicode__(self):
486         return self.title
487
488     @permalink
489     def get_absolute_url(self):
490         return ('catalogue.views.book_detail', [self.slug])
491
492     def in_pd(self):
493         return self.pd is not None and self.pd <= datetime.now().year
494
495     @property
496     def name(self):
497         return self.title
498
499