d63581f477b5d4859d35cf22ee3f3a1b78b18a12
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58     
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     @staticmethod
102     def get_tag_list(tags):
103         if isinstance(tags, basestring):
104             real_tags = []
105             category = None
106             for name in tags.split('/'):
107                 if name in Tag.categories_rev:
108                     category = Tag.categories_rev[name]
109                 else:
110                     if category:
111                         real_tags.append(Tag.objects.get(slug=name, category=category))
112                         category = None
113                     else:
114                         real_tags.append(Tag.objects.get(slug=name))
115             if category:
116                 raise Http404
117             return real_tags
118         else:
119             return TagBase.get_tag_list(tags)
120     
121     @property
122     def url_chunk(self):
123         return '/'.join((Tag.categories_dict[self.category], self.slug))
124
125
126 # TODO: why is this hard-coded ? 
127 def book_upload_path(ext):
128     def get_dynamic_path(book, filename):
129         return 'lektura/%s.%s' % (book.slug, ext)
130     return get_dynamic_path
131
132
133 class Book(models.Model):
134     title = models.CharField(_('title'), max_length=120)
135     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
136     description = models.TextField(_('description'), blank=True)
137     created_at = models.DateTimeField(_('creation date'), auto_now=True)
138     _short_html = models.TextField(_('short HTML'), editable=False)
139     parent_number = models.IntegerField(_('parent number'), default=0)
140     extra_info = JSONField(_('extra information'))
141     gazeta_link = models.CharField(blank=True, max_length=240)
142     wiki_link = models.CharField(blank=True, max_length=240)
143
144
145     # Formats
146     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
147     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
148     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
149     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
150     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
151     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
152     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
153     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
154
155     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
156
157     objects = models.Manager()
158     tagged = managers.ModelTaggedItemManager(Tag)
159     tags = managers.TagDescriptor(Tag)
160     
161     _tag_counter = JSONField(null=True, editable=False)
162     _theme_counter = JSONField(null=True, editable=False)
163
164     class AlreadyExists(Exception):
165         pass
166
167     class Meta:
168         ordering = ('title',)
169         verbose_name = _('book')
170         verbose_name_plural = _('books')
171
172     def __unicode__(self):
173         return self.title
174
175     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
176         if reset_short_html:
177             # Reset _short_html during save
178             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
179                 self.__setattr__(key, '')
180
181         book = super(Book, self).save(force_insert, force_update)
182
183         if refresh_mp3 and self.mp3_file:
184             print self.mp3_file, self.mp3_file.path
185             extra_info = self.get_extra_info_value()
186             extra_info.update(self.get_mp3_info())
187             self.set_extra_info_value(extra_info)
188             book = super(Book, self).save(force_insert, force_update)
189
190         return book
191
192     @permalink
193     def get_absolute_url(self):
194         return ('catalogue.views.book_detail', [self.slug])
195
196     @property
197     def name(self):
198         return self.title
199     
200     def book_tag(self):
201         slug = ('l-' + self.slug)[:120]
202         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
203         if created:
204             book_tag.name = self.title[:50]
205             book_tag.sort_key = slug
206             book_tag.save()
207         return book_tag
208
209     def short_html(self):
210         key = '_short_html_%s' % get_language()
211         short_html = getattr(self, key)
212
213         if short_html and len(short_html):
214             return mark_safe(short_html)
215         else:
216             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
217             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
218
219             formats = []
220             if self.html_file:
221                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
222             if self.pdf_file:
223                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
224             if self.epub_file:
225                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
226             if self.odt_file:
227                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
228             if self.txt_file:
229                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
230             if self.mp3_file:
231                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
232             if self.ogg_file:
233                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
234
235             formats = [mark_safe(format) for format in formats]
236
237             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
238                 {'book': self, 'tags': tags, 'formats': formats})))
239             self.save(reset_short_html=False)
240             return mark_safe(getattr(self, key))
241
242
243     def get_mp3_info(self):
244         """Retrieves artist and director names from audio ID3 tags."""
245         audio = id3.ID3(self.mp3_file.path)
246         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
247         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
248         return {'artist_name': artist_name, 'director_name': director_name}
249
250     def has_description(self):
251         return len(self.description) > 0
252     has_description.short_description = _('description')
253     has_description.boolean = True
254
255     def has_pdf_file(self):
256         return bool(self.pdf_file)
257     has_pdf_file.short_description = 'PDF'
258     has_pdf_file.boolean = True
259
260     def has_epub_file(self):
261         return bool(self.epub_file)
262     has_epub_file.short_description = 'EPUB'
263     has_epub_file.boolean = True
264
265     def has_odt_file(self):
266         return bool(self.odt_file)
267     has_odt_file.short_description = 'ODT'
268     has_odt_file.boolean = True
269
270     def has_html_file(self):
271         return bool(self.html_file)
272     has_html_file.short_description = 'HTML'
273     has_html_file.boolean = True
274
275     @classmethod
276     def from_xml_file(cls, xml_file, overwrite=False):
277         # use librarian to parse meta-data
278         book_info = dcparser.parse(xml_file)
279
280         if not isinstance(xml_file, File):
281             xml_file = File(xml_file)
282
283         try:
284             return cls.from_text_and_meta(xml_file, book_info, overwrite)
285         finally:
286             xml_file.close()
287
288     @classmethod
289     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
290         from tempfile import NamedTemporaryFile
291         from slughifi import slughifi
292         from markupstring import MarkupString
293
294         # Read book metadata
295         book_base, book_slug = book_info.url.rsplit('/', 1)
296         book, created = Book.objects.get_or_create(slug=book_slug)
297
298         if created:
299             book_shelves = []
300         else:
301             if not overwrite:
302                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
303             # Save shelves for this book
304             book_shelves = list(book.tags.filter(category='set'))
305
306         book.title = book_info.title
307         book.set_extra_info_value(book_info.to_dict())
308         book._short_html = ''
309         book.save()
310
311         book_tags = []
312         for category in ('kind', 'genre', 'author', 'epoch'):
313             tag_name = getattr(book_info, category)
314             tag_sort_key = tag_name
315             if category == 'author':
316                 tag_sort_key = tag_name.last_name
317                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
318             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
319             if created:
320                 tag.name = tag_name
321                 tag.sort_key = slughifi(tag_sort_key)
322                 tag.save()
323             book_tags.append(tag)
324
325         book.tags = book_tags
326
327         book_tag = book.book_tag()
328
329         if hasattr(book_info, 'parts'):
330             for n, part_url in enumerate(book_info.parts):
331                 base, slug = part_url.rsplit('/', 1)
332                 try:
333                     child_book = Book.objects.get(slug=slug)
334                     child_book.parent = book
335                     child_book.parent_number = n
336                     child_book.save()
337                 except Book.DoesNotExist, e:
338                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
339
340         book_descendants = list(book.children.all())
341         while len(book_descendants) > 0:
342             child_book = book_descendants.pop(0)
343             child_book.tags = list(child_book.tags) + [book_tag]
344             child_book.save()
345             for fragment in child_book.fragments.all():
346                 fragment.tags = set(list(fragment.tags) + [book_tag])
347             book_descendants += list(child_book.children.all())
348
349         # Save XML and HTML files
350         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
351
352         html_file = NamedTemporaryFile()
353         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
354             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
355
356             # Extract fragments
357             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
358             book_themes = []
359             for fragment in closed_fragments.values():
360                 text = fragment.to_string()
361                 short_text = ''
362                 if (len(MarkupString(text)) > 240):
363                     short_text = unicode(MarkupString(text)[:160])
364                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
365                     defaults={'text': text, 'short_text': short_text})
366
367                 try:
368                     theme_names = [s.strip() for s in fragment.themes.split(',')]
369                 except AttributeError:
370                     continue
371                 themes = []
372                 for theme_name in theme_names:
373                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
374                     if created:
375                         tag.name = theme_name
376                         tag.sort_key = slughifi(theme_name)
377                         tag.save()
378                     themes.append(tag)
379                 new_fragment.save()
380                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
381                 book_themes += themes
382
383             book_themes = set(book_themes)
384             book.tags = list(book.tags) + list(book_themes) + book_shelves
385
386         book.save()
387         return book
388     
389     
390     def refresh_tag_counter(self):
391         tags = {}
392         for child in self.children.all().order_by():
393             for tag_pk, value in child.tag_counter.iteritems():
394                 tags[tag_pk] = tags.get(tag_pk, 0) + value
395         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
396             tags[tag.pk] = 1
397         self.set__tag_counter_value(tags)
398         self.save(reset_short_html=False, refresh_mp3=False)
399         return tags
400     
401     @property
402     def tag_counter(self):
403         if self._tag_counter is None:
404             return self.refresh_tag_counter()
405         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
406
407     def refresh_theme_counter(self):
408         tags = {}
409         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
410             for tag in fragment.tags.filter(category='theme').order_by():
411                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
412         self.set__theme_counter_value(tags)
413         self.save(reset_short_html=False, refresh_mp3=False)
414         return tags
415     
416     @property
417     def theme_counter(self):
418         if self._theme_counter is None:
419             return self.refresh_theme_counter()
420         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
421     
422
423
424 class Fragment(models.Model):
425     text = models.TextField()
426     short_text = models.TextField(editable=False)
427     _short_html = models.TextField(editable=False)
428     anchor = models.CharField(max_length=120)
429     book = models.ForeignKey(Book, related_name='fragments')
430
431     objects = models.Manager()
432     tagged = managers.ModelTaggedItemManager(Tag)
433     tags = managers.TagDescriptor(Tag)
434
435     class Meta:
436         ordering = ('book', 'anchor',)
437         verbose_name = _('fragment')
438         verbose_name_plural = _('fragments')
439
440     def get_absolute_url(self):
441         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
442
443     def short_html(self):
444         key = '_short_html_%s' % get_language()
445         short_html = getattr(self, key)
446         if short_html and len(short_html):
447             return mark_safe(short_html)
448         else:
449             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
450                 for tag in self.book.tags if tag.category == 'author']
451
452             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
453                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
454             self.save()
455             return mark_safe(getattr(self, key))
456
457
458 class BookStub(models.Model):
459     title = models.CharField(_('title'), max_length=120)
460     author = models.CharField(_('author'), max_length=120)
461     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
462     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
463     translator = models.TextField(_('translator'), blank=True)
464     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
465
466     class Meta:
467         ordering = ('title',)
468         verbose_name = _('book stub')
469         verbose_name_plural = _('book stubs')
470
471     def __unicode__(self):
472         return self.title
473
474     @permalink
475     def get_absolute_url(self):
476         return ('catalogue.views.book_detail', [self.slug])
477
478     def in_pd(self):
479         return self.pd is not None and self.pd <= datetime.now().year
480
481     @property
482     def name(self):
483         return self.title
484
485