allow multiple tags (#303)
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     categories_rev = {
60         'autor': 'author',
61         'epoka': 'epoch',
62         'rodzaj': 'kind',
63         'gatunek': 'genre',
64         'motyw': 'theme',
65         'polka': 'set',
66     }
67     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
68
69     class Meta:
70         ordering = ('sort_key',)
71         verbose_name = _('tag')
72         verbose_name_plural = _('tags')
73         unique_together = (("slug", "category"),)
74
75     def __unicode__(self):
76         return self.name
77
78     def __repr__(self):
79         return "Tag(slug=%r)" % self.slug
80
81     @permalink
82     def get_absolute_url(self):
83         return ('catalogue.views.tagged_object_list', [self.url_chunk])
84
85     def has_description(self):
86         return len(self.description) > 0
87     has_description.short_description = _('description')
88     has_description.boolean = True
89
90     def alive(self):
91         return self.death is None
92
93     def in_pd(self):
94         """ tests whether an author is in public domain """
95         return self.death is not None and self.goes_to_pd() <= datetime.now().year
96
97     def goes_to_pd(self):
98         """ calculates the year of public domain entry for an author """
99         return self.death + 71 if self.death is not None else None
100
101     @staticmethod
102     def get_tag_list(tags):
103         if isinstance(tags, basestring):
104             real_tags = []
105             ambiguous_slugs = []
106             category = None
107             tags_splitted = tags.split('/')
108             for index, name in enumerate(tags_splitted):
109                 if name in Tag.categories_rev:
110                     category = Tag.categories_rev[name]
111                 else:
112                     if category:
113                         real_tags.append(Tag.objects.get(slug=name, category=category))
114                         category = None
115                     else:
116                         try:
117                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
118                         except Tag.MultipleObjectsReturned, e:
119                             ambiguous_slugs.append(name)
120
121             if category:
122                 # something strange left off
123                 raise Tag.DoesNotExist()
124             if ambiguous_slugs:
125                 # some tags should be qualified
126                 e = Tag.MultipleObjectsReturned()
127                 e.tags = real_tags
128                 e.ambiguous_slugs = ambiguous_slugs
129                 raise e
130             else:
131                 return real_tags
132         else:
133             return TagBase.get_tag_list(tags)
134
135     @property
136     def url_chunk(self):
137         return '/'.join((Tag.categories_dict[self.category], self.slug))
138
139
140 # TODO: why is this hard-coded ?
141 def book_upload_path(ext):
142     def get_dynamic_path(book, filename):
143         return 'lektura/%s.%s' % (book.slug, ext)
144     return get_dynamic_path
145
146
147 class Book(models.Model):
148     title = models.CharField(_('title'), max_length=120)
149     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
150     description = models.TextField(_('description'), blank=True)
151     created_at = models.DateTimeField(_('creation date'), auto_now=True)
152     _short_html = models.TextField(_('short HTML'), editable=False)
153     parent_number = models.IntegerField(_('parent number'), default=0)
154     extra_info = JSONField(_('extra information'))
155     gazeta_link = models.CharField(blank=True, max_length=240)
156     wiki_link = models.CharField(blank=True, max_length=240)
157
158
159     # Formats
160     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
161     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
162     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
163     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
164     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
165     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
166     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
167     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
168
169     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
170
171     objects = models.Manager()
172     tagged = managers.ModelTaggedItemManager(Tag)
173     tags = managers.TagDescriptor(Tag)
174
175     _tag_counter = JSONField(null=True, editable=False)
176     _theme_counter = JSONField(null=True, editable=False)
177
178     class AlreadyExists(Exception):
179         pass
180
181     class Meta:
182         ordering = ('title',)
183         verbose_name = _('book')
184         verbose_name_plural = _('books')
185
186     def __unicode__(self):
187         return self.title
188
189     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
190         if reset_short_html:
191             # Reset _short_html during save
192             update = {}
193             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
194                 update[key] = ''
195                 self.__setattr__(key, '')
196             # Fragment.short_html relies on book's tags, so reset it here too
197             self.fragments.all().update(**update)
198
199         book = super(Book, self).save(force_insert, force_update)
200
201         if refresh_mp3 and self.mp3_file:
202             print self.mp3_file, self.mp3_file.path
203             extra_info = self.get_extra_info_value()
204             extra_info.update(self.get_mp3_info())
205             self.set_extra_info_value(extra_info)
206             book = super(Book, self).save(force_insert, force_update)
207
208         return book
209
210     @permalink
211     def get_absolute_url(self):
212         return ('catalogue.views.book_detail', [self.slug])
213
214     @property
215     def name(self):
216         return self.title
217
218     def book_tag(self):
219         slug = ('l-' + self.slug)[:120]
220         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
221         if created:
222             book_tag.name = self.title[:50]
223             book_tag.sort_key = slug
224             book_tag.save()
225         return book_tag
226
227     def short_html(self):
228         key = '_short_html_%s' % get_language()
229         short_html = getattr(self, key)
230
231         if short_html and len(short_html):
232             return mark_safe(short_html)
233         else:
234             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
235             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
236
237             formats = []
238             if self.html_file:
239                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
240             if self.pdf_file:
241                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
242             if self.epub_file:
243                 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
244             if self.odt_file:
245                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
246             if self.txt_file:
247                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
248             if self.mp3_file:
249                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
250             if self.ogg_file:
251                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
252
253             formats = [mark_safe(format) for format in formats]
254
255             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
256                 {'book': self, 'tags': tags, 'formats': formats})))
257             self.save(reset_short_html=False)
258             return mark_safe(getattr(self, key))
259
260
261     def get_mp3_info(self):
262         """Retrieves artist and director names from audio ID3 tags."""
263         audio = id3.ID3(self.mp3_file.path)
264         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
265         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
266         return {'artist_name': artist_name, 'director_name': director_name}
267
268     def has_description(self):
269         return len(self.description) > 0
270     has_description.short_description = _('description')
271     has_description.boolean = True
272
273     def has_pdf_file(self):
274         return bool(self.pdf_file)
275     has_pdf_file.short_description = 'PDF'
276     has_pdf_file.boolean = True
277
278     def has_epub_file(self):
279         return bool(self.epub_file)
280     has_epub_file.short_description = 'EPUB'
281     has_epub_file.boolean = True
282
283     def has_odt_file(self):
284         return bool(self.odt_file)
285     has_odt_file.short_description = 'ODT'
286     has_odt_file.boolean = True
287
288     def has_html_file(self):
289         return bool(self.html_file)
290     has_html_file.short_description = 'HTML'
291     has_html_file.boolean = True
292
293     @classmethod
294     def from_xml_file(cls, xml_file, overwrite=False):
295         # use librarian to parse meta-data
296         book_info = dcparser.parse(xml_file)
297
298         if not isinstance(xml_file, File):
299             xml_file = File(xml_file)
300
301         try:
302             return cls.from_text_and_meta(xml_file, book_info, overwrite)
303         finally:
304             xml_file.close()
305
306     @classmethod
307     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
308         from tempfile import NamedTemporaryFile
309         from slughifi import slughifi
310         from markupstring import MarkupString
311
312         # Read book metadata
313         book_base, book_slug = book_info.url.rsplit('/', 1)
314         book, created = Book.objects.get_or_create(slug=book_slug)
315
316         if created:
317             book_shelves = []
318         else:
319             if not overwrite:
320                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
321             # Save shelves for this book
322             book_shelves = list(book.tags.filter(category='set'))
323
324         book.title = book_info.title
325         book.set_extra_info_value(book_info.to_dict())
326         book._short_html = ''
327         book.save()
328
329         book_tags = []
330         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
331         for field_name, category in categories:
332             try:
333                 tag_names = getattr(book_info, field_name)
334             except:
335                 tag_names = [getattr(book_info, category)]
336             for tag_name in tag_names:
337                 tag_sort_key = tag_name
338                 if category == 'author':
339                     tag_sort_key = tag_name.last_name
340                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
341                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
342                 if created:
343                     tag.name = tag_name
344                     tag.sort_key = slughifi(tag_sort_key)
345                     tag.save()
346                 book_tags.append(tag)
347
348         book.tags = book_tags
349
350         book_tag = book.book_tag()
351
352         if hasattr(book_info, 'parts'):
353             for n, part_url in enumerate(book_info.parts):
354                 base, slug = part_url.rsplit('/', 1)
355                 try:
356                     child_book = Book.objects.get(slug=slug)
357                     child_book.parent = book
358                     child_book.parent_number = n
359                     child_book.save()
360                 except Book.DoesNotExist, e:
361                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
362
363         book_descendants = list(book.children.all())
364         while len(book_descendants) > 0:
365             child_book = book_descendants.pop(0)
366             child_book.tags = list(child_book.tags) + [book_tag]
367             child_book.save()
368             for fragment in child_book.fragments.all():
369                 fragment.tags = set(list(fragment.tags) + [book_tag])
370             book_descendants += list(child_book.children.all())
371
372         # Save XML and HTML files
373         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
374
375         html_file = NamedTemporaryFile()
376         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
377             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
378
379             # Extract fragments
380             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
381             book_themes = []
382             for fragment in closed_fragments.values():
383                 text = fragment.to_string()
384                 short_text = ''
385                 if (len(MarkupString(text)) > 240):
386                     short_text = unicode(MarkupString(text)[:160])
387                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
388                     defaults={'text': text, 'short_text': short_text})
389
390                 try:
391                     theme_names = [s.strip() for s in fragment.themes.split(',')]
392                 except AttributeError:
393                     continue
394                 themes = []
395                 for theme_name in theme_names:
396                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
397                     if created:
398                         tag.name = theme_name
399                         tag.sort_key = slughifi(theme_name)
400                         tag.save()
401                     themes.append(tag)
402                 new_fragment.save()
403                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
404                 book_themes += themes
405
406             book_themes = set(book_themes)
407             book.tags = list(book.tags) + list(book_themes) + book_shelves
408
409         book.save()
410         return book
411
412
413     def refresh_tag_counter(self):
414         tags = {}
415         for child in self.children.all().order_by():
416             for tag_pk, value in child.tag_counter.iteritems():
417                 tags[tag_pk] = tags.get(tag_pk, 0) + value
418         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
419             tags[tag.pk] = 1
420         self.set__tag_counter_value(tags)
421         self.save(reset_short_html=False, refresh_mp3=False)
422         return tags
423
424     @property
425     def tag_counter(self):
426         if self._tag_counter is None:
427             return self.refresh_tag_counter()
428         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
429
430     def refresh_theme_counter(self):
431         tags = {}
432         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
433             for tag in fragment.tags.filter(category='theme').order_by():
434                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
435         self.set__theme_counter_value(tags)
436         self.save(reset_short_html=False, refresh_mp3=False)
437         return tags
438
439     @property
440     def theme_counter(self):
441         if self._theme_counter is None:
442             return self.refresh_theme_counter()
443         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
444
445
446
447 class Fragment(models.Model):
448     text = models.TextField()
449     short_text = models.TextField(editable=False)
450     _short_html = models.TextField(editable=False)
451     anchor = models.CharField(max_length=120)
452     book = models.ForeignKey(Book, related_name='fragments')
453
454     objects = models.Manager()
455     tagged = managers.ModelTaggedItemManager(Tag)
456     tags = managers.TagDescriptor(Tag)
457
458     class Meta:
459         ordering = ('book', 'anchor',)
460         verbose_name = _('fragment')
461         verbose_name_plural = _('fragments')
462
463     def get_absolute_url(self):
464         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
465
466     def short_html(self):
467         key = '_short_html_%s' % get_language()
468         short_html = getattr(self, key)
469         if short_html and len(short_html):
470             return mark_safe(short_html)
471         else:
472             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
473                 {'fragment': self})))
474             self.save()
475             return mark_safe(getattr(self, key))
476
477
478 class BookStub(models.Model):
479     title = models.CharField(_('title'), max_length=120)
480     author = models.CharField(_('author'), max_length=120)
481     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
482     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
483     translator = models.TextField(_('translator'), blank=True)
484     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
485
486     class Meta:
487         ordering = ('title',)
488         verbose_name = _('book stub')
489         verbose_name_plural = _('book stubs')
490
491     def __unicode__(self):
492         return self.title
493
494     @permalink
495     def get_absolute_url(self):
496         return ('catalogue.views.book_detail', [self.slug])
497
498     def in_pd(self):
499         return self.pd is not None and self.pd <= datetime.now().year
500
501     @property
502     def name(self):
503         return self.title
504
505