Added some tests for book imports.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
15
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
19
20 from librarian import html, dcparser
21 from mutagen import id3
22
23
24 TAG_CATEGORIES = (
25     ('author', _('author')),
26     ('epoch', _('epoch')),
27     ('kind', _('kind')),
28     ('genre', _('genre')),
29     ('theme', _('theme')),
30     ('set', _('set')),
31     ('book', _('book')),
32 )
33
34
35 class TagSubcategoryManager(models.Manager):
36     def __init__(self, subcategory):
37         super(TagSubcategoryManager, self).__init__()
38         self.subcategory = subcategory
39
40     def get_query_set(self):
41         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
42
43
44 class Tag(TagBase):
45     name = models.CharField(_('name'), max_length=50, db_index=True)
46     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
47     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49         db_index=True, choices=TAG_CATEGORIES)
50     description = models.TextField(_('description'), blank=True)
51     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
52
53     user = models.ForeignKey(User, blank=True, null=True)
54     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55     death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56     gazeta_link = models.CharField(blank=True, max_length=240)
57     wiki_link = models.CharField(blank=True, max_length=240)
58
59     class Meta:
60         ordering = ('sort_key',)
61         verbose_name = _('tag')
62         verbose_name_plural = _('tags')
63
64     def __unicode__(self):
65         return self.name
66
67     def __repr__(self):
68         return "Tag(slug=%r)" % self.slug
69
70     @permalink
71     def get_absolute_url(self):
72         return ('catalogue.views.tagged_object_list', [self.slug])
73
74     def has_description(self):
75         return len(self.description) > 0
76     has_description.short_description = _('description')
77     has_description.boolean = True
78
79     def alive(self):
80         return self.death is None
81
82     def in_pd(self):
83         """ tests whether an author is in public domain """
84         return self.death is not None and self.goes_to_pd() <= datetime.now().year
85
86     def goes_to_pd(self):
87         """ calculates the year of public domain entry for an author """
88         return self.death + 71 if self.death is not None else None
89
90     @staticmethod
91     def get_tag_list(tags):
92         if isinstance(tags, basestring):
93             tag_slugs = tags.split('/')
94             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
95         else:
96             return TagBase.get_tag_list(tags)
97
98
99 # TODO: why is this hard-coded ? 
100 def book_upload_path(ext):
101     def get_dynamic_path(book, filename):
102         return 'lektura/%s.%s' % (book.slug, ext)
103     return get_dynamic_path
104
105
106 class Book(models.Model):
107     title = models.CharField(_('title'), max_length=120)
108     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
109     description = models.TextField(_('description'), blank=True)
110     created_at = models.DateTimeField(_('creation date'), auto_now=True)
111     _short_html = models.TextField(_('short HTML'), editable=False)
112     parent_number = models.IntegerField(_('parent number'), default=0)
113     extra_info = JSONField(_('extra information'))
114     gazeta_link = models.CharField(blank=True, max_length=240)
115     wiki_link = models.CharField(blank=True, max_length=240)
116
117
118     # Formats
119     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
120     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
121     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
122     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
123     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
124     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
125     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
126
127     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
128
129     objects = models.Manager()
130     tagged = managers.ModelTaggedItemManager(Tag)
131     tags = managers.TagDescriptor(Tag)
132
133     class AlreadyExists(Exception):
134         pass
135
136     class Meta:
137         ordering = ('title',)
138         verbose_name = _('book')
139         verbose_name_plural = _('books')
140
141     def __unicode__(self):
142         return self.title
143
144     def save(self, force_insert=False, force_update=False, reset_short_html=True):
145         if reset_short_html:
146             # Reset _short_html during save
147             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
148                 self.__setattr__(key, '')
149
150         book = super(Book, self).save(force_insert, force_update)
151
152         if self.mp3_file:
153             print self.mp3_file, self.mp3_file.path
154             extra_info = self.get_extra_info_value()
155             extra_info.update(self.get_mp3_info())
156             self.set_extra_info_value(extra_info)
157             book = super(Book, self).save(force_insert, force_update)
158
159         return book
160
161     @permalink
162     def get_absolute_url(self):
163         return ('catalogue.views.book_detail', [self.slug])
164
165     @property
166     def name(self):
167         return self.title
168
169     def short_html(self):
170         key = '_short_html_%s' % get_language()
171         short_html = getattr(self, key)
172
173         if short_html and len(short_html):
174             return mark_safe(short_html)
175         else:
176             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
177             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
178
179             formats = []
180             if self.html_file:
181                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
182             if self.pdf_file:
183                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
184             if self.odt_file:
185                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
186             if self.txt_file:
187                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
188             if self.mp3_file:
189                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
190             if self.ogg_file:
191                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
192
193             formats = [mark_safe(format) for format in formats]
194
195             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
196                 {'book': self, 'tags': tags, 'formats': formats})))
197             self.save(reset_short_html=False)
198             return mark_safe(getattr(self, key))
199
200
201     def get_mp3_info(self):
202         """Retrieves artist and director names from audio ID3 tags."""
203         audio = id3.ID3(self.mp3_file.path)
204         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
205         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
206         return {'artist_name': artist_name, 'director_name': director_name}
207
208     def has_description(self):
209         return len(self.description) > 0
210     has_description.short_description = _('description')
211     has_description.boolean = True
212
213     def has_pdf_file(self):
214         return bool(self.pdf_file)
215     has_pdf_file.short_description = 'PDF'
216     has_pdf_file.boolean = True
217
218     def has_odt_file(self):
219         return bool(self.odt_file)
220     has_odt_file.short_description = 'ODT'
221     has_odt_file.boolean = True
222
223     def has_html_file(self):
224         return bool(self.html_file)
225     has_html_file.short_description = 'HTML'
226     has_html_file.boolean = True
227
228     @classmethod
229     def from_xml_file(cls, xml_file, overwrite=False):
230         # use librarian to parse meta-data
231         book_info = dcparser.parse(xml_file)
232
233         if not isinstance(xml_file, File):
234             xml_file = File(xml_file)
235
236         try:
237             return cls.from_text_and_meta(xml_file, book_info, overwrite)
238         finally:
239             xml_file.close()
240
241     @classmethod
242     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
243         from tempfile import NamedTemporaryFile
244         from slughifi import slughifi
245         from markupstring import MarkupString
246
247         # Read book metadata
248         book_base, book_slug = book_info.url.rsplit('/', 1)
249         book, created = Book.objects.get_or_create(slug=book_slug)
250
251         if created:
252             book_shelves = []
253         else:
254             if not overwrite:
255                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
256             # Save shelves for this book
257             book_shelves = list(book.tags.filter(category='set'))
258
259         book.title = book_info.title
260         book.set_extra_info_value(book_info.to_dict())
261         book._short_html = ''
262         book.save()
263
264         book_tags = []
265         for category in ('kind', 'genre', 'author', 'epoch'):
266             tag_name = getattr(book_info, category)
267             tag_sort_key = tag_name
268             if category == 'author':
269                 tag_sort_key = tag_name.last_name
270                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
271             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
272             if created:
273                 tag.name = tag_name
274                 tag.sort_key = slughifi(tag_sort_key)
275                 tag.category = category
276                 tag.save()
277             book_tags.append(tag)
278
279         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
280         if created:
281             book_tag.name = book.title[:50]
282             book_tag.sort_key = ('l-' + book.slug)[:120]
283             book_tag.category = 'book'
284             book_tag.save()
285         book_tags.append(book_tag)
286
287         book.tags = book_tags
288
289         if hasattr(book_info, 'parts'):
290             for n, part_url in enumerate(book_info.parts):
291                 base, slug = part_url.rsplit('/', 1)
292                 try:
293                     child_book = Book.objects.get(slug=slug)
294                     child_book.parent = book
295                     child_book.parent_number = n
296                     child_book.save()
297                 except Book.DoesNotExist, e:
298                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
299
300         book_descendants = list(book.children.all())
301         while len(book_descendants) > 0:
302             child_book = book_descendants.pop(0)
303             for fragment in child_book.fragments.all():
304                 fragment.tags = set(list(fragment.tags) + [book_tag])
305             book_descendants += list(child_book.children.all())
306
307         # Save XML and HTML files
308         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
309
310         html_file = NamedTemporaryFile()
311         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
312             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
313
314             # Extract fragments
315             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
316             book_themes = []
317             for fragment in closed_fragments.values():
318                 text = fragment.to_string()
319                 short_text = ''
320                 if (len(MarkupString(text)) > 240):
321                     short_text = unicode(MarkupString(text)[:160])
322                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
323                     defaults={'text': text, 'short_text': short_text})
324
325                 try:
326                     theme_names = [s.strip() for s in fragment.themes.split(',')]
327                 except AttributeError:
328                     continue
329                 themes = []
330                 for theme_name in theme_names:
331                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
332                     if created:
333                         tag.name = theme_name
334                         tag.sort_key = slughifi(theme_name)
335                         tag.category = 'theme'
336                         tag.save()
337                     themes.append(tag)
338                 new_fragment.save()
339                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
340                 book_themes += themes
341
342             book_themes = set(book_themes)
343             book.tags = list(book.tags) + list(book_themes) + book_shelves
344
345         book.save()
346         return book
347
348
349 class Fragment(models.Model):
350     text = models.TextField()
351     short_text = models.TextField(editable=False)
352     _short_html = models.TextField(editable=False)
353     anchor = models.CharField(max_length=120)
354     book = models.ForeignKey(Book, related_name='fragments')
355
356     objects = models.Manager()
357     tagged = managers.ModelTaggedItemManager(Tag)
358     tags = managers.TagDescriptor(Tag)
359
360     class Meta:
361         ordering = ('book', 'anchor',)
362         verbose_name = _('fragment')
363         verbose_name_plural = _('fragments')
364
365     def get_absolute_url(self):
366         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
367
368     def short_html(self):
369         key = '_short_html_%s' % get_language()
370         short_html = getattr(self, key)
371         if short_html and len(short_html):
372             return mark_safe(short_html)
373         else:
374             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
375                 for tag in self.book.tags if tag.category == 'author']
376
377             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
378                 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
379             self.save()
380             return mark_safe(getattr(self, key))
381
382
383 class BookStub(models.Model):
384     title = models.CharField(_('title'), max_length=120)
385     author = models.CharField(_('author'), max_length=120)
386     pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
387     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
388     translator = models.TextField(_('translator'), blank=True)
389     translator_death = models.TextField(_('year of translator\'s death'), blank=True)
390
391     class Meta:
392         ordering = ('title',)
393         verbose_name = _('book stub')
394         verbose_name_plural = _('book stubs')
395
396     def __unicode__(self):
397         return self.title
398
399     @permalink
400     def get_absolute_url(self):
401         return ('catalogue.views.book_detail', [self.slug])
402
403     def in_pd(self):
404         return self.pd is not None and self.pd <= datetime.now().year
405
406     @property
407     def name(self):
408         return self.title
409
410