1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14 from datetime import datetime
16 from newtagging.models import TagBase
17 from newtagging import managers
18 from catalogue.fields import JSONField
20 from librarian import html, dcparser
21 from mutagen import id3
25 ('author', _('author')),
26 ('epoch', _('epoch')),
28 ('genre', _('genre')),
29 ('theme', _('theme')),
35 class TagSubcategoryManager(models.Manager):
36 def __init__(self, subcategory):
37 super(TagSubcategoryManager, self).__init__()
38 self.subcategory = subcategory
40 def get_query_set(self):
41 return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
45 name = models.CharField(_('name'), max_length=50, db_index=True)
46 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
47 sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
48 category = models.CharField(_('category'), max_length=50, blank=False, null=False,
49 db_index=True, choices=TAG_CATEGORIES)
50 description = models.TextField(_('description'), blank=True)
51 main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
53 user = models.ForeignKey(User, blank=True, null=True)
54 book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
55 death = models.IntegerField(_(u'year of death'), blank=True, null=True)
56 gazeta_link = models.CharField(blank=True, max_length=240)
57 wiki_link = models.CharField(blank=True, max_length=240)
60 ordering = ('sort_key',)
61 verbose_name = _('tag')
62 verbose_name_plural = _('tags')
64 def __unicode__(self):
68 return "Tag(slug=%r)" % self.slug
71 def get_absolute_url(self):
72 return ('catalogue.views.tagged_object_list', [self.slug])
74 def has_description(self):
75 return len(self.description) > 0
76 has_description.short_description = _('description')
77 has_description.boolean = True
80 return self.death is None
83 """ tests whether an author is in public domain """
84 return self.death is not None and self.goes_to_pd() <= datetime.now().year
87 """ calculates the year of public domain entry for an author """
88 return self.death + 71 if self.death is not None else None
91 def get_tag_list(tags):
92 if isinstance(tags, basestring):
93 tag_slugs = tags.split('/')
94 return [Tag.objects.get(slug=slug) for slug in tag_slugs]
96 return TagBase.get_tag_list(tags)
99 # TODO: why is this hard-coded ?
100 def book_upload_path(ext):
101 def get_dynamic_path(book, filename):
102 return 'lektura/%s.%s' % (book.slug, ext)
103 return get_dynamic_path
106 class Book(models.Model):
107 title = models.CharField(_('title'), max_length=120)
108 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
109 description = models.TextField(_('description'), blank=True)
110 created_at = models.DateTimeField(_('creation date'), auto_now=True)
111 _short_html = models.TextField(_('short HTML'), editable=False)
112 parent_number = models.IntegerField(_('parent number'), default=0)
113 extra_info = JSONField(_('extra information'))
114 gazeta_link = models.CharField(blank=True, max_length=240)
115 wiki_link = models.CharField(blank=True, max_length=240)
119 xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
120 html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
121 pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
122 epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
123 odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
124 txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
125 mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
126 ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
128 parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
130 objects = models.Manager()
131 tagged = managers.ModelTaggedItemManager(Tag)
132 tags = managers.TagDescriptor(Tag)
134 _tag_counter = JSONField(editable=False, default='')
135 _theme_counter = JSONField(editable=False, default='')
137 class AlreadyExists(Exception):
141 ordering = ('title',)
142 verbose_name = _('book')
143 verbose_name_plural = _('books')
145 def __unicode__(self):
148 def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
150 # Reset _short_html during save
151 for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
152 self.__setattr__(key, '')
154 book = super(Book, self).save(force_insert, force_update)
156 if refresh_mp3 and self.mp3_file:
157 print self.mp3_file, self.mp3_file.path
158 extra_info = self.get_extra_info_value()
159 extra_info.update(self.get_mp3_info())
160 self.set_extra_info_value(extra_info)
161 book = super(Book, self).save(force_insert, force_update)
166 def get_absolute_url(self):
167 return ('catalogue.views.book_detail', [self.slug])
174 slug = ('l-' + self.slug)[:120]
175 book_tag, created = Tag.objects.get_or_create(slug=slug)
177 book_tag.name = self.title[:50]
178 book_tag.sort_key = slug
179 book_tag.category = 'book'
183 def short_html(self):
184 key = '_short_html_%s' % get_language()
185 short_html = getattr(self, key)
187 if short_html and len(short_html):
188 return mark_safe(short_html)
190 tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
191 tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
195 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
197 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
199 formats.append(u'<a href="%s">EPUB</a>' % self.epub_file.url)
201 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
203 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
205 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
207 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
209 formats = [mark_safe(format) for format in formats]
211 setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
212 {'book': self, 'tags': tags, 'formats': formats})))
213 self.save(reset_short_html=False)
214 return mark_safe(getattr(self, key))
217 def get_mp3_info(self):
218 """Retrieves artist and director names from audio ID3 tags."""
219 audio = id3.ID3(self.mp3_file.path)
220 artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
221 director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
222 return {'artist_name': artist_name, 'director_name': director_name}
224 def has_description(self):
225 return len(self.description) > 0
226 has_description.short_description = _('description')
227 has_description.boolean = True
229 def has_pdf_file(self):
230 return bool(self.pdf_file)
231 has_pdf_file.short_description = 'PDF'
232 has_pdf_file.boolean = True
234 def has_epub_file(self):
235 return bool(self.epub_file)
236 has_epub_file.short_description = 'EPUB'
237 has_epub_file.boolean = True
239 def has_odt_file(self):
240 return bool(self.odt_file)
241 has_odt_file.short_description = 'ODT'
242 has_odt_file.boolean = True
244 def has_html_file(self):
245 return bool(self.html_file)
246 has_html_file.short_description = 'HTML'
247 has_html_file.boolean = True
250 def from_xml_file(cls, xml_file, overwrite=False):
251 # use librarian to parse meta-data
252 book_info = dcparser.parse(xml_file)
254 if not isinstance(xml_file, File):
255 xml_file = File(xml_file)
258 return cls.from_text_and_meta(xml_file, book_info, overwrite)
263 def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
264 from tempfile import NamedTemporaryFile
265 from slughifi import slughifi
266 from markupstring import MarkupString
269 book_base, book_slug = book_info.url.rsplit('/', 1)
270 book, created = Book.objects.get_or_create(slug=book_slug)
276 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
277 # Save shelves for this book
278 book_shelves = list(book.tags.filter(category='set'))
280 book.title = book_info.title
281 book.set_extra_info_value(book_info.to_dict())
282 book._short_html = ''
286 for category in ('kind', 'genre', 'author', 'epoch'):
287 tag_name = getattr(book_info, category)
288 tag_sort_key = tag_name
289 if category == 'author':
290 tag_sort_key = tag_name.last_name
291 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
292 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
295 tag.sort_key = slughifi(tag_sort_key)
296 tag.category = category
298 book_tags.append(tag)
300 book.tags = book_tags
302 book_tag = book.book_tag()
304 if hasattr(book_info, 'parts'):
305 for n, part_url in enumerate(book_info.parts):
306 base, slug = part_url.rsplit('/', 1)
308 child_book = Book.objects.get(slug=slug)
309 child_book.parent = book
310 child_book.parent_number = n
312 except Book.DoesNotExist, e:
313 raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
315 book_descendants = list(book.children.all())
316 while len(book_descendants) > 0:
317 child_book = book_descendants.pop(0)
318 child_book.tags = list(child_book.tags) + [book_tag]
320 for fragment in child_book.fragments.all():
321 fragment.tags = set(list(fragment.tags) + [book_tag])
322 book_descendants += list(child_book.children.all())
324 # Save XML and HTML files
325 book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
327 html_file = NamedTemporaryFile()
328 if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
329 book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
332 closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
334 for fragment in closed_fragments.values():
335 text = fragment.to_string()
337 if (len(MarkupString(text)) > 240):
338 short_text = unicode(MarkupString(text)[:160])
339 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
340 defaults={'text': text, 'short_text': short_text})
343 theme_names = [s.strip() for s in fragment.themes.split(',')]
344 except AttributeError:
347 for theme_name in theme_names:
348 tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
350 tag.name = theme_name
351 tag.sort_key = slughifi(theme_name)
352 tag.category = 'theme'
356 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
357 book_themes += themes
359 book_themes = set(book_themes)
360 book.tags = list(book.tags) + list(book_themes) + book_shelves
366 def refresh_tag_counter(self):
368 for child in self.children.all().order_by():
369 for tag_pk, value in child.tag_counter.iteritems():
370 tags[tag_pk] = tags.get(tag_pk, 0) + value
371 for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
373 self.set__tag_counter_value(tags)
374 self.save(reset_short_html=False, refresh_mp3=False)
378 def tag_counter(self):
379 if self._tag_counter == '':
380 return self.refresh_tag_counter()
381 return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
382 #return self.get__tag_counter_value()
384 def refresh_theme_counter(self):
386 for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
387 for tag in fragment.tags.filter(category='theme').order_by():
388 tags[tag.pk] = tags.get(tag.pk, 0) + 1
389 self.set__theme_counter_value(tags)
390 self.save(reset_short_html=False, refresh_mp3=False)
394 def theme_counter(self):
395 if self._theme_counter == '':
396 return self.refresh_theme_counter()
397 return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
398 return self.get__theme_counter_value()
402 class Fragment(models.Model):
403 text = models.TextField()
404 short_text = models.TextField(editable=False)
405 _short_html = models.TextField(editable=False)
406 anchor = models.CharField(max_length=120)
407 book = models.ForeignKey(Book, related_name='fragments')
409 objects = models.Manager()
410 tagged = managers.ModelTaggedItemManager(Tag)
411 tags = managers.TagDescriptor(Tag)
414 ordering = ('book', 'anchor',)
415 verbose_name = _('fragment')
416 verbose_name_plural = _('fragments')
418 def get_absolute_url(self):
419 return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
421 def short_html(self):
422 key = '_short_html_%s' % get_language()
423 short_html = getattr(self, key)
424 if short_html and len(short_html):
425 return mark_safe(short_html)
427 book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name))
428 for tag in self.book.tags if tag.category == 'author']
430 setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
431 {'fragment': self, 'book': self.book, 'book_authors': book_authors})))
433 return mark_safe(getattr(self, key))
436 class BookStub(models.Model):
437 title = models.CharField(_('title'), max_length=120)
438 author = models.CharField(_('author'), max_length=120)
439 pd = models.IntegerField(_('goes to public domain'), null=True, blank=True)
440 slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
441 translator = models.TextField(_('translator'), blank=True)
442 translator_death = models.TextField(_('year of translator\'s death'), blank=True)
445 ordering = ('title',)
446 verbose_name = _('book stub')
447 verbose_name_plural = _('book stubs')
449 def __unicode__(self):
453 def get_absolute_url(self):
454 return ('catalogue.views.book_detail', [self.slug])
457 return self.pd is not None and self.pd <= datetime.now().year