ac9a14cb1470dd6126f1b6f4d09ab33cd8bf4933
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.core.urlresolvers import reverse
13
14 from newtagging.models import TagBase
15 from newtagging import managers
16 from catalogue.fields import JSONField
17
18 from librarian import html, dcparser
19 from mutagen import id3
20
21
22 TAG_CATEGORIES = (
23     ('author', _('author')),
24     ('epoch', _('epoch')),
25     ('kind', _('kind')),
26     ('genre', _('genre')),
27     ('theme', _('theme')),
28     ('set', _('set')),
29     ('book', _('book')),
30 )
31
32
33 class TagSubcategoryManager(models.Manager):
34     def __init__(self, subcategory):
35         super(TagSubcategoryManager, self).__init__()
36         self.subcategory = subcategory
37         
38     def get_query_set(self):
39         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
40
41
42 class Tag(TagBase):
43     name = models.CharField(_('name'), max_length=50, db_index=True)
44     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
45     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
46     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
47         db_index=True, choices=TAG_CATEGORIES)
48     description = models.TextField(_('description'), blank=True)
49     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
50     
51     user = models.ForeignKey(User, blank=True, null=True)
52     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
53     gazeta_link = models.CharField(blank=True,  max_length=240)
54     wiki_link = models.CharField(blank=True,  max_length=240)
55     
56     def has_description(self):
57         return len(self.description) > 0
58     has_description.short_description = _('description')
59     has_description.boolean = True
60
61     @permalink
62     def get_absolute_url(self):
63         return ('catalogue.views.tagged_object_list', [self.slug])
64     
65     class Meta:
66         ordering = ('sort_key',)
67         verbose_name = _('tag')
68         verbose_name_plural = _('tags')
69     
70     def __unicode__(self):
71         return self.name
72
73     @staticmethod
74     def get_tag_list(tags):
75         if isinstance(tags, basestring):
76             tag_slugs = tags.split('/')
77             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
78         else:
79             return TagBase.get_tag_list(tags)
80
81
82 def book_upload_path(ext):
83     def get_dynamic_path(book, filename):
84         return 'lektura/%s.%s' % (book.slug, ext)
85     return get_dynamic_path
86
87
88 class Book(models.Model):
89     title = models.CharField(_('title'), max_length=120)
90     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
91     description = models.TextField(_('description'), blank=True)
92     created_at = models.DateTimeField(_('creation date'), auto_now=True)
93     _short_html = models.TextField(_('short HTML'), editable=False)
94     parent_number = models.IntegerField(_('parent number'), default=0)
95     extra_info = JSONField(_('extra information'))
96     gazeta_link = models.CharField(blank=True,  max_length=240)
97     wiki_link = models.CharField(blank=True,  max_length=240)
98
99     
100     # Formats
101     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
102     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
103     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
104     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
105     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
106     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
107     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
108     
109     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
110     
111     objects = models.Manager()
112     tagged = managers.ModelTaggedItemManager(Tag)
113     tags = managers.TagDescriptor(Tag)
114     
115     @property
116     def name(self):
117         return self.title
118     
119     def short_html(self):
120         if len(self._short_html):
121             return mark_safe(self._short_html)
122         else:
123             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
124             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
125
126             formats = []
127             if self.html_file:
128                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
129             if self.pdf_file:
130                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
131             if self.odt_file:
132                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
133             if self.txt_file:
134                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
135             if self.mp3_file:
136                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
137             if self.ogg_file:
138                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
139             
140             formats = [mark_safe(format) for format in formats]
141             
142             self._short_html = unicode(render_to_string('catalogue/book_short.html',
143                 {'book': self, 'tags': tags, 'formats': formats}))
144             self.save(reset_short_html=False)
145             return mark_safe(self._short_html)
146     
147     def save(self, force_insert=False, force_update=False, reset_short_html=True):
148         if reset_short_html:
149             # Reset _short_html during save
150             self._short_html = ''
151         
152         book = super(Book, self).save(force_insert, force_update)
153         
154         if self.mp3_file:
155             print self.mp3_file, self.mp3_file.path
156             extra_info = self.get_extra_info_value()
157             extra_info.update(self.get_mp3_info())
158             self.set_extra_info_value(extra_info)
159             book = super(Book, self).save(force_insert, force_update)
160         
161         return book
162     
163     def get_mp3_info(self):
164         """Retrieves artist and director names from audio ID3 tags."""
165         audio = id3.ID3(self.mp3_file.path)
166         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
167         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
168         return {'artist_name': artist_name, 'director_name': director_name}
169         
170     def has_description(self):
171         return len(self.description) > 0
172     has_description.short_description = _('description')
173     has_description.boolean = True
174     
175     def has_pdf_file(self):
176         return bool(self.pdf_file)
177     has_pdf_file.short_description = 'PDF'
178     has_pdf_file.boolean = True
179     
180     def has_odt_file(self):
181         return bool(self.odt_file)
182     has_odt_file.short_description = 'ODT'
183     has_odt_file.boolean = True
184     
185     def has_html_file(self):
186         return bool(self.html_file)
187     has_html_file.short_description = 'HTML'
188     has_html_file.boolean = True
189
190     class AlreadyExists(Exception):
191         pass
192     
193     @staticmethod
194     def from_xml_file(xml_file, overwrite=False):
195         from tempfile import NamedTemporaryFile
196         from slughifi import slughifi
197         from markupstring import MarkupString
198         
199         # Read book metadata
200         book_info = dcparser.parse(xml_file)
201         book_base, book_slug = book_info.url.rsplit('/', 1)
202         book, created = Book.objects.get_or_create(slug=book_slug)
203         
204         if created:
205             book_shelves = []
206         else:
207             if not overwrite:
208                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
209             # Save shelves for this book
210             book_shelves = list(book.tags.filter(category='set'))
211         
212         book.title = book_info.title
213         book.set_extra_info_value(book_info.to_dict())
214         book._short_html = ''
215         book.save()
216         
217         book_tags = []
218         for category in ('kind', 'genre', 'author', 'epoch'):    
219             tag_name = getattr(book_info, category)
220             tag_sort_key = tag_name
221             if category == 'author':
222                 tag_sort_key = tag_name.last_name
223                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
224             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
225             if created:
226                 tag.name = tag_name
227                 tag.sort_key = slughifi(tag_sort_key)
228                 tag.category = category
229                 tag.save()
230             book_tags.append(tag)
231             
232         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
233         if created:
234             book_tag.name = book.title[:50]
235             book_tag.sort_key = ('l-' + book.slug)[:120]
236             book_tag.category = 'book'
237             book_tag.save()
238         book_tags.append(book_tag)
239         
240         book.tags = book_tags
241         
242         if hasattr(book_info, 'parts'):
243             for n, part_url in enumerate(book_info.parts):
244                 base, slug = part_url.rsplit('/', 1)
245                 try:
246                     child_book = Book.objects.get(slug=slug)
247                     child_book.parent = book
248                     child_book.parent_number = n
249                     child_book.save()
250                 except Book.DoesNotExist, e:
251                     raise Book.DoesNotExist(u'Book with slug = "%s" does not exist.' % slug)
252         
253         book_descendants = list(book.children.all())
254         while len(book_descendants) > 0:
255             child_book = book_descendants.pop(0)
256             for fragment in child_book.fragments.all():
257                 fragment.tags = set(list(fragment.tags) + [book_tag])
258             book_descendants += list(child_book.children.all())
259             
260         # Save XML and HTML files
261         if not isinstance(xml_file, File):
262             xml_file = File(file(xml_file))
263         book.xml_file.save('%s.xml' % book.slug, xml_file, save=False)
264         
265         html_file = NamedTemporaryFile()
266         if html.transform(book.xml_file.path, html_file):
267             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
268             
269             # Extract fragments
270             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
271             book_themes = []
272             for fragment in closed_fragments.values():
273                 text = fragment.to_string()
274                 short_text = ''
275                 if (len(MarkupString(text)) > 240):
276                     short_text = unicode(MarkupString(text)[:160])
277                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
278                     defaults={'text': text, 'short_text': short_text})
279                 
280                 try:
281                     theme_names = [s.strip() for s in fragment.themes.split(',')]
282                 except AttributeError:
283                     continue
284                 themes = []
285                 for theme_name in theme_names:
286                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
287                     if created:
288                         tag.name = theme_name
289                         tag.sort_key = slughifi(theme_name)
290                         tag.category = 'theme'
291                         tag.save()
292                     themes.append(tag)
293                 new_fragment.save()
294                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
295                 book_themes += themes
296             
297             book_themes = set(book_themes)
298             book.tags = list(book.tags) + list(book_themes) + book_shelves
299         
300         book.save()
301         return book
302     
303     @permalink
304     def get_absolute_url(self):
305         return ('catalogue.views.book_detail', [self.slug])
306         
307     class Meta:
308         ordering = ('title',)
309         verbose_name = _('book')
310         verbose_name_plural = _('books')
311
312     def __unicode__(self):
313         return self.title
314
315
316 class Fragment(models.Model):
317     text = models.TextField()
318     short_text = models.TextField(editable=False)
319     _short_html = models.TextField(editable=False)
320     anchor = models.CharField(max_length=120)
321     book = models.ForeignKey(Book, related_name='fragments')
322
323     objects = models.Manager()
324     tagged = managers.ModelTaggedItemManager(Tag)
325     tags = managers.TagDescriptor(Tag)
326     
327     def short_html(self):
328         if len(self._short_html):
329             return mark_safe(self._short_html)
330         else:
331             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) 
332                 for tag in self.book.tags if tag.category == 'author']
333             
334             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
335                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
336             self.save()
337             return mark_safe(self._short_html)
338     
339     def get_absolute_url(self):
340         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
341     
342     class Meta:
343         ordering = ('book', 'anchor',)
344         verbose_name = _('fragment')
345         verbose_name_plural = _('fragments')
346