Added extraction of MP3 info when saving Book models.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16 from mutagen import id3
17
18
19 TAG_CATEGORIES = (
20     ('author', _('author')),
21     ('epoch', _('epoch')),
22     ('kind', _('kind')),
23     ('genre', _('genre')),
24     ('theme', _('theme')),
25     ('set', _('set')),
26     ('book', _('book')),
27 )
28
29
30 class TagSubcategoryManager(models.Manager):
31     def __init__(self, subcategory):
32         super(TagSubcategoryManager, self).__init__()
33         self.subcategory = subcategory
34         
35     def get_query_set(self):
36         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
37
38
39 class Tag(TagBase):
40     name = models.CharField(_('name'), max_length=50, db_index=True)
41     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
42     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
43     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
44         db_index=True, choices=TAG_CATEGORIES)
45     description = models.TextField(_('description'), blank=True)
46     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
47     
48     user = models.ForeignKey(User, blank=True, null=True)
49     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
50     
51     def has_description(self):
52         return len(self.description) > 0
53     has_description.short_description = _('description')
54     has_description.boolean = True
55
56     @permalink
57     def get_absolute_url(self):
58         return ('catalogue.views.tagged_object_list', [self.slug])
59     
60     class Meta:
61         ordering = ('sort_key',)
62         verbose_name = _('tag')
63         verbose_name_plural = _('tags')
64     
65     def __unicode__(self):
66         return self.name
67
68     @staticmethod
69     def get_tag_list(tags):
70         if isinstance(tags, basestring):
71             tag_slugs = tags.split('/')
72             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
73         else:
74             return TagBase.get_tag_list(tags)
75
76
77 def book_upload_path(ext):
78     def get_dynamic_path(book, filename):
79         return 'lektura/%s.%s' % (book.slug, ext)
80     return get_dynamic_path
81
82
83 class Book(models.Model):
84     title = models.CharField(_('title'), max_length=120)
85     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
86     description = models.TextField(_('description'), blank=True)
87     created_at = models.DateTimeField(_('creation date'), auto_now=True)
88     _short_html = models.TextField(_('short HTML'), editable=False)
89     parent_number = models.IntegerField(_('parent number'), default=0)
90     extra_info = JSONField(_('extra information'))
91     
92     # Formats
93     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
94     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
95     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
96     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
97     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
98     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
99     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
100     
101     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
102     
103     objects = models.Manager()
104     tagged = managers.ModelTaggedItemManager(Tag)
105     tags = managers.TagDescriptor(Tag)
106
107     
108     @property
109     def name(self):
110         return self.title
111     
112     def short_html(self):
113         if len(self._short_html):
114             return mark_safe(self._short_html)
115         else:
116             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
117             tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
118
119             formats = []
120             if self.html_file:
121                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
122             if self.pdf_file:
123                 formats.append(u'<a href="%s">Plik PDF</a>' % self.pdf_file.url)
124             if self.odt_file:
125                 formats.append(u'<a href="%s">Plik ODT</a>' % self.odt_file.url)
126             if self.txt_file:
127                 formats.append(u'<a href="%s">Plik TXT</a>' % self.txt_file.url)
128             
129             self._short_html = unicode(render_to_string('catalogue/book_short.html',
130                 {'book': self, 'tags': tags, 'formats': formats}))
131             self.save()
132             return mark_safe(self._short_html)
133     
134     def save(self, force_insert=False, force_update=False):
135         if self.mp3_file:
136             extra_info = self.get_extra_info_value()
137             extra_info.update(self.get_mp3_info())
138             self.set_extra_info_value(extra_info)
139         return super(Book, self).save(force_insert, force_update)
140     
141     def get_mp3_info(self):
142         """Retrieves artist and director names from audio ID3 tags."""
143         audio = id3.ID3(self.mp3_file.path)
144         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
145         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
146         return {'artist_name': artist_name, 'director_name': director_name}
147         
148     def has_description(self):
149         return len(self.description) > 0
150     has_description.short_description = _('description')
151     has_description.boolean = True
152     
153     def has_pdf_file(self):
154         return bool(self.pdf_file)
155     has_pdf_file.short_description = 'PDF'
156     has_pdf_file.boolean = True
157     
158     def has_odt_file(self):
159         return bool(self.odt_file)
160     has_odt_file.short_description = 'ODT'
161     has_odt_file.boolean = True
162     
163     def has_html_file(self):
164         return bool(self.html_file)
165     has_html_file.short_description = 'HTML'
166     has_html_file.boolean = True
167
168     class AlreadyExists(Exception):
169         pass
170     
171     @staticmethod
172     def from_xml_file(xml_file, overwrite=False):
173         from tempfile import NamedTemporaryFile
174         from slughifi import slughifi
175         from markupstring import MarkupString
176         
177         # Read book metadata
178         book_info = dcparser.parse(xml_file)
179         book_base, book_slug = book_info.url.rsplit('/', 1)
180         book, created = Book.objects.get_or_create(slug=book_slug)
181         
182         if created:
183             book_shelves = []
184         else:
185             if not overwrite:
186                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
187             # Save shelves for this book
188             book_shelves = list(book.tags.filter(category='set'))
189         
190         book.title = book_info.title
191         book.set_extra_info_value(book_info.to_dict())
192         book._short_html = ''
193         book.save()
194         
195         book_tags = []
196         for category in ('kind', 'genre', 'author', 'epoch'):    
197             tag_name = getattr(book_info, category)
198             tag_sort_key = tag_name
199             if category == 'author':
200                 tag_sort_key = tag_name.last_name
201                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
202             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
203             if created:
204                 tag.name = tag_name
205                 tag.sort_key = slughifi(tag_sort_key)
206                 tag.category = category
207                 tag.save()
208             book_tags.append(tag)
209             
210         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
211         if created:
212             book_tag.name = book.title[:50]
213             book_tag.sort_key = ('l-' + book.slug)[:120]
214             book_tag.category = 'book'
215             book_tag.save()
216         book_tags.append(book_tag)
217         
218         book.tags = book_tags
219         
220         if hasattr(book_info, 'parts'):
221             for n, part_url in enumerate(book_info.parts):
222                 base, slug = part_url.rsplit('/', 1)
223                 child_book = Book.objects.get(slug=slug)
224                 child_book.parent = book
225                 child_book.parent_number = n
226                 child_book.save()
227
228         book_descendants = list(book.children.all())
229         while len(book_descendants) > 0:
230             child_book = book_descendants.pop(0)
231             for fragment in child_book.fragments.all():
232                 fragment.tags = set(list(fragment.tags) + [book_tag])
233             book_descendants += list(child_book.children.all())
234             
235         # Save XML and HTML files
236         book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
237         
238         html_file = NamedTemporaryFile()
239         if html.transform(book.xml_file.path, html_file):
240             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
241             
242             # Extract fragments
243             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
244             book_themes = []
245             for fragment in closed_fragments.values():
246                 text = fragment.to_string()
247                 short_text = ''
248                 if (len(MarkupString(text)) > 240):
249                     short_text = unicode(MarkupString(text)[:160])
250                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
251                     defaults={'text': text, 'short_text': short_text})
252                 
253                 try:
254                     theme_names = [s.strip() for s in fragment.themes.split(',')]
255                 except AttributeError:
256                     continue
257                 themes = []
258                 for theme_name in theme_names:
259                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
260                     if created:
261                         tag.name = theme_name
262                         tag.sort_key = slughifi(theme_name)
263                         tag.category = 'theme'
264                         tag.save()
265                     themes.append(tag)
266                 new_fragment.save()
267                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
268                 book_themes += themes
269             
270             book_themes = set(book_themes)
271             book.tags = list(book.tags) + list(book_themes) + book_shelves
272         
273         book.save()
274         return book
275     
276     @permalink
277     def get_absolute_url(self):
278         return ('catalogue.views.book_detail', [self.slug])
279         
280     class Meta:
281         ordering = ('title',)
282         verbose_name = _('book')
283         verbose_name_plural = _('books')
284
285     def __unicode__(self):
286         return self.title
287
288
289 class Fragment(models.Model):
290     text = models.TextField()
291     short_text = models.TextField(editable=False)
292     _short_html = models.TextField(editable=False)
293     anchor = models.CharField(max_length=120)
294     book = models.ForeignKey(Book, related_name='fragments')
295
296     objects = models.Manager()
297     tagged = managers.ModelTaggedItemManager(Tag)
298     tags = managers.TagDescriptor(Tag)
299     
300     def short_html(self):
301         if len(self._short_html):
302             return mark_safe(self._short_html)
303         else:
304             book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
305                 for tag in self.book.tags if tag.category == 'author']
306             
307             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
308                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
309             self.save()
310             return mark_safe(self._short_html)
311     
312     def get_absolute_url(self):
313         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
314     
315     class Meta:
316         ordering = ('book', 'anchor',)
317         verbose_name = _('fragment')
318         verbose_name_plural = _('fragments')
319