a9e39de01a5f91cba2fd11ff5a806db933a711c1
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16 from mutagen import id3
17
18
19 TAG_CATEGORIES = (
20     ('author', _('author')),
21     ('epoch', _('epoch')),
22     ('kind', _('kind')),
23     ('genre', _('genre')),
24     ('theme', _('theme')),
25     ('set', _('set')),
26     ('book', _('book')),
27 )
28
29
30 class TagSubcategoryManager(models.Manager):
31     def __init__(self, subcategory):
32         super(TagSubcategoryManager, self).__init__()
33         self.subcategory = subcategory
34         
35     def get_query_set(self):
36         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
37
38
39 class Tag(TagBase):
40     name = models.CharField(_('name'), max_length=50, db_index=True)
41     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
42     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
43     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
44         db_index=True, choices=TAG_CATEGORIES)
45     description = models.TextField(_('description'), blank=True)
46     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
47     
48     user = models.ForeignKey(User, blank=True, null=True)
49     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
50     gazeta_link = models.CharField(blank=True,  max_length=240)
51     wiki_link = models.CharField(blank=True,  max_length=240)
52     
53     def has_description(self):
54         return len(self.description) > 0
55     has_description.short_description = _('description')
56     has_description.boolean = True
57
58     @permalink
59     def get_absolute_url(self):
60         return ('catalogue.views.tagged_object_list', [self.slug])
61     
62     class Meta:
63         ordering = ('sort_key',)
64         verbose_name = _('tag')
65         verbose_name_plural = _('tags')
66     
67     def __unicode__(self):
68         return self.name
69
70     @staticmethod
71     def get_tag_list(tags):
72         if isinstance(tags, basestring):
73             tag_slugs = tags.split('/')
74             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
75         else:
76             return TagBase.get_tag_list(tags)
77
78
79 def book_upload_path(ext):
80     def get_dynamic_path(book, filename):
81         return 'lektura/%s.%s' % (book.slug, ext)
82     return get_dynamic_path
83
84
85 class Book(models.Model):
86     title = models.CharField(_('title'), max_length=120)
87     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
88     description = models.TextField(_('description'), blank=True)
89     created_at = models.DateTimeField(_('creation date'), auto_now=True)
90     _short_html = models.TextField(_('short HTML'), editable=False)
91     parent_number = models.IntegerField(_('parent number'), default=0)
92     extra_info = JSONField(_('extra information'))
93     gazeta_link = models.CharField(blank=True,  max_length=240)
94     wiki_link = models.CharField(blank=True,  max_length=240)
95
96     
97     # Formats
98     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
99     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
100     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
101     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
102     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
103     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
104     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
105     
106     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
107     
108     objects = models.Manager()
109     tagged = managers.ModelTaggedItemManager(Tag)
110     tags = managers.TagDescriptor(Tag)
111     
112     @property
113     def name(self):
114         return self.title
115     
116     def short_html(self):
117         if len(self._short_html):
118             return mark_safe(self._short_html)
119         else:
120             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
121             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
122
123             formats = []
124             if self.html_file:
125                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
126             if self.pdf_file:
127                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
128             if self.odt_file:
129                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
130             if self.txt_file:
131                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
132             if self.mp3_file:
133                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
134             if self.ogg_file:
135                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
136             
137             formats = [mark_safe(format) for format in formats]
138             
139             self._short_html = unicode(render_to_string('catalogue/book_short.html',
140                 {'book': self, 'tags': tags, 'formats': formats}))
141             self.save(reset_short_html=False)
142             return mark_safe(self._short_html)
143     
144     def save(self, force_insert=False, force_update=False, reset_short_html=True):
145         if reset_short_html:
146             # Reset _short_html during save
147             self._short_html = ''
148         
149         book = super(Book, self).save(force_insert, force_update)
150         
151         if self.mp3_file:
152             print self.mp3_file, self.mp3_file.path
153             extra_info = self.get_extra_info_value()
154             extra_info.update(self.get_mp3_info())
155             self.set_extra_info_value(extra_info)
156             book = super(Book, self).save(force_insert, force_update)
157         
158         return book
159     
160     def get_mp3_info(self):
161         """Retrieves artist and director names from audio ID3 tags."""
162         audio = id3.ID3(self.mp3_file.path)
163         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
164         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
165         return {'artist_name': artist_name, 'director_name': director_name}
166         
167     def has_description(self):
168         return len(self.description) > 0
169     has_description.short_description = _('description')
170     has_description.boolean = True
171     
172     def has_pdf_file(self):
173         return bool(self.pdf_file)
174     has_pdf_file.short_description = 'PDF'
175     has_pdf_file.boolean = True
176     
177     def has_odt_file(self):
178         return bool(self.odt_file)
179     has_odt_file.short_description = 'ODT'
180     has_odt_file.boolean = True
181     
182     def has_html_file(self):
183         return bool(self.html_file)
184     has_html_file.short_description = 'HTML'
185     has_html_file.boolean = True
186
187     class AlreadyExists(Exception):
188         pass
189     
190     @staticmethod
191     def from_xml_file(xml_file, overwrite=False):
192         from tempfile import NamedTemporaryFile
193         from slughifi import slughifi
194         from markupstring import MarkupString
195         
196         # Read book metadata
197         book_info = dcparser.parse(xml_file)
198         book_base, book_slug = book_info.url.rsplit('/', 1)
199         book, created = Book.objects.get_or_create(slug=book_slug)
200         
201         if created:
202             book_shelves = []
203         else:
204             if not overwrite:
205                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
206             # Save shelves for this book
207             book_shelves = list(book.tags.filter(category='set'))
208         
209         book.title = book_info.title
210         book.set_extra_info_value(book_info.to_dict())
211         book._short_html = ''
212         book.save()
213         
214         book_tags = []
215         for category in ('kind', 'genre', 'author', 'epoch'):    
216             tag_name = getattr(book_info, category)
217             tag_sort_key = tag_name
218             if category == 'author':
219                 tag_sort_key = tag_name.last_name
220                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
221             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
222             if created:
223                 tag.name = tag_name
224                 tag.sort_key = slughifi(tag_sort_key)
225                 tag.category = category
226                 tag.save()
227             book_tags.append(tag)
228             
229         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
230         if created:
231             book_tag.name = book.title[:50]
232             book_tag.sort_key = ('l-' + book.slug)[:120]
233             book_tag.category = 'book'
234             book_tag.save()
235         book_tags.append(book_tag)
236         
237         book.tags = book_tags
238         
239         if hasattr(book_info, 'parts'):
240             for n, part_url in enumerate(book_info.parts):
241                 base, slug = part_url.rsplit('/', 1)
242                 try:
243                     child_book = Book.objects.get(slug=slug)
244                     child_book.parent = book
245                     child_book.parent_number = n
246                     child_book.save()
247                 except Book.DoesNotExist, e:
248                     raise Book.DoesNotExist(u'Book with slug = "%s" does not exist.' % slug)
249         
250         book_descendants = list(book.children.all())
251         while len(book_descendants) > 0:
252             child_book = book_descendants.pop(0)
253             for fragment in child_book.fragments.all():
254                 fragment.tags = set(list(fragment.tags) + [book_tag])
255             book_descendants += list(child_book.children.all())
256             
257         # Save XML and HTML files
258         if not isinstance(xml_file, File):
259             xml_file = File(file(xml_file))
260         book.xml_file.save('%s.xml' % book.slug, xml_file, save=False)
261         
262         html_file = NamedTemporaryFile()
263         if html.transform(book.xml_file.path, html_file):
264             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
265             
266             # Extract fragments
267             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
268             book_themes = []
269             for fragment in closed_fragments.values():
270                 text = fragment.to_string()
271                 short_text = ''
272                 if (len(MarkupString(text)) > 240):
273                     short_text = unicode(MarkupString(text)[:160])
274                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
275                     defaults={'text': text, 'short_text': short_text})
276                 
277                 try:
278                     theme_names = [s.strip() for s in fragment.themes.split(',')]
279                 except AttributeError:
280                     continue
281                 themes = []
282                 for theme_name in theme_names:
283                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
284                     if created:
285                         tag.name = theme_name
286                         tag.sort_key = slughifi(theme_name)
287                         tag.category = 'theme'
288                         tag.save()
289                     themes.append(tag)
290                 new_fragment.save()
291                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
292                 book_themes += themes
293             
294             book_themes = set(book_themes)
295             book.tags = list(book.tags) + list(book_themes) + book_shelves
296         
297         book.save()
298         return book
299     
300     @permalink
301     def get_absolute_url(self):
302         return ('catalogue.views.book_detail', [self.slug])
303         
304     class Meta:
305         ordering = ('title',)
306         verbose_name = _('book')
307         verbose_name_plural = _('books')
308
309     def __unicode__(self):
310         return self.title
311
312
313 class Fragment(models.Model):
314     text = models.TextField()
315     short_text = models.TextField(editable=False)
316     _short_html = models.TextField(editable=False)
317     anchor = models.CharField(max_length=120)
318     book = models.ForeignKey(Book, related_name='fragments')
319
320     objects = models.Manager()
321     tagged = managers.ModelTaggedItemManager(Tag)
322     tags = managers.TagDescriptor(Tag)
323     
324     def short_html(self):
325         if len(self._short_html):
326             return mark_safe(self._short_html)
327         else:
328             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) 
329                 for tag in self.book.tags if tag.category == 'author']
330             
331             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
332                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
333             self.save()
334             return mark_safe(self._short_html)
335     
336     def get_absolute_url(self):
337         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
338     
339     class Meta:
340         ordering = ('book', 'anchor',)
341         verbose_name = _('fragment')
342         verbose_name_plural = _('fragments')
343