Pozamykanie tagów <div> i <span>, które nie mogą być standalone wg WebKit.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16 from mutagen import id3
17
18
19 TAG_CATEGORIES = (
20     ('author', _('author')),
21     ('epoch', _('epoch')),
22     ('kind', _('kind')),
23     ('genre', _('genre')),
24     ('theme', _('theme')),
25     ('set', _('set')),
26     ('book', _('book')),
27 )
28
29
30 class TagSubcategoryManager(models.Manager):
31     def __init__(self, subcategory):
32         super(TagSubcategoryManager, self).__init__()
33         self.subcategory = subcategory
34         
35     def get_query_set(self):
36         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
37
38
39 class Tag(TagBase):
40     name = models.CharField(_('name'), max_length=50, db_index=True)
41     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
42     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
43     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
44         db_index=True, choices=TAG_CATEGORIES)
45     description = models.TextField(_('description'), blank=True)
46     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
47     
48     user = models.ForeignKey(User, blank=True, null=True)
49     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
50     gazeta_link = models.CharField(blank=True,  max_length=240)
51     wiki_link = models.CharField(blank=True,  max_length=240)
52     
53     def has_description(self):
54         return len(self.description) > 0
55     has_description.short_description = _('description')
56     has_description.boolean = True
57
58     @permalink
59     def get_absolute_url(self):
60         return ('catalogue.views.tagged_object_list', [self.slug])
61     
62     class Meta:
63         ordering = ('sort_key',)
64         verbose_name = _('tag')
65         verbose_name_plural = _('tags')
66     
67     def __unicode__(self):
68         return self.name
69
70     @staticmethod
71     def get_tag_list(tags):
72         if isinstance(tags, basestring):
73             tag_slugs = tags.split('/')
74             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
75         else:
76             return TagBase.get_tag_list(tags)
77
78
79 def book_upload_path(ext):
80     def get_dynamic_path(book, filename):
81         return 'lektura/%s.%s' % (book.slug, ext)
82     return get_dynamic_path
83
84
85 class Book(models.Model):
86     title = models.CharField(_('title'), max_length=120)
87     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
88     description = models.TextField(_('description'), blank=True)
89     created_at = models.DateTimeField(_('creation date'), auto_now=True)
90     _short_html = models.TextField(_('short HTML'), editable=False)
91     parent_number = models.IntegerField(_('parent number'), default=0)
92     extra_info = JSONField(_('extra information'))
93     gazeta_link = models.CharField(blank=True,  max_length=240)
94     wiki_link = models.CharField(blank=True,  max_length=240)
95
96     
97     # Formats
98     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
99     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
100     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
101     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
102     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
103     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
104     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
105     
106     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
107     
108     objects = models.Manager()
109     tagged = managers.ModelTaggedItemManager(Tag)
110     tags = managers.TagDescriptor(Tag)
111
112     
113     @property
114     def name(self):
115         return self.title
116     
117     def short_html(self):
118         if len(self._short_html):
119             return mark_safe(self._short_html)
120         else:
121             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
122             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
123
124             formats = []
125             if self.html_file:
126                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
127             if self.pdf_file:
128                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
129             if self.odt_file:
130                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
131             if self.txt_file:
132                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
133             if self.mp3_file:
134                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
135             if self.ogg_file:
136                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
137             
138             formats = [mark_safe(format) for format in formats]
139             
140             self._short_html = unicode(render_to_string('catalogue/book_short.html',
141                 {'book': self, 'tags': tags, 'formats': formats}))
142             self.save()
143             return mark_safe(self._short_html)
144     
145     def save(self, force_insert=False, force_update=False):
146         if self.mp3_file:
147             extra_info = self.get_extra_info_value()
148             extra_info.update(self.get_mp3_info())
149             self.set_extra_info_value(extra_info)
150         return super(Book, self).save(force_insert, force_update)
151     
152     def get_mp3_info(self):
153         """Retrieves artist and director names from audio ID3 tags."""
154         audio = id3.ID3(self.mp3_file.path)
155         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
156         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
157         return {'artist_name': artist_name, 'director_name': director_name}
158         
159     def has_description(self):
160         return len(self.description) > 0
161     has_description.short_description = _('description')
162     has_description.boolean = True
163     
164     def has_pdf_file(self):
165         return bool(self.pdf_file)
166     has_pdf_file.short_description = 'PDF'
167     has_pdf_file.boolean = True
168     
169     def has_odt_file(self):
170         return bool(self.odt_file)
171     has_odt_file.short_description = 'ODT'
172     has_odt_file.boolean = True
173     
174     def has_html_file(self):
175         return bool(self.html_file)
176     has_html_file.short_description = 'HTML'
177     has_html_file.boolean = True
178
179     class AlreadyExists(Exception):
180         pass
181     
182     @staticmethod
183     def from_xml_file(xml_file, overwrite=False):
184         from tempfile import NamedTemporaryFile
185         from slughifi import slughifi
186         from markupstring import MarkupString
187         
188         # Read book metadata
189         book_info = dcparser.parse(xml_file)
190         book_base, book_slug = book_info.url.rsplit('/', 1)
191         book, created = Book.objects.get_or_create(slug=book_slug)
192         
193         if created:
194             book_shelves = []
195         else:
196             if not overwrite:
197                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
198             # Save shelves for this book
199             book_shelves = list(book.tags.filter(category='set'))
200         
201         book.title = book_info.title
202         book.set_extra_info_value(book_info.to_dict())
203         book._short_html = ''
204         book.save()
205         
206         book_tags = []
207         for category in ('kind', 'genre', 'author', 'epoch'):    
208             tag_name = getattr(book_info, category)
209             tag_sort_key = tag_name
210             if category == 'author':
211                 tag_sort_key = tag_name.last_name
212                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
213             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
214             if created:
215                 tag.name = tag_name
216                 tag.sort_key = slughifi(tag_sort_key)
217                 tag.category = category
218                 tag.save()
219             book_tags.append(tag)
220             
221         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
222         if created:
223             book_tag.name = book.title[:50]
224             book_tag.sort_key = ('l-' + book.slug)[:120]
225             book_tag.category = 'book'
226             book_tag.save()
227         book_tags.append(book_tag)
228         
229         book.tags = book_tags
230         
231         if hasattr(book_info, 'parts'):
232             for n, part_url in enumerate(book_info.parts):
233                 base, slug = part_url.rsplit('/', 1)
234                 try:
235                     child_book = Book.objects.get(slug=slug)
236                     child_book.parent = book
237                     child_book.parent_number = n
238                     child_book.save()
239                 except Book.DoesNotExist, e:
240                     raise Book.DoesNotExist(u'Book with slug = "%s" does not exist.' % slug)
241         
242         book_descendants = list(book.children.all())
243         while len(book_descendants) > 0:
244             child_book = book_descendants.pop(0)
245             for fragment in child_book.fragments.all():
246                 fragment.tags = set(list(fragment.tags) + [book_tag])
247             book_descendants += list(child_book.children.all())
248             
249         # Save XML and HTML files
250         if not isinstance(xml_file, File):
251             xml_file = File(file(xml_file))
252         book.xml_file.save('%s.xml' % book.slug, xml_file, save=False)
253         
254         html_file = NamedTemporaryFile()
255         if html.transform(book.xml_file.path, html_file):
256             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
257             
258             # Extract fragments
259             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
260             book_themes = []
261             for fragment in closed_fragments.values():
262                 text = fragment.to_string()
263                 short_text = ''
264                 if (len(MarkupString(text)) > 240):
265                     short_text = unicode(MarkupString(text)[:160])
266                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
267                     defaults={'text': text, 'short_text': short_text})
268                 
269                 try:
270                     theme_names = [s.strip() for s in fragment.themes.split(',')]
271                 except AttributeError:
272                     continue
273                 themes = []
274                 for theme_name in theme_names:
275                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
276                     if created:
277                         tag.name = theme_name
278                         tag.sort_key = slughifi(theme_name)
279                         tag.category = 'theme'
280                         tag.save()
281                     themes.append(tag)
282                 new_fragment.save()
283                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
284                 book_themes += themes
285             
286             book_themes = set(book_themes)
287             book.tags = list(book.tags) + list(book_themes) + book_shelves
288         
289         book.save()
290         return book
291     
292     @permalink
293     def get_absolute_url(self):
294         return ('catalogue.views.book_detail', [self.slug])
295         
296     class Meta:
297         ordering = ('title',)
298         verbose_name = _('book')
299         verbose_name_plural = _('books')
300
301     def __unicode__(self):
302         return self.title
303
304
305 class Fragment(models.Model):
306     text = models.TextField()
307     short_text = models.TextField(editable=False)
308     _short_html = models.TextField(editable=False)
309     anchor = models.CharField(max_length=120)
310     book = models.ForeignKey(Book, related_name='fragments')
311
312     objects = models.Manager()
313     tagged = managers.ModelTaggedItemManager(Tag)
314     tags = managers.TagDescriptor(Tag)
315     
316     def short_html(self):
317         if len(self._short_html):
318             return mark_safe(self._short_html)
319         else:
320             book_authors = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) 
321                 for tag in self.book.tags if tag.category == 'author']
322             
323             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
324                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
325             self.save()
326             return mark_safe(self._short_html)
327     
328     def get_absolute_url(self):
329         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
330     
331     class Meta:
332         ordering = ('book', 'anchor',)
333         verbose_name = _('fragment')
334         verbose_name_plural = _('fragments')
335