Added scripts and data for importing links from gazeta and wikipedia.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16 from mutagen import id3
17
18
19 TAG_CATEGORIES = (
20     ('author', _('author')),
21     ('epoch', _('epoch')),
22     ('kind', _('kind')),
23     ('genre', _('genre')),
24     ('theme', _('theme')),
25     ('set', _('set')),
26     ('book', _('book')),
27 )
28
29
30 class TagSubcategoryManager(models.Manager):
31     def __init__(self, subcategory):
32         super(TagSubcategoryManager, self).__init__()
33         self.subcategory = subcategory
34         
35     def get_query_set(self):
36         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
37
38
39 class Tag(TagBase):
40     name = models.CharField(_('name'), max_length=50, db_index=True)
41     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
42     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
43     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
44         db_index=True, choices=TAG_CATEGORIES)
45     description = models.TextField(_('description'), blank=True)
46     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
47     
48     user = models.ForeignKey(User, blank=True, null=True)
49     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
50     gazeta_link = models.CharField(blank=True,  max_length=240)
51     
52     def has_description(self):
53         return len(self.description) > 0
54     has_description.short_description = _('description')
55     has_description.boolean = True
56
57     @permalink
58     def get_absolute_url(self):
59         return ('catalogue.views.tagged_object_list', [self.slug])
60     
61     class Meta:
62         ordering = ('sort_key',)
63         verbose_name = _('tag')
64         verbose_name_plural = _('tags')
65     
66     def __unicode__(self):
67         return self.name
68
69     @staticmethod
70     def get_tag_list(tags):
71         if isinstance(tags, basestring):
72             tag_slugs = tags.split('/')
73             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
74         else:
75             return TagBase.get_tag_list(tags)
76
77
78 def book_upload_path(ext):
79     def get_dynamic_path(book, filename):
80         return 'lektura/%s.%s' % (book.slug, ext)
81     return get_dynamic_path
82
83
84 class Book(models.Model):
85     title = models.CharField(_('title'), max_length=120)
86     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
87     description = models.TextField(_('description'), blank=True)
88     created_at = models.DateTimeField(_('creation date'), auto_now=True)
89     _short_html = models.TextField(_('short HTML'), editable=False)
90     parent_number = models.IntegerField(_('parent number'), default=0)
91     extra_info = JSONField(_('extra information'))
92     gazeta_link = models.CharField(blank=True,  max_length=240)
93     
94     # Formats
95     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
96     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
97     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
98     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
99     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
100     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
101     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
102     
103     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
104     
105     objects = models.Manager()
106     tagged = managers.ModelTaggedItemManager(Tag)
107     tags = managers.TagDescriptor(Tag)
108
109     
110     @property
111     def name(self):
112         return self.title
113     
114     def short_html(self):
115         if len(self._short_html):
116             return mark_safe(self._short_html)
117         else:
118             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
119             tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
120
121             formats = []
122             if self.html_file:
123                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
124             if self.pdf_file:
125                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
126             if self.odt_file:
127                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
128             if self.txt_file:
129                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
130             if self.mp3_file:
131                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
132             if self.ogg_file:
133                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
134             
135             self._short_html = unicode(render_to_string('catalogue/book_short.html',
136                 {'book': self, 'tags': tags, 'formats': formats}))
137             self.save()
138             return mark_safe(self._short_html)
139     
140     def save(self, force_insert=False, force_update=False):
141         if self.mp3_file:
142             extra_info = self.get_extra_info_value()
143             extra_info.update(self.get_mp3_info())
144             self.set_extra_info_value(extra_info)
145         return super(Book, self).save(force_insert, force_update)
146     
147     def get_mp3_info(self):
148         """Retrieves artist and director names from audio ID3 tags."""
149         audio = id3.ID3(self.mp3_file.path)
150         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
151         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
152         return {'artist_name': artist_name, 'director_name': director_name}
153         
154     def has_description(self):
155         return len(self.description) > 0
156     has_description.short_description = _('description')
157     has_description.boolean = True
158     
159     def has_pdf_file(self):
160         return bool(self.pdf_file)
161     has_pdf_file.short_description = 'PDF'
162     has_pdf_file.boolean = True
163     
164     def has_odt_file(self):
165         return bool(self.odt_file)
166     has_odt_file.short_description = 'ODT'
167     has_odt_file.boolean = True
168     
169     def has_html_file(self):
170         return bool(self.html_file)
171     has_html_file.short_description = 'HTML'
172     has_html_file.boolean = True
173
174     class AlreadyExists(Exception):
175         pass
176     
177     @staticmethod
178     def from_xml_file(xml_file, overwrite=False):
179         from tempfile import NamedTemporaryFile
180         from slughifi import slughifi
181         from markupstring import MarkupString
182         
183         # Read book metadata
184         book_info = dcparser.parse(xml_file)
185         book_base, book_slug = book_info.url.rsplit('/', 1)
186         book, created = Book.objects.get_or_create(slug=book_slug)
187         
188         if created:
189             book_shelves = []
190         else:
191             if not overwrite:
192                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
193             # Save shelves for this book
194             book_shelves = list(book.tags.filter(category='set'))
195         
196         book.title = book_info.title
197         book.set_extra_info_value(book_info.to_dict())
198         book._short_html = ''
199         book.save()
200         
201         book_tags = []
202         for category in ('kind', 'genre', 'author', 'epoch'):    
203             tag_name = getattr(book_info, category)
204             tag_sort_key = tag_name
205             if category == 'author':
206                 tag_sort_key = tag_name.last_name
207                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
208             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
209             if created:
210                 tag.name = tag_name
211                 tag.sort_key = slughifi(tag_sort_key)
212                 tag.category = category
213                 tag.save()
214             book_tags.append(tag)
215             
216         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
217         if created:
218             book_tag.name = book.title[:50]
219             book_tag.sort_key = ('l-' + book.slug)[:120]
220             book_tag.category = 'book'
221             book_tag.save()
222         book_tags.append(book_tag)
223         
224         book.tags = book_tags
225         
226         if hasattr(book_info, 'parts'):
227             for n, part_url in enumerate(book_info.parts):
228                 base, slug = part_url.rsplit('/', 1)
229                 child_book = Book.objects.get(slug=slug)
230                 child_book.parent = book
231                 child_book.parent_number = n
232                 child_book.save()
233
234         book_descendants = list(book.children.all())
235         while len(book_descendants) > 0:
236             child_book = book_descendants.pop(0)
237             for fragment in child_book.fragments.all():
238                 fragment.tags = set(list(fragment.tags) + [book_tag])
239             book_descendants += list(child_book.children.all())
240             
241         # Save XML and HTML files
242         if not isinstance(xml_file, File):
243             xml_file = File(file(xml_file))
244         book.xml_file.save('%s.xml' % book.slug, xml_file, save=False)
245         
246         html_file = NamedTemporaryFile()
247         if html.transform(book.xml_file.path, html_file):
248             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
249             
250             # Extract fragments
251             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
252             book_themes = []
253             for fragment in closed_fragments.values():
254                 text = fragment.to_string()
255                 short_text = ''
256                 if (len(MarkupString(text)) > 240):
257                     short_text = unicode(MarkupString(text)[:160])
258                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
259                     defaults={'text': text, 'short_text': short_text})
260                 
261                 try:
262                     theme_names = [s.strip() for s in fragment.themes.split(',')]
263                 except AttributeError:
264                     continue
265                 themes = []
266                 for theme_name in theme_names:
267                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
268                     if created:
269                         tag.name = theme_name
270                         tag.sort_key = slughifi(theme_name)
271                         tag.category = 'theme'
272                         tag.save()
273                     themes.append(tag)
274                 new_fragment.save()
275                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
276                 book_themes += themes
277             
278             book_themes = set(book_themes)
279             book.tags = list(book.tags) + list(book_themes) + book_shelves
280         
281         book.save()
282         return book
283     
284     @permalink
285     def get_absolute_url(self):
286         return ('catalogue.views.book_detail', [self.slug])
287         
288     class Meta:
289         ordering = ('title',)
290         verbose_name = _('book')
291         verbose_name_plural = _('books')
292
293     def __unicode__(self):
294         return self.title
295
296
297 class Fragment(models.Model):
298     text = models.TextField()
299     short_text = models.TextField(editable=False)
300     _short_html = models.TextField(editable=False)
301     anchor = models.CharField(max_length=120)
302     book = models.ForeignKey(Book, related_name='fragments')
303
304     objects = models.Manager()
305     tagged = managers.ModelTaggedItemManager(Tag)
306     tags = managers.TagDescriptor(Tag)
307     
308     def short_html(self):
309         if len(self._short_html):
310             return mark_safe(self._short_html)
311         else:
312             book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
313                 for tag in self.book.tags if tag.category == 'author']
314             
315             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
316                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
317             self.save()
318             return mark_safe(self._short_html)
319     
320     def get_absolute_url(self):
321         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
322     
323     class Meta:
324         ordering = ('book', 'anchor',)
325         verbose_name = _('fragment')
326         verbose_name_plural = _('fragments')
327