Added importing links from wikipedia to import_links.py script.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16 from mutagen import id3
17
18
19 TAG_CATEGORIES = (
20     ('author', _('author')),
21     ('epoch', _('epoch')),
22     ('kind', _('kind')),
23     ('genre', _('genre')),
24     ('theme', _('theme')),
25     ('set', _('set')),
26     ('book', _('book')),
27 )
28
29
30 class TagSubcategoryManager(models.Manager):
31     def __init__(self, subcategory):
32         super(TagSubcategoryManager, self).__init__()
33         self.subcategory = subcategory
34         
35     def get_query_set(self):
36         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
37
38
39 class Tag(TagBase):
40     name = models.CharField(_('name'), max_length=50, db_index=True)
41     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
42     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
43     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
44         db_index=True, choices=TAG_CATEGORIES)
45     description = models.TextField(_('description'), blank=True)
46     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
47     
48     user = models.ForeignKey(User, blank=True, null=True)
49     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
50     gazeta_link = models.CharField(blank=True,  max_length=240)
51     wiki_link = models.CharField(blank=True,  max_length=240)
52     
53     def has_description(self):
54         return len(self.description) > 0
55     has_description.short_description = _('description')
56     has_description.boolean = True
57
58     @permalink
59     def get_absolute_url(self):
60         return ('catalogue.views.tagged_object_list', [self.slug])
61     
62     class Meta:
63         ordering = ('sort_key',)
64         verbose_name = _('tag')
65         verbose_name_plural = _('tags')
66     
67     def __unicode__(self):
68         return self.name
69
70     @staticmethod
71     def get_tag_list(tags):
72         if isinstance(tags, basestring):
73             tag_slugs = tags.split('/')
74             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
75         else:
76             return TagBase.get_tag_list(tags)
77
78
79 def book_upload_path(ext):
80     def get_dynamic_path(book, filename):
81         return 'lektura/%s.%s' % (book.slug, ext)
82     return get_dynamic_path
83
84
85 class Book(models.Model):
86     title = models.CharField(_('title'), max_length=120)
87     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
88     description = models.TextField(_('description'), blank=True)
89     created_at = models.DateTimeField(_('creation date'), auto_now=True)
90     _short_html = models.TextField(_('short HTML'), editable=False)
91     parent_number = models.IntegerField(_('parent number'), default=0)
92     extra_info = JSONField(_('extra information'))
93     gazeta_link = models.CharField(blank=True,  max_length=240)
94     wiki_link = models.CharField(blank=True,  max_length=240)
95
96     
97     # Formats
98     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
99     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
100     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
101     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
102     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
103     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
104     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
105     
106     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
107     
108     objects = models.Manager()
109     tagged = managers.ModelTaggedItemManager(Tag)
110     tags = managers.TagDescriptor(Tag)
111
112     
113     @property
114     def name(self):
115         return self.title
116     
117     def short_html(self):
118         if len(self._short_html):
119             return mark_safe(self._short_html)
120         else:
121             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
122             tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
123
124             formats = []
125             if self.html_file:
126                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
127             if self.pdf_file:
128                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
129             if self.odt_file:
130                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
131             if self.txt_file:
132                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
133             if self.mp3_file:
134                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
135             if self.ogg_file:
136                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
137             
138             self._short_html = unicode(render_to_string('catalogue/book_short.html',
139                 {'book': self, 'tags': tags, 'formats': formats}))
140             self.save()
141             return mark_safe(self._short_html)
142     
143     def save(self, force_insert=False, force_update=False):
144         if self.mp3_file:
145             extra_info = self.get_extra_info_value()
146             extra_info.update(self.get_mp3_info())
147             self.set_extra_info_value(extra_info)
148         return super(Book, self).save(force_insert, force_update)
149     
150     def get_mp3_info(self):
151         """Retrieves artist and director names from audio ID3 tags."""
152         audio = id3.ID3(self.mp3_file.path)
153         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
154         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
155         return {'artist_name': artist_name, 'director_name': director_name}
156         
157     def has_description(self):
158         return len(self.description) > 0
159     has_description.short_description = _('description')
160     has_description.boolean = True
161     
162     def has_pdf_file(self):
163         return bool(self.pdf_file)
164     has_pdf_file.short_description = 'PDF'
165     has_pdf_file.boolean = True
166     
167     def has_odt_file(self):
168         return bool(self.odt_file)
169     has_odt_file.short_description = 'ODT'
170     has_odt_file.boolean = True
171     
172     def has_html_file(self):
173         return bool(self.html_file)
174     has_html_file.short_description = 'HTML'
175     has_html_file.boolean = True
176
177     class AlreadyExists(Exception):
178         pass
179     
180     @staticmethod
181     def from_xml_file(xml_file, overwrite=False):
182         from tempfile import NamedTemporaryFile
183         from slughifi import slughifi
184         from markupstring import MarkupString
185         
186         # Read book metadata
187         book_info = dcparser.parse(xml_file)
188         book_base, book_slug = book_info.url.rsplit('/', 1)
189         book, created = Book.objects.get_or_create(slug=book_slug)
190         
191         if created:
192             book_shelves = []
193         else:
194             if not overwrite:
195                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
196             # Save shelves for this book
197             book_shelves = list(book.tags.filter(category='set'))
198         
199         book.title = book_info.title
200         book.set_extra_info_value(book_info.to_dict())
201         book._short_html = ''
202         book.save()
203         
204         book_tags = []
205         for category in ('kind', 'genre', 'author', 'epoch'):    
206             tag_name = getattr(book_info, category)
207             tag_sort_key = tag_name
208             if category == 'author':
209                 tag_sort_key = tag_name.last_name
210                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
211             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
212             if created:
213                 tag.name = tag_name
214                 tag.sort_key = slughifi(tag_sort_key)
215                 tag.category = category
216                 tag.save()
217             book_tags.append(tag)
218             
219         book_tag, created = Tag.objects.get_or_create(slug=('l-' + book.slug)[:120])
220         if created:
221             book_tag.name = book.title[:50]
222             book_tag.sort_key = ('l-' + book.slug)[:120]
223             book_tag.category = 'book'
224             book_tag.save()
225         book_tags.append(book_tag)
226         
227         book.tags = book_tags
228         
229         if hasattr(book_info, 'parts'):
230             for n, part_url in enumerate(book_info.parts):
231                 base, slug = part_url.rsplit('/', 1)
232                 child_book = Book.objects.get(slug=slug)
233                 child_book.parent = book
234                 child_book.parent_number = n
235                 child_book.save()
236
237         book_descendants = list(book.children.all())
238         while len(book_descendants) > 0:
239             child_book = book_descendants.pop(0)
240             for fragment in child_book.fragments.all():
241                 fragment.tags = set(list(fragment.tags) + [book_tag])
242             book_descendants += list(child_book.children.all())
243             
244         # Save XML and HTML files
245         if not isinstance(xml_file, File):
246             xml_file = File(file(xml_file))
247         book.xml_file.save('%s.xml' % book.slug, xml_file, save=False)
248         
249         html_file = NamedTemporaryFile()
250         if html.transform(book.xml_file.path, html_file):
251             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
252             
253             # Extract fragments
254             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
255             book_themes = []
256             for fragment in closed_fragments.values():
257                 text = fragment.to_string()
258                 short_text = ''
259                 if (len(MarkupString(text)) > 240):
260                     short_text = unicode(MarkupString(text)[:160])
261                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
262                     defaults={'text': text, 'short_text': short_text})
263                 
264                 try:
265                     theme_names = [s.strip() for s in fragment.themes.split(',')]
266                 except AttributeError:
267                     continue
268                 themes = []
269                 for theme_name in theme_names:
270                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
271                     if created:
272                         tag.name = theme_name
273                         tag.sort_key = slughifi(theme_name)
274                         tag.category = 'theme'
275                         tag.save()
276                     themes.append(tag)
277                 new_fragment.save()
278                 new_fragment.tags = set(list(book.tags) + themes + [book_tag])
279                 book_themes += themes
280             
281             book_themes = set(book_themes)
282             book.tags = list(book.tags) + list(book_themes) + book_shelves
283         
284         book.save()
285         return book
286     
287     @permalink
288     def get_absolute_url(self):
289         return ('catalogue.views.book_detail', [self.slug])
290         
291     class Meta:
292         ordering = ('title',)
293         verbose_name = _('book')
294         verbose_name_plural = _('books')
295
296     def __unicode__(self):
297         return self.title
298
299
300 class Fragment(models.Model):
301     text = models.TextField()
302     short_text = models.TextField(editable=False)
303     _short_html = models.TextField(editable=False)
304     anchor = models.CharField(max_length=120)
305     book = models.ForeignKey(Book, related_name='fragments')
306
307     objects = models.Manager()
308     tagged = managers.ModelTaggedItemManager(Tag)
309     tags = managers.TagDescriptor(Tag)
310     
311     def short_html(self):
312         if len(self._short_html):
313             return mark_safe(self._short_html)
314         else:
315             book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
316                 for tag in self.book.tags if tag.category == 'author']
317             
318             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
319                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
320             self.save()
321             return mark_safe(self._short_html)
322     
323     def get_absolute_url(self):
324         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
325     
326     class Meta:
327         ordering = ('book', 'anchor',)
328         verbose_name = _('fragment')
329         verbose_name_plural = _('fragments')
330