pdcounter fix, librarian bump
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models
6 from django.db.models import permalink, Q
7 from django.utils.translation import ugettext_lazy as _
8 from django.contrib.auth.models import User
9 from django.core.files import File
10 from django.template.loader import render_to_string
11 from django.utils.safestring import mark_safe
12 from django.utils.translation import get_language
13 from django.core.urlresolvers import reverse
14
15 from newtagging.models import TagBase, tags_updated
16 from newtagging import managers
17 from catalogue.fields import JSONField
18
19 from librarian import dcparser, html, epub, NoDublinCore
20 from mutagen import id3
21
22
23 TAG_CATEGORIES = (
24     ('author', _('author')),
25     ('epoch', _('epoch')),
26     ('kind', _('kind')),
27     ('genre', _('genre')),
28     ('theme', _('theme')),
29     ('set', _('set')),
30     ('book', _('book')),
31 )
32
33
34 class TagSubcategoryManager(models.Manager):
35     def __init__(self, subcategory):
36         super(TagSubcategoryManager, self).__init__()
37         self.subcategory = subcategory
38
39     def get_query_set(self):
40         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
41
42
43 class Tag(TagBase):
44     name = models.CharField(_('name'), max_length=50, db_index=True)
45     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
46     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
47     category = models.CharField(_('category'), max_length=50, blank=False, null=False,
48         db_index=True, choices=TAG_CATEGORIES)
49     description = models.TextField(_('description'), blank=True)
50     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
51
52     user = models.ForeignKey(User, blank=True, null=True)
53     book_count = models.IntegerField(_('book count'), blank=False, null=True)
54     gazeta_link = models.CharField(blank=True, max_length=240)
55     wiki_link = models.CharField(blank=True, max_length=240)
56
57     categories_rev = {
58         'autor': 'author',
59         'epoka': 'epoch',
60         'rodzaj': 'kind',
61         'gatunek': 'genre',
62         'motyw': 'theme',
63         'polka': 'set',
64     }
65     categories_dict = dict((item[::-1] for item in categories_rev.iteritems()))
66
67     class Meta:
68         ordering = ('sort_key',)
69         verbose_name = _('tag')
70         verbose_name_plural = _('tags')
71         unique_together = (("slug", "category"),)
72
73     def __unicode__(self):
74         return self.name
75
76     def __repr__(self):
77         return "Tag(slug=%r)" % self.slug
78
79     @permalink
80     def get_absolute_url(self):
81         return ('catalogue.views.tagged_object_list', [self.url_chunk])
82
83     def has_description(self):
84         return len(self.description) > 0
85     has_description.short_description = _('description')
86     has_description.boolean = True
87
88     def get_count(self):
89         """ returns global book count for book tags, fragment count for themes """
90
91         if self.book_count is None:
92             if self.category == 'book':
93                 # never used
94                 objects = Book.objects.none()
95             elif self.category == 'theme':
96                 objects = Fragment.tagged.with_all((self,))
97             else:
98                 objects = Book.tagged.with_all((self,)).order_by()
99                 if self.category != 'set':
100                     # eliminate descendants
101                     l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
102                     descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
103                     if descendants_keys:
104                         objects = objects.exclude(pk__in=descendants_keys)
105             self.book_count = objects.count()
106             self.save()
107         return self.book_count
108
109     @staticmethod
110     def get_tag_list(tags):
111         if isinstance(tags, basestring):
112             real_tags = []
113             ambiguous_slugs = []
114             category = None
115             tags_splitted = tags.split('/')
116             for index, name in enumerate(tags_splitted):
117                 if name in Tag.categories_rev:
118                     category = Tag.categories_rev[name]
119                 else:
120                     if category:
121                         real_tags.append(Tag.objects.get(slug=name, category=category))
122                         category = None
123                     else:
124                         try:
125                             real_tags.append(Tag.objects.exclude(category='book').get(slug=name))
126                         except Tag.MultipleObjectsReturned, e:
127                             ambiguous_slugs.append(name)
128
129             if category:
130                 # something strange left off
131                 raise Tag.DoesNotExist()
132             if ambiguous_slugs:
133                 # some tags should be qualified
134                 e = Tag.MultipleObjectsReturned()
135                 e.tags = real_tags
136                 e.ambiguous_slugs = ambiguous_slugs
137                 raise e
138             else:
139                 return real_tags
140         else:
141             return TagBase.get_tag_list(tags)
142
143     @property
144     def url_chunk(self):
145         return '/'.join((Tag.categories_dict[self.category], self.slug))
146
147
148 # TODO: why is this hard-coded ?
149 def book_upload_path(ext):
150     def get_dynamic_path(book, filename):
151         return 'lektura/%s.%s' % (book.slug, ext)
152     return get_dynamic_path
153
154
155 class Book(models.Model):
156     title = models.CharField(_('title'), max_length=120)
157     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
158     description = models.TextField(_('description'), blank=True)
159     created_at = models.DateTimeField(_('creation date'), auto_now=True)
160     _short_html = models.TextField(_('short HTML'), editable=False)
161     parent_number = models.IntegerField(_('parent number'), default=0)
162     extra_info = JSONField(_('extra information'))
163     gazeta_link = models.CharField(blank=True, max_length=240)
164     wiki_link = models.CharField(blank=True, max_length=240)
165
166
167     # Formats
168     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
169     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
170     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
171     epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True)
172     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
173     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
174     mp3_file = models.FileField(_('MP3 file'), upload_to=book_upload_path('mp3'), blank=True)
175     ogg_file = models.FileField(_('OGG file'), upload_to=book_upload_path('ogg'), blank=True)
176     daisy_file = models.FileField(_('DAISY file'), upload_to=book_upload_path('daisy.zip'), blank=True)
177
178     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
179
180     objects = models.Manager()
181     tagged = managers.ModelTaggedItemManager(Tag)
182     tags = managers.TagDescriptor(Tag)
183
184     _tag_counter = JSONField(null=True, editable=False)
185     _theme_counter = JSONField(null=True, editable=False)
186
187     class AlreadyExists(Exception):
188         pass
189
190     class Meta:
191         ordering = ('title',)
192         verbose_name = _('book')
193         verbose_name_plural = _('books')
194
195     def __unicode__(self):
196         return self.title
197
198     def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
199         if reset_short_html:
200             # Reset _short_html during save
201             update = {}
202             for key in filter(lambda x: x.startswith('_short_html'), self.__dict__):
203                 update[key] = ''
204                 self.__setattr__(key, '')
205             # Fragment.short_html relies on book's tags, so reset it here too
206             self.fragments.all().update(**update)
207
208         book = super(Book, self).save(force_insert, force_update)
209
210         if refresh_mp3 and self.mp3_file:
211             print self.mp3_file, self.mp3_file.path
212             extra_info = self.get_extra_info_value()
213             extra_info.update(self.get_mp3_info())
214             self.set_extra_info_value(extra_info)
215             book = super(Book, self).save(force_insert, force_update)
216
217         return book
218
219     @permalink
220     def get_absolute_url(self):
221         return ('catalogue.views.book_detail', [self.slug])
222
223     @property
224     def name(self):
225         return self.title
226
227     def book_tag_slug(self):
228         return ('l-' + self.slug)[:120]
229
230     def book_tag(self):
231         slug = self.book_tag_slug()
232         book_tag, created = Tag.objects.get_or_create(slug=slug, category='book')
233         if created:
234             book_tag.name = self.title[:50]
235             book_tag.sort_key = slug
236             book_tag.save()
237         return book_tag
238
239     def short_html(self):
240         key = '_short_html_%s' % get_language()
241         short_html = getattr(self, key)
242
243         if short_html and len(short_html):
244             return mark_safe(short_html)
245         else:
246             tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
247             tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
248
249             formats = []
250             if self.html_file:
251                 formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
252             if self.pdf_file:
253                 formats.append(u'<a href="%s">PDF</a>' % self.pdf_file.url)
254             if self.root_ancestor.epub_file:
255                 formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.epub_file.url)
256             if self.odt_file:
257                 formats.append(u'<a href="%s">ODT</a>' % self.odt_file.url)
258             if self.txt_file:
259                 formats.append(u'<a href="%s">TXT</a>' % self.txt_file.url)
260             if self.mp3_file:
261                 formats.append(u'<a href="%s">MP3</a>' % self.mp3_file.url)
262             if self.ogg_file:
263                 formats.append(u'<a href="%s">OGG</a>' % self.ogg_file.url)
264             if self.daisy_file:
265                 formats.append(u'<a href="%s">DAISY</a>' % self.daisy_file.url)
266
267             formats = [mark_safe(format) for format in formats]
268
269             setattr(self, key, unicode(render_to_string('catalogue/book_short.html',
270                 {'book': self, 'tags': tags, 'formats': formats})))
271             self.save(reset_short_html=False)
272             return mark_safe(getattr(self, key))
273
274
275     @property
276     def root_ancestor(self):
277         """ returns the oldest ancestor """
278
279         if not hasattr(self, '_root_ancestor'):
280             book = self
281             while book.parent:
282                 book = book.parent
283             self._root_ancestor = book
284         return self._root_ancestor
285
286
287     def get_mp3_info(self):
288         """Retrieves artist and director names from audio ID3 tags."""
289         audio = id3.ID3(self.mp3_file.path)
290         artist_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE1'))
291         director_name = ', '.join(', '.join(tag.text) for tag in audio.getall('TPE3'))
292         return {'artist_name': artist_name, 'director_name': director_name}
293
294     def has_description(self):
295         return len(self.description) > 0
296     has_description.short_description = _('description')
297     has_description.boolean = True
298
299     def has_pdf_file(self):
300         return bool(self.pdf_file)
301     has_pdf_file.short_description = 'PDF'
302     has_pdf_file.boolean = True
303
304     def has_epub_file(self):
305         return bool(self.epub_file)
306     has_epub_file.short_description = 'EPUB'
307     has_epub_file.boolean = True
308
309     def has_odt_file(self):
310         return bool(self.odt_file)
311     has_odt_file.short_description = 'ODT'
312     has_odt_file.boolean = True
313
314     def has_html_file(self):
315         return bool(self.html_file)
316     has_html_file.short_description = 'HTML'
317     has_html_file.boolean = True
318
319     def build_epub(self, remove_descendants=True):
320         """ (Re)builds the epub file.
321             If book has a parent, does nothing.
322             Unless remove_descendants is False, descendants' epubs are removed.
323         """
324     
325         from StringIO import StringIO
326         from hashlib import sha1
327         from django.core.files.base import ContentFile
328         from librarian import DocProvider
329
330         class BookImportDocProvider(DocProvider):
331             """ used for joined EPUBs """
332
333             def __init__(self, book):
334                 self.book = book
335
336             def by_slug(self, slug):
337                 if slug == self.book.slug:
338                     return self.book.xml_file
339                 else:
340                     return Book.objects.get(slug=slug).xml_file
341
342         if self.parent:
343             # don't need an epub
344             return
345
346         epub_file = StringIO()
347         try:
348             epub.transform(BookImportDocProvider(self), self.slug, epub_file)
349             self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()), save=False)
350             self.save()
351             FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
352         except NoDublinCore:
353             pass
354
355         book_descendants = list(self.children.all())
356         while len(book_descendants) > 0:
357             child_book = book_descendants.pop(0)
358             if remove_descendants and child_book.has_epub_file():
359                 child_book.epub_file.delete()
360             # save anyway, to refresh short_html
361             child_book.save()
362             book_descendants += list(child_book.children.all())
363
364
365     @classmethod
366     def from_xml_file(cls, xml_file, overwrite=False):
367         # use librarian to parse meta-data
368         book_info = dcparser.parse(xml_file)
369
370         if not isinstance(xml_file, File):
371             xml_file = File(open(xml_file))
372
373         try:
374             return cls.from_text_and_meta(xml_file, book_info, overwrite)
375         finally:
376             xml_file.close()
377
378     @classmethod
379     def from_text_and_meta(cls, raw_file, book_info, overwrite=False):
380         from tempfile import NamedTemporaryFile
381         from slughifi import slughifi
382         from markupstring import MarkupString
383         from django.core.files.storage import default_storage
384
385         # Read book metadata
386         book_base, book_slug = book_info.url.rsplit('/', 1)
387         book, created = Book.objects.get_or_create(slug=book_slug)
388
389         if created:
390             book_shelves = []
391         else:
392             if not overwrite:
393                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
394             # Save shelves for this book
395             book_shelves = list(book.tags.filter(category='set'))
396
397         book.title = book_info.title
398         book.set_extra_info_value(book_info.to_dict())
399         book._short_html = ''
400         book.save()
401
402         book_tags = []
403         categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
404         for field_name, category in categories:
405             try:
406                 tag_names = getattr(book_info, field_name)
407             except:
408                 tag_names = [getattr(book_info, category)]
409             for tag_name in tag_names:
410                 tag_sort_key = tag_name
411                 if category == 'author':
412                     tag_sort_key = tag_name.last_name
413                     tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
414                 tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
415                 if created:
416                     tag.name = tag_name
417                     tag.sort_key = slughifi(tag_sort_key)
418                     tag.save()
419                 book_tags.append(tag)
420
421         book.tags = book_tags + book_shelves
422
423         book_tag = book.book_tag()
424
425         if hasattr(book_info, 'parts'):
426             for n, part_url in enumerate(book_info.parts):
427                 base, slug = part_url.rsplit('/', 1)
428                 try:
429                     child_book = Book.objects.get(slug=slug)
430                     child_book.parent = book
431                     child_book.parent_number = n
432                     child_book.save()
433                 except Book.DoesNotExist, e:
434                     raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
435
436         # Save XML and HTML files
437         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
438
439         # delete old fragments when overwriting
440         book.fragments.all().delete()
441
442         html_file = NamedTemporaryFile()
443         if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
444             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
445
446             # get ancestor l-tags for adding to new fragments
447             ancestor_tags = []
448             p = book.parent
449             while p:
450                 ancestor_tags.append(p.book_tag())
451                 p = p.parent
452
453             # Extract fragments
454             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
455             for fragment in closed_fragments.values():
456                 try:
457                     theme_names = [s.strip() for s in fragment.themes.split(',')]
458                 except AttributeError:
459                     continue
460                 themes = []
461                 for theme_name in theme_names:
462                     if not theme_name:
463                         continue
464                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
465                     if created:
466                         tag.name = theme_name
467                         tag.sort_key = slughifi(theme_name)
468                         tag.save()
469                     themes.append(tag)
470                 if not themes:
471                     continue
472
473                 text = fragment.to_string()
474                 short_text = ''
475                 if (len(MarkupString(text)) > 240):
476                     short_text = unicode(MarkupString(text)[:160])
477                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
478                     defaults={'text': text, 'short_text': short_text})
479
480                 new_fragment.save()
481                 new_fragment.tags = set(book_tags + themes + [book_tag] + ancestor_tags)
482
483         if not book.parent:
484             book.build_epub(remove_descendants=False)
485
486         book_descendants = list(book.children.all())
487         # add l-tag to descendants and their fragments
488         # delete unnecessary EPUB files
489         while len(book_descendants) > 0:
490             child_book = book_descendants.pop(0)
491             child_book.tags = list(child_book.tags) + [book_tag]
492             if child_book.has_epub_file():
493                 child_book.epub_file.delete()
494             child_book.save()
495             for fragment in child_book.fragments.all():
496                 fragment.tags = set(list(fragment.tags) + [book_tag])
497             book_descendants += list(child_book.children.all())
498
499         # refresh cache
500         book.reset_tag_counter()
501         book.reset_theme_counter()
502
503         book.save()
504         return book
505
506
507     def refresh_tag_counter(self):
508         tags = {}
509         for child in self.children.all().order_by():
510             for tag_pk, value in child.tag_counter.iteritems():
511                 tags[tag_pk] = tags.get(tag_pk, 0) + value
512         for tag in self.tags.exclude(category__in=('book', 'theme', 'set')).order_by():
513             tags[tag.pk] = 1
514         self.set__tag_counter_value(tags)
515         self.save(reset_short_html=False, refresh_mp3=False)
516         return tags
517
518     def reset_tag_counter(self):
519         self._tag_counter = None
520         self.save(reset_short_html=False, refresh_mp3=False)
521         if self.parent:
522             self.parent.reset_tag_counter()
523
524     @property
525     def tag_counter(self):
526         if self._tag_counter is None:
527             return self.refresh_tag_counter()
528         return dict((int(k), v) for k, v in self.get__tag_counter_value().iteritems())
529
530     def refresh_theme_counter(self):
531         tags = {}
532         for fragment in Fragment.tagged.with_any([self.book_tag()]).order_by():
533             for tag in fragment.tags.filter(category='theme').order_by():
534                 tags[tag.pk] = tags.get(tag.pk, 0) + 1
535         self.set__theme_counter_value(tags)
536         self.save(reset_short_html=False, refresh_mp3=False)
537         return tags
538
539     def reset_theme_counter(self):
540         self._theme_counter = None
541         self.save(reset_short_html=False, refresh_mp3=False)
542         if self.parent:
543             self.parent.reset_theme_counter()
544
545     @property
546     def theme_counter(self):
547         if self._theme_counter is None:
548             return self.refresh_theme_counter()
549         return dict((int(k), v) for k, v in self.get__theme_counter_value().iteritems())
550
551
552
553 class Fragment(models.Model):
554     text = models.TextField()
555     short_text = models.TextField(editable=False)
556     _short_html = models.TextField(editable=False)
557     anchor = models.CharField(max_length=120)
558     book = models.ForeignKey(Book, related_name='fragments')
559
560     objects = models.Manager()
561     tagged = managers.ModelTaggedItemManager(Tag)
562     tags = managers.TagDescriptor(Tag)
563
564     class Meta:
565         ordering = ('book', 'anchor',)
566         verbose_name = _('fragment')
567         verbose_name_plural = _('fragments')
568
569     def get_absolute_url(self):
570         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
571
572     def short_html(self):
573         key = '_short_html_%s' % get_language()
574         short_html = getattr(self, key)
575         if short_html and len(short_html):
576             return mark_safe(short_html)
577         else:
578             setattr(self, key, unicode(render_to_string('catalogue/fragment_short.html',
579                 {'fragment': self})))
580             self.save()
581             return mark_safe(getattr(self, key))
582
583
584 class FileRecord(models.Model):
585     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
586     type = models.CharField(_('type'), max_length=20, db_index=True)
587     sha1 = models.CharField(_('sha-1 hash'), max_length=40)
588     time = models.DateTimeField(_('time'), auto_now_add=True)
589
590     class Meta:
591         ordering = ('-time','-slug', '-type')
592         verbose_name = _('file record')
593         verbose_name_plural = _('file records')
594
595     def __unicode__(self):
596         return "%s %s.%s" % (self.sha1,  self.slug, self.type)
597
598
599 def _tags_updated_handler(sender, affected_tags, **kwargs):
600     # reset tag global counter
601     Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)
602
603     # if book tags changed, reset book tag counter
604     if isinstance(sender, Book) and \
605                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
606                     exclude(category__in=('book', 'theme', 'set')).count():
607         sender.reset_tag_counter()
608     # if fragment theme changed, reset book theme counter
609     elif isinstance(sender, Fragment) and \
610                 Tag.objects.filter(pk__in=(tag.pk for tag in affected_tags)).\
611                     filter(category='theme').count():
612         sender.book.reset_theme_counter()
613 tags_updated.connect(_tags_updated_handler)
614