Added importing metabooks (books with relation.hasPart in Dublin Core metadata).

[wolnelektury.git] / catalogue / models.py
diff --git a/catalogue/models.py b/catalogue/models.py

index 71c89a2..114b34b 100644 (file)
--- a/catalogue/models.py
+++ b/catalogue/models.py
@@ -1,13 +1,17 @@
  # -*- coding: utf-8 -*-
  from django.db import models
-from django.db.models import permalink
+from django.db.models import permalink, Q
  from django.utils.translation import ugettext_lazy as _
  from django.contrib.auth.models import User
  from django.core.files import File
+from django.template.loader import render_to_string
+from django.utils.safestring import mark_safe
  
  from newtagging.models import TagBase
  from newtagging import managers
  
+from librarian import html, dcparser
+
  
  TAG_CATEGORIES = (
      ('author', _('author')),
@@ -40,12 +44,12 @@ class Tag(TagBase):
      
      def has_description(self):
          return len(self.description) > 0
-    has_description.short_description = _('Has description')
+    has_description.short_description = _('description')
      has_description.boolean = True
  
      @permalink
      def get_absolute_url(self):
-        return ('catalogue.views.tagged_book_list', [self.slug])
+        return ('catalogue.views.tagged_object_list', [self.slug])
      
      class Meta:
          ordering = ('sort_key',)
@@ -69,6 +73,7 @@ class Book(models.Model):
      slug = models.SlugField(_('slug'), unique=True, db_index=True)
      description = models.TextField(_('description'), blank=True)
      created_at = models.DateTimeField(_('creation date'), auto_now=True)
+    _short_html = models.TextField(_('short HTML'), editable=False)
      
      # Formats
      xml_file = models.FileField(_('XML file'), upload_to='books/xml', blank=True)
@@ -76,12 +81,34 @@ class Book(models.Model):
      odt_file = models.FileField(_('ODT file'), upload_to='books/odt', blank=True)
      html_file = models.FileField(_('HTML file'), upload_to='books/html', blank=True)
      
+    parent = models.ForeignKey('self', blank=True, null=True)
+    
      objects = managers.ModelTaggedItemManager(Tag)
      tags = managers.TagDescriptor(Tag)
      
+    def short_html(self):
+        if len(self._short_html):
+            return mark_safe(self._short_html)
+        else:
+            tags = self.tags.filter(~Q(category__in=('set', 'theme')))
+            tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
+
+            formats = []
+            if self.html_file:
+                formats.append(u'<a href="%s">Czytaj online</a>' % self.html_file.url)
+            if self.pdf_file:
+                formats.append(u'<a href="%s">Plik PDF</a>' % self.pdf_file.url)
+            if self.odt_file:
+                formats.append(u'<a href="%s">Plik ODT</a>' % self.odt_file.url)
+            
+            self._short_html = render_to_string('catalogue/book_short.html',
+                {'book': self, 'tags': tags, 'formats': formats})
+            self.save()
+            return mark_safe(self._short_html)
+    
      def has_description(self):
          return len(self.description) > 0
-    has_description.short_description = _('Has description')
+    has_description.short_description = _('description')
      has_description.boolean = True
      
      def has_pdf_file(self):
@@ -98,22 +125,69 @@ class Book(models.Model):
          return bool(self.html_file)
      has_html_file.short_description = 'HTML'
      has_html_file.boolean = True
-    
-    def save(self, **kwargs):
-        try:
-            from bin import book2html
-            from os.path import splitext, basename
-            from tempfile import NamedTemporaryFile
-            
-            html_file = NamedTemporaryFile()
-            book2html.transform(self.xml_file.path, html_file)
-            
-            html_filename = '%s.html' % splitext(basename(self.xml_file.path))[0]
-            self.html_file.save(html_filename, File(html_file), save=False)
-        except ValueError:
-            pass
  
-        book = super(Book, self).save(**kwargs)
+    @staticmethod
+    def from_xml_file(xml_file):
+        from tempfile import NamedTemporaryFile
+        from slughifi import slughifi
+        from markupstring import MarkupString
+        
+        # Read book metadata
+        book_info = dcparser.parse(xml_file)
+        book = Book(title=book_info.title, slug=slughifi(book_info.title))
+        book.save()
+        
+        book_tags = []
+        for category in ('kind', 'genre', 'author', 'epoch'):    
+            tag_name = getattr(book_info, category)
+            tag_sort_key = tag_name
+            if category == 'author':
+                tag_sort_key = tag_name.last_name
+                tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
+            tag, created = Tag.objects.get_or_create(name=tag_name,
+                slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category)
+            tag.save()
+            book_tags.append(tag)
+        book.tags = book_tags
+        
+        if hasattr(book_info, 'parts'):
+            for part_url in book_info.parts:
+                base, slug = part_url.rsplit('/', 1)
+                child_book = Book.objects.get(slug=slug)
+                child_book.parent = book
+                child_book.save()
+        
+        # Save XML and HTML files
+        book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
+        
+        html_file = NamedTemporaryFile()
+        html.transform(book.xml_file.path, html_file)
+        book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
+        
+        # Extract fragments
+        closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
+        book_themes = []
+        for fragment in closed_fragments.values():
+            text = fragment.to_string()
+            short_text = ''
+            if (len(MarkupString(text)) > 240):
+                short_text = MarkupString(text)[:160]
+            new_fragment = Fragment(text=text, short_text=short_text, anchor=fragment.id, book=book)
+                
+            theme_names = [s.strip() for s in fragment.themes.split(',')]
+            themes = []
+            for theme_name in theme_names:
+                tag, created = Tag.objects.get_or_create(name=theme_name,
+                    slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme')
+                tag.save()
+                themes.append(tag)
+            new_fragment.save()
+            new_fragment.tags = list(book.tags) + themes
+            book_themes += themes
+        
+        book_themes = set(book_themes)
+        book.tags = list(book.tags) + list(book_themes)
+        return book.save()
      
      @permalink
      def get_absolute_url(self):
@@ -130,15 +204,28 @@ class Book(models.Model):
  
  class Fragment(models.Model):
      text = models.TextField()
-    short_text = models.TextField()
+    short_text = models.TextField(editable=False)
+    _short_html = models.TextField(editable=False)
      anchor = models.IntegerField()
-    book = models.ForeignKey(Book)
-    
+    book = models.ForeignKey(Book, related_name='fragments')
+
      objects = managers.ModelTaggedItemManager(Tag)
      tags = managers.TagDescriptor(Tag)
      
+    def short_html(self):
+        if len(self._short_html):
+            return mark_safe(self._short_html)
+        else:
+            book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
+                for tag in self.book.tags if tag.category == 'author']
+            
+            self._short_html = render_to_string('catalogue/fragment_short.html',
+                {'fragment': self, 'book': self.book, 'book_authors': book_authors})
+            self.save()
+            return mark_safe(self._short_html)
+        
      class Meta:
          ordering = ('book', 'anchor',)
          verbose_name = _('fragment')
-        verbose_name_plural = _('fragment')
+        verbose_name_plural = _('fragments')