Added tagging books and fragments with themes.
[wolnelektury.git] / catalogue / models.py
index 71c89a2..1b5a32f 100644 (file)
@@ -8,6 +8,8 @@ from django.core.files import File
 from newtagging.models import TagBase
 from newtagging import managers
 
 from newtagging.models import TagBase
 from newtagging import managers
 
+from librarian import html
+
 
 TAG_CATEGORIES = (
     ('author', _('author')),
 
 TAG_CATEGORIES = (
     ('author', _('author')),
@@ -98,22 +100,58 @@ class Book(models.Model):
         return bool(self.html_file)
     has_html_file.short_description = 'HTML'
     has_html_file.boolean = True
         return bool(self.html_file)
     has_html_file.short_description = 'HTML'
     has_html_file.boolean = True
-    
-    def save(self, **kwargs):
-        try:
-            from bin import book2html
-            from os.path import splitext, basename
-            from tempfile import NamedTemporaryFile
-            
-            html_file = NamedTemporaryFile()
-            book2html.transform(self.xml_file.path, html_file)
-            
-            html_filename = '%s.html' % splitext(basename(self.xml_file.path))[0]
-            self.html_file.save(html_filename, File(html_file), save=False)
-        except ValueError:
-            pass
-
-        book = super(Book, self).save(**kwargs)
+
+    @staticmethod
+    def from_xml_file(xml_file):
+        from tempfile import NamedTemporaryFile
+        from slughifi import slughifi
+        import dcparser
+        
+        book_info = dcparser.parse(xml_file)
+        book = Book(title=book_info.title, slug=slughifi(book_info.title))
+        book.save()
+        
+        book_tags = []
+        for category in ('kind', 'genre', 'author', 'epoch'):    
+            tag_name = getattr(book_info, category)
+            tag_sort_key = tag_name
+            if category == 'author':
+                tag_sort_key = tag_name.last_name
+                tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
+            tag, created = Tag.objects.get_or_create(name=tag_name,
+                slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category)
+            tag.save()
+            book_tags.append(tag)
+        book.tags = book_tags
+        
+        # Save XML and HTML files
+        book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
+        
+        html_file = NamedTemporaryFile()
+        html.transform(book.xml_file.path, html_file)
+        book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
+        
+        # Extract fragments
+        closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
+        book_themes = []
+        for fragment in closed_fragments.values():
+            new_fragment = Fragment(html=fragment.to_string(), short_html=fragment.to_string(),
+                anchor=fragment.id, book=book)
+                
+            theme_names = [s.strip() for s in fragment.themes.split(',')]
+            themes = []
+            for theme_name in theme_names:
+                tag, created = Tag.objects.get_or_create(name=theme_name,
+                    slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme')
+                tag.save()
+                themes.append(tag)
+            new_fragment.save()
+            new_fragment.tags = list(book.tags) + themes
+            book_themes += themes
+        
+        book_themes = set(book_themes)
+        book.tags = list(book.tags) + list(book_themes)
+        return book.save()
     
     @permalink
     def get_absolute_url(self):
     
     @permalink
     def get_absolute_url(self):
@@ -129,10 +167,10 @@ class Book(models.Model):
 
 
 class Fragment(models.Model):
 
 
 class Fragment(models.Model):
-    text = models.TextField()
-    short_text = models.TextField()
+    html = models.TextField()
+    short_html = models.TextField()
     anchor = models.IntegerField()
     anchor = models.IntegerField()
-    book = models.ForeignKey(Book)
+    book = models.ForeignKey(Book, related_name='fragments')
     
     objects = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)
     
     objects = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)