from newtagging.models import TagBase
from newtagging import managers
+from librarian import html
+
TAG_CATEGORIES = (
('author', _('author')),
return bool(self.html_file)
has_html_file.short_description = 'HTML'
has_html_file.boolean = True
-
- def save(self, **kwargs):
- try:
- from bin import book2html
- from os.path import splitext, basename
- from tempfile import NamedTemporaryFile
-
- html_file = NamedTemporaryFile()
- book2html.transform(self.xml_file.path, html_file)
-
- html_filename = '%s.html' % splitext(basename(self.xml_file.path))[0]
- self.html_file.save(html_filename, File(html_file), save=False)
- except ValueError:
- pass
-
- book = super(Book, self).save(**kwargs)
+
+ @staticmethod
+ def from_xml_file(xml_file):
+ from tempfile import NamedTemporaryFile
+ from slughifi import slughifi
+ import dcparser
+
+ book_info = dcparser.parse(xml_file)
+ book = Book(title=book_info.title, slug=slughifi(book_info.title))
+ book.save()
+
+ book_tags = []
+ for category in ('kind', 'genre', 'author', 'epoch'):
+ tag_name = getattr(book_info, category)
+ tag_sort_key = tag_name
+ if category == 'author':
+ tag_sort_key = tag_name.last_name
+ tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
+ tag, created = Tag.objects.get_or_create(name=tag_name,
+ slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category)
+ tag.save()
+ book_tags.append(tag)
+ book.tags = book_tags
+
+ # Save XML and HTML files
+ book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
+
+ html_file = NamedTemporaryFile()
+ html.transform(book.xml_file.path, html_file)
+ book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
+
+ # Extract fragments
+ closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
+ book_themes = []
+ for fragment in closed_fragments.values():
+ new_fragment = Fragment(html=fragment.to_string(), short_html=fragment.to_string(),
+ anchor=fragment.id, book=book)
+
+ theme_names = [s.strip() for s in fragment.themes.split(',')]
+ themes = []
+ for theme_name in theme_names:
+ tag, created = Tag.objects.get_or_create(name=theme_name,
+ slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme')
+ tag.save()
+ themes.append(tag)
+ new_fragment.save()
+ new_fragment.tags = list(book.tags) + themes
+ book_themes += themes
+
+ book_themes = set(book_themes)
+ book.tags = list(book.tags) + list(book_themes)
+ return book.save()
@permalink
def get_absolute_url(self):
class Fragment(models.Model):
- text = models.TextField()
- short_text = models.TextField()
+ html = models.TextField()
+ short_html = models.TextField()
anchor = models.IntegerField()
- book = models.ForeignKey(Book)
+ book = models.ForeignKey(Book, related_name='fragments')
objects = managers.ModelTaggedItemManager(Tag)
tags = managers.TagDescriptor(Tag)