X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/113d217a051b5644061876c64b27da1281e49041..9ba3ccd2ad2748ed4a1b67187b8e1ac273dc8ee8:/catalogue/models.py?ds=sidebyside diff --git a/catalogue/models.py b/catalogue/models.py index 71c89a234..1b5a32ff9 100644 --- a/catalogue/models.py +++ b/catalogue/models.py @@ -8,6 +8,8 @@ from django.core.files import File from newtagging.models import TagBase from newtagging import managers +from librarian import html + TAG_CATEGORIES = ( ('author', _('author')), @@ -98,22 +100,58 @@ class Book(models.Model): return bool(self.html_file) has_html_file.short_description = 'HTML' has_html_file.boolean = True - - def save(self, **kwargs): - try: - from bin import book2html - from os.path import splitext, basename - from tempfile import NamedTemporaryFile - - html_file = NamedTemporaryFile() - book2html.transform(self.xml_file.path, html_file) - - html_filename = '%s.html' % splitext(basename(self.xml_file.path))[0] - self.html_file.save(html_filename, File(html_file), save=False) - except ValueError: - pass - - book = super(Book, self).save(**kwargs) + + @staticmethod + def from_xml_file(xml_file): + from tempfile import NamedTemporaryFile + from slughifi import slughifi + import dcparser + + book_info = dcparser.parse(xml_file) + book = Book(title=book_info.title, slug=slughifi(book_info.title)) + book.save() + + book_tags = [] + for category in ('kind', 'genre', 'author', 'epoch'): + tag_name = getattr(book_info, category) + tag_sort_key = tag_name + if category == 'author': + tag_sort_key = tag_name.last_name + tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name + tag, created = Tag.objects.get_or_create(name=tag_name, + slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category) + tag.save() + book_tags.append(tag) + book.tags = book_tags + + # Save XML and HTML files + book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False) + + html_file = NamedTemporaryFile() + html.transform(book.xml_file.path, html_file) + book.html_file.save('%s.html' % book.slug, File(html_file), save=False) + + # Extract fragments + closed_fragments, open_fragments = html.extract_fragments(book.html_file.path) + book_themes = [] + for fragment in closed_fragments.values(): + new_fragment = Fragment(html=fragment.to_string(), short_html=fragment.to_string(), + anchor=fragment.id, book=book) + + theme_names = [s.strip() for s in fragment.themes.split(',')] + themes = [] + for theme_name in theme_names: + tag, created = Tag.objects.get_or_create(name=theme_name, + slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme') + tag.save() + themes.append(tag) + new_fragment.save() + new_fragment.tags = list(book.tags) + themes + book_themes += themes + + book_themes = set(book_themes) + book.tags = list(book.tags) + list(book_themes) + return book.save() @permalink def get_absolute_url(self): @@ -129,10 +167,10 @@ class Book(models.Model): class Fragment(models.Model): - text = models.TextField() - short_text = models.TextField() + html = models.TextField() + short_html = models.TextField() anchor = models.IntegerField() - book = models.ForeignKey(Book) + book = models.ForeignKey(Book, related_name='fragments') objects = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag)