X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/c310ca6683029b1922a5edda4857e3d1856e3487..7628df2902354179a6be697483a594c8c3b7a935:/catalogue/models.py diff --git a/catalogue/models.py b/catalogue/models.py index ba41638c6..1b5a32ff9 100644 --- a/catalogue/models.py +++ b/catalogue/models.py @@ -8,6 +8,8 @@ from django.core.files import File from newtagging.models import TagBase from newtagging import managers +from librarian import html + TAG_CATEGORIES = ( ('author', _('author')), @@ -98,22 +100,58 @@ class Book(models.Model): return bool(self.html_file) has_html_file.short_description = 'HTML' has_html_file.boolean = True - - def save(self, **kwargs): - try: - from bin import book2html - from os.path import splitext, basename - from tempfile import NamedTemporaryFile - - html_file = NamedTemporaryFile() - book2html.transform(self.xml_file.path, html_file) - - html_filename = '%s.html' % splitext(basename(self.xml_file.path))[0] - self.html_file.save(html_filename, File(html_file), save=False) - except ValueError: - pass - - book = super(Book, self).save(**kwargs) + + @staticmethod + def from_xml_file(xml_file): + from tempfile import NamedTemporaryFile + from slughifi import slughifi + import dcparser + + book_info = dcparser.parse(xml_file) + book = Book(title=book_info.title, slug=slughifi(book_info.title)) + book.save() + + book_tags = [] + for category in ('kind', 'genre', 'author', 'epoch'): + tag_name = getattr(book_info, category) + tag_sort_key = tag_name + if category == 'author': + tag_sort_key = tag_name.last_name + tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name + tag, created = Tag.objects.get_or_create(name=tag_name, + slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category) + tag.save() + book_tags.append(tag) + book.tags = book_tags + + # Save XML and HTML files + book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False) + + html_file = NamedTemporaryFile() + html.transform(book.xml_file.path, html_file) + book.html_file.save('%s.html' % book.slug, File(html_file), save=False) + + # Extract fragments + closed_fragments, open_fragments = html.extract_fragments(book.html_file.path) + book_themes = [] + for fragment in closed_fragments.values(): + new_fragment = Fragment(html=fragment.to_string(), short_html=fragment.to_string(), + anchor=fragment.id, book=book) + + theme_names = [s.strip() for s in fragment.themes.split(',')] + themes = [] + for theme_name in theme_names: + tag, created = Tag.objects.get_or_create(name=theme_name, + slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme') + tag.save() + themes.append(tag) + new_fragment.save() + new_fragment.tags = list(book.tags) + themes + book_themes += themes + + book_themes = set(book_themes) + book.tags = list(book.tags) + list(book_themes) + return book.save() @permalink def get_absolute_url(self): @@ -128,27 +166,17 @@ class Book(models.Model): return self.title -# class Fragment(models.Model): -# id = models.IntegerField(primary_key=True) -# text = models.TextField(blank=True) -# start_paragraph = models.IntegerField(null=True, blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'fragment' - - -# class Inflections(models.Model): -# word = models.CharField(max_length=120, primary_key=True) -# cases = models.TextField() # This field type is a guess. -# class Meta: -# db_table = u'inflections' - - -# class Paragraph(models.Model): -# id = models.IntegerField(primary_key=True) -# number = models.IntegerField(null=True, blank=True) -# text = models.TextField(blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'paragraph' +class Fragment(models.Model): + html = models.TextField() + short_html = models.TextField() + anchor = models.IntegerField() + book = models.ForeignKey(Book, related_name='fragments') + + objects = managers.ModelTaggedItemManager(Tag) + tags = managers.TagDescriptor(Tag) + + class Meta: + ordering = ('book', 'anchor',) + verbose_name = _('fragment') + verbose_name_plural = _('fragment')