X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/9c31d87a67dca4861c1e609001a6807eba9b6967..46dea89395af73352b46b178e64e69fa13d640c2:/catalogue/models.py diff --git a/catalogue/models.py b/catalogue/models.py index 483741be8..0da75e0bd 100644 --- a/catalogue/models.py +++ b/catalogue/models.py @@ -3,10 +3,13 @@ from django.db import models from django.db.models import permalink from django.utils.translation import ugettext_lazy as _ from django.contrib.auth.models import User +from django.core.files import File from newtagging.models import TagBase from newtagging import managers +from librarian import html + TAG_CATEGORIES = ( ('author', _('author')), @@ -70,6 +73,7 @@ class Book(models.Model): created_at = models.DateTimeField(_('creation date'), auto_now=True) # Formats + xml_file = models.FileField(_('XML file'), upload_to='books/xml', blank=True) pdf_file = models.FileField(_('PDF file'), upload_to='books/pdf', blank=True) odt_file = models.FileField(_('ODT file'), upload_to='books/odt', blank=True) html_file = models.FileField(_('HTML file'), upload_to='books/html', blank=True) @@ -96,6 +100,58 @@ class Book(models.Model): return bool(self.html_file) has_html_file.short_description = 'HTML' has_html_file.boolean = True + + @staticmethod + def from_xml_file(xml_file): + from tempfile import NamedTemporaryFile + from slughifi import slughifi + import dcparser + + book_info = dcparser.parse(xml_file) + book = Book(title=book_info.title, slug=slughifi(book_info.title)) + book.save() + + book_tags = [] + for category in ('kind', 'genre', 'author', 'epoch'): + tag_name = getattr(book_info, category) + tag_sort_key = tag_name + if category == 'author': + tag_sort_key = tag_name.last_name + tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name + tag, created = Tag.objects.get_or_create(name=tag_name, + slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category) + tag.save() + book_tags.append(tag) + book.tags = book_tags + + # Save XML and HTML files + book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False) + + html_file = NamedTemporaryFile() + html.transform(book.xml_file.path, html_file) + book.html_file.save('%s.html' % book.slug, File(html_file), save=False) + + # Extract fragments + closed_fragments, open_fragments = html.extract_fragments(book.html_file.path) + book_themes = [] + for fragment in closed_fragments.values(): + new_fragment = Fragment(html=fragment.to_string(), short_html=fragment.to_string(), + anchor=fragment.id, book=book) + + theme_names = [s.strip() for s in fragment.themes.split(',')] + themes = [] + for theme_name in theme_names: + tag, created = Tag.objects.get_or_create(name=theme_name, + slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme') + tag.save() + themes.append(tag) + new_fragment.save() + new_fragment.tags = list(book.tags) + themes + book_themes += themes + + book_themes = set(book_themes) + book.tags = list(book.tags) + list(book_themes) + return book.save() @permalink def get_absolute_url(self): @@ -110,27 +166,17 @@ class Book(models.Model): return self.title -# class Fragment(models.Model): -# id = models.IntegerField(primary_key=True) -# text = models.TextField(blank=True) -# start_paragraph = models.IntegerField(null=True, blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'fragment' - - -# class Inflections(models.Model): -# word = models.CharField(max_length=120, primary_key=True) -# cases = models.TextField() # This field type is a guess. -# class Meta: -# db_table = u'inflections' - - -# class Paragraph(models.Model): -# id = models.IntegerField(primary_key=True) -# number = models.IntegerField(null=True, blank=True) -# text = models.TextField(blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'paragraph' +class Fragment(models.Model): + html = models.TextField() + short_html = models.TextField() + anchor = models.IntegerField() + book = models.ForeignKey(Book, related_name='fragments') + + objects = managers.ModelTaggedItemManager(Tag) + tags = managers.TagDescriptor(Tag) + + class Meta: + ordering = ('book', 'anchor',) + verbose_name = _('fragment') + verbose_name_plural = _('fragments')