X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/9c31d87a67dca4861c1e609001a6807eba9b6967..3199cbfa76c763b4082b5a8d8f971d74f67e27c0:/catalogue/models.py diff --git a/catalogue/models.py b/catalogue/models.py index 483741be8..f3f6bc2d5 100644 --- a/catalogue/models.py +++ b/catalogue/models.py @@ -1,12 +1,17 @@ # -*- coding: utf-8 -*- from django.db import models -from django.db.models import permalink +from django.db.models import permalink, Q from django.utils.translation import ugettext_lazy as _ from django.contrib.auth.models import User +from django.core.files import File +from django.template.loader import render_to_string +from django.utils.safestring import mark_safe from newtagging.models import TagBase from newtagging import managers +from librarian import html, dcparser + TAG_CATEGORIES = ( ('author', _('author')), @@ -39,12 +44,12 @@ class Tag(TagBase): def has_description(self): return len(self.description) > 0 - has_description.short_description = _('Has description') + has_description.short_description = _('description') has_description.boolean = True @permalink def get_absolute_url(self): - return ('catalogue.views.tagged_book_list', [self.slug]) + return ('catalogue.views.tagged_object_list', [self.slug]) class Meta: ordering = ('sort_key',) @@ -68,18 +73,43 @@ class Book(models.Model): slug = models.SlugField(_('slug'), unique=True, db_index=True) description = models.TextField(_('description'), blank=True) created_at = models.DateTimeField(_('creation date'), auto_now=True) + _short_html = models.TextField(_('short HTML'), editable=False) # Formats + xml_file = models.FileField(_('XML file'), upload_to='books/xml', blank=True) pdf_file = models.FileField(_('PDF file'), upload_to='books/pdf', blank=True) odt_file = models.FileField(_('ODT file'), upload_to='books/odt', blank=True) html_file = models.FileField(_('HTML file'), upload_to='books/html', blank=True) - objects = managers.ModelTaggedItemManager(Tag) + parent = models.ForeignKey('self', blank=True, null=True, related_name='children') + + objects = models.Manager() + tagged = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag) + def short_html(self): + if len(self._short_html): + return mark_safe(self._short_html) + else: + tags = self.tags.filter(~Q(category__in=('set', 'theme'))) + tags = [u'%s' % (tag.get_absolute_url(), tag.name) for tag in tags] + + formats = [] + if self.html_file: + formats.append(u'Czytaj online' % self.html_file.url) + if self.pdf_file: + formats.append(u'Plik PDF' % self.pdf_file.url) + if self.odt_file: + formats.append(u'Plik ODT' % self.odt_file.url) + + self._short_html = render_to_string('catalogue/book_short.html', + {'book': self, 'tags': tags, 'formats': formats}) + self.save() + return mark_safe(self._short_html) + def has_description(self): return len(self.description) > 0 - has_description.short_description = _('Has description') + has_description.short_description = _('description') has_description.boolean = True def has_pdf_file(self): @@ -96,6 +126,69 @@ class Book(models.Model): return bool(self.html_file) has_html_file.short_description = 'HTML' has_html_file.boolean = True + + @staticmethod + def from_xml_file(xml_file): + from tempfile import NamedTemporaryFile + from slughifi import slughifi + from markupstring import MarkupString + + # Read book metadata + book_info = dcparser.parse(xml_file) + book = Book(title=book_info.title, slug=slughifi(book_info.title)) + book.save() + + book_tags = [] + for category in ('kind', 'genre', 'author', 'epoch'): + tag_name = getattr(book_info, category) + tag_sort_key = tag_name + if category == 'author': + tag_sort_key = tag_name.last_name + tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name + tag, created = Tag.objects.get_or_create(name=tag_name, + slug=slughifi(tag_name), sort_key=slughifi(tag_sort_key), category=category) + tag.save() + book_tags.append(tag) + book.tags = book_tags + + if hasattr(book_info, 'parts'): + for part_url in book_info.parts: + base, slug = part_url.rsplit('/', 1) + child_book = Book.objects.get(slug=slug) + child_book.parent = book + child_book.save() + + # Save XML and HTML files + book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False) + + html_file = NamedTemporaryFile() + html.transform(book.xml_file.path, html_file) + book.html_file.save('%s.html' % book.slug, File(html_file), save=False) + + # Extract fragments + closed_fragments, open_fragments = html.extract_fragments(book.html_file.path) + book_themes = [] + for fragment in closed_fragments.values(): + text = fragment.to_string() + short_text = '' + if (len(MarkupString(text)) > 240): + short_text = MarkupString(text)[:160] + new_fragment = Fragment(text=text, short_text=short_text, anchor=fragment.id, book=book) + + theme_names = [s.strip() for s in fragment.themes.split(',')] + themes = [] + for theme_name in theme_names: + tag, created = Tag.objects.get_or_create(name=theme_name, + slug=slughifi(theme_name), sort_key=slughifi(theme_name), category='theme') + tag.save() + themes.append(tag) + new_fragment.save() + new_fragment.tags = list(book.tags) + themes + book_themes += themes + + book_themes = set(book_themes) + book.tags = list(book.tags) + list(book_themes) + return book.save() @permalink def get_absolute_url(self): @@ -110,27 +203,31 @@ class Book(models.Model): return self.title -# class Fragment(models.Model): -# id = models.IntegerField(primary_key=True) -# text = models.TextField(blank=True) -# start_paragraph = models.IntegerField(null=True, blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'fragment' - - -# class Inflections(models.Model): -# word = models.CharField(max_length=120, primary_key=True) -# cases = models.TextField() # This field type is a guess. -# class Meta: -# db_table = u'inflections' - +class Fragment(models.Model): + text = models.TextField() + short_text = models.TextField(editable=False) + _short_html = models.TextField(editable=False) + anchor = models.IntegerField() + book = models.ForeignKey(Book, related_name='fragments') -# class Paragraph(models.Model): -# id = models.IntegerField(primary_key=True) -# number = models.IntegerField(null=True, blank=True) -# text = models.TextField(blank=True) -# book_id = models.IntegerField(null=True, blank=True) -# class Meta: -# db_table = u'paragraph' + objects = models.Manager() + tagged = managers.ModelTaggedItemManager(Tag) + tags = managers.TagDescriptor(Tag) + + def short_html(self): + if len(self._short_html): + return mark_safe(self._short_html) + else: + book_authors = [u'%s' % (tag.get_absolute_url(), tag.name) + for tag in self.book.tags if tag.category == 'author'] + + self._short_html = render_to_string('catalogue/fragment_short.html', + {'fragment': self, 'book': self.book, 'book_authors': book_authors}) + self.save() + return mark_safe(self._short_html) + + class Meta: + ordering = ('book', 'anchor',) + verbose_name = _('fragment') + verbose_name_plural = _('fragments')