X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/ea8925059c81175c3a432a006779764d65308ad8..3af72840ecad9b3122fdcfa6a4c2946f6113eac6:/apps/catalogue/models.py diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index a7e04f1a2..4ef0a14e6 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -17,7 +17,7 @@ from newtagging.models import TagBase, tags_updated from newtagging import managers from catalogue.fields import JSONField -from librarian import html, dcparser +from librarian import dcparser, html, epub, NoDublinCore from mutagen import id3 @@ -207,7 +207,7 @@ class Book(models.Model): def __unicode__(self): return self.title - def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True): + def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs): if reset_short_html: # Reset _short_html during save update = {} @@ -332,6 +332,10 @@ class Book(models.Model): from tempfile import NamedTemporaryFile from slughifi import slughifi from markupstring import MarkupString + from hashlib import sha1 + from django.core.files.base import ContentFile + from django.core.files.storage import default_storage + from StringIO import StringIO # Read book metadata book_base, book_slug = book_info.url.rsplit('/', 1) @@ -400,28 +404,42 @@ class Book(models.Model): if html.transform(book.xml_file.path, html_file, parse_dublincore=False): book.html_file.save('%s.html' % book.slug, File(html_file), save=False) + # Create EPUB + epub_file = StringIO() + try: + epub.transform(book.xml_file, epub_file) + book.epub_file.save('%s.epub' % book.slug, ContentFile(epub_file.getvalue()), save=False) + FileRecord(slug=book.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save() + except NoDublinCore: + pass + # Extract fragments closed_fragments, open_fragments = html.extract_fragments(book.html_file.path) for fragment in closed_fragments.values(): - text = fragment.to_string() - short_text = '' - if (len(MarkupString(text)) > 240): - short_text = unicode(MarkupString(text)[:160]) - new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, - defaults={'text': text, 'short_text': short_text}) - try: theme_names = [s.strip() for s in fragment.themes.split(',')] except AttributeError: continue themes = [] for theme_name in theme_names: + if not theme_name: + continue tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme') if created: tag.name = theme_name tag.sort_key = slughifi(theme_name) tag.save() themes.append(tag) + if not themes: + continue + + text = fragment.to_string() + short_text = '' + if (len(MarkupString(text)) > 240): + short_text = unicode(MarkupString(text)[:160]) + new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, + defaults={'text': text, 'short_text': short_text}) + new_fragment.save() new_fragment.tags = set(book_tags + themes + [book_tag]) @@ -538,6 +556,21 @@ class BookStub(models.Model): return self.title +class FileRecord(models.Model): + slug = models.SlugField(_('slug'), max_length=120, db_index=True) + type = models.CharField(_('type'), max_length=20, db_index=True) + sha1 = models.CharField(_('sha-1 hash'), max_length=40) + time = models.DateTimeField(_('time'), auto_now_add=True) + + class Meta: + ordering = ('-time','-slug', '-type') + verbose_name = _('file record') + verbose_name_plural = _('file records') + + def __unicode__(self): + return "%s %s.%s" % (self.sha1, self.slug, self.type) + + def _tags_updated_handler(sender, affected_tags, **kwargs): # reset tag global counter Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)