from newtagging import managers
from catalogue.fields import JSONField
-from librarian import html, dcparser
+from librarian import dcparser, html, epub, NoDublinCore
from mutagen import id3
def __unicode__(self):
return self.title
- def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True):
+ def save(self, force_insert=False, force_update=False, reset_short_html=True, refresh_mp3=True, **kwargs):
if reset_short_html:
# Reset _short_html during save
update = {}
from tempfile import NamedTemporaryFile
from slughifi import slughifi
from markupstring import MarkupString
+ from hashlib import sha1
+ from django.core.files.base import ContentFile
+ from django.core.files.storage import default_storage
+ from StringIO import StringIO
# Read book metadata
book_base, book_slug = book_info.url.rsplit('/', 1)
if html.transform(book.xml_file.path, html_file, parse_dublincore=False):
book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
+ # Create EPUB
+ epub_file = StringIO()
+ try:
+ epub.transform(book.xml_file, epub_file)
+ book.epub_file.save('%s.epub' % book.slug, ContentFile(epub_file.getvalue()), save=False)
+ FileRecord(slug=book.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
+ except NoDublinCore:
+ pass
+
# Extract fragments
closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
for fragment in closed_fragments.values():
- text = fragment.to_string()
- short_text = ''
- if (len(MarkupString(text)) > 240):
- short_text = unicode(MarkupString(text)[:160])
- new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
- defaults={'text': text, 'short_text': short_text})
-
try:
theme_names = [s.strip() for s in fragment.themes.split(',')]
except AttributeError:
continue
themes = []
for theme_name in theme_names:
+ if not theme_name:
+ continue
tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name), category='theme')
if created:
tag.name = theme_name
tag.sort_key = slughifi(theme_name)
tag.save()
themes.append(tag)
+ if not themes:
+ continue
+
+ text = fragment.to_string()
+ short_text = ''
+ if (len(MarkupString(text)) > 240):
+ short_text = unicode(MarkupString(text)[:160])
+ new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book,
+ defaults={'text': text, 'short_text': short_text})
+
new_fragment.save()
new_fragment.tags = set(book_tags + themes + [book_tag])
return self.title
+class FileRecord(models.Model):
+ slug = models.SlugField(_('slug'), max_length=120, db_index=True)
+ type = models.CharField(_('type'), max_length=20, db_index=True)
+ sha1 = models.CharField(_('sha-1 hash'), max_length=40)
+ time = models.DateTimeField(_('time'), auto_now_add=True)
+
+ class Meta:
+ ordering = ('-time','-slug', '-type')
+ verbose_name = _('file record')
+ verbose_name_plural = _('file records')
+
+ def __unicode__(self):
+ return "%s %s.%s" % (self.sha1, self.slug, self.type)
+
+
def _tags_updated_handler(sender, affected_tags, **kwargs):
# reset tag global counter
Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None)