X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/ee1f5f9be7d4e2bb843c01a118cc090ab97e6cc6..ae0e673a17c3edcdca910fafb84eeff9dfe7b588:/librarian/epub.py diff --git a/librarian/epub.py b/librarian/epub.py index a0d9742..c1e3f4e 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -7,41 +7,15 @@ from __future__ import with_statement import os import os.path -import shutil -import sys from copy import deepcopy from lxml import etree import zipfile -from librarian import XMLNamespace, RDFNS, DCNS, WLNS, XHTMLNS, NoDublinCore -from librarian.parser import WLDocument - -#TODO: shouldn't be repeated here -NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/") -OPFNS = XMLNamespace("http://www.idpf.org/2007/opf") - - -class DocProvider(object): - class DoesNotExist(Exception): - pass - - def by_slug(self, slug): - raise NotImplemented - - def __getitem__(self, slug): - return self.by_slug(slug) - - def by_uri(self, uri): - return self.by_slug(uri.rsplit('/', 1)[1]) - - -class DirDocProvider(DocProvider): - def __init__(self, dir): - self.dir = dir - self.files = {} +import sys +sys.path.append('..') # for running from working copy - def by_slug(self, slug): - return open(os.path.join(self.dir, '%s.xml' % slug)) +from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, NoDublinCore +from librarian.dcparser import BookInfo def inner_xml(node): @@ -63,7 +37,6 @@ def set_inner_xml(node, text): xyz """ - p = etree.fromstring('%s' % text) node.text = p.text node[:] = p[:] @@ -103,12 +76,11 @@ def replace_characters(node): def replace_chars(text): if text is None: return None - return text.replace("&", "&")\ - .replace("---", "—")\ - .replace("--", "–")\ - .replace(",,", "„")\ - .replace('"', "”")\ - .replace("'", "’") + return text.replace("---", u"\u2014")\ + .replace("--", u"\u2013")\ + .replace(",,", u"\u201E")\ + .replace('"', u"\u201D")\ + .replace("'", u"\u2019") if node.tag == 'extra': node.clear() else: @@ -274,11 +246,12 @@ def transform_chunk(chunk_xml, chunk_no, annotations): return output_html, toc -def transform(provider, slug, output_file): +def transform(provider, slug, output_file=None, output_dir=None): """ produces an epub provider is a DocProvider - output_file should be filelike object + either output_file (a file-like object) or output_dir (path to file/dir) should be specified + if output_dir is specified, file will be written to //.epub """ def transform_file(input_xml, chunk_counter=1, first=True): @@ -329,6 +302,24 @@ def transform(provider, slug, output_file): return toc, chunk_counter + # read metadata from the first file + input_xml = etree.parse(provider[slug]) + metadata = input_xml.find('.//'+RDFNS('Description')) + if metadata is None: + raise NoDublinCore('Document has no DublinCore - which is required.') + book_info = BookInfo.from_element(input_xml) + metadata = etree.ElementTree(metadata) + + # if output to dir, create the file + if output_dir is not None: + author = unicode(book_info.author) + author_dir = os.path.join(output_dir, author) + try: + os.makedirs(author_dir) + except OSError: + pass + output_file = open(os.path.join(author_dir, '%s.epub' % slug), 'w') + zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) @@ -346,12 +337,6 @@ def transform(provider, slug, output_file): for fname in 'style.css', 'logo_wolnelektury.png': zip.write(res(fname), os.path.join('OPS', fname)) - # metadata from first file - input_xml = etree.parse(provider[slug]) - metadata = input_xml.find('.//'+RDFNS('Description')) - if metadata is None: - raise NoDublinCore('Document has no DublinCore - which is required.') - metadata = etree.ElementTree(metadata) opf = xslt(metadata, res('xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) spine = opf.find('.//' + OPFNS('spine')) @@ -402,6 +387,8 @@ def transform(provider, slug, output_file): if __name__ == '__main__': + from librarian import DirDocProvider + if len(sys.argv) < 2: print >> sys.stderr, 'Usage: python epub.py ' sys.exit(1) @@ -409,7 +396,6 @@ if __name__ == '__main__': main_input = sys.argv[1] basepath, ext = os.path.splitext(main_input) path, slug = os.path.realpath(basepath).rsplit('/', 1) - output = basepath + '.epub' provider = DirDocProvider(path) - transform(provider, slug, open(output, 'w')) + transform(provider, slug, output_dir=path)