* lxml <http://codespeak.net/lxml/>, version 2.2 or later
* additional PDF converter dependencies:
- * XeTeX
+ * XeTeX with support for Polish language
* TeXML <http://getfo.org/texml/>
+ * recommended: morefloats LaTeX package, version >=1.0c
+ for dealing with documents with many motifs in one paragraph.
+ <http://www.ctan.org/tex-archive/help/Catalogue/entries/morefloats.html>
Installation
return output_html, toc, chars
-def transform(provider, slug, output_file=None, output_dir=None, make_dir=False):
+def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False):
""" produces a EPUB file
provider: a DocProvider
return toc, chunk_counter, chars
# read metadata from the first file
- input_xml = etree.parse(provider[slug])
+ if file_path:
+ if slug:
+ raise ValueError('slug or file_path should be specified, not both')
+ f = open(file_path, 'r')
+ input_xml = etree.parse(f)
+ f.close()
+ else:
+ if not slug:
+ raise ValueError('either slug or file_path should be specified')
+ input_xml = etree.parse(provider[slug])
+
metadata = input_xml.find('.//'+RDFNS('Description'))
if metadata is None:
raise NoDublinCore('Document has no DublinCore - which is required.')
os.makedirs(output_dir)
except OSError:
pass
- output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w')
+ if slug:
+ output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w')
+ else:
+ output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w')
zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
- subprocess.check_call(['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)])
+ optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)]
+ if verbose:
+ print "Running font-optimizer"
+ subprocess.check_call(optimizer_call)
+ else:
+ subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
rmtree(tmpdir)
os.chdir(cwd)
return p == 0
-def transform(provider, slug, output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None):
+def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None):
""" produces a PDF file with XeLaTeX
provider: a DocProvider
slug: slug of file to process, available by provider
+ file_path can be provided instead of a slug
output_file: file-like object or path to output file
output_dir: path to directory to save output file to; either this or output_file must be present
make_dir: writes output to <output_dir>/<author>/<slug>.pdf istead of <output_dir>/<slug>.pdf
# Parse XSLT
try:
- document = load_including_children(provider, slug)
+ if file_path:
+ if slug:
+ raise ValueError('slug or file_path should be specified, not both')
+ document = load_including_children(provider, file_path=file_path)
+ else:
+ if not slug:
+ raise ValueError('either slug or file_path should be specified')
+ document = load_including_children(provider, slug=slug)
- # check for latex packages
- if not package_available('morefloats', 'maxfloats=19', verbose=verbose):
+ # check for LaTeX packages
+ if not package_available('morefloats', 'maxfloats=19'):
+ # using old morefloats or none at all
document.edoc.getroot().set('old-morefloats', 'yes')
- print >> sys.stderr, """
-==============================================================================
-LaTeX `morefloats' package is older than v.1.0c or not available at all.
-Some documents with many motifs in long stanzas or paragraphs may not compile.
-=============================================================================="""
# hack the tree
move_motifs_inside(document.edoc)
os.makedirs(output_dir)
except OSError:
pass
- output_path = os.path.join(output_dir, '%s.pdf' % slug)
+ if slug:
+ output_path = os.path.join(output_dir, '%s.pdf' % slug)
+ else:
+ output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf')
shutil.move(pdf_path, output_path)
else:
if hasattr(output_file, 'write'):
raise ParseError(e)
-def load_including_children(provider, slug=None, uri=None):
+def load_including_children(provider, slug=None, uri=None, file_path=None):
""" makes one big xml file with children inserted at end
either slug or uri must be provided
"""
f = provider.by_uri(uri)
elif slug:
f = provider[slug]
+ elif file_path:
+ f = open(file_path, 'r')
else:
- raise ValueError('Neither slug nor URI provided for a book.')
+ raise ValueError('Neither slug, URI nor file path provided for a book.')
document = WLDocument.from_file(f, True,
parse_dublincore=True,
preserve_lines=False)
+ f.close()
+
for child_uri in document.book_info.parts:
child = load_including_children(provider, uri=child_uri)
document.edoc.getroot().append(child.edoc.getroot())
import os.path
import optparse
-from librarian import epub, DirDocProvider
+from librarian import epub, DirDocProvider, ParseError
if __name__ == '__main__':
# Parse commandline arguments
- usage = """Usage: %prog [options] SOURCE
- Convert SOURCE file to EPUB format."""
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to EPUB format."""
parser = optparse.OptionParser(usage=usage)
help='print status messages to stdout')
parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
help='create a directory for author and put the PDF in it')
+ parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+ help='specifies the directory for output')
options, input_filenames = parser.parse_args()
- if len(input_filenames) != 1:
+ if len(input_filenames) < 1:
parser.print_help()
exit(1)
# Do some real work
- main_input = input_filenames[0]
- if options.verbose:
- print main_input
- basepath, ext = os.path.splitext(main_input)
- path, slug = os.path.realpath(basepath).rsplit('/', 1)
- provider = DirDocProvider(path)
- epub.transform(provider, slug, output_dir=path, make_dir=options.make_dir)
+ try:
+ for main_input in input_filenames:
+ if options.verbose:
+ print main_input
+ path, fname = os.path.realpath(main_input).rsplit('/', 1)
+ provider = DirDocProvider(path)
+
+ output_dir = output_file = None
+ if options.output_dir:
+ output_dir = options.output_dir
+ elif options.output_file:
+ output_file = options.output_file
+ else:
+ output_dir = path
+
+ epub.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir)
+ except ParseError, e:
+ print '%(file)s:%(name)s:%(message)s' % {
+ 'file': main_input,
+ 'name': e.__class__.__name__,
+ 'message': e.message
+ }
+
\ No newline at end of file
from librarian import pdf, DirDocProvider, ParseError
if __name__ == '__main__':
- usage = "usage: %prog [options] <input_file> [output_file]"
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to EPUB format."""
+
parser = OptionParser(usage)
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
help='make lots of noise and revert to default interaction in LaTeX')
help='create a directory for author and put the PDF in it')
parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE',
help='path to save the intermediary LaTeX file to')
+ parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+ help='specifies the directory for output')
(options, args) = parser.parse_args()
- if not 1 <= len(args) <= 2:
+ if len(args) < 1:
parser.print_help()
exit(1)
- main_input = args[0]
- basepath, ext = os.path.splitext(main_input)
- path, slug = os.path.realpath(basepath).rsplit('/', 1)
- provider = DirDocProvider(path)
try:
- if len(args) > 1:
- pdf.transform(provider, slug, output_file=args[1], verbose=options.verbose, save_tex=options.save_tex)
- else:
- pdf.transform(provider, slug, output_dir=path, make_dir=options.make_dir, verbose=options.verbose, save_tex=options.save_tex)
+ for main_input in args:
+ if options.verbose:
+ print main_input
+ path, fname = os.path.realpath(main_input).rsplit('/', 1)
+ provider = DirDocProvider(path)
+
+ output_dir = output_file = None
+ if options.output_dir:
+ output_dir = options.output_dir
+ elif options.output_file:
+ output_file = options.output_file
+ else:
+ output_dir = path
+
+ pdf.transform(provider, file_path=main_input, output_file=output_file, output_dir=output_dir, make_dir=options.make_dir, verbose=options.verbose, save_tex=options.save_tex)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
'file': main_input,