From ccd5654568bb362a15185bcf0c1dfa5cb9f3ace7 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 10 Nov 2010 12:42:18 +0100 Subject: [PATCH] removed the annoying morefloats notice, updated README, added more options in book2{pdf,epub} some minor fixes --- README.md | 5 ++++- librarian/epub.py | 26 ++++++++++++++++++++++---- librarian/pdf.py | 35 +++++++++++++++++++++++------------ scripts/book2epub | 42 +++++++++++++++++++++++++++++++----------- scripts/book2pdf | 33 +++++++++++++++++++++++---------- 5 files changed, 103 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 0867419..7c27d2d 100755 --- a/README.md +++ b/README.md @@ -45,8 +45,11 @@ Dependencies * lxml , version 2.2 or later * additional PDF converter dependencies: - * XeTeX + * XeTeX with support for Polish language * TeXML + * recommended: morefloats LaTeX package, version >=1.0c + for dealing with documents with many motifs in one paragraph. + Installation diff --git a/librarian/epub.py b/librarian/epub.py index 3ee487a..ef07c3f 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -262,7 +262,7 @@ def transform_chunk(chunk_xml, chunk_no, annotations): return output_html, toc, chars -def transform(provider, slug, output_file=None, output_dir=None, make_dir=False): +def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False): """ produces a EPUB file provider: a DocProvider @@ -328,7 +328,17 @@ def transform(provider, slug, output_file=None, output_dir=None, make_dir=False) return toc, chunk_counter, chars # read metadata from the first file - input_xml = etree.parse(provider[slug]) + if file_path: + if slug: + raise ValueError('slug or file_path should be specified, not both') + f = open(file_path, 'r') + input_xml = etree.parse(f) + f.close() + else: + if not slug: + raise ValueError('either slug or file_path should be specified') + input_xml = etree.parse(provider[slug]) + metadata = input_xml.find('.//'+RDFNS('Description')) if metadata is None: raise NoDublinCore('Document has no DublinCore - which is required.') @@ -344,7 +354,10 @@ def transform(provider, slug, output_file=None, output_dir=None, make_dir=False) os.makedirs(output_dir) except OSError: pass - output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w') + if slug: + output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w') + else: + output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w') zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) @@ -404,7 +417,12 @@ def transform(provider, slug, output_file=None, output_dir=None, make_dir=False) os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': - subprocess.check_call(['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)]) + optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)] + if verbose: + print "Running font-optimizer" + subprocess.check_call(optimizer_call) + else: + subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) rmtree(tmpdir) os.chdir(cwd) diff --git a/librarian/pdf.py b/librarian/pdf.py index 0b80a2d..a868a2b 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -153,11 +153,12 @@ def package_available(package, args='', verbose=False): return p == 0 -def transform(provider, slug, output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None): +def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None): """ produces a PDF file with XeLaTeX provider: a DocProvider slug: slug of file to process, available by provider + file_path can be provided instead of a slug output_file: file-like object or path to output file output_dir: path to directory to save output file to; either this or output_file must be present make_dir: writes output to //.pdf istead of /.pdf @@ -167,16 +168,19 @@ def transform(provider, slug, output_file=None, output_dir=None, make_dir=False, # Parse XSLT try: - document = load_including_children(provider, slug) + if file_path: + if slug: + raise ValueError('slug or file_path should be specified, not both') + document = load_including_children(provider, file_path=file_path) + else: + if not slug: + raise ValueError('either slug or file_path should be specified') + document = load_including_children(provider, slug=slug) - # check for latex packages - if not package_available('morefloats', 'maxfloats=19', verbose=verbose): + # check for LaTeX packages + if not package_available('morefloats', 'maxfloats=19'): + # using old morefloats or none at all document.edoc.getroot().set('old-morefloats', 'yes') - print >> sys.stderr, """ -============================================================================== -LaTeX `morefloats' package is older than v.1.0c or not available at all. -Some documents with many motifs in long stanzas or paragraphs may not compile. -==============================================================================""" # hack the tree move_motifs_inside(document.edoc) @@ -229,7 +233,10 @@ Some documents with many motifs in long stanzas or paragraphs may not compile. os.makedirs(output_dir) except OSError: pass - output_path = os.path.join(output_dir, '%s.pdf' % slug) + if slug: + output_path = os.path.join(output_dir, '%s.pdf' % slug) + else: + output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf') shutil.move(pdf_path, output_path) else: if hasattr(output_file, 'write'): @@ -246,7 +253,7 @@ Some documents with many motifs in long stanzas or paragraphs may not compile. raise ParseError(e) -def load_including_children(provider, slug=None, uri=None): +def load_including_children(provider, slug=None, uri=None, file_path=None): """ makes one big xml file with children inserted at end either slug or uri must be provided """ @@ -255,13 +262,17 @@ def load_including_children(provider, slug=None, uri=None): f = provider.by_uri(uri) elif slug: f = provider[slug] + elif file_path: + f = open(file_path, 'r') else: - raise ValueError('Neither slug nor URI provided for a book.') + raise ValueError('Neither slug, URI nor file path provided for a book.') document = WLDocument.from_file(f, True, parse_dublincore=True, preserve_lines=False) + f.close() + for child_uri in document.book_info.parts: child = load_including_children(provider, uri=child_uri) document.edoc.getroot().append(child.edoc.getroot()) diff --git a/scripts/book2epub b/scripts/book2epub index 8e713b9..ea87483 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -7,13 +7,13 @@ import os.path import optparse -from librarian import epub, DirDocProvider +from librarian import epub, DirDocProvider, ParseError if __name__ == '__main__': # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE - Convert SOURCE file to EPUB format.""" + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to EPUB format.""" parser = optparse.OptionParser(usage=usage) @@ -21,18 +21,38 @@ if __name__ == '__main__': help='print status messages to stdout') parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, help='create a directory for author and put the PDF in it') + parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', + help='specifies the directory for output') options, input_filenames = parser.parse_args() - if len(input_filenames) != 1: + if len(input_filenames) < 1: parser.print_help() exit(1) # Do some real work - main_input = input_filenames[0] - if options.verbose: - print main_input - basepath, ext = os.path.splitext(main_input) - path, slug = os.path.realpath(basepath).rsplit('/', 1) - provider = DirDocProvider(path) - epub.transform(provider, slug, output_dir=path, make_dir=options.make_dir) + try: + for main_input in input_filenames: + if options.verbose: + print main_input + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + + output_dir = output_file = None + if options.output_dir: + output_dir = options.output_dir + elif options.output_file: + output_file = options.output_file + else: + output_dir = path + + epub.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir) + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e.message + } + \ No newline at end of file diff --git a/scripts/book2pdf b/scripts/book2pdf index 6589746..a1b84f4 100755 --- a/scripts/book2pdf +++ b/scripts/book2pdf @@ -9,7 +9,9 @@ from optparse import OptionParser from librarian import pdf, DirDocProvider, ParseError if __name__ == '__main__': - usage = "usage: %prog [options] [output_file]" + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to EPUB format.""" + parser = OptionParser(usage) parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='make lots of noise and revert to default interaction in LaTeX') @@ -17,21 +19,32 @@ if __name__ == '__main__': help='create a directory for author and put the PDF in it') parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE', help='path to save the intermediary LaTeX file to') + parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', + help='specifies the directory for output') (options, args) = parser.parse_args() - if not 1 <= len(args) <= 2: + if len(args) < 1: parser.print_help() exit(1) - main_input = args[0] - basepath, ext = os.path.splitext(main_input) - path, slug = os.path.realpath(basepath).rsplit('/', 1) - provider = DirDocProvider(path) try: - if len(args) > 1: - pdf.transform(provider, slug, output_file=args[1], verbose=options.verbose, save_tex=options.save_tex) - else: - pdf.transform(provider, slug, output_dir=path, make_dir=options.make_dir, verbose=options.verbose, save_tex=options.save_tex) + for main_input in args: + if options.verbose: + print main_input + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + + output_dir = output_file = None + if options.output_dir: + output_dir = options.output_dir + elif options.output_file: + output_file = options.output_file + else: + output_dir = path + + pdf.transform(provider, file_path=main_input, output_file=output_file, output_dir=output_dir, make_dir=options.make_dir, verbose=options.verbose, save_tex=options.save_tex) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, -- 2.20.1