From 5c55be51e46b296d368b86375f2f4b90ea46c00b Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 13 Jun 2012 14:09:33 +0200 Subject: [PATCH 1/1] Allow using remote cache for image downloading. Also, DRY in book2* scripts --- librarian/book2anything.py | 141 +++++++++++++++++++++++++++++++++++++ librarian/cover.py | 13 +++- librarian/epub.py | 25 +++---- librarian/mobi.py | 8 +-- librarian/pdf.py | 12 ++-- scripts/book2cover | 43 ++++------- scripts/book2epub | 71 ++++--------------- scripts/book2fb2 | 66 +++-------------- scripts/book2html | 75 +++++--------------- scripts/book2ihtml | 62 ---------------- scripts/book2mobi | 55 +++------------ scripts/book2pdf | 68 ++++-------------- scripts/book2txt | 70 +++++------------- setup.py | 4 +- 14 files changed, 272 insertions(+), 441 deletions(-) create mode 100755 librarian/book2anything.py delete mode 100755 scripts/book2ihtml diff --git a/librarian/book2anything.py b/librarian/book2anything.py new file mode 100755 index 0000000..7660ec7 --- /dev/null +++ b/librarian/book2anything.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from collections import namedtuple +import os.path +import optparse + +from librarian import DirDocProvider, ParseError +from librarian.parser import WLDocument +from librarian.cover import WLCover + + +class Option(object): + """Option for optparse. Use it like `optparse.OptionParser.add_option`.""" + def __init__(self, *names, **options): + self.names = names + self.options = options + + def add(self, parser): + parser.add_option(*self.names, **self.options) + + def name(self): + return self.options['dest'] + + def value(self, options): + return getattr(options, self.name()) + + +class Book2Anything(object): + """A class for creating book2... scripts. + + Subclass it for any format you want to convert to. + """ + format_name = None # Set format name, like "PDF". + ext = None # Set file extension, like "pdf". + uses_cover = False # Can it add a cover? + cover_optional = True # Only relevant if uses_cover + uses_provider = False # Does it need a DocProvider? + transform = None # Transform method. Uses WLDocument.as_{ext} by default. + parser_options = [] # List of Option objects for additional parser args. + transform_options = [] # List of Option objects for additional transform args. + transform_flags = [] # List of Option objects for supported transform flags. + + + @classmethod + def run(cls): + # Parse commandline arguments + usage = """Usage: %%prog [options] SOURCE [SOURCE...] + Convert SOURCE files to %s format.""" % cls.format_name + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', + action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-d', '--make-dir', + action='store_true', dest='make_dir', default=False, + help='create a directory for author and put the output file in it') + parser.add_option('-o', '--output-file', + dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', + dest='output_dir', metavar='DIR', + help='specifies the directory for output') + if cls.uses_cover: + if cls.cover_optional: + parser.add_option('-c', '--with-cover', + action='store_true', dest='with_cover', default=False, + help='create default cover') + parser.add_option('-C', '--image-cache', + dest='image_cache', metavar='URL', + help='prefix for image download cache' + + (' (implies --with-cover)' if cls.cover_optional else '')) + for option in cls.parser_options + cls.transform_options + cls.transform_flags: + option.add(parser) + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + return(1) + + # Prepare additional args for parser. + parser_args = {} + for option in cls.parser_options: + parser_args[option.name()] = option.value(options) + # Prepare additional args for transform method. + transform_args = {} + for option in cls.transform_options: + transform_args[option.name()] = option.value(options) + # Add flags to transform_args, if any. + transform_flags = [flag.name() for flag in cls.transform_flags + if flag.value(options)] + if transform_flags: + transform_args['flags'] = transform_flags + # Add cover support, if any. + if cls.uses_cover: + if options.image_cache: + transform_args['cover'] = lambda x: WLCover(x, image_cache = options.image_cache) + elif not cls.cover_optional or options.with_cover: + transform_args['cover'] = WLCover + + + # Do some real work + try: + for main_input in input_filenames: + if options.verbose: + print main_input + + # Where to find input? + if cls.uses_provider: + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + else: + provider = None + + # Where to write output? + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.' + cls.ext + else: + output_file = None + + # Do the transformation. + doc = WLDocument.from_file(main_input, provider=provider, **parser_args) + transform = cls.transform + if transform is None: + transform = getattr(WLDocument, 'as_%s' % cls.ext) + output = transform(doc, **transform_args) + + doc.save_output_file(output, + output_file, options.output_dir, options.make_dir, cls.ext) + + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e + } diff --git a/librarian/cover.py b/librarian/cover.py index 46990fd..02d76f9 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -212,7 +212,7 @@ class WLCover(Cover): u'Współczesność': '#06393d', } - def __init__(self, book_info): + def __init__(self, book_info, image_cache=None): super(WLCover, self).__init__(book_info) self.kind = book_info.kind self.epoch = book_info.epoch @@ -220,7 +220,16 @@ class WLCover(Cover): from urllib2 import urlopen from StringIO import StringIO - bg_src = urlopen(book_info.cover_url) + url = book_info.cover_url + bg_src = None + if image_cache: + from urllib import quote + try: + bg_src = urlopen(image_cache + quote(url, safe="")) + except: + pass + if bg_src is None: + bg_src = urlopen(url) self.background_img = StringIO(bg_src.read()) bg_src.close() else: diff --git a/librarian/epub.py b/librarian/epub.py index 5481516..f34bb86 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -291,7 +291,7 @@ def transform(wldoc, verbose=False, """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object or True for default + cover: a cover.Cover factory or True for default flags: less-advertising, without-fonts, working-copy """ @@ -396,28 +396,29 @@ def transform(wldoc, verbose=False, if cover: if cover is True: cover = WLCover - if cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) cover_file = StringIO() - c = cover(document.book_info) - c.save(cover_file) - c_name = 'cover.%s' % c.ext() - zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue()) + bound_cover = cover(document.book_info) + bound_cover.save(cover_file) + cover_name = 'cover.%s' % bound_cover.ext() + zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) del cover_file cover_tree = etree.parse(get_resource('epub/cover.html')) - cover_tree.find('//' + XHTMLNS('img')).set('src', c_name) + cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) zip.writestr('OPS/cover.html', etree.tostring( cover_tree, method="html", pretty_print=True)) + if bound_cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + manifest.append(etree.fromstring( '')) manifest.append(etree.fromstring( - '' % (c_name, c.mime_type()))) + '' % (cover_name, bound_cover.mime_type()))) spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring('')) diff --git a/librarian/mobi.py b/librarian/mobi.py index 1e7569b..d98b838 100644 --- a/librarian/mobi.py +++ b/librarian/mobi.py @@ -19,7 +19,7 @@ def transform(wldoc, verbose=False, wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object + cover: a cover.Cover factory overriding default flags: less-advertising, """ @@ -31,10 +31,10 @@ def transform(wldoc, verbose=False, if not cover: cover = WLCover cover_file = NamedTemporaryFile(suffix='.png', delete=False) - c = cover(book_info) - c.save(cover_file) + bound_cover = cover(book_info) + bound_cover.save(cover_file) - if cover.uses_dc_cover: + if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: diff --git a/librarian/pdf.py b/librarian/pdf.py index cb65311..7bba9d2 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -181,7 +181,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats - cover: a cover.Cover object + cover: a cover.Cover factory or True for default flags: less-advertising, customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ @@ -193,9 +193,10 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, if cover: if cover is True: cover = WLCover - document.edoc.getroot().set('data-cover-width', str(cover.width)) - document.edoc.getroot().set('data-cover-height', str(cover.height)) - if cover.uses_dc_cover: + bound_cover = cover(document.book_info) + document.edoc.getroot().set('data-cover-width', str(bound_cover.width)) + document.edoc.getroot().set('data-cover-height', str(bound_cover.height)) + if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: @@ -231,9 +232,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, temp = mkdtemp('-wl2pdf') if cover: - c = cover(document.book_info) with open(os.path.join(temp, 'cover.png'), 'w') as f: - c.save(f) + bound_cover.save(f) del document # no longer needed large object :) diff --git a/scripts/book2cover b/scripts/book2cover index 49cd539..ae11e60 100755 --- a/scripts/book2cover +++ b/scripts/book2cover @@ -4,36 +4,23 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse +from StringIO import StringIO +from librarian import OutputFile +from librarian.book2anything import Book2Anything, Option -from librarian import ParseError -from librarian.parser import WLDocument -from librarian.cover import WLCover +class Book2Cover(Book2Anything): + format_name = "JPEG" + ext = "jpg" + uses_cover = True + cover_optional = False -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Create cover images for SOURCE files.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - - options, input_filenames = parser.parse_args() + @staticmethod + def transform(wldoc, cover): + output = StringIO() + cover(wldoc.book_info).save(output) + return OutputFile.from_string(output.getvalue()) - if len(input_filenames) < 1: - parser.print_help() - exit(1) - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.jpg' - - doc = WLDocument.from_file(input_filename) - WLCover(doc.book_info).save(output_filename) +if __name__ == '__main__': + Book2Cover.run() diff --git a/scripts/book2epub b/scripts/book2epub index 04e4515..01ca79a 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -4,65 +4,20 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything, Option -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Epub(Book2Anything): + format_name = "EPUB" + ext = "epub" + uses_cover = True + uses_provider = True + transform_flags = [ + Option('-w', '--working-copy', dest='working-copy', + action='store_true', default=False, + help='mark the output as a working copy') + ] -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to EPUB format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False, - help='mark the output as a working copy') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - flags = [] - if options.working_copy: - flags.append('working-copy') - # Do some real work - try: - for main_input in input_filenames: - if options.verbose: - print main_input - - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.epub' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - epub = doc.as_epub(cover=options.with_cover, flags=flags) - - doc.save_output_file(epub, - output_file, options.output_dir, options.make_dir, 'epub') - - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Epub.run() diff --git a/scripts/book2fb2 b/scripts/book2fb2 index 81a002b..584ae99 100755 --- a/scripts/book2fb2 +++ b/scripts/book2fb2 @@ -4,65 +4,15 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Fb2(Book2Anything): + format_name = "FB2" + ext = "fb2" + uses_cover = False + uses_provider = True -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to FB2 format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False, - help='mark the output as a working copy') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - flags = [] - if options.working_copy: - flags.append('working-copy') - # Do some real work - try: - for main_input in input_filenames: - if options.verbose: - print main_input - - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.fb2' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - fb2 = doc.as_fb2(cover=options.with_cover, flags=flags) - - doc.save_output_file(fb2, - output_file, options.output_dir, options.make_dir, 'fb2') - - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Fb2.run() diff --git a/scripts/book2html b/scripts/book2html index 8adeb38..5d48eec 100755 --- a/scripts/book2html +++ b/scripts/book2html @@ -4,62 +4,25 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument +from librarian.book2anything import Book2Anything, Option + + +class Book2Html(Book2Anything): + format_name = "HTML" + ext = "html" + uses_cover = False + uses_provider = False + transform_flags = [ + Option('-r', '--raw', dest='full-page', + action='store_false', default=True, + help='output raw text for use in templates') + ] + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - parser.add_option('-r', '--raw', action='store_false', dest='full_page', default=True, - help='outpu raw text for use in templates') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - flags = ('full-page',) if options.full_page else None - html = doc.as_html(flags=flags) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e, - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise - + Book2Html.run() diff --git a/scripts/book2ihtml b/scripts/book2ihtml deleted file mode 100755 index 779f245..0000000 --- a/scripts/book2ihtml +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_html(flags=('full-page',), stylesheet='partial') - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.message.encode('utf-8') - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e.message.encode('utf-8'), - } - raise - diff --git a/scripts/book2mobi b/scripts/book2mobi index 665dcfa..f477a83 100755 --- a/scripts/book2mobi +++ b/scripts/book2mobi @@ -4,53 +4,16 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Mobi(Book2Anything): + format_name = "MOBI" + ext = "mobi" + uses_cover = True + cover_optional = False + uses_provider = True -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to MOBI format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - try: - for main_input in input_filenames: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.mobi' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - mobi = doc.as_mobi() - - doc.save_output_file(mobi, - output_file, options.output_dir, options.make_dir, 'mobi') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Mobi.run() diff --git a/scripts/book2pdf b/scripts/book2pdf index 258c20d..68e2d08 100755 --- a/scripts/book2pdf +++ b/scripts/book2pdf @@ -4,61 +4,21 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -from optparse import OptionParser +from librarian.book2anything import Book2Anything, Option -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Pdf(Book2Anything): + format_name = "PDF" + ext = "pdf" + uses_cover = True + uses_provider = True + transform_args = [ + Option('-t', '--save-tex', dest='save_tex', metavar='FILE', + help='path to save the intermediary LaTeX file to'), + Option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', + help='force morefloats in old (<1.0c), new (>=1.0c) or none') + ] -if __name__ == '__main__': - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to PDF format.""" - - parser = OptionParser(usage) - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='make lots of noise and revert to default interaction in LaTeX') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE', - help='path to save the intermediary LaTeX file to') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - parser.add_option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', - help='force morefloats in old (<1.0c), new (>=1.0c) or none') - (options, args) = parser.parse_args() - - if len(args) < 1: - parser.print_help() - exit(1) - - if options.output_dir and options.output_file: - raise ValueError("Either --output-dir or --output file should be specified") - try: - for main_input in args: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - output_file, output_dir = options.output_file, options.output_dir - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.pdf' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - pdf = doc.as_pdf(save_tex=options.save_tex, - cover=options.with_cover, - morefloats=options.morefloats) - - doc.save_output_file(pdf, - output_file, options.output_dir, options.make_dir, 'pdf') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s; use -v to see more output' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Pdf.run() diff --git a/scripts/book2txt b/scripts/book2txt index 9cfdef2..e584579 100755 --- a/scripts/book2txt +++ b/scripts/book2txt @@ -4,60 +4,24 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse +from librarian.book2anything import Book2Anything, Option -from librarian import ParseError -from librarian.parser import WLDocument +class Book2Txt(Book2Anything): + format_name = "TXT" + ext = "txt" + uses_cover = False + uses_provider = False + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] + transform_args = [ + Option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, + help='set line wrap column') + ] -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to TXT format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, - help='set line wrap column') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.txt' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_text(wrapping=str(options.wrapping)) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise +if __name__ == '__main__': + Book2Txt.run() diff --git a/setup.py b/setup.py index 7ea9ed0..b6dbcb4 100755 --- a/setup.py +++ b/setup.py @@ -21,12 +21,12 @@ def whole_tree(prefix, path): setup( name='librarian', - version='1.4.2a', + version='1.5', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', maintainer='Radek Czajka', - maintainer_email='radek.czajka@gmail.com', + maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl', url='http://github.com/fnp/librarian', packages=['librarian'], package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fonts/*', 'res/*'] + -- 2.20.1