X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/6642c1c71c5c6ce6ef3401c8c9da84cf076b018b..51bcb92d5e5a879118d83f72d588d909d69dd3ab:/scripts/book2txt?ds=sidebyside diff --git a/scripts/book2txt b/scripts/book2txt index 41a3978..9cfdef2 100755 --- a/scripts/book2txt +++ b/scripts/book2txt @@ -1,9 +1,14 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# import os import optparse -from librarian import text -from librarian import dcparser, ParseError +from librarian import ParseError +from librarian.parser import WLDocument if __name__ == '__main__': @@ -17,9 +22,11 @@ if __name__ == '__main__': help='print status messages to stdout') parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, help='set line wrap column') - + parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, + help='don\'t try to parse dublin core metadata') + options, input_filenames = parser.parse_args() - + if len(input_filenames) < 1: parser.print_help() exit(1) @@ -28,15 +35,18 @@ if __name__ == '__main__': for input_filename in input_filenames: if options.verbose: print input_filename - + output_filename = os.path.splitext(input_filename)[0] + '.txt' try: - text.transform(input_filename, output_filename, wrapping=str(options.wrapping)) + doc = WLDocument.from_file(input_filename, + parse_dublincore=options.parse_dublincore) + html = doc.as_text(wrapping=str(options.wrapping)) + doc.save_output_file(html, output_path=output_filename) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': input_filename, 'name': e.__class__.__name__, - 'message': e.message + 'message': e } except IOError, e: print '%(file)s:%(name)s:%(message)s' % { @@ -48,6 +58,6 @@ if __name__ == '__main__': print '%(file)s:%(etype)s:%(message)s' % { 'file': input_filename, 'etype': e.__class__.__name__, - 'message': e.message, + 'message': e, } - raise e + raise