X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/07fdba2c7fe8e11b6867712d47bdd608e88c29fb..6642c1c71c5c6ce6ef3401c8c9da84cf076b018b:/librarian/html.py diff --git a/librarian/html.py b/librarian/html.py index b279e5d..4edbf33 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -3,9 +3,9 @@ import os import cStringIO import re import copy -import pkgutil from lxml import etree +from librarian.parser import WLDocument ENTITY_SUBSTITUTIONS = [ @@ -31,32 +31,28 @@ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns['substitute_entities'] = substitute_entities -def transform(input_filename, output_filename): +def transform(input, output_filename=None, is_file=True): """Transforms file input_filename in XML to output_filename in XHTML.""" # Parse XSLT style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') style = etree.parse(style_filename) - doc_file = cStringIO.StringIO() - expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); - - f = open(input_filename, 'r') - for line in f: - line = line.decode('utf-8') - line = expr.sub(u'
\n', line) - doc_file.write(line.encode('utf-8')) - f.close() - - doc_file.seek(0); + if is_file: + document = WLDocument.from_file(input, True) + else: + document = WLDocument.from_string(input, True) - parser = etree.XMLParser(remove_blank_text=True) - doc = etree.parse(doc_file, parser) + result = document.transform(style) + del document # no longer needed large object :) - result = doc.xslt(style) if result.find('//p') is not None: add_anchors(result.getroot()) add_table_of_contents(result.getroot()) - result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + + if output_filename is not None: + result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + else: + return result return True else: return False