def replace_chars(text):
if text is None:
return None
- return text.replace("---", u"\u2014")\
+ return text.replace(u"\ufeff", u"")\
+ .replace("---", u"\u2014")\
.replace("--", u"\u2013")\
.replace(",,", u"\u201E")\
.replace('"', u"\u201D")\
replace_by_verse(chunk_xml)
html_tree = xslt(chunk_xml, res('xsltScheme.xsl'))
chars = used_chars(html_tree.getroot())
- output_html = etree.tostring(html_tree, pretty_print=True)
+ output_html = etree.tostring(html_tree, method="html", pretty_print=True)
return output_html, toc, chars
def transform_file(input_xml, chunk_counter=1, first=True):
""" processes one input file and proceeds to its children """
+ replace_characters(input_xml.getroot())
+
children = [child.text for child in input_xml.findall('.//'+DCNS('relation.hasPart'))]
# every input file will have a TOC entry,
html_tree = xslt(input_xml, res('xsltTitle.xsl'))
chars = used_chars(html_tree.getroot())
zip.writestr('OPS/title.html',
- etree.tostring(html_tree, pretty_print=True))
+ etree.tostring(html_tree, method="html", pretty_print=True))
elif children:
# write title page for every parent
html_tree = xslt(input_xml, res('xsltChunkTitle.xsl'))
chars = used_chars(html_tree.getroot())
zip.writestr('OPS/part%d.html' % chunk_counter,
- etree.tostring(html_tree, pretty_print=True))
+ etree.tostring(html_tree, method="html", pretty_print=True))
add_to_manifest(manifest, chunk_counter)
add_to_spine(spine, chunk_counter)
chunk_counter += 1
main_text = None
if main_text is not None:
- replace_characters(main_text)
-
for chunk_xml in chop(main_text):
chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations)
toc.extend(chunk_toc)
html_tree = xslt(annotations, res("xsltAnnotations.xsl"))
chars = chars.union(used_chars(html_tree.getroot()))
zip.writestr('OPS/annotations.html', etree.tostring(
- html_tree, pretty_print=True))
+ html_tree, method="html", pretty_print=True))
# strip fonts
tmpdir = mkdtemp('-librarian-epub')