X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/88cabfe51907b7d68ec059a4070ebfac7192fc38..dfab836a2bcb5a8e4787e960c14f3790f7773e8d:/librarian/epub.py diff --git a/librarian/epub.py b/librarian/epub.py index 4855de8..b17ca0e 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -78,6 +78,7 @@ def replace_characters(node): def replace_chars(text): if text is None: return None + #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use. return text.replace(u"\ufeff", u"")\ .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ @@ -248,7 +249,7 @@ class TOC(object): nav_label = nav_map.makeelement(NCXNS('navLabel')) text = nav_map.makeelement(NCXNS('text')) - text.text = child.name + text.text = re.sub(r'\n', ' ', child.name) nav_label.append(text) nav_point.append(nav_label) @@ -292,25 +293,19 @@ def chop(main_text): last_node_part = False - # the below loops are workaround for a problem with epubs in drama ebooks without acts + # the below loop are workaround for a problem with epubs in drama ebooks without acts is_scene = False + is_act = False for one_part in main_text: - name = one_part.tag - if name in ('naglowek_scena'): + name = one_part.tag + if name == 'naglowek_scena': is_scene = True - break - if is_scene is True: - is_scene_with_acts = False - for one_part in main_text: - if one_part.tag == 'naglowek_akt': - is_scene_with_acts = True - break - else: - is_scene_with_acts = False + elif name == 'naglowek_akt': + is_act = True for one_part in main_text: name = one_part.tag - if is_scene_with_acts is False and is_scene is True: + if is_act is False and is_scene is True: if name == 'naglowek_czesc': yield part_xml last_node_part = True @@ -504,7 +499,7 @@ def transform(wldoc, verbose=False, '')) manifest.append(etree.fromstring( '' % (cover_name, bound_cover.mime_type()))) - spine.insert(0, etree.fromstring('')) + spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring(''))