X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/88cabfe51907b7d68ec059a4070ebfac7192fc38..ccc0df182c479eb554148e5bbc3fd9f6bec95e20:/librarian/epub.py?ds=sidebyside diff --git a/librarian/epub.py b/librarian/epub.py index 4855de8..294843b 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -78,6 +78,7 @@ def replace_characters(node): def replace_chars(text): if text is None: return None + #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use. return text.replace(u"\ufeff", u"")\ .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ @@ -292,30 +293,24 @@ def chop(main_text): last_node_part = False - # the below loops are workaround for a problem with epubs in drama ebooks without acts + # the below loop are workaround for a problem with epubs in drama ebooks without acts is_scene = False + is_act = False for one_part in main_text: - name = one_part.tag - if name in ('naglowek_scena'): + name = one_part.tag + if name == 'naglowek_scena': is_scene = True - break - if is_scene is True: - is_scene_with_acts = False - for one_part in main_text: - if one_part.tag == 'naglowek_akt': - is_scene_with_acts = True - break - else: - is_scene_with_acts = False + elif name == 'naglowek_akt': + is_act = True for one_part in main_text: name = one_part.tag - if is_scene_with_acts is False and is_scene is True: + if is_act is False and is_scene is True: if name == 'naglowek_czesc': yield part_xml last_node_part = True main_xml_part[:] = [deepcopy(one_part)] - elif not last_node_part and name in ("naglowek_scena"): + elif not last_node_part and name == "naglowek_scena": yield part_xml main_xml_part[:] = [deepcopy(one_part)] else: @@ -340,7 +335,7 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s toc = TOC() for element in chunk_xml[0]: - if element.tag in ("naglowek_czesc"): + if element.tag == "naglowek_czesc": toc.add(node_name(element), "part%d.html#book-text" % chunk_no) elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): toc.add(node_name(element), "part%d.html" % chunk_no)