X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/ea4b7dbd086b690c7ce754a561ef4ee9a68b9f35..3615220e78b96f6c29a1d4e7c580904ecab204f7:/librarian/epub.py diff --git a/librarian/epub.py b/librarian/epub.py index 2f7bbec..81dcb6e 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -78,6 +78,7 @@ def replace_characters(node): def replace_chars(text): if text is None: return None + #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use. return text.replace(u"\ufeff", u"")\ .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ @@ -291,18 +292,41 @@ def chop(main_text): main_xml_part = part_xml[0] # master last_node_part = False + + # the below loop are workaround for a problem with epubs in drama ebooks without acts + is_scene = False + is_act = False + for one_part in main_text: + name = one_part.tag + if name == 'naglowek_scena': + is_scene = True + elif name == 'naglowek_akt': + is_act = True + for one_part in main_text: name = one_part.tag - if name == 'naglowek_czesc': - yield part_xml - last_node_part = True - main_xml_part[:] = [deepcopy(one_part)] - elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): - yield part_xml - main_xml_part[:] = [deepcopy(one_part)] + if is_act is False and is_scene is True: + if name == 'naglowek_czesc': + yield part_xml + last_node_part = True + main_xml_part[:] = [deepcopy(one_part)] + elif not last_node_part and name in ("naglowek_scena"): + yield part_xml + main_xml_part[:] = [deepcopy(one_part)] + else: + main_xml_part.append(deepcopy(one_part)) + last_node_part = False else: - main_xml_part.append(deepcopy(one_part)) - last_node_part = False + if name == 'naglowek_czesc': + yield part_xml + last_node_part = True + main_xml_part[:] = [deepcopy(one_part)] + elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): + yield part_xml + main_xml_part[:] = [deepcopy(one_part)] + else: + main_xml_part.append(deepcopy(one_part)) + last_node_part = False yield part_xml