X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/0081ccb8e51026b5af824b3c641648d23155f1e1..9c32ad2925b2d8359eb888c966a2faac8d94307d:/librarian/epub.py?ds=sidebyside diff --git a/librarian/epub.py b/librarian/epub.py index 4855de8..81dcb6e 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -78,6 +78,7 @@ def replace_characters(node): def replace_chars(text): if text is None: return None + #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use. return text.replace(u"\ufeff", u"")\ .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ @@ -292,25 +293,19 @@ def chop(main_text): last_node_part = False - # the below loops are workaround for a problem with epubs in drama ebooks without acts + # the below loop are workaround for a problem with epubs in drama ebooks without acts is_scene = False + is_act = False for one_part in main_text: - name = one_part.tag - if name in ('naglowek_scena'): + name = one_part.tag + if name == 'naglowek_scena': is_scene = True - break - if is_scene is True: - is_scene_with_acts = False - for one_part in main_text: - if one_part.tag == 'naglowek_akt': - is_scene_with_acts = True - break - else: - is_scene_with_acts = False + elif name == 'naglowek_akt': + is_act = True for one_part in main_text: name = one_part.tag - if is_scene_with_acts is False and is_scene is True: + if is_act is False and is_scene is True: if name == 'naglowek_czesc': yield part_xml last_node_part = True