def replace_chars(text):
if text is None:
return None
+ #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use.
return text.replace(u"\ufeff", u"")\
.replace("---", u"\u2014")\
.replace("--", u"\u2013")\
last_node_part = False
- # the below loops are workaround for a problem with epubs in drama ebooks without acts
+ # the below loop are workaround for a problem with epubs in drama ebooks without acts
is_scene = False
+ is_act = False
for one_part in main_text:
- name = one_part.tag
- if name in ('naglowek_scena'):
+ name = one_part.tag
+ if name == 'naglowek_scena':
is_scene = True
- break
- if is_scene is True:
- is_scene_with_acts = False
- for one_part in main_text:
- if one_part.tag == 'naglowek_akt':
- is_scene_with_acts = True
- break
- else:
- is_scene_with_acts = False
+ elif name == 'naglowek_akt':
+ is_act = True
for one_part in main_text:
name = one_part.tag
- if is_scene_with_acts is False and is_scene is True:
+ if is_act is False and is_scene is True:
if name == 'naglowek_czesc':
yield part_xml
last_node_part = True
main_xml_part[:] = [deepcopy(one_part)]
- elif not last_node_part and name in ("naglowek_scena"):
+ elif not last_node_part and name == "naglowek_scena":
yield part_xml
main_xml_part[:] = [deepcopy(one_part)]
else:
toc = TOC()
for element in chunk_xml[0]:
- if element.tag in ("naglowek_czesc"):
+ if element.tag == "naglowek_czesc":
toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
toc.add(node_name(element), "part%d.html" % chunk_no)