fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[TXT] Newline after last line, update test.
[librarian.git]
/
librarian
/
epub.py
diff --git
a/librarian/epub.py
b/librarian/epub.py
index
4855de8
..
81dcb6e
100644
(file)
--- a/
librarian/epub.py
+++ b/
librarian/epub.py
@@
-78,6
+78,7
@@
def replace_characters(node):
def replace_chars(text):
if text is None:
return None
def replace_chars(text):
if text is None:
return None
+ #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use.
return text.replace(u"\ufeff", u"")\
.replace("---", u"\u2014")\
.replace("--", u"\u2013")\
return text.replace(u"\ufeff", u"")\
.replace("---", u"\u2014")\
.replace("--", u"\u2013")\
@@
-292,25
+293,19
@@
def chop(main_text):
last_node_part = False
last_node_part = False
- # the below loop
s
are workaround for a problem with epubs in drama ebooks without acts
+ # the below loop are workaround for a problem with epubs in drama ebooks without acts
is_scene = False
is_scene = False
+ is_act = False
for one_part in main_text:
for one_part in main_text:
- name = one_part.tag
- if name
in ('naglowek_scena')
:
+
name = one_part.tag
+ if name
== 'naglowek_scena'
:
is_scene = True
is_scene = True
- break
- if is_scene is True:
- is_scene_with_acts = False
- for one_part in main_text:
- if one_part.tag == 'naglowek_akt':
- is_scene_with_acts = True
- break
- else:
- is_scene_with_acts = False
+ elif name == 'naglowek_akt':
+ is_act = True
for one_part in main_text:
name = one_part.tag
for one_part in main_text:
name = one_part.tag
- if is_
scene_with_acts
is False and is_scene is True:
+ if is_
act
is False and is_scene is True:
if name == 'naglowek_czesc':
yield part_xml
last_node_part = True
if name == 'naglowek_czesc':
yield part_xml
last_node_part = True