X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/dd3e5f7c77134d034eb8592479288e1bb9fe28fd..dfab836a2bcb5a8e4787e960c14f3790f7773e8d:/librarian/epub.py
diff --git a/librarian/epub.py b/librarian/epub.py
index c8a8668..b17ca0e 100644
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -22,6 +22,7 @@ from librarian.cover import DefaultEbookCover
from librarian import functions, get_resource
functions.reg_person_name()
+functions.reg_lang_code_3to2()
def inner_xml(node):
@@ -77,6 +78,7 @@ def replace_characters(node):
def replace_chars(text):
if text is None:
return None
+ #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions â for future use.
return text.replace(u"\ufeff", u"")\
.replace("---", u"\u2014")\
.replace("--", u"\u2013")\
@@ -247,7 +249,7 @@ class TOC(object):
nav_label = nav_map.makeelement(NCXNS('navLabel'))
text = nav_map.makeelement(NCXNS('text'))
- text.text = child.name
+ text.text = re.sub(r'\n', ' ', child.name)
nav_label.append(text)
nav_point.append(nav_label)
@@ -290,18 +292,41 @@ def chop(main_text):
main_xml_part = part_xml[0] # master
last_node_part = False
+
+ # the below loop are workaround for a problem with epubs in drama ebooks without acts
+ is_scene = False
+ is_act = False
+ for one_part in main_text:
+ name = one_part.tag
+ if name == 'naglowek_scena':
+ is_scene = True
+ elif name == 'naglowek_akt':
+ is_act = True
+
for one_part in main_text:
name = one_part.tag
- if name == 'naglowek_czesc':
- yield part_xml
- last_node_part = True
- main_xml_part[:] = [deepcopy(one_part)]
- elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
- yield part_xml
- main_xml_part[:] = [deepcopy(one_part)]
+ if is_act is False and is_scene is True:
+ if name == 'naglowek_czesc':
+ yield part_xml
+ last_node_part = True
+ main_xml_part[:] = [deepcopy(one_part)]
+ elif not last_node_part and name in ("naglowek_scena"):
+ yield part_xml
+ main_xml_part[:] = [deepcopy(one_part)]
+ else:
+ main_xml_part.append(deepcopy(one_part))
+ last_node_part = False
else:
- main_xml_part.append(deepcopy(one_part))
- last_node_part = False
+ if name == 'naglowek_czesc':
+ yield part_xml
+ last_node_part = True
+ main_xml_part[:] = [deepcopy(one_part)]
+ elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+ yield part_xml
+ main_xml_part[:] = [deepcopy(one_part)]
+ else:
+ main_xml_part.append(deepcopy(one_part))
+ last_node_part = False
yield part_xml
@@ -310,7 +335,9 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
toc = TOC()
for element in chunk_xml[0]:
- if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+ if element.tag in ("naglowek_czesc"):
+ toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
+ elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
toc.add(node_name(element), "part%d.html" % chunk_no)
elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
@@ -472,7 +499,7 @@ def transform(wldoc, verbose=False,
' '))
manifest.append(etree.fromstring(
' ' % (cover_name, bound_cover.mime_type())))
- spine.insert(0, etree.fromstring(''))
+ spine.insert(0, etree.fromstring(''))
opf.getroot()[0].append(etree.fromstring(''))
guide.append(etree.fromstring(''))
@@ -556,8 +583,7 @@ def transform(wldoc, verbose=False,
rmtree(tmpdir)
if cwd is not None:
os.chdir(cwd)
-
- zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
+ zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, xml_declaration = True, encoding='UTF-8'))
title = document.book_info.title
attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
for st in attributes:
@@ -574,7 +600,7 @@ def transform(wldoc, verbose=False,
toc.add(u"Spis treÅci", "toc.html", index=1)
zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
toc.write_to_xml(nav_map)
- zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
+ zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True, xml_declaration = True, encoding='UTF-8'))
zip.close()
return OutputFile.from_filename(output_file.name)