String comparison bug.
[librarian.git] / librarian / epub.py
index 9e30134..294843b 100644 (file)
@@ -22,6 +22,7 @@ from librarian.cover import DefaultEbookCover
 from librarian import functions, get_resource
 
 functions.reg_person_name()
+functions.reg_lang_code_3to2()
 
 
 def inner_xml(node):
@@ -77,6 +78,7 @@ def replace_characters(node):
     def replace_chars(text):
         if text is None:
             return None
+        #text = re.sub(r"(?<=\s\w)\s+", u"\u00a0", text) #fix for hanging single letter conjunctions – for future use.
         return text.replace(u"\ufeff", u"")\
                    .replace("---", u"\u2014")\
                    .replace("--", u"\u2013")\
@@ -290,18 +292,41 @@ def chop(main_text):
     main_xml_part = part_xml[0] # master
 
     last_node_part = False
+    
+    # the below loop are workaround for a problem with epubs in drama ebooks without acts
+    is_scene = False
+    is_act = False
+    for one_part in main_text:
+        name = one_part.tag
+        if name == 'naglowek_scena':
+            is_scene = True
+        elif name == 'naglowek_akt':
+            is_act = True
+    
     for one_part in main_text:
         name = one_part.tag
-        if name == 'naglowek_czesc':
-            yield part_xml
-            last_node_part = True
-            main_xml_part[:] = [deepcopy(one_part)]
-        elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
-            yield part_xml
-            main_xml_part[:] = [deepcopy(one_part)]
+        if is_act is False and is_scene is True:
+            if name == 'naglowek_czesc':
+                yield part_xml
+                last_node_part = True
+                main_xml_part[:] = [deepcopy(one_part)]
+            elif not last_node_part and name == "naglowek_scena":
+                yield part_xml
+                main_xml_part[:] = [deepcopy(one_part)]
+            else:
+                main_xml_part.append(deepcopy(one_part))
+                last_node_part = False
         else:
-            main_xml_part.append(deepcopy(one_part))
-            last_node_part = False
+            if name == 'naglowek_czesc':
+                yield part_xml
+                last_node_part = True
+                main_xml_part[:] = [deepcopy(one_part)]
+            elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+                yield part_xml
+                main_xml_part[:] = [deepcopy(one_part)]
+            else:
+                main_xml_part.append(deepcopy(one_part))
+                last_node_part = False            
     yield part_xml
 
 
@@ -310,7 +335,9 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
 
     toc = TOC()
     for element in chunk_xml[0]:
-        if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+        if element.tag == "naglowek_czesc":
+            toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
+        elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
             toc.add(node_name(element), "part%d.html" % chunk_no)
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
@@ -556,8 +583,7 @@ def transform(wldoc, verbose=False,
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
-
-    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
+    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, xml_declaration = True, encoding='UTF-8'))
     title = document.book_info.title
     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
     for st in attributes:
@@ -565,7 +591,7 @@ def transform(wldoc, verbose=False,
         meta.set('name', st)
         meta.set('content', '0')
         toc_file[0].append(meta)
-    toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl')))
+    toc_file[0][0].set('content', str(document.book_info.url))
     toc_file[0][1].set('content', str(toc.depth()))
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 
@@ -574,7 +600,7 @@ def transform(wldoc, verbose=False,
         toc.add(u"Spis treści", "toc.html", index=1)
         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
     toc.write_to_xml(nav_map)
-    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
+    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True, xml_declaration = True, encoding='UTF-8'))
     zip.close()
 
     return OutputFile.from_filename(output_file.name)