fix for subscripts
[librarian.git] / librarian / epub.py
index bf58a9f..397dc51 100644 (file)
@@ -11,13 +11,15 @@ import re
 import subprocess
 from StringIO import StringIO
 from copy import deepcopy
 import subprocess
 from StringIO import StringIO
 from copy import deepcopy
+from mimetypes import guess_type
+
 from lxml import etree
 import zipfile
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
 from lxml import etree
 import zipfile
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
 
 from librarian import functions, get_resource
 
 
 from librarian import functions, get_resource
 
@@ -51,19 +53,21 @@ def set_hyph_language(source_tree):
 
 
 def hyphenate_and_fix_conjunctions(source_tree, hyph):
 
 
 def hyphenate_and_fix_conjunctions(source_tree, hyph):
-    if hyph is not None:
-        texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
-        for t in texts:
-            parent = t.getparent()
+    texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
+    for t in texts:
+        parent = t.getparent()
+        if hyph is not None:
             newt = ''
             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
             for w in wlist:
                 newt += hyph.inserted(w, u'\u00AD')
             newt = ''
             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
             for w in wlist:
                 newt += hyph.inserted(w, u'\u00AD')
-            newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
-            if t.is_text:
-                parent.text = newt
-            elif t.is_tail:
-                parent.tail = newt
+        else:
+            newt = t
+        newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
+        if t.is_text:
+            parent.text = newt
+        elif t.is_tail:
+            parent.tail = newt
 
 
 def inner_xml(node):
 
 
 def inner_xml(node):
@@ -109,11 +113,13 @@ def node_name(node):
     return tempnode.text
 
 
     return tempnode.text
 
 
-def xslt(xml, sheet):
+def xslt(xml, sheet, **kwargs):
     if isinstance(xml, etree._Element):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
     if isinstance(xml, etree._Element):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
-        return xml.xslt(etree.parse(xsltf))
+        transform = etree.XSLT(etree.parse(xsltf))
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        return transform(xml, **params)
 
 
 def replace_characters(node):
 
 
 def replace_characters(node):
@@ -195,6 +201,8 @@ class Stanza(object):
         if not text:
             return
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
         if not text:
             return
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
+            if not verse_text.strip():
+                continue
             if i:
                 self.open_normal_verse()
             verse = self.get_open_verse()
             if i:
                 self.open_normal_verse()
             verse = self.get_open_verse()
@@ -404,9 +412,8 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
     return output_html, toc, chars
 
 
     return output_html, toc, chars
 
 
-def transform(wldoc, verbose=False,
-              style=None, html_toc=False,
-              sample=None, cover=None, flags=None):
+def transform(wldoc, verbose=False, style=None, html_toc=False,
+              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
@@ -419,7 +426,7 @@ def transform(wldoc, verbose=False,
 
         replace_characters(wldoc.edoc.getroot())
 
 
         replace_characters(wldoc.edoc.getroot())
 
-        hyphenator = set_hyph_language(wldoc.edoc.getroot())
+        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
@@ -428,7 +435,7 @@ def transform(wldoc, verbose=False,
         chars = set()
         if first:
             # write book title page
         chars = set()
         if first:
             # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
             chars = used_chars(html_tree.getroot())
             zip.writestr(
                 'OPS/title.html',
             chars = used_chars(html_tree.getroot())
             zip.writestr(
                 'OPS/title.html',
@@ -501,6 +508,9 @@ def transform(wldoc, verbose=False,
         for flag in flags:
             document.edoc.getroot().set(flag, 'yes')
 
         for flag in flags:
             document.edoc.getroot().set(flag, 'yes')
 
+    document.clean_ed_note()
+    document.clean_ed_note('abstrakt')
+
     # add editors info
     editors = document.editors()
     if editors:
     # add editors info
     editors = document.editors()
     if editors:
@@ -522,6 +532,14 @@ def transform(wldoc, verbose=False,
 
     functions.reg_mathml_epub(zip)
 
 
     functions.reg_mathml_epub(zip)
 
+    if os.path.isdir(ilustr_path):
+        for i, filename in enumerate(os.listdir(ilustr_path)):
+            file_path = os.path.join(ilustr_path, filename)
+            zip.write(file_path, os.path.join('OPS', filename))
+            image_id = 'image%s' % i
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+
     # write static elements
     mime = zipfile.ZipInfo()
     mime.filename = 'mimetype'
     # write static elements
     mime = zipfile.ZipInfo()
     mime.filename = 'mimetype'
@@ -547,7 +565,7 @@ def transform(wldoc, verbose=False,
 
     if cover:
         if cover is True:
 
     if cover:
         if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
@@ -634,7 +652,7 @@ def transform(wldoc, verbose=False,
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
     zip.writestr('OPS/last.html', etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
     chars.update(used_chars(html_tree.getroot()))
     zip.writestr('OPS/last.html', etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
@@ -643,7 +661,7 @@ def transform(wldoc, verbose=False,
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
     ))
 
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
     ))
 
-    if not flags or not 'without-fonts' in flags:
+    if not flags or 'without-fonts' not in flags:
         # strip fonts
         tmpdir = mkdtemp('-librarian-epub')
         try:
         # strip fonts
         tmpdir = mkdtemp('-librarian-epub')
         try:
@@ -661,7 +679,8 @@ def transform(wldoc, verbose=False,
                 print "Running font-optimizer"
                 subprocess.check_call(optimizer_call)
             else:
                 print "Running font-optimizer"
                 subprocess.check_call(optimizer_call)
             else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                dev_null = open(os.devnull, 'w')
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))