fix Ó in <naglowekosoba> (hack)

[librarian.git] / librarian / epub.py
diff --git a/librarian/epub.py b/librarian/epub.py

index 1ea2688..397dc51 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -11,13 +11,15 @@ import re
  import subprocess
  from StringIO import StringIO
  from copy import deepcopy
  import subprocess
  from StringIO import StringIO
  from copy import deepcopy
+from mimetypes import guess_type
+
  from lxml import etree
  import zipfile
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  from lxml import etree
  import zipfile
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
  
  from librarian import functions, get_resource
  
  
  from librarian import functions, get_resource
  
@@ -51,19 +53,21 @@ def set_hyph_language(source_tree):
  
  
  def hyphenate_and_fix_conjunctions(source_tree, hyph):
  
  
  def hyphenate_and_fix_conjunctions(source_tree, hyph):
-    if hyph is not None:
-        texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
-        for t in texts:
-            parent = t.getparent()
+    texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
+    for t in texts:
+        parent = t.getparent()
+        if hyph is not None:
              newt = ''
              wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
              for w in wlist:
                  newt += hyph.inserted(w, u'\u00AD')
              newt = ''
              wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
              for w in wlist:
                  newt += hyph.inserted(w, u'\u00AD')
-            newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
-            if t.is_text:
-                parent.text = newt
-            elif t.is_tail:
-                parent.tail = newt
+        else:
+            newt = t
+        newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
+        if t.is_text:
+            parent.text = newt
+        elif t.is_tail:
+            parent.tail = newt
  
  
  def inner_xml(node):
  
  
  def inner_xml(node):
@@ -109,11 +113,13 @@ def node_name(node):
      return tempnode.text
  
  
      return tempnode.text
  
  
-def xslt(xml, sheet):
+def xslt(xml, sheet, **kwargs):
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
-        return xml.xslt(etree.parse(xsltf))
+        transform = etree.XSLT(etree.parse(xsltf))
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        return transform(xml, **params)
  
  
  def replace_characters(node):
  
  
  def replace_characters(node):
@@ -195,6 +201,8 @@ class Stanza(object):
          if not text:
              return
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
          if not text:
              return
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
+            if not verse_text.strip():
+                continue
              if i:
                  self.open_normal_verse()
              verse = self.get_open_verse()
              if i:
                  self.open_normal_verse()
              verse = self.get_open_verse()
@@ -404,9 +412,8 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
      return output_html, toc, chars
  
  
      return output_html, toc, chars
  
  
-def transform(wldoc, verbose=False,
-              style=None, html_toc=False,
-              sample=None, cover=None, flags=None):
+def transform(wldoc, verbose=False, style=None, html_toc=False,
+              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
@@ -419,7 +426,7 @@ def transform(wldoc, verbose=False,
  
          replace_characters(wldoc.edoc.getroot())
  
  
          replace_characters(wldoc.edoc.getroot())
  
-        hyphenator = set_hyph_language(wldoc.edoc.getroot())
+        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
  
          # every input file will have a TOC entry,
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
  
          # every input file will have a TOC entry,
@@ -428,7 +435,7 @@ def transform(wldoc, verbose=False,
          chars = set()
          if first:
              # write book title page
          chars = set()
          if first:
              # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
              chars = used_chars(html_tree.getroot())
              zip.writestr(
                  'OPS/title.html',
              chars = used_chars(html_tree.getroot())
              zip.writestr(
                  'OPS/title.html',
@@ -501,6 +508,9 @@ def transform(wldoc, verbose=False,
          for flag in flags:
              document.edoc.getroot().set(flag, 'yes')
  
          for flag in flags:
              document.edoc.getroot().set(flag, 'yes')
  
+    document.clean_ed_note()
+    document.clean_ed_note('abstrakt')
+
      # add editors info
      editors = document.editors()
      if editors:
      # add editors info
      editors = document.editors()
      if editors:
@@ -520,6 +530,16 @@ def transform(wldoc, verbose=False,
      output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
      output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
+    functions.reg_mathml_epub(zip)
+
+    if os.path.isdir(ilustr_path):
+        for i, filename in enumerate(os.listdir(ilustr_path)):
+            file_path = os.path.join(ilustr_path, filename)
+            zip.write(file_path, os.path.join('OPS', filename))
+            image_id = 'image%s' % i
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+
      # write static elements
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      # write static elements
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
@@ -545,7 +565,7 @@ def transform(wldoc, verbose=False,
  
      if cover:
          if cover is True:
  
      if cover:
          if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
@@ -632,7 +652,7 @@ def transform(wldoc, verbose=False,
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
      chars.update(used_chars(html_tree.getroot()))
      zip.writestr('OPS/last.html', etree.tostring(
          html_tree, pretty_print=True, xml_declaration=True,
      chars.update(used_chars(html_tree.getroot()))
      zip.writestr('OPS/last.html', etree.tostring(
          html_tree, pretty_print=True, xml_declaration=True,
@@ -641,7 +661,7 @@ def transform(wldoc, verbose=False,
                  '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
      ))
  
                  '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
      ))
  
-    if not flags or not 'without-fonts' in flags:
+    if not flags or 'without-fonts' not in flags:
          # strip fonts
          tmpdir = mkdtemp('-librarian-epub')
          try:
          # strip fonts
          tmpdir = mkdtemp('-librarian-epub')
          try:
@@ -659,7 +679,8 @@ def transform(wldoc, verbose=False,
                  print "Running font-optimizer"
                  subprocess.check_call(optimizer_call)
              else:
                  print "Running font-optimizer"
                  subprocess.check_call(optimizer_call)
              else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                dev_null = open(os.devnull, 'w')
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))