Fix for Epubcheck Version 3.0.1 validation warning
[librarian.git] / librarian / epub.py
index f34bb86..c8a8668 100644 (file)
@@ -7,6 +7,7 @@ from __future__ import with_statement
 
 import os
 import os.path
 
 import os
 import os.path
+import re
 import subprocess
 from StringIO import StringIO
 from copy import deepcopy
 import subprocess
 from StringIO import StringIO
 from copy import deepcopy
@@ -16,7 +17,7 @@ from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
-from librarian.cover import WLCover
+from librarian.cover import DefaultEbookCover
 
 from librarian import functions, get_resource
 
 
 from librarian import functions, get_resource
 
@@ -109,31 +110,74 @@ def find_annotations(annotations, source, part_no):
             find_annotations(annotations, child, part_no)
 
 
             find_annotations(annotations, child, part_no)
 
 
+class Stanza(object):
+    """
+    Converts / verse endings into verse elements in a stanza.
+
+    Slashes may only occur directly in the stanza. Any slashes in subelements
+    will be ignored, and the subelements will be put inside verse elements.
+
+    >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
+    >>> Stanza(s).versify()
+    >>> print etree.tostring(s)
+    <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
+    y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
+    
+    """
+    def __init__(self, stanza_elem):
+        self.stanza = stanza_elem
+        self.verses = []
+        self.open_verse = None
+
+    def versify(self):
+        self.push_text(self.stanza.text)
+        for elem in self.stanza:
+            self.push_elem(elem)
+            self.push_text(elem.tail)
+        tail = self.stanza.tail
+        self.stanza.clear()
+        self.stanza.tail = tail
+        self.stanza.extend(self.verses)
+
+    def open_normal_verse(self):
+        self.open_verse = self.stanza.makeelement("wers_normalny")
+        self.verses.append(self.open_verse)
+
+    def get_open_verse(self):
+        if self.open_verse is None:
+            self.open_normal_verse()
+        return self.open_verse
+
+    def push_text(self, text):
+        if not text:
+            return
+        for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
+            if i:
+                self.open_normal_verse()
+            verse = self.get_open_verse()
+            if len(verse):
+                verse[-1].tail = (verse[-1].tail or "") + verse_text
+            else:
+                verse.text = (verse.text or "") + verse_text
+
+    def push_elem(self, elem):
+        if elem.tag.startswith("wers"):
+            verse = deepcopy(elem)
+            verse.tail = None
+            self.verses.append(verse)
+            self.open_verse = verse
+        else:
+            appended = deepcopy(elem)
+            appended.tail = None
+            self.get_open_verse().append(appended)
+
+
 def replace_by_verse(tree):
     """ Find stanzas and create new verses in place of a '/' character """
 
     stanzas = tree.findall('.//' + WLNS('strofa'))
 def replace_by_verse(tree):
     """ Find stanzas and create new verses in place of a '/' character """
 
     stanzas = tree.findall('.//' + WLNS('strofa'))
-    for node in stanzas:
-        for child_node in node:
-            if child_node.tag in ('slowo_obce', 'wyroznienie'):
-                foreign_verses = inner_xml(child_node).split('/\n')
-                if len(foreign_verses) > 1:
-                    new_foreign = ''
-                    for foreign_verse in foreign_verses:
-                        if foreign_verse.startswith('<wers'):
-                            new_foreign += foreign_verse
-                        else:
-                            new_foreign += ''.join(('<wers_normalny>', foreign_verse, '</wers_normalny>'))
-                    set_inner_xml(child_node, new_foreign)
-        verses = inner_xml(node).split('/\n')
-        if len(verses) > 1:
-            modified_inner_xml = ''
-            for verse in verses:
-                if verse.startswith('<wers') or verse.startswith('<extra'):
-                    modified_inner_xml += verse
-                else:
-                    modified_inner_xml += ''.join(('<wers_normalny>', verse, '</wers_normalny>'))
-            set_inner_xml(node, modified_inner_xml)
+    for stanza in stanzas:
+        Stanza(stanza).versify()
 
 
 def add_to_manifest(manifest, partno):
 
 
 def add_to_manifest(manifest, partno):
@@ -292,7 +336,7 @@ def transform(wldoc, verbose=False,
 
     sample=n: generate sample e-book (with at least n paragraphs)
     cover: a cover.Cover factory or True for default
 
     sample=n: generate sample e-book (with at least n paragraphs)
     cover: a cover.Cover factory or True for default
-    flags: less-advertising, without-fonts, working-copy
+    flags: less-advertising, without-fonts, working-copy, with-full-fonts
     """
 
     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
     """
 
     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
@@ -368,6 +412,15 @@ def transform(wldoc, verbose=False,
         for flag in flags:
             document.edoc.getroot().set(flag, 'yes')
 
         for flag in flags:
             document.edoc.getroot().set(flag, 'yes')
 
+    # add editors info
+    document.edoc.getroot().set('editors', u', '.join(sorted(
+        editor.readable() for editor in document.editors())))
+    if document.book_info.funders:
+        document.edoc.getroot().set('funders', u', '.join(
+            document.book_info.funders))
+    if document.book_info.thanks:
+        document.edoc.getroot().set('thanks', document.book_info.thanks)
+
     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
     manifest = opf.find('.//' + OPFNS('manifest'))
     guide = opf.find('.//' + OPFNS('guide'))
     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
     manifest = opf.find('.//' + OPFNS('manifest'))
     guide = opf.find('.//' + OPFNS('guide'))
@@ -395,7 +448,7 @@ def transform(wldoc, verbose=False,
 
     if cover:
         if cover is True:
 
     if cover:
         if cover is True:
-            cover = WLCover
+            cover = DefaultEbookCover
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
@@ -458,6 +511,15 @@ def transform(wldoc, verbose=False,
         zip.writestr('OPS/annotations.html', etree.tostring(
                             html_tree, method="html", pretty_print=True))
 
         zip.writestr('OPS/annotations.html', etree.tostring(
                             html_tree, method="html", pretty_print=True))
 
+    toc.add("Wesprzyj Wolne Lektury", "support.html")
+    manifest.append(etree.fromstring(
+        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
+    spine.append(etree.fromstring(
+        '<itemref idref="support" />'))
+    html_string = open(get_resource('epub/support.html')).read()
+    chars.update(used_chars(etree.fromstring(html_string)))
+    zip.writestr('OPS/support.html', html_string)
+
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
@@ -471,22 +533,29 @@ def transform(wldoc, verbose=False,
     if not flags or not 'without-fonts' in flags:
         # strip fonts
         tmpdir = mkdtemp('-librarian-epub')
     if not flags or not 'without-fonts' in flags:
         # strip fonts
         tmpdir = mkdtemp('-librarian-epub')
-        cwd = os.getcwd()
+        try:
+            cwd = os.getcwd()
+        except OSError:
+            cwd = None
 
         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 
         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
-            optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
-                              get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]
-            if verbose:
-                print "Running font-optimizer"
-                subprocess.check_call(optimizer_call)
+            if not flags or not 'with-full-fonts' in flags:
+                optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
+                              get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]              
+                if verbose:
+                    print "Running font-optimizer"
+                    subprocess.check_call(optimizer_call)
+                else:
+                    subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                    zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             else:
             else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
+                zip.write(get_resource('fonts/' + fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
             manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="font/ttf" />' % (fname, fname)))
+                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
         rmtree(tmpdir)
         rmtree(tmpdir)
-        os.chdir(cwd)
+        if cwd is not None:
+            os.chdir(cwd)
 
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
     title = document.book_info.title
 
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
     title = document.book_info.title
@@ -496,7 +565,7 @@ def transform(wldoc, verbose=False,
         meta.set('name', st)
         meta.set('content', '0')
         toc_file[0].append(meta)
         meta.set('name', st)
         meta.set('content', '0')
         toc_file[0].append(meta)
-    toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl')))
+    toc_file[0][0].set('content', str(document.book_info.url))
     toc_file[0][1].set('content', str(toc.depth()))
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 
     toc_file[0][1].set('content', str(toc.depth()))
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))