description (sponsor info) in lesson pdf
[librarian.git] / librarian / epub.py
index 5481516..01f5c92 100644 (file)
@@ -7,6 +7,7 @@ from __future__ import with_statement
 
 import os
 import os.path
+import re
 import subprocess
 from StringIO import StringIO
 from copy import deepcopy
@@ -15,8 +16,7 @@ import zipfile
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
-from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
-from librarian.cover import WLCover
+from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, IOFile
 
 from librarian import functions, get_resource
 
@@ -33,6 +33,7 @@ def inner_xml(node):
     nt = node.text if node.text is not None else ''
     return ''.join([nt] + [etree.tostring(child) for child in node])
 
+
 def set_inner_xml(node, text):
     """ sets node's text and children from a string
 
@@ -109,31 +110,74 @@ def find_annotations(annotations, source, part_no):
             find_annotations(annotations, child, part_no)
 
 
+class Stanza(object):
+    """
+    Converts / verse endings into verse elements in a stanza.
+
+    Slashes may only occur directly in the stanza. Any slashes in subelements
+    will be ignored, and the subelements will be put inside verse elements.
+
+    >>> s = etree.fromstring("<strofa>a/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
+    >>> Stanza(s).versify()
+    >>> print etree.tostring(s)
+    <strofa><wers_normalny>a</wers_normalny><wers_normalny>b<x>x/
+    y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
+
+    """
+    def __init__(self, stanza_elem):
+        self.stanza = stanza_elem
+        self.verses = []
+        self.open_verse = None
+
+    def versify(self):
+        self.push_text(self.stanza.text)
+        for elem in self.stanza:
+            self.push_elem(elem)
+            self.push_text(elem.tail)
+        tail = self.stanza.tail
+        self.stanza.clear()
+        self.stanza.tail = tail
+        self.stanza.extend(self.verses)
+
+    def open_normal_verse(self):
+        self.open_verse = self.stanza.makeelement("wers_normalny")
+        self.verses.append(self.open_verse)
+
+    def get_open_verse(self):
+        if self.open_verse is None:
+            self.open_normal_verse()
+        return self.open_verse
+
+    def push_text(self, text):
+        if not text or not text.strip():
+            return
+        for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
+            if i:
+                self.open_normal_verse()
+            verse = self.get_open_verse()
+            if len(verse):
+                verse[-1].tail = (verse[-1].tail or "") + verse_text.strip()
+            else:
+                verse.text = (verse.text or "") + verse_text.strip()
+
+    def push_elem(self, elem):
+        if elem.tag.startswith("wers"):
+            verse = deepcopy(elem)
+            verse.tail = None
+            self.verses.append(verse)
+            self.open_verse = verse
+        else:
+            appended = deepcopy(elem)
+            appended.tail = None
+            self.get_open_verse().append(appended)
+
+
 def replace_by_verse(tree):
     """ Find stanzas and create new verses in place of a '/' character """
 
     stanzas = tree.findall('.//' + WLNS('strofa'))
-    for node in stanzas:
-        for child_node in node:
-            if child_node.tag in ('slowo_obce', 'wyroznienie'):
-                foreign_verses = inner_xml(child_node).split('/\n')
-                if len(foreign_verses) > 1:
-                    new_foreign = ''
-                    for foreign_verse in foreign_verses:
-                        if foreign_verse.startswith('<wers'):
-                            new_foreign += foreign_verse
-                        else:
-                            new_foreign += ''.join(('<wers_normalny>', foreign_verse, '</wers_normalny>'))
-                    set_inner_xml(child_node, new_foreign)
-        verses = inner_xml(node).split('/\n')
-        if len(verses) > 1:
-            modified_inner_xml = ''
-            for verse in verses:
-                if verse.startswith('<wers') or verse.startswith('<extra'):
-                    modified_inner_xml += verse
-                else:
-                    modified_inner_xml += ''.join(('<wers_normalny>', verse, '</wers_normalny>'))
-            set_inner_xml(node, modified_inner_xml)
+    for stanza in stanzas:
+        Stanza(stanza).versify()
 
 
 def add_to_manifest(manifest, partno):
@@ -151,7 +195,7 @@ def add_to_manifest(manifest, partno):
 def add_to_spine(spine, partno):
     """ Adds a node to the spine section in content.opf file """
 
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
+    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
     spine.append(e)
 
 
@@ -243,7 +287,7 @@ def chop(main_text):
     # prepare a container for each chunk
     part_xml = etree.Element('utwor')
     etree.SubElement(part_xml, 'master')
-    main_xml_part = part_xml[0] # master
+    main_xml_part = part_xml[0]  # master
 
     last_node_part = False
     for one_part in main_text:
@@ -261,8 +305,10 @@ def chop(main_text):
     yield part_xml
 
 
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=None):
     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+    if _empty_html_static is None:
+        _empty_html_static = []
 
     toc = TOC()
     for element in chunk_xml[0]:
@@ -291,7 +337,7 @@ def transform(wldoc, verbose=False,
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
-    cover: a cover.Cover object or True for default
+    cover: a cover.Cover factory or True for default
     flags: less-advertising, without-fonts, working-copy
     """
 
@@ -308,8 +354,7 @@ def transform(wldoc, verbose=False,
             # write book title page
             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
             chars = used_chars(html_tree.getroot())
-            zip.writestr('OPS/title.html',
-                 etree.tostring(html_tree, method="html", pretty_print=True))
+            zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True))
             # add a title page TOC entry
             toc.add(u"Strona tytułowa", "title.html")
         elif wldoc.book_info.parts:
@@ -352,15 +397,14 @@ def transform(wldoc, verbose=False,
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
 
-        for child in wldoc.parts():
-            child_toc, chunk_counter, chunk_chars, sample = transform_file(
-                child, chunk_counter, first=False, sample=sample)
-            toc.append(child_toc)
-            chars = chars.union(chunk_chars)
+        for child in wldoc.parts():
+            child_toc, chunk_counter, chunk_chars, sample = transform_file(
+                child, chunk_counter, first=False, sample=sample)
+            toc.append(child_toc)
+            chars = chars.union(chunk_chars)
 
         return toc, chunk_counter, chars, sample
 
-
     document = deepcopy(wldoc)
     del wldoc
 
@@ -368,6 +412,10 @@ def transform(wldoc, verbose=False,
         for flag in flags:
             document.edoc.getroot().set(flag, 'yes')
 
+    # add editors info
+    # document.edoc.getroot().set('editors', u', '.join(sorted(
+    #     editor.readable() for editor in document.editors())))
+
     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
     manifest = opf.find('.//' + OPFNS('manifest'))
     guide = opf.find('.//' + OPFNS('guide'))
@@ -382,11 +430,12 @@ def transform(wldoc, verbose=False,
     mime.compress_type = zipfile.ZIP_STORED
     mime.extra = ''
     zip.writestr(mime, 'application/epub+zip')
-    zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
-                       'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
-                       '<rootfiles><rootfile full-path="OPS/content.opf" ' \
-                       'media-type="application/oebps-package+xml" />' \
-                       '</rootfiles></container>')
+    zip.writestr(
+        'META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
+        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+        '<rootfiles><rootfile full-path="OPS/content.opf" '
+        'media-type="application/oebps-package+xml" />'
+        '</rootfiles></container>')
     zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
     zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
     if not style:
@@ -394,42 +443,40 @@ def transform(wldoc, verbose=False,
     zip.write(style, os.path.join('OPS', 'style.css'))
 
     if cover:
-        if cover is True:
-            cover = WLCover
-        if cover.uses_dc_cover:
-            if document.book_info.cover_by:
-                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
-            if document.book_info.cover_source:
-                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
-
         cover_file = StringIO()
-        c = cover(document.book_info)
-        c.save(cover_file)
-        c_name = 'cover.%s' % c.ext()
-        zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue())
+        bound_cover = cover(document.book_info)
+        bound_cover.save(cover_file)
+        cover_name = 'cover.%s' % bound_cover.ext()
+        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
         del cover_file
 
         cover_tree = etree.parse(get_resource('epub/cover.html'))
-        cover_tree.find('//' + XHTMLNS('img')).set('src', c_name)
+        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
         zip.writestr('OPS/cover.html', etree.tostring(
                         cover_tree, method="html", pretty_print=True))
 
+        if bound_cover.uses_dc_cover:
+            if document.book_info.cover_by:
+                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+            if document.book_info.cover_source:
+                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
+
         manifest.append(etree.fromstring(
             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
         manifest.append(etree.fromstring(
-            '<item id="cover-image" href="%s" media-type="%s" />' % (c_name, c.mime_type())))
+            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
         spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
-
     annotations = etree.Element('annotations')
 
-    toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
-                               '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
-                               '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
-                               'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
-                               '</navMap></ncx>')
+    toc_file = etree.fromstring(
+        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+        '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+        '</navMap></ncx>')
     nav_map = toc_file[-1]
 
     if html_toc:
@@ -467,10 +514,13 @@ def transform(wldoc, verbose=False,
     zip.writestr('OPS/last.html', etree.tostring(
                         html_tree, method="html", pretty_print=True))
 
-    if not flags or not 'without-fonts' in flags:
+    if not flags or 'without-fonts' not in flags:
         # strip fonts
         tmpdir = mkdtemp('-librarian-epub')
-        cwd = os.getcwd()
+        try:
+            cwd = os.getcwd()
+        except OSError:
+            cwd = None
 
         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
@@ -485,7 +535,8 @@ def transform(wldoc, verbose=False,
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="font/ttf" />' % (fname, fname)))
         rmtree(tmpdir)
-        os.chdir(cwd)
+        if cwd is not None:
+            os.chdir(cwd)
 
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
     title = document.book_info.title
@@ -507,4 +558,4 @@ def transform(wldoc, verbose=False,
     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
     zip.close()
 
-    return OutputFile.from_filename(output_file.name)
+    return IOFile.from_filename(output_file.name)