images in epub
[librarian.git] / librarian / formats / epub / __init__.py
index 38778ac..b9d1c7a 100644 (file)
@@ -4,11 +4,15 @@
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 import os
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 import os
+import urllib
 from copy import deepcopy
 from copy import deepcopy
+from mimetypes import guess_type
 from tempfile import NamedTemporaryFile
 import zipfile
 from tempfile import NamedTemporaryFile
 import zipfile
+from urllib2 import urlopen
+
 from lxml import etree
 from lxml import etree
-from librarian import OPFNS, NCXNS, XHTMLNS
+from librarian import OPFNS, NCXNS, XHTMLNS, DCNS
 from librarian import core
 from librarian.formats import Format
 from librarian.formats.cover.wolnelektury import WLCover
 from librarian import core
 from librarian.formats import Format
 from librarian.formats.cover.wolnelektury import WLCover
@@ -30,12 +34,29 @@ class EpubFormat(Format):
         if cover is not None:
             self.cover = cover
 
         if cover is not None:
             self.cover = cover
 
-    def build(self):
+    def build(self, ctx=None):
+
+        def add_file(url, file_id):
+            filename = url.rsplit('/', 1)[1]
+            if url.startswith('file://'):
+                url = ctx.files_path + urllib.quote(url[7:])
+            if url.startswith('/'):
+                url = 'http://milpeer.eu' + url
+            file_content = urlopen(url).read()
+            zip.writestr(os.path.join('OPS', filename), file_content)
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
+
         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
         manifest = opf.find(OPFNS('manifest'))
         guide = opf.find(OPFNS('guide'))
         spine = opf.find(OPFNS('spine'))
 
         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
         manifest = opf.find(OPFNS('manifest'))
         guide = opf.find(OPFNS('guide'))
         spine = opf.find(OPFNS('spine'))
 
+        author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
+        title = self.doc.meta.title()
+        opf.find('.//' + DCNS('creator')).text = author
+        opf.find('.//' + DCNS('title')).text = title
+
         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
@@ -44,20 +65,21 @@ class EpubFormat(Format):
         mime.compress_type = zipfile.ZIP_STORED
         mime.extra = ''
         zip.writestr(mime, 'application/epub+zip')
         mime.compress_type = zipfile.ZIP_STORED
         mime.extra = ''
         zip.writestr(mime, 'application/epub+zip')
-        zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
-                       'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
-                       '<rootfiles><rootfile full-path="OPS/content.opf" ' \
-                       'media-type="application/oebps-package+xml" />' \
-                       '</rootfiles></container>')
-
-        toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
-                               '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
-                               '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
-                               'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
-                               '</navMap></ncx>')
-        nav_map = toc_file[-1]
+        zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
+                     'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+                     '<rootfiles><rootfile full-path="OPS/content.opf" '
+                     'media-type="application/oebps-package+xml" />'
+                     '</rootfiles></container>')
+
+        toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+                                    '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+                                    '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+                                    'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+                                    '</navMap></ncx>')
+        nav_map = toc_file[-1]
 
         if self.cover is not None:
 
         if self.cover is not None:
+            cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
             cover = self.cover(self.doc)
             cover_output = cover.build()
             cover_name = 'cover.%s' % cover.format_ext
             cover = self.cover(self.doc)
             cover_output = cover.build()
             cover_name = 'cover.%s' % cover.format_ext
@@ -71,9 +93,9 @@ class EpubFormat(Format):
 
             if cover.uses_dc_cover:
                 if self.doc.meta.get_one('cover_by'):
 
             if cover.uses_dc_cover:
                 if self.doc.meta.get_one('cover_by'):
-                    document.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
+                    self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
                 if self.doc.meta.get_one('cover_source'):
                 if self.doc.meta.get_one('cover_source'):
-                    document.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
+                    self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
 
             manifest.append(etree.fromstring(
                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 
             manifest.append(etree.fromstring(
                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
@@ -83,11 +105,14 @@ class EpubFormat(Format):
             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
-
-        ctx = Context(format=self)
+        if not ctx:
+            ctx = Context(format=self)
+        else:
+            ctx.format = self
         ctx.toc = TOC()
         ctx.toc_level = 0
         ctx.footnotes = Footnotes()
         ctx.toc = TOC()
         ctx.toc_level = 0
         ctx.footnotes = Footnotes()
+        ctx.images = []
         ctx.part_no = 0
 
         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
         ctx.part_no = 0
 
         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
@@ -108,20 +133,22 @@ class EpubFormat(Format):
                     }))
             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
 
                     }))
             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
 
+        for i, url in enumerate(ctx.images):
+            add_file(url, 'image%s' % i)
+
         if len(ctx.footnotes.output):
             ctx.toc.add("Przypisy", "footnotes.html")
         if len(ctx.footnotes.output):
             ctx.toc.add("Przypisy", "footnotes.html")
-            manifest.append(etree.Element(OPFNS('item'),
-                    id='footnotes', href='footnotes.html',
-                    **{'media-type': "application/xhtml+xml"}))
+            manifest.append(etree.Element(
+                OPFNS('item'), id='footnotes', href='footnotes.html',
+                **{'media-type': "application/xhtml+xml"}))
             spine.append(etree.Element('itemref', idref='footnotes'))
             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
             
             spine.append(etree.Element('itemref', idref='footnotes'))
             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
             
-            #chars = chars.union(used_chars(html_tree.getroot()))
+            # chars = chars.union(used_chars(html_tree.getroot()))
             zip.writestr('OPS/footnotes.html', etree.tostring(
                                 wrap, method="html", pretty_print=True))
 
             zip.writestr('OPS/footnotes.html', etree.tostring(
                                 wrap, method="html", pretty_print=True))
 
-
         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
         ctx.toc.render(toc_file[-1])
         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
         ctx.toc.render(toc_file[-1])
         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
@@ -180,8 +207,8 @@ class Footnotes(object):
 
     def append(self, items):
         self.counter += 1
 
     def append(self, items):
         self.counter += 1
-        e = etree.Element("a",
-            href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
+        e = etree.Element(
+            "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
             id="footnote-%d" % self.counter,
             style="float:left;margin-right:1em")
         e.text = "[%d]" % self.counter
             id="footnote-%d" % self.counter,
             style="float:left;margin-right:1em")
         e.text = "[%d]" % self.counter
@@ -189,9 +216,8 @@ class Footnotes(object):
         self.output.append(e)
         for item in items:
             extend_element(self.output, item)
         self.output.append(e)
         for item in items:
             extend_element(self.output, item)
-        anchor = etree.Element("a",
-            id="footnote-anchor-%d" % self.counter,
-            href="footnotes.html#footnote-%d" % self.counter)
+        anchor = etree.Element(
+            "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
         anchor.text = "[%d]" % self.counter
         return anchor
 
         anchor.text = "[%d]" % self.counter
         return anchor
 
@@ -239,7 +265,7 @@ class AsideR(EpubRenderer):
     def render(self, element, ctx):
         outputs = list(super(AsideR, self).render(element, ctx))
         anchor = ctx.footnotes.append(outputs)
     def render(self, element, ctx):
         outputs = list(super(AsideR, self).render(element, ctx))
         anchor = ctx.footnotes.append(outputs)
-        wrapper, inside = self.text_container()  #etree.Element('_', part_no=str(ctx.part_no))
+        wrapper, inside = self.text_container()  # etree.Element('_', part_no=str(ctx.part_no))
         inside.append(anchor)
         yield wrapper
 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
         inside.append(anchor)
         yield wrapper
 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
@@ -255,6 +281,22 @@ class DivR(EpubRenderer):
 EpubFormat.renderers.register(core.Div, None, DivR('div'))
 
 
 EpubFormat.renderers.register(core.Div, None, DivR('div'))
 
 
+class DivImageR(EpubRenderer):
+    def render(self, element, ctx):
+        src = element.attrib.get('src', '')
+        ctx.images.append(src)
+        src = src.rsplit('/', 1)[1]
+        return super(DivImageR, self).render(element, Context(ctx, src=src))
+
+    def container(self, ctx):
+        root, inner = super(DivImageR, self).container(ctx)
+        src = getattr(ctx, 'src', '')
+        inner.set('src', src)
+        # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
+        return root, inner
+EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
+
+
 class HeaderR(EpubRenderer):
     def subcontext(self, element, ctx):
         return Context(ctx, inline=True)
 class HeaderR(EpubRenderer):
     def subcontext(self, element, ctx):
         return Context(ctx, inline=True)
@@ -276,4 +318,3 @@ EpubFormat.renderers.register(core.Section, None, SectionR())
 class SpanR(EpubRenderer):
     pass
 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
 class SpanR(EpubRenderer):
     pass
 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
-