report images without extensions, allow span.item

[librarian.git] / librarian / formats / epub / __init__.py
diff --git a/librarian/formats/epub/__init__.py b/librarian/formats/epub/__init__.py

index bf21a6f..4b6cf13 100644 (file)
--- a/librarian/formats/epub/__init__.py
+++ b/librarian/formats/epub/__init__.py
@@ -4,6 +4,7 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  import os
+import re
  import urllib
  from copy import deepcopy
  from mimetypes import guess_type
  import urllib
  from copy import deepcopy
  from mimetypes import guess_type
@@ -12,7 +13,7 @@ import zipfile
  from urllib2 import urlopen
  
  from lxml import etree
  from urllib2 import urlopen
  
  from lxml import etree
-from librarian import OPFNS, NCXNS, XHTMLNS, DCNS
+from librarian import OPFNS, NCXNS, XHTMLNS, DCNS, BuildError
  from librarian import core
  from librarian.formats import Format
  from librarian.formats.cover.evens import EvensCover
  from librarian import core
  from librarian.formats import Format
  from librarian.formats.cover.evens import EvensCover
@@ -34,6 +35,9 @@ class EpubFormat(Format):
          if cover is not None:
              self.cover = cover
  
          if cover is not None:
              self.cover = cover
  
+    def dc(self, tag):
+        return self.doc.meta.get_one(DCNS(tag))
+
      def build(self, ctx=None):
  
          def add_file(url, file_id):
      def build(self, ctx=None):
  
          def add_file(url, file_id):
@@ -52,7 +56,7 @@ class EpubFormat(Format):
          guide = opf.find(OPFNS('guide'))
          spine = opf.find(OPFNS('spine'))
  
          guide = opf.find(OPFNS('guide'))
          spine = opf.find(OPFNS('spine'))
  
-        author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
+        author = ", ". join(self.doc.meta.get(DCNS('creator')) or [])
          title = self.doc.meta.title()
          opf.find('.//' + DCNS('creator')).text = author
          opf.find('.//' + DCNS('title')).text = title
          title = self.doc.meta.title()
          opf.find('.//' + DCNS('creator')).text = author
          opf.find('.//' + DCNS('title')).text = title
@@ -150,6 +154,31 @@ class EpubFormat(Format):
              zip.writestr('OPS/footnotes.html', etree.tostring(
                                  wrap, method="html", pretty_print=True))
  
              zip.writestr('OPS/footnotes.html', etree.tostring(
                                  wrap, method="html", pretty_print=True))
  
+        footer_text = [
+            'Information about the resource',
+            'Publisher: %s' % self.dc('publisher'),
+            'Rights: %s' % self.dc('rights'),
+            'Intended audience: %s' % self.dc('audience'),
+            self.dc('description'),
+            'Resource prepared using MIL/PEER editing platform.',
+            'Source available at %s' % ctx.source_url,
+        ]
+        footer_wrap = deepcopy(wrap_tmpl)
+        footer_body = footer_wrap.find('//*[@id="book-text"]')
+        for line in footer_text:
+            footer_line = etree.Element('p')
+            footer_line.text = line
+            footer_body.append(footer_line)
+        manifest.append(manifest.makeelement(OPFNS('item'), attrib={
+            'id': 'footer',
+            'href': "footer.html",
+            'media-type': 'application/xhtml+xml',
+        }))
+        spine.append(spine.makeelement(OPFNS('itemref'), attrib={
+            'idref': 'footer',
+        }))
+        zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html'))
+
          zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
          ctx.toc.render(toc_file[-1])
          zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
          zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
          ctx.toc.render(toc_file[-1])
          zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
@@ -201,6 +230,24 @@ class EpubRenderer(TreeRenderer):
          yield wrapper
  
  
          yield wrapper
  
  
+class NaturalText(EpubRenderer):
+    def render_text(self, text, ctx):
+        root, inner = self.text_container()
+        chunks = re.split('(?<=\s\w) ', text)
+        inner.text = chunks[0]
+        for chunk in chunks[1:]:
+            x = etree.Entity("nbsp")
+            x.tail = chunk
+            inner.append(x)
+        return root
+
+
+class Silent(EpubRenderer):
+    def render_text(self, text, ctx):
+        root, inner = self.text_container()
+        return root
+
+
  class Footnotes(object):
      def __init__(self):
          self.counter = 0
  class Footnotes(object):
      def __init__(self):
          self.counter = 0
@@ -262,7 +309,7 @@ class TOC(object):
  
  # Renderers
  
  
  # Renderers
  
-class AsideR(EpubRenderer):
+class AsideR(NaturalText):
      def render(self, element, ctx):
          outputs = list(super(AsideR, self).render(element, ctx))
          anchor = ctx.footnotes.append(outputs)
      def render(self, element, ctx):
          outputs = list(super(AsideR, self).render(element, ctx))
          anchor = ctx.footnotes.append(outputs)
@@ -271,8 +318,10 @@ class AsideR(EpubRenderer):
          yield wrapper
  EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
  
          yield wrapper
  EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
  
+EpubFormat.renderers.register(core.Aside, 'comment', Silent())
+
  
  
-class DivR(EpubRenderer):
+class DivR(NaturalText):
      def container(self, ctx):
          root, inner = super(DivR, self).container(ctx)
          if getattr(ctx, 'inline', False):
      def container(self, ctx):
          root, inner = super(DivR, self).container(ctx)
          if getattr(ctx, 'inline', False):
@@ -280,12 +329,20 @@ class DivR(EpubRenderer):
              inner.set('style', 'display: block;')
          return root, inner
  EpubFormat.renderers.register(core.Div, None, DivR('div'))
              inner.set('style', 'display: block;')
          return root, inner
  EpubFormat.renderers.register(core.Div, None, DivR('div'))
+EpubFormat.renderers.register(core.Div, 'p', NaturalText('p'))
+
+EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
+EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
+EpubFormat.renderers.register(core.Div, 'item', NaturalText('li'))
+EpubFormat.renderers.register(core.Span, 'item', NaturalText('li'))
  
  
  class DivImageR(EpubRenderer):
      def render(self, element, ctx):
          src = element.attrib.get('src', '')
          ctx.images.append(src)
  
  
  class DivImageR(EpubRenderer):
      def render(self, element, ctx):
          src = element.attrib.get('src', '')
          ctx.images.append(src)
+        if '/' not in src:
+            raise BuildError('Bad image URL')
          src = src.rsplit('/', 1)[1]
          return super(DivImageR, self).render(element, Context(ctx, src=src))
  
          src = src.rsplit('/', 1)[1]
          return super(DivImageR, self).render(element, Context(ctx, src=src))
  
@@ -298,13 +355,28 @@ class DivImageR(EpubRenderer):
  EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
  
  
  EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
  
  
-class HeaderR(EpubRenderer):
+class DivVideoR(Silent):
+    def render(self, element, ctx):
+        src = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid', '')
+        return super(DivVideoR, self).render(element, Context(ctx, src=src))
+
+    def container(self, ctx):
+        root, inner = super(DivVideoR, self).container(ctx)
+        src = getattr(ctx, 'src', '')
+        link = etree.Element('a', {'href': src})
+        link.text = src
+        inner.append(link)
+        return root, inner
+EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p'))
+
+
+class HeaderR(NaturalText):
      def subcontext(self, element, ctx):
          return Context(ctx, inline=True)
  EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
  
  
      def subcontext(self, element, ctx):
          return Context(ctx, inline=True)
  EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
  
  
-class SectionR(EpubRenderer):
+class SectionR(NaturalText):
      epub_separate = True
  
      def render(self, element, ctx):
      epub_separate = True
  
      def render(self, element, ctx):
@@ -316,6 +388,21 @@ class SectionR(EpubRenderer):
  EpubFormat.renderers.register(core.Section, None, SectionR())
  
  
  EpubFormat.renderers.register(core.Section, None, SectionR())
  
  
-class SpanR(EpubRenderer):
+class SpanR(NaturalText):
      pass
  EpubFormat.renderers.register(core.Span, None, SpanR('span'))
      pass
  EpubFormat.renderers.register(core.Span, None, SpanR('span'))
+EpubFormat.renderers.register(core.Span, 'cite', SpanR('i'))
+EpubFormat.renderers.register(core.Span, 'emp', SpanR('b'))
+EpubFormat.renderers.register(core.Span, 'emph', SpanR('i'))
+
+
+class SpanLink(EpubRenderer):
+    def render(self, element, ctx):
+        parts = super(SpanLink, self).render(element, ctx)
+        for part in parts:
+            src = element.attrib.get('href', '')
+            if src.startswith('file://'):
+                src = ctx.files_path + src[7:]
+            part[0].attrib['href'] = src
+            yield part
+EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))