Preliminary math and tables support.
authorRadek Czajka <radekczajka@nowoczesnapolska.org.pl>
Sat, 27 Dec 2014 23:35:25 +0000 (00:35 +0100)
committerRadek Czajka <radekczajka@nowoczesnapolska.org.pl>
Sat, 27 Dec 2014 23:35:25 +0000 (00:35 +0100)
16 files changed:
librarian/embeds/__init__.py [new file with mode: 0644]
librarian/embeds/latex.py [new file with mode: 0644]
librarian/embeds/mathml.py [new file with mode: 0644]
librarian/epub.py
librarian/epub/style.css
librarian/epub/xsltScheme.xsl
librarian/fb2/paragraphs.xslt
librarian/functions.py
librarian/pdf.py
librarian/pdf/wl.cls
librarian/pdf/wl2tex.xslt
librarian/res/embeds/latex/template.tex [new file with mode: 0644]
librarian/res/embeds/mathml/mathml2latex.xslt [new file with mode: 0644]
librarian/xslt/book2html.xslt
librarian/xslt/book2txt.xslt
setup.py

diff --git a/librarian/embeds/__init__.py b/librarian/embeds/__init__.py
new file mode 100644 (file)
index 0000000..3b1abdb
--- /dev/null
@@ -0,0 +1,56 @@
+import importlib
+from lxml import etree
+
+known_types = {
+    'application/mathml+xml': 'librarian.embeds.mathml.MathML',
+    'application/x-latex': 'librarian.embeds.latex.LaTeX',
+}
+
+class Embed():
+    @classmethod
+    def transforms_to(cls, mime_types, downgrade=False):
+        matches = set()
+        for name, method in cls.__dict__.iteritems():
+            if hasattr(method, "embed_converts_to"):
+                conv_type, conv_downgrade = method.embed_converts_to
+                if downgrade == conv_downgrade and conv_type in mime_types:
+                    matches.add(conv_type)
+        return matches
+
+    def transform_to(self, mime_type, downgrade=False):
+        for name, method in type(cls).__dict__.iteritems():
+            if hasattr(method, "embed_converts_to"):
+                conv_type, conv_downgrade = method.embed_converts_to
+                if downgrade == conv_downgrade and conv_type == mime_type:
+                    return method(self)
+
+
+class DataEmbed(Embed):
+    def __init__(self, data=None):
+        self.data = data
+
+class TreeEmbed(Embed):
+    def __init__(self, tree=None):
+        if isinstance(tree, etree._Element):
+            tree = etree.ElementTree(tree)
+        self.tree = tree
+
+def converts_to(mime_type, downgrade=False):
+    def decorator(method):
+        method.embed_converts_to = mime_type, downgrade
+        return method
+    return decorator
+
+def downgrades_to(mime_type):
+    return converts_to(mime_type, True)
+
+def create_embed(mime_type, tree=None, data=None):
+    embed = known_types.get(mime_type)
+    if embed is None:
+        embed = DataEmbed if tree is None else TreeEmbed
+    else:
+        mod_name, cls_name = embed.rsplit('.', 1)
+        mod = importlib.import_module(mod_name)
+        embed = getattr(mod, cls_name)
+
+    return embed(data if tree is None else tree)
diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py
new file mode 100644 (file)
index 0000000..e10d165
--- /dev/null
@@ -0,0 +1,21 @@
+import os
+import shutil
+from subprocess import call, PIPE
+from tempfile import mkdtemp
+from librarian import get_resource
+from . import DataEmbed, create_embed, downgrades_to, converts_to
+
+class LaTeX(DataEmbed):
+    @downgrades_to('image/png')
+    def to_png(self):
+        tmpl = open(get_resource('res/embeds/latex/template.tex')).read().decode('utf-8')
+        tempdir = mkdtemp('-librarian-embed-latex')
+        fpath = os.path.join(tempdir, 'doc.tex')
+        with open(fpath, 'w') as f:
+            f.write((tmpl % {'code': self.data}).encode('utf-8'))
+        call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
+        call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim',
+                os.path.join(tempdir, 'doc.png')])
+        pngdata = open(os.path.join(tempdir, 'doc.png')).read()
+        shutil.rmtree(tempdir)
+        return create_embed('image/png', data=pngdata)
diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py
new file mode 100644 (file)
index 0000000..f99f979
--- /dev/null
@@ -0,0 +1,10 @@
+from lxml import etree
+from librarian import get_resource
+from . import TreeEmbed, create_embed, downgrades_to, converts_to
+
+class MathML(TreeEmbed):
+    @downgrades_to('application/x-latex')
+    def to_latex(self):
+        xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
+        output = self.tree.xslt(xslt)
+        return create_embed('application/x-latex', data=unicode(output))
index 1ea2688..bf58a9f 100644 (file)
@@ -520,6 +520,8 @@ def transform(wldoc, verbose=False,
     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
+    functions.reg_mathml_epub(zip)
+
     # write static elements
     mime = zipfile.ZipInfo()
     mime.filename = 'mimetype'
index 1f5d11b..57f5490 100644 (file)
@@ -368,3 +368,10 @@ p.minor-info {
 p.footer {
     margin-top: 2em;
 }
+
+table {
+    border-collapse: collapse;
+}
+td {
+    border: 1px solid black;
+}
index d2d7871..1c066d9 100644 (file)
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:stylesheet version="1.0"  xmlns="http://www.w3.org/1999/xhtml" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:wl="http://wolnelektury.pl/functions">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
   <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
   <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
 
   <xsl:template match="motyw" />
 
+<xsl:template match="mat">
+    <img xmlns="http://www.w3.org/1999/xhtml">
+        <xsl:attribute name="src">
+            <xsl:value-of select="wl:mathml(.)" />
+        </xsl:attribute>
+    </img>
+</xsl:template>
+
+<xsl:template match="tabela">
+  <table xmlns="http://www.w3.org/1999/xhtml">
+    <xsl:apply-templates />
+  </table>
+</xsl:template>
+<xsl:template match="wiersz">
+  <tr xmlns="http://www.w3.org/1999/xhtml">
+      <xsl:apply-templates />
+  </tr>
+</xsl:template>
+<xsl:template match="kol">
+  <td xmlns="http://www.w3.org/1999/xhtml">
+     <xsl:apply-templates />
+  </td>
+</xsl:template>
+
+
   <!--===========================================================-->
   <!-- Tagi IGNOROWANE -->
   <!--===========================================================-->
index 68c6257..334412e 100644 (file)
                <empty-line/><p>————————</p><empty-line/>
        </xsl:template>
 
+       <xsl:template mode="para" match="tabela">
+               <table><xsl:apply-templates mode="para" /></table>
+       </xsl:template>
+       <xsl:template mode="para" match="wiersz">
+               <tr><xsl:apply-templates mode="para" /></tr>
+       </xsl:template>
+       <xsl:template mode="para" match="kol">
+               <td><xsl:apply-templates mode="inline" /></td>
+       </xsl:template>
+
 
 
        <xsl:template mode="para" match="*"/>
index b88a7fb..659bb94 100644 (file)
@@ -121,3 +121,24 @@ def reg_lang_code_3to2():
        _register_function(lang_code_3to2)
 
 
+def mathml_latex(context, trees):
+    from librarian.embeds.mathml import MathML
+    text = MathML(trees[0]).to_latex().data
+    # Remove invisible multiplications, they produce unwanted spaces.
+    text = text.replace(u'\u2062', '')
+    return text
+
+def reg_mathml_latex():
+    _register_function(mathml_latex)
+
+def reg_mathml_epub(zipf):
+    from librarian.embeds.mathml import MathML
+    def mathml(context, trees):
+        data = MathML(trees[0]).to_latex().to_png().data
+        name = "math%d.png" % mathml.count
+        mathml.count += 1
+        zipf.writestr('OPS/' + name, data)
+        return name
+    mathml.count = 0
+    _register_function(mathml)
+
index 12c07ea..95883e1 100644 (file)
@@ -95,6 +95,17 @@ def fix_hanging(doc):
                 exclude=[DCNS("identifier.url"), DCNS("rights.license")]
                 )
 
+def fix_tables(doc):
+    for kol in doc.iter(tag='kol'):
+        if kol.tail is not None:
+            if not kol.tail.strip():
+                kol.tail = None
+    for table in doc.iter(tag='tabela'):
+        if table.get('ramka') == '1' or table.get('ramki') == '1':
+            table.set('_format', '|' + 'X|' * len(table[0]))
+        else:
+            table.set('_format', 'X' * len(table[0]))
+
 
 def move_motifs_inside(doc):
     """ moves motifs to be into block elements """
@@ -245,10 +256,12 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         parse_creator(document.edoc)
         substitute_hyphens(document.edoc)
         fix_hanging(document.edoc)
+        fix_tables(document.edoc)
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
         style = etree.parse(style_filename)
+        functions.reg_mathml_latex()
 
         # TeXML -> LaTeX
         temp = mkdtemp('-wl2pdf')
index 8907b08..a802e20 100644 (file)
 \usepackage{xunicode}
 \usepackage{xltxtra}
 
+\usepackage{longtable}
+\usepackage{tabu}
+\usepackage{unicode-math}
+\setmathfont{Latin Modern Math}
+
 \usepackage[overload]{textcase}
 \usepackage{scalefnt}
 \usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref}
index d39b61a..2548abc 100644 (file)
     </parm></cmd>
 </xsl:template>
 
+<xsl:template match="mat" mode="inline">
+    <TeXML escape="0">
+        <xsl:text>$</xsl:text>
+        <xsl:value-of select="wl:mathml_latex(.)" />
+        <xsl:text>$</xsl:text>
+    </TeXML>
+</xsl:template>
+
+<xsl:template match="mat">
+    <TeXML escape="0">
+        <xsl:text>$$</xsl:text>
+        <xsl:value-of select="wl:mathml_latex(.)" />
+        <xsl:text>$$</xsl:text>
+    </TeXML>
+</xsl:template>
+
+<xsl:template match="tabela|tabelka">
+    <cmd name="par" />
+    <cmd name="vspace"><parm>1em</parm></cmd>
+    <group><cmd name="raggedright" />
+    <env name="longtabu"> to <TeXML escape="0">\textwidth </TeXML>
+      <!--parm><cmd name="textwidth"/></parm-->
+      <parm><TeXML escape="0"><xsl:value-of select="@_format" /></TeXML></parm>
+        <xsl:choose>
+        <xsl:when test="@ramka='1' or @ramki='1'">
+          <cmd name="hline" />
+          <xsl:apply-templates mode="wiersze-ramki"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:apply-templates/>
+        </xsl:otherwise>
+        </xsl:choose>
+    </env>
+    </group>
+    <cmd name="vspace"><parm>1em</parm></cmd>
+</xsl:template>
+<xsl:template match="wiersz" mode="wiersze-ramki">
+    <xsl:apply-templates />
+    <spec cat="esc"/><spec cat="esc"/>
+    <cmd name="hline" gr="0" />
+</xsl:template>
+<xsl:template match="wiersz">
+    <xsl:apply-templates />
+    <spec cat="esc"/><spec cat="esc"/>
+</xsl:template>
+<xsl:template match="kol">
+    <xsl:apply-templates mode="inline"/>
+    <xsl:if test="position() &lt; last()">
+    <spec cat="align"/>
+    </xsl:if>
+</xsl:template>
+
+
 <!-- ============== -->
 <!-- = ADDED TAGS = -->
 <!-- ============== -->
diff --git a/librarian/res/embeds/latex/template.tex b/librarian/res/embeds/latex/template.tex
new file mode 100644 (file)
index 0000000..8e4b807
--- /dev/null
@@ -0,0 +1,9 @@
+\documentclass{article}
+\usepackage{unicode-math}
+\setmathfont{Latin Modern Math}
+\pagestyle{empty}
+\begin{document}
+
+$%(code)s$
+
+\end{document}
diff --git a/librarian/res/embeds/mathml/mathml2latex.xslt b/librarian/res/embeds/mathml/mathml2latex.xslt
new file mode 100644 (file)
index 0000000..76ccf95
--- /dev/null
@@ -0,0 +1,66 @@
+<xsl:stylesheet version = '1.0'
+xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
+xmlns:ldf="http://planet-sl.org/ldf"
+xmlns:mml="http://www.w3.org/1998/Math/MathML">
+
+<xsl:output method="txt" encoding="utf-8" omit-xml-declaration="yes"/> 
+
+<xsl:template match="mml:mtext">
+       <xsl:text>\textrm{</xsl:text>
+       <xsl:apply-templates select="node()"/>
+       <xsl:text>}</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:mi">
+       <xsl:value-of select="."/>
+</xsl:template>
+
+<xsl:template match="mml:mn">
+       <xsl:value-of select="."/>
+</xsl:template>
+
+<xsl:template match="mml:mo">
+       <xsl:value-of select="."/>
+</xsl:template>
+
+<xsl:template match="mml:msup">
+       <xsl:text>{</xsl:text>
+       <xsl:apply-templates select="*[1]"/>
+       <xsl:text>}^{</xsl:text>
+       <xsl:apply-templates select="*[2]"/>
+       <xsl:text>}</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:msub">
+       <xsl:text>{</xsl:text>
+       <xsl:apply-templates select="*[1]"/>
+       <xsl:text>}_{</xsl:text>
+       <xsl:apply-templates select="*[2]"/>
+       <xsl:text>}</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:mrow">
+       <xsl:text>{</xsl:text>
+       <xsl:apply-templates select="node()"/>
+       <xsl:text>}</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:mfenced">
+       <xsl:text>(</xsl:text>
+       <xsl:apply-templates select="node()"/>
+       <xsl:text>)</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:mfrac">
+       <xsl:text>\frac{</xsl:text>
+       <xsl:apply-templates select="*[1]"/>
+       <xsl:text>}{</xsl:text>
+       <xsl:apply-templates select="*[2]"/>
+       <xsl:text>}</xsl:text>
+</xsl:template>
+
+<xsl:template match="mml:varepsilon">
+       <xsl:text>\varepsilon </xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
index 499a1dc..201381c 100644 (file)
     <p class="motto_podpis"><xsl:apply-templates mode="inline" /></p>
 </xsl:template>
 
+<xsl:template match="tabela">
+    <table><xsl:apply-templates /></table>
+</xsl:template>
+<xsl:template match="wiersz">
+    <tr><xsl:apply-templates /></tr>
+</xsl:template>
+<xsl:template match="kol">
+    <td><xsl:apply-templates mode="inline" /></td>
+</xsl:template>
+
+<xsl:template match="mat">
+    <xsl:copy-of select="*" />
+</xsl:template>
+
 
 <!-- ================================================ -->
 <!-- = INLINE TAGS                                  = -->
 
 <!-- Other inline tags -->
 <xsl:template match="mat" mode="inline">
-    <em class="math"><xsl:apply-templates mode="inline" /></em>
+    <xsl:copy-of select="*" />
 </xsl:template>
 
 <xsl:template match="didask_tekst" mode="inline">
index 317e581..a578492 100644 (file)
 <xsl:apply-templates mode="inline" />
 </xsl:template>
 
+<xsl:template match="tabela">
+       <xsl:text>
+
+</xsl:text>
+<xsl:apply-templates/>
+<xsl:text>
+</xsl:text>
+</xsl:template>
+<xsl:template match="wiersz">
+       <xsl:apply-templates />
+       <xsl:text>
+</xsl:text>
+</xsl:template>
+<xsl:template match="kol">
+       <xsl:apply-templates mode="inline" />
+       <xsl:text>    </xsl:text>
+</xsl:template>
+
 
 <!-- ================================================ -->
 <!-- = INLINE TAGS                                  = -->
index 732f145..10abe6e 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@ setup(
     maintainer='Radek Czajka',
     maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
     url='http://github.com/fnp/librarian',
-    packages=['librarian'],
+    packages=['librarian', 'librarian.embeds'],
     package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] +
                                 whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') +
                                 whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},