From 5f92f5a341c58a30c0a5c0d64ba3ed498bb9db85 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Sun, 28 Dec 2014 00:35:25 +0100 Subject: [PATCH] Preliminary math and tables support. --- librarian/embeds/__init__.py | 56 ++++++++++++++++ librarian/embeds/latex.py | 21 ++++++ librarian/embeds/mathml.py | 10 +++ librarian/epub.py | 2 + librarian/epub/style.css | 7 ++ librarian/epub/xsltScheme.xsl | 27 +++++++- librarian/fb2/paragraphs.xslt | 10 +++ librarian/functions.py | 21 ++++++ librarian/pdf.py | 13 ++++ librarian/pdf/wl.cls | 5 ++ librarian/pdf/wl2tex.xslt | 53 +++++++++++++++ librarian/res/embeds/latex/template.tex | 9 +++ librarian/res/embeds/mathml/mathml2latex.xslt | 66 +++++++++++++++++++ librarian/xslt/book2html.xslt | 16 ++++- librarian/xslt/book2txt.xslt | 18 +++++ setup.py | 2 +- 16 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 librarian/embeds/__init__.py create mode 100644 librarian/embeds/latex.py create mode 100644 librarian/embeds/mathml.py create mode 100644 librarian/res/embeds/latex/template.tex create mode 100644 librarian/res/embeds/mathml/mathml2latex.xslt diff --git a/librarian/embeds/__init__.py b/librarian/embeds/__init__.py new file mode 100644 index 0000000..3b1abdb --- /dev/null +++ b/librarian/embeds/__init__.py @@ -0,0 +1,56 @@ +import importlib +from lxml import etree + +known_types = { + 'application/mathml+xml': 'librarian.embeds.mathml.MathML', + 'application/x-latex': 'librarian.embeds.latex.LaTeX', +} + +class Embed(): + @classmethod + def transforms_to(cls, mime_types, downgrade=False): + matches = set() + for name, method in cls.__dict__.iteritems(): + if hasattr(method, "embed_converts_to"): + conv_type, conv_downgrade = method.embed_converts_to + if downgrade == conv_downgrade and conv_type in mime_types: + matches.add(conv_type) + return matches + + def transform_to(self, mime_type, downgrade=False): + for name, method in type(cls).__dict__.iteritems(): + if hasattr(method, "embed_converts_to"): + conv_type, conv_downgrade = method.embed_converts_to + if downgrade == conv_downgrade and conv_type == mime_type: + return method(self) + + +class DataEmbed(Embed): + def __init__(self, data=None): + self.data = data + +class TreeEmbed(Embed): + def __init__(self, tree=None): + if isinstance(tree, etree._Element): + tree = etree.ElementTree(tree) + self.tree = tree + +def converts_to(mime_type, downgrade=False): + def decorator(method): + method.embed_converts_to = mime_type, downgrade + return method + return decorator + +def downgrades_to(mime_type): + return converts_to(mime_type, True) + +def create_embed(mime_type, tree=None, data=None): + embed = known_types.get(mime_type) + if embed is None: + embed = DataEmbed if tree is None else TreeEmbed + else: + mod_name, cls_name = embed.rsplit('.', 1) + mod = importlib.import_module(mod_name) + embed = getattr(mod, cls_name) + + return embed(data if tree is None else tree) diff --git a/librarian/embeds/latex.py b/librarian/embeds/latex.py new file mode 100644 index 0000000..e10d165 --- /dev/null +++ b/librarian/embeds/latex.py @@ -0,0 +1,21 @@ +import os +import shutil +from subprocess import call, PIPE +from tempfile import mkdtemp +from librarian import get_resource +from . import DataEmbed, create_embed, downgrades_to, converts_to + +class LaTeX(DataEmbed): + @downgrades_to('image/png') + def to_png(self): + tmpl = open(get_resource('res/embeds/latex/template.tex')).read().decode('utf-8') + tempdir = mkdtemp('-librarian-embed-latex') + fpath = os.path.join(tempdir, 'doc.tex') + with open(fpath, 'w') as f: + f.write((tmpl % {'code': self.data}).encode('utf-8')) + call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE) + call(['convert', '-density', '150', os.path.join(tempdir, 'doc.pdf'), '-trim', + os.path.join(tempdir, 'doc.png')]) + pngdata = open(os.path.join(tempdir, 'doc.png')).read() + shutil.rmtree(tempdir) + return create_embed('image/png', data=pngdata) diff --git a/librarian/embeds/mathml.py b/librarian/embeds/mathml.py new file mode 100644 index 0000000..f99f979 --- /dev/null +++ b/librarian/embeds/mathml.py @@ -0,0 +1,10 @@ +from lxml import etree +from librarian import get_resource +from . import TreeEmbed, create_embed, downgrades_to, converts_to + +class MathML(TreeEmbed): + @downgrades_to('application/x-latex') + def to_latex(self): + xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt')) + output = self.tree.xslt(xslt) + return create_embed('application/x-latex', data=unicode(output)) diff --git a/librarian/epub.py b/librarian/epub.py index 1ea2688..bf58a9f 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -520,6 +520,8 @@ def transform(wldoc, verbose=False, output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) + functions.reg_mathml_epub(zip) + # write static elements mime = zipfile.ZipInfo() mime.filename = 'mimetype' diff --git a/librarian/epub/style.css b/librarian/epub/style.css index 1f5d11b..57f5490 100644 --- a/librarian/epub/style.css +++ b/librarian/epub/style.css @@ -368,3 +368,10 @@ p.minor-info { p.footer { margin-top: 2em; } + +table { + border-collapse: collapse; +} +td { + border: 1px solid black; +} diff --git a/librarian/epub/xsltScheme.xsl b/librarian/epub/xsltScheme.xsl index d2d7871..1c066d9 100644 --- a/librarian/epub/xsltScheme.xsl +++ b/librarian/epub/xsltScheme.xsl @@ -1,5 +1,5 @@ - + @@ -284,6 +284,31 @@ + + + + + + + + + + + +
+
+ + + + + + + + + + + + diff --git a/librarian/fb2/paragraphs.xslt b/librarian/fb2/paragraphs.xslt index 68c6257..334412e 100644 --- a/librarian/fb2/paragraphs.xslt +++ b/librarian/fb2/paragraphs.xslt @@ -39,6 +39,16 @@

————————

+ +
+
+ + + + + + + diff --git a/librarian/functions.py b/librarian/functions.py index b88a7fb..659bb94 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -121,3 +121,24 @@ def reg_lang_code_3to2(): _register_function(lang_code_3to2) +def mathml_latex(context, trees): + from librarian.embeds.mathml import MathML + text = MathML(trees[0]).to_latex().data + # Remove invisible multiplications, they produce unwanted spaces. + text = text.replace(u'\u2062', '') + return text + +def reg_mathml_latex(): + _register_function(mathml_latex) + +def reg_mathml_epub(zipf): + from librarian.embeds.mathml import MathML + def mathml(context, trees): + data = MathML(trees[0]).to_latex().to_png().data + name = "math%d.png" % mathml.count + mathml.count += 1 + zipf.writestr('OPS/' + name, data) + return name + mathml.count = 0 + _register_function(mathml) + diff --git a/librarian/pdf.py b/librarian/pdf.py index 12c07ea..95883e1 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -95,6 +95,17 @@ def fix_hanging(doc): exclude=[DCNS("identifier.url"), DCNS("rights.license")] ) +def fix_tables(doc): + for kol in doc.iter(tag='kol'): + if kol.tail is not None: + if not kol.tail.strip(): + kol.tail = None + for table in doc.iter(tag='tabela'): + if table.get('ramka') == '1' or table.get('ramki') == '1': + table.set('_format', '|' + 'X|' * len(table[0])) + else: + table.set('_format', 'X' * len(table[0])) + def move_motifs_inside(doc): """ moves motifs to be into block elements """ @@ -245,10 +256,12 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, parse_creator(document.edoc) substitute_hyphens(document.edoc) fix_hanging(document.edoc) + fix_tables(document.edoc) # wl -> TeXML style_filename = get_stylesheet("wl2tex") style = etree.parse(style_filename) + functions.reg_mathml_latex() # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') diff --git a/librarian/pdf/wl.cls b/librarian/pdf/wl.cls index 8907b08..a802e20 100644 --- a/librarian/pdf/wl.cls +++ b/librarian/pdf/wl.cls @@ -73,6 +73,11 @@ \usepackage{xunicode} \usepackage{xltxtra} +\usepackage{longtable} +\usepackage{tabu} +\usepackage{unicode-math} +\setmathfont{Latin Modern Math} + \usepackage[overload]{textcase} \usepackage{scalefnt} \usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref} diff --git a/librarian/pdf/wl2tex.xslt b/librarian/pdf/wl2tex.xslt index d39b61a..2548abc 100644 --- a/librarian/pdf/wl2tex.xslt +++ b/librarian/pdf/wl2tex.xslt @@ -435,6 +435,59 @@ + + + $ + + $ + + + + + + $$ + + $$ + + + + + + 1em + + to \textwidth + + + + + + + + + + + + + + 1em + + + + + + + + + + + + + + + + + + diff --git a/librarian/res/embeds/latex/template.tex b/librarian/res/embeds/latex/template.tex new file mode 100644 index 0000000..8e4b807 --- /dev/null +++ b/librarian/res/embeds/latex/template.tex @@ -0,0 +1,9 @@ +\documentclass{article} +\usepackage{unicode-math} +\setmathfont{Latin Modern Math} +\pagestyle{empty} +\begin{document} + +$%(code)s$ + +\end{document} diff --git a/librarian/res/embeds/mathml/mathml2latex.xslt b/librarian/res/embeds/mathml/mathml2latex.xslt new file mode 100644 index 0000000..76ccf95 --- /dev/null +++ b/librarian/res/embeds/mathml/mathml2latex.xslt @@ -0,0 +1,66 @@ + + + + + + \textrm{ + + } + + + + + + + + + + + + + + + + { + + }^{ + + } + + + + { + + }_{ + + } + + + + { + + } + + + + ( + + ) + + + + \frac{ + + }{ + + } + + + + \varepsilon + + + diff --git a/librarian/xslt/book2html.xslt b/librarian/xslt/book2html.xslt index 499a1dc..201381c 100644 --- a/librarian/xslt/book2html.xslt +++ b/librarian/xslt/book2html.xslt @@ -231,6 +231,20 @@

+ +
+
+ + + + + + + + + + + @@ -244,7 +258,7 @@ - + diff --git a/librarian/xslt/book2txt.xslt b/librarian/xslt/book2txt.xslt index 317e581..a578492 100644 --- a/librarian/xslt/book2txt.xslt +++ b/librarian/xslt/book2txt.xslt @@ -233,6 +233,24 @@ + + + + + + + + + + + + + + + + + + diff --git a/setup.py b/setup.py index 732f145..10abe6e 100755 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ setup( maintainer='Radek Czajka', maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl', url='http://github.com/fnp/librarian', - packages=['librarian'], + packages=['librarian', 'librarian.embeds'], package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*'] + whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'res') + whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')}, -- 2.20.1