Fix XML entities left from MathML.
[librarian.git] / librarian / embeds / mathml.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 from lxml import etree
5 import six
6 from librarian import get_resource
7 from . import TreeEmbed, create_embed, downgrades_to
8
9
10 class MathML(TreeEmbed):
11     @downgrades_to('application/x-latex')
12     def to_latex(self):
13         """
14         >>> print(MathML(etree.fromstring('<mat>a &lt; b</mat>')).to_latex().data.strip())
15         a < b
16
17         >>> print(MathML(etree.fromstring('<mat>&lt; &amp; &amp;lt; &#65;</mat>')).to_latex().data.strip())
18         < & &lt; A
19
20         """
21         xslt = etree.parse(get_resource('res/embeds/mathml/mathml2latex.xslt'))
22         output = self.tree.xslt(xslt)
23         text = six.text_type(output)
24         # Workaround for entities being preserved in output. But there should be a better way.
25         text = text.replace('&lt;', '<').replace('&amp;', '&')
26         return create_embed('application/x-latex', data=text)