1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
7 from collections import defaultdict
11 """Transforms XML to some text.
12 Used instead of XSLT which is difficult and cumbersome.
15 def __init__(self, options=None, state=None):
17 self.state = state or {}
19 self._options.append(options)
20 self.text_filters = []
21 self.escaped_text_filters = []
23 def register_text_filter(self, fun):
24 self.text_filters.append(fun)
26 def register_escaped_text_filter(self, fun):
27 self.escaped_text_filters.append(fun)
29 def filter_text(self, text):
30 for flt in self.text_filters:
35 # TODO: just work on the tree and let lxml handle escaping.
36 e = etree.Element("x")
38 # This whole mixing text with ML is so wrong.
39 output = etree.tostring(e, encoding=unicode)[3:-4]
40 for flt in self.escaped_text_filters:
45 def generate(self, document):
46 """Generate text from node using handlers defined in class."""
47 output = self._handle_element(document)
48 return u''.join([x for x in flatten(output) if x is not None])
52 """Returnes merged scoped options for current node.
54 # Here we can see how a decision not to return the modified map
55 # leads to a need for a hack.
56 return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: None))
59 def options(self, opts):
60 """Sets options overrides for current and child nodes
62 self._options.append(opts)
65 def _handle_for_element(self, element):
68 # from nose.tools import set_trace
70 if element.tag[0] == '{':
71 for nshort, nhref in element.nsmap.items():
73 if element.tag.index('{%s}' % nhref) == 0:
75 tagname = element.tag[len('{%s}' % nhref):]
80 raise ValueError("Strange ns for tag: %s, nsmap: %s" %
81 (element.tag, element.nsmap))
86 meth_name = "handle_%s__%s" % (ns, tagname)
88 meth_name = "handle_%s" % (tagname,)
90 handler = getattr(self, meth_name, None)
93 def next(self, element):
98 sibling = element.getnext()
99 if sibling is not None: return sibling # found a new branch to dig into
100 element = element.getparent()
101 if element is None: return None # end of tree
103 def _handle_element(self, element):
104 if isinstance(element, etree._Comment): return None
106 handler = self._handle_for_element(element)
107 if self.state.get('mute') and not getattr(handler, 'unmuter', False): return None
110 options_scopes = len(self._options)
113 pre = [self.filter_text(element.text)]
114 post = [self.filter_text(element.tail)]
116 vals = handler(element)
117 # depending on number of returned values, vals can be None, a value, or a tuple.
118 # how poorly designed is that? 9 lines below are needed just to unpack this.
120 return [self.filter_text(element.tail)]
122 if not isinstance(vals, tuple):
123 return [vals, self.filter_text(element.tail)]
125 pre = [vals[0], self.filter_text(element.text)]
126 post = [vals[1], self.filter_text(element.tail)]
128 out = pre + [self._handle_element(child) for child in element] + post
130 # clean up option scopes if necessary
131 self._options = self._options[0:options_scopes]
136 def tag_open_close(name_, classes_=None, **attrs):
137 u"""Creates tag beginning and end.
139 >>> tag_open_close("a", "klass", x=u"ą<")
140 (u'<a x="\\u0105<" class="klass">', u'</a>')
144 if isinstance(classes_, (tuple, list)): classes_ = ' '.join(classes_)
145 attrs['class'] = classes_
147 e = etree.Element(name_)
149 for k, v in attrs.items():
151 pre, post = etree.tostring(e, encoding=unicode).split(u"> <")
152 return pre + u">", u"<" + post
154 def tag(name_, classes_=None, **attrs):
155 """Returns a handler which wraps node contents in tag `name', with class attribute
156 set to `classes' and other attributes according to keyword paramters
158 def _hnd(self, element):
159 return tag_open_close(name_, classes_, **attrs)
163 def tagged(name, classes=None, **attrs):
164 """Handler decorator which wraps handler output in tag `name', with class attribute
165 set to `classes' and other attributes according to keyword paramters
168 if isinstance(classes, (tuple,list)): classes = ' '.join(classes)
169 attrs['class'] = classes
170 a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()])
172 def _wrap(self, element):
176 prepend = "<%s%s>" % (name, a)
177 append = "</%s>" % name
179 if isinstance(r, tuple):
180 return prepend + r[0], r[1] + append
181 return prepend + r + append
186 def ifoption(**options):
187 """Decorator which enables node only when options are set
190 def _handler(self, *args, **kw):
192 for k, v in options.items():
195 return f(self, *args, **kw)
199 def flatten(l, ltypes=(list, tuple)):
200 """flatten function from BasicPropery/BasicTypes package
206 while isinstance(l[i], ltypes):