class Xmill(object):
- """Transforms XML to some text.
+ """Transforms XML to some text.
Used instead of XSLT which is difficult and cumbersome.
-
+
"""
- def __init__(self, options=None):
+ def __init__(self, options=None, state=None):
self._options = []
+ self.state = state or {}
if options:
self._options.append(options)
+ self.text_filters = []
+ self.escaped_text_filters = []
+
+ def register_text_filter(self, fun):
+ self.text_filters.append(fun)
+
+ def register_escaped_text_filter(self, fun):
+ self.escaped_text_filters.append(fun)
+
+ def filter_text(self, text):
+ for flt in self.text_filters:
+ if text is None:
+ return None
+ else:
+ text = flt(text)
+ # TODO: just work on the tree and let lxml handle escaping.
+ e = etree.Element("x")
+ e.text = text
+ # This whole mixing text with ML is so wrong.
+ output = etree.tostring(e, encoding=unicode)[3:-4]
+ for flt in self.escaped_text_filters:
+ output = flt(output)
+ return output
+
def generate(self, document):
"""Generate text from node using handlers defined in class."""
def options(self):
"""Returnes merged scoped options for current node.
"""
- # Here we can see how a decision not to return the modified map
+ # Here we can see how a decision not to return the modified map
# leads to a need for a hack.
- return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: False))
+ return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: None))
@options.setter
def options(self, opts):
tagname = None
# from nose.tools import set_trace
- if isinstance(element, etree._Comment): return None
-
if element.tag[0] == '{':
for nshort, nhref in element.nsmap.items():
try:
except ValueError:
pass
if not ns:
- raise ValueError("Strange ns for tag: %s, nsmap: %s" %
- (element.tag, element.nsmap))
+ raise ValueError("Strange ns for tag: %s, nsmap: %s" %
+ (element.tag, element.nsmap))
else:
tagname = element.tag
meth_name = "handle_%s__%s" % (ns, tagname)
else:
meth_name = "handle_%s" % (tagname,)
-
+
handler = getattr(self, meth_name, None)
return handler
if element is None: return None # end of tree
def _handle_element(self, element):
+ if isinstance(element, etree._Comment): return None
+
handler = self._handle_for_element(element)
+ if self.state.get('mute') and not getattr(handler, 'unmuter', False): return None
# How many scopes
try:
options_scopes = len(self._options)
if handler is None:
- pre = [element.text]
- post = []
+ pre = [self.filter_text(element.text)]
+ post = [self.filter_text(element.tail)]
else:
vals = handler(element)
# depending on number of returned values, vals can be None, a value, or a tuple.
# how poorly designed is that? 9 lines below are needed just to unpack this.
if vals is None:
- return []
+ return [self.filter_text(element.tail)]
else:
if not isinstance(vals, tuple):
- return [vals]
+ return [vals, self.filter_text(element.tail)]
else:
- pre = [vals[0], element.text]
- post = [vals[1]]
-
- if element.tail:
- post.append(element.tail)
+ pre = [vals[0], self.filter_text(element.text)]
+ post = [vals[1], self.filter_text(element.tail)]
out = pre + [self._handle_element(child) for child in element] + post
finally:
# clean up option scopes if necessary
self._options = self._options[0:options_scopes]
+
return out
-def tag(name, classes=None, **attrs):
+def tag_open_close(name_, classes_=None, **attrs):
+ u"""Creates tag beginning and end.
+
+ >>> tag_open_close("a", "klass", x=u"ą<")
+ (u'<a x="\\u0105<" class="klass">', u'</a>')
+
+ """
+ if classes_:
+ if isinstance(classes_, (tuple, list)): classes_ = ' '.join(classes_)
+ attrs['class'] = classes_
+
+ e = etree.Element(name_)
+ e.text = " "
+ for k, v in attrs.items():
+ e.attrib[k] = v
+ pre, post = etree.tostring(e, encoding=unicode).split(u"> <")
+ return pre + u">", u"<" + post
+
+def tag(name_, classes_=None, **attrs):
"""Returns a handler which wraps node contents in tag `name', with class attribute
set to `classes' and other attributes according to keyword paramters
"""
- if classes:
- if isinstance(classes, (tuple, list)): classes = ' '.join(classes)
- attrs['class'] = classes
- a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()])
def _hnd(self, element):
- return "<%s%s>" % (name, a), "</%s>" % name
+ return tag_open_close(name_, classes_, **attrs)
return _hnd