class Xmill(object):
- """Transforms XML to some text.
+ """Transforms XML to some text.
Used instead of XSLT which is difficult and cumbersome.
-
+
"""
def __init__(self, options=None):
self._options = []
if options:
self._options.append(options)
+ self.text_filters = []
+
+ def register_text_filter(self, fun):
+ self.text_filters.append(fun)
+
+ def filter_text(self, text):
+ for flt in self.text_filters:
+ if text is None:
+ return None
+ text = flt(text)
+ return text
def generate(self, document):
"""Generate text from node using handlers defined in class."""
def options(self):
"""Returnes merged scoped options for current node.
"""
- # Here we can see how a decision not to return the modified map
+ # Here we can see how a decision not to return the modified map
# leads to a need for a hack.
return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: False))
tagname = None
# from nose.tools import set_trace
- if isinstance(element, etree._Comment): return None
-
if element.tag[0] == '{':
for nshort, nhref in element.nsmap.items():
try:
except ValueError:
pass
if not ns:
- raise ValueError("Strange ns for tag: %s, nsmap: %s" %
- (element.tag, element.nsmap))
+ raise ValueError("Strange ns for tag: %s, nsmap: %s" %
+ (element.tag, element.nsmap))
else:
tagname = element.tag
meth_name = "handle_%s__%s" % (ns, tagname)
else:
meth_name = "handle_%s" % (tagname,)
-
+
handler = getattr(self, meth_name, None)
return handler
if element is None: return None # end of tree
def _handle_element(self, element):
+ if isinstance(element, etree._Comment): return None
+
handler = self._handle_for_element(element)
# How many scopes
try:
options_scopes = len(self._options)
if handler is None:
- pre = [element.text]
- post = []
+ pre = [self.filter_text(element.text)]
+ post = [self.filter_text(element.tail)]
else:
vals = handler(element)
# depending on number of returned values, vals can be None, a value, or a tuple.
# how poorly designed is that? 9 lines below are needed just to unpack this.
if vals is None:
- return []
+ return [self.filter_text(element.tail)]
else:
if not isinstance(vals, tuple):
- return [vals, element.tail]
+ return [vals, self.filter_text(element.tail)]
else:
- pre = [vals[0], element.text]
- post = [vals[1], element.tail]
+ pre = [vals[0], self.filter_text(element.text)]
+ post = [vals[1], self.filter_text(element.tail)]
out = pre + [self._handle_element(child) for child in element] + post
finally: