X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fe8e5b5e224d32baebbdaa2fecf4a847ed4e5354..3b0b98465bc1862306b05bb8305a1abbf40ca310:/librarian/xmlutils.py diff --git a/librarian/xmlutils.py b/librarian/xmlutils.py index d762320..ae3512a 100644 --- a/librarian/xmlutils.py +++ b/librarian/xmlutils.py @@ -12,21 +12,34 @@ class Xmill(object): Used instead of XSLT which is difficult and cumbersome. """ - def __init__(self, options=None): + def __init__(self, options=None, state=None): self._options = [] + self.state = state or {} if options: self._options.append(options) self.text_filters = [] + self.escaped_text_filters = [] def register_text_filter(self, fun): self.text_filters.append(fun) + def register_escaped_text_filter(self, fun): + self.escaped_text_filters.append(fun) + def filter_text(self, text): for flt in self.text_filters: if text is None: return None - text = flt(text) - return text + else: + text = flt(text) + # TODO: just work on the tree and let lxml handle escaping. + e = etree.Element("x") + e.text = text + # This whole mixing text with ML is so wrong. + output = etree.tostring(e, encoding=unicode)[3:-4] + for flt in self.escaped_text_filters: + output = flt(output) + return output def generate(self, document): """Generate text from node using handlers defined in class.""" @@ -39,7 +52,7 @@ class Xmill(object): """ # Here we can see how a decision not to return the modified map # leads to a need for a hack. - return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: False)) + return reduce(lambda a, b: a.update(b) or a, self._options, defaultdict(lambda: None)) @options.setter def options(self, opts): @@ -47,20 +60,17 @@ class Xmill(object): """ self._options.append(opts) - def _handle_for_element(self, element): ns = None tagname = None -# from nose.tools import set_trace - - if isinstance(element, etree._Comment): return None + # from nose.tools import set_trace if element.tag[0] == '{': for nshort, nhref in element.nsmap.items(): try: if element.tag.index('{%s}' % nhref) == 0: ns = nshort - tagname = element.tag[len('{%s}' % nhref):] + tagname = element.tag[len('{%s}' % nhref):] break except ValueError: pass @@ -84,25 +94,31 @@ class Xmill(object): while True: sibling = element.getnext() - if sibling is not None: return sibling # found a new branch to dig into + if sibling is not None: + return sibling # found a new branch to dig into element = element.getparent() - if element is None: return None # end of tree + if element is None: + return None # end of tree def _handle_element(self, element): + if isinstance(element, etree._Comment): + return None + handler = self._handle_for_element(element) + if self.state.get('mute') and not getattr(handler, 'unmuter', False): + return None # How many scopes + options_scopes = len(self._options) try: - options_scopes = len(self._options) - if handler is None: pre = [self.filter_text(element.text)] - post = [] + post = [self.filter_text(element.tail)] else: vals = handler(element) # depending on number of returned values, vals can be None, a value, or a tuple. # how poorly designed is that? 9 lines below are needed just to unpack this. if vals is None: - return [] + return [self.filter_text(element.tail)] else: if not isinstance(vals, tuple): return [vals, self.filter_text(element.tail)] @@ -114,19 +130,36 @@ class Xmill(object): finally: # clean up option scopes if necessary self._options = self._options[0:options_scopes] + return out -def tag(name, classes=None, **attrs): +def tag_open_close(name_, classes_=None, **attrs): + u"""Creates tag beginning and end. + + >>> tag_open_close("a", "klass", x=u"ą<") + (u'', u'') + + """ + if classes_: + if isinstance(classes_, (tuple, list)): + classes_ = ' '.join(classes_) + attrs['class'] = classes_ + + e = etree.Element(name_) + e.text = " " + for k, v in attrs.items(): + e.attrib[k] = v + pre, post = etree.tostring(e, encoding=unicode).split(u"> <") + return pre + u">", u"<" + post + + +def tag(name_, classes_=None, **attrs): """Returns a handler which wraps node contents in tag `name', with class attribute set to `classes' and other attributes according to keyword paramters """ - if classes: - if isinstance(classes, (tuple, list)): classes = ' '.join(classes) - attrs['class'] = classes - a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()]) def _hnd(self, element): - return "<%s%s>" % (name, a), "" % name + return tag_open_close(name_, classes_, **attrs) return _hnd @@ -135,13 +168,16 @@ def tagged(name, classes=None, **attrs): set to `classes' and other attributes according to keyword paramters """ if classes: - if isinstance(classes, (tuple,list)): classes = ' '.join(classes) + if isinstance(classes, (tuple, list)): + classes = ' '.join(classes) attrs['class'] = classes - a = ''.join([' %s="%s"' % (k,v) for (k,v) in attrs.items()]) + a = ''.join([' %s="%s"' % (k, v) for (k, v) in attrs.items()]) + def _decor(f): def _wrap(self, element): r = f(self, element) - if r is None: return + if r is None: + return prepend = "<%s%s>" % (name, a) append = "" % name @@ -166,6 +202,7 @@ def ifoption(**options): return _handler return _decor + def flatten(l, ltypes=(list, tuple)): """flatten function from BasicPropery/BasicTypes package """