X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/13480b3da2d3da87f1d99c6d340c1553ca9d89c1..0560b4a83f947a4d08f087d85759f05516f6e580:/librarian/document.py diff --git a/librarian/document.py b/librarian/document.py index 32148e3..462b1ba 100755 --- a/librarian/document.py +++ b/librarian/document.py @@ -11,21 +11,32 @@ from .parser import SSTParser class Document(object): - # Do I use meta_context? def __init__(self, edoc, meta_context=None): self.edoc = edoc - root_elem = edoc.getroot() + # Do I use meta_context? if meta_context is not None: root_elem.meta_context = meta_context + self.validate() + def validate(self): + root_elem = self.edoc.getroot() if not isinstance(root_elem, Section): if root_elem.tag != SSTNS('section'): - raise ValidationError("Invalid root element. Found '%s', should be '%s'" % ( - root_elem.tag, SSTNS('section'))) + if root_elem.tag == 'section': + for element in root_elem.iter(): + if element.tag in ('section', 'header', 'div', 'span', 'aside', 'metadata'): + element.tag = str(SSTNS(element.tag)) + + parser = SSTParser() + tree = etree.parse(StringIO(etree.tostring(root_elem)), parser) + tree.xinclude() + self.edoc = tree + else: + raise ValueError("Invalid root element. Found '%s', should be '%s'" % ( + root_elem.tag, SSTNS('section'))) else: - raise ValidationError("Invalid class of root element. " - "Use librarian.parser.SSTParser.") + raise ValueError("Invalid class of root element. Use librarian.parser.SSTParser.") @classmethod def from_string(cls, xml, *args, **kwargs): @@ -47,6 +58,8 @@ class Document(object): data = data.decode('utf-8') data = data.replace(u'\ufeff', '') + # This is bad. The editor shouldn't spew unknown HTML entities. + data = data.replace(u' ', u'\u00a0') parser = SSTParser() tree = etree.parse(StringIO(data.encode('utf-8')), parser)