New Element-based builder API (WiP).
[librarian.git] / src / librarian / builders / sanitize.py
diff --git a/src/librarian/builders/sanitize.py b/src/librarian/builders/sanitize.py
new file mode 100644 (file)
index 0000000..4d7f7f9
--- /dev/null
@@ -0,0 +1,18 @@
+from lxml import etree
+from librarian import OutputFile
+
+
+class Sanitizer:
+    identifier = 'sanitize'
+    file_extension = 'xml2'
+
+    def build(self, document):
+        doc = document.tree.getroot() # TODO: copy
+        doc.sanitize()
+        return OutputFile.from_bytes(
+            etree.tostring(
+                doc,
+                encoding='utf-8',
+            )
+        )
+