Better handling of multipart DAISY.
[librarian.git] / src / librarian / builders / sanitize.py
1 from lxml import etree
2 from librarian import OutputFile
3
4
5 class Sanitizer:
6     identifier = 'sanitize'
7     file_extension = 'xml2'
8
9     def build(self, document, **kwargs):
10         doc = document.tree.getroot() # TODO: copy
11         doc.sanitize()
12         return OutputFile.from_bytes(
13             etree.tostring(
14                 doc,
15                 encoding='utf-8',
16             )
17         )
18