bit longer author field
[librarian.git] / src / librarian / builders / sanitize.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from lxml import etree
5 from librarian import OutputFile
6
7
8 class Sanitizer:
9     identifier = 'sanitize'
10     file_extension = 'xml2'
11
12     def build(self, document, **kwargs):
13         doc = document.tree.getroot() # TODO: copy
14         doc.sanitize()
15         return OutputFile.from_bytes(
16             etree.tostring(
17                 doc,
18                 encoding='utf-8',
19             )
20         )
21