X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/011b98a4661b754a0789ae78e145437b5b86e5fe..23d025c8875cca1404f274aca7170c9db5e980e7:/src/librarian/document.py diff --git a/src/librarian/document.py b/src/librarian/document.py index d61abb4..7f468fa 100644 --- a/src/librarian/document.py +++ b/src/librarian/document.py @@ -1,7 +1,7 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # -from collections import Counter +from collections import defaultdict, Counter import gettext import os import re @@ -17,6 +17,7 @@ class WLDocument: source = filename or urllib.request.urlopen(url) tree = etree.parse(source, parser=parser) self.tree = tree + self.counters = defaultdict(lambda: 1) tree.getroot().document = self self.preprocess() @@ -41,6 +42,12 @@ class WLDocument: for _e, elem in etree.iterwalk(self.tree, ('start',), 'strofa'): elem.preprocess() + def assign_ids(self): + # Assign IDs depth-first, to account for any inside. + for _e, elem in etree.iterwalk(self.tree, events=('end',)): + if getattr(elem, 'NUMBERING', None): + elem.assign_id(self) + @property def children(self): for part_uri in self.meta.parts or []: