X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/118a8cf1045eee8369f21236e1400189f3ec2647..a91e41e489588ebad550cd9e22d157062effa2ff:/librarian/html.py diff --git a/librarian/html.py b/librarian/html.py index 8822f96..0eeb76b 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -134,14 +134,17 @@ def extract_fragments(input_filename): fragment = Fragment(id=element.get('fid'), themes=element.text) # Append parents - if element.getparent().get('id', None) != 'book-text': - parents = [element.getparent()] - while parents[-1].getparent().get('id', None) != 'book-text': - parents.append(parents[-1].getparent()) - - parents.reverse() - for parent in parents: - fragment.append('start', parent) + parent = element.getparent() + parents = [] + while parent.get('id', None) != 'book-text': + cparent = copy.deepcopy(parent) + cparent.text = None + parents.append(cparent) + parent = parent.getparent() + + parents.reverse() + for parent in parents: + fragment.append('start', parent) open_fragments[fragment.id] = fragment