X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/2991698067a246ea9b99b4e668d261af6d418eca..adde01d89c27fb02056fd5c901ffea9ba1d9882f:/src/librarian/document.py diff --git a/src/librarian/document.py b/src/librarian/document.py index 6ac2842..aa6f37f 100644 --- a/src/librarian/document.py +++ b/src/librarian/document.py @@ -1,8 +1,11 @@ +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. +# import gettext import os import re +import urllib.request from lxml import etree -import six from .parser import parser from . import dcparser, DCNS, DirDocProvider from .functions import lang_code_3to2 @@ -10,7 +13,7 @@ from .functions import lang_code_3to2 class WLDocument: def __init__(self, filename=None, url=None, provider=None): - source = filename or six.moves.urllib.request.urlopen(url) + source = filename or urllib.request.urlopen(url) tree = etree.parse(source, parser=parser) self.tree = tree tree.getroot().document = self @@ -32,10 +35,8 @@ class WLDocument: @property def children(self): for part_uri in self.meta.parts or []: - yield type(self)( - filename=self.provider.by_slug(part_uri.slug), - provider=self.provider - ) + with self.provider.by_slug(part_uri.slug) as f: + yield type(self)(filename=f, provider=self.provider) def build(self, builder, base_url=None, **kwargs): return builder(base_url=base_url).build(self, **kwargs) @@ -101,7 +102,10 @@ class WLDocument: def _compat_assigns_section_ids_in_elem(elem, prefix='sec'): for i, child in enumerate(elem): idfier = '{}{}'.format(prefix, i + 1) - child.attrib['_compat_section_id'] = idfier + try: + child.attrib['_compat_section_id'] = idfier + except: + pass _compat_assigns_section_ids_in_elem(child, idfier + '-') _compat_assigns_section_ids_in_elem(self.tree.getroot().master) @@ -115,3 +119,5 @@ class WLDocument: persons.remove(None) return persons + def references(self): + return self.tree.findall('.//ref')