parser = etree.HTMLParser(encoding='utf-8')
tree = etree.parse(html_path, parser)
footnotes = tree.find('//*[@id="footnotes"]')
- re_qualifier = re.compile(ur'[^\u2014]+\s+\((.+)\)\s+\u2014')
+ re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
if footnotes is not None:
for footnote in footnotes.findall('div'):
fn_type = footnote.get('class').split('-')[1]
),
'Standard footnote with qualifier and some emphasis.'),
+ ('<pe>Definiendum (łac.) --- definens (some) --- more text.</pe>', (
+ 'pe',
+ 'łac.',
+ 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.',
+ '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>',
+ ),
+ 'Footnote with a second parentheses and mdash.'),
+
)
xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join(