From: Radek Czajka Date: Thu, 9 Oct 2014 10:28:03 +0000 (+0200) Subject: Fix in extract_annotations X-Git-Tag: 1.7~93 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/ac7899052e10143e0548ad7de2f67a2c6ca2b50b?ds=inline Fix in extract_annotations --- diff --git a/librarian/html.py b/librarian/html.py index e084ed2..85b9003 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -298,7 +298,7 @@ def extract_annotations(html_path): parser = etree.HTMLParser(encoding='utf-8') tree = etree.parse(html_path, parser) footnotes = tree.find('//*[@id="footnotes"]') - re_qualifier = re.compile(ur'[^\u2014]+\s+\((.+)\)\s+\u2014') + re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014') if footnotes is not None: for footnote in footnotes.findall('div'): fn_type = footnote.get('class').split('-')[1] diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py index 87e9b01..851c5b0 100644 --- a/tests/test_html_annotations.py +++ b/tests/test_html_annotations.py @@ -86,6 +86,14 @@ def test_annotations(): ), 'Standard footnote with qualifier and some emphasis.'), + ('Definiendum (łac.) --- definens (some) --- more text.', ( + 'pe', + 'łac.', + 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.', + '

Definiendum (łac.) \u2014 definiens (some) \u2014 more text.

', + ), + 'Footnote with a second parentheses and mdash.'), + ) xml_src = ''' %s ''' % "".join(