From ac7899052e10143e0548ad7de2f67a2c6ca2b50b Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Thu, 9 Oct 2014 12:28:03 +0200 Subject: [PATCH] Fix in extract_annotations --- librarian/html.py | 2 +- tests/test_html_annotations.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/librarian/html.py b/librarian/html.py index e084ed2..85b9003 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -298,7 +298,7 @@ def extract_annotations(html_path): parser = etree.HTMLParser(encoding='utf-8') tree = etree.parse(html_path, parser) footnotes = tree.find('//*[@id="footnotes"]') - re_qualifier = re.compile(ur'[^\u2014]+\s+\((.+)\)\s+\u2014') + re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014') if footnotes is not None: for footnote in footnotes.findall('div'): fn_type = footnote.get('class').split('-')[1] diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py index 87e9b01..851c5b0 100644 --- a/tests/test_html_annotations.py +++ b/tests/test_html_annotations.py @@ -86,6 +86,14 @@ def test_annotations(): ), 'Standard footnote with qualifier and some emphasis.'), + ('Definiendum (łac.) --- definens (some) --- more text.', ( + 'pe', + 'łac.', + 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.', + '

Definiendum (łac.) \u2014 definiens (some) \u2014 more text.

', + ), + 'Footnote with a second parentheses and mdash.'), + ) xml_src = ''' %s ''' % "".join( -- 2.20.1