Fix in extract_annotations

author Radek Czajka <radekczajka@nowoczesnapolska.org.pl>

Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)

committer Radek Czajka <radekczajka@nowoczesnapolska.org.pl>

Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)
author Radek Czajka <radekczajka@nowoczesnapolska.org.pl>
Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)
committer Radek Czajka <radekczajka@nowoczesnapolska.org.pl>
Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)
diff --git a/librarian/html.py b/librarian/html.py

index e084ed2..85b9003 100644 (file)
--- a/librarian/html.py
+++ b/librarian/html.py
@@ -298,7 +298,7 @@ def extract_annotations(html_path):
      parser = etree.HTMLParser(encoding='utf-8')
      tree = etree.parse(html_path, parser)
      footnotes = tree.find('//*[@id="footnotes"]')
-    re_qualifier = re.compile(ur'[^\u2014]+\s+\((.+)\)\s+\u2014')
+    re_qualifier = re.compile(ur'[^\u2014]+\s+\(([^\)]+)\)\s+\u2014')
      if footnotes is not None:
          for footnote in footnotes.findall('div'):
              fn_type = footnote.get('class').split('-')[1]
diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py

index 87e9b01..851c5b0 100644 (file)
--- a/tests/test_html_annotations.py
+++ b/tests/test_html_annotations.py
@@ -86,6 +86,14 @@ def test_annotations():
              ),
              'Standard footnote with qualifier and some emphasis.'),
  
+        ('<pe>Definiendum (łac.) --- definens (some) --- more text.</pe>', (
+            'pe',
+            'łac.',
+            'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.',
+            '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>',
+            ),
+            'Footnote with a second parentheses and mdash.'),
+
      )
  
      xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join(
author	Radek Czajka <radekczajka@nowoczesnapolska.org.pl>
	Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)
committer	Radek Czajka <radekczajka@nowoczesnapolska.org.pl>
	Thu, 9 Oct 2014 10:28:03 +0000 (12:28 +0200)
librarian/html.py		patch \| blob \| history
tests/test_html_annotations.py		patch \| blob \| history