From a6ee2dd83d3c4d5d2d3e8cb3401734ced2b12c22 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Fri, 16 Sep 2011 09:58:20 +0200 Subject: [PATCH] extract annotations: return anchor to annotation, not the word in text --- librarian/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/librarian/html.py b/librarian/html.py index 9869513..5974d93 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -271,7 +271,7 @@ def extract_annotations(html_path): footnotes = tree.find('//*[@id="footnotes"]') if footnotes is not None: for footnote in footnotes.findall('div'): - anchor = footnote.find('a[@href]').get('href') + anchor = footnote.find('a[@name]').get('name') del footnote[:2] text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip() html_str = etree.tostring(footnote, method='html', encoding='utf-8') -- 2.20.1