From: Radek Czajka Date: Fri, 16 Sep 2011 07:58:20 +0000 (+0200) Subject: extract annotations: return anchor to annotation, not the word in text X-Git-Tag: 1.7~196 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/a6ee2dd83d3c4d5d2d3e8cb3401734ced2b12c22?ds=sidebyside;hp=--cc extract annotations: return anchor to annotation, not the word in text --- a6ee2dd83d3c4d5d2d3e8cb3401734ced2b12c22 diff --git a/librarian/html.py b/librarian/html.py index 9869513..5974d93 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -271,7 +271,7 @@ def extract_annotations(html_path): footnotes = tree.find('//*[@id="footnotes"]') if footnotes is not None: for footnote in footnotes.findall('div'): - anchor = footnote.find('a[@href]').get('href') + anchor = footnote.find('a[@name]').get('name') del footnote[:2] text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip() html_str = etree.tostring(footnote, method='html', encoding='utf-8')