From 2c15db814c7f40406b6a86383e3e4bc8825b7faf Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Thu, 1 Aug 2019 10:49:49 +0200 Subject: [PATCH 1/1] Fix transform_abstracy + tests. --- CHANGELOG.md | 7 +++++++ librarian/html.py | 6 +++--- tests/files/text/abstrakt.xml | 6 ++++++ tests/test_html_transform_abstrakt.py | 20 ++++++++++++++++++++ tests/test_picture.py | 2 +- 5 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 tests/files/text/abstrakt.xml create mode 100644 tests/test_html_transform_abstrakt.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c392530..009faf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ This document records all notable changes to Librarian. +## 1.7.4 (2019-08-01) + +### Fixed +- `html.tranform_abstract` bytes vs text confusion. +- Tests. + + ## 1.7.3 (2019-07-31) ### Added diff --git a/librarian/html.py b/librarian/html.py index 67f0061..ed7b4d6 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -38,10 +38,10 @@ def html_has_content(text): def transform_abstrakt(abstrakt_element): style_filename = get_stylesheet('legacy') style = etree.parse(style_filename) - xml = etree.tostring(abstrakt_element) - document = etree.parse(six.BytesIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK + xml = etree.tostring(abstrakt_element, encoding='unicode') + document = etree.parse(six.StringIO(xml.replace('abstrakt', 'dlugi_cytat'))) # HACK result = document.xslt(style) - html = re.sub('', '', etree.tostring(result)) + html = re.sub('', '', etree.tostring(result, encoding='unicode')) return re.sub(']*>', '', html) diff --git a/tests/files/text/abstrakt.xml b/tests/files/text/abstrakt.xml new file mode 100644 index 0000000..7a468a1 --- /dev/null +++ b/tests/files/text/abstrakt.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/test_html_transform_abstrakt.py b/tests/test_html_transform_abstrakt.py new file mode 100644 index 0000000..dbe22c0 --- /dev/null +++ b/tests/test_html_transform_abstrakt.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from __future__ import unicode_literals + +from librarian.parser import WLDocument +from librarian.html import transform_abstrakt +from nose.tools import * +from .utils import get_fixture + + +def test_fragments(): + transform_abstrakt( + WLDocument.from_file( + get_fixture('text', 'abstrakt.xml'), + parse_dublincore=False + ).edoc.getroot().find('.//abstrakt') + ) diff --git a/tests/test_picture.py b/tests/test_picture.py index f97609b..7d12b1d 100644 --- a/tests/test_picture.py +++ b/tests/test_picture.py @@ -36,7 +36,7 @@ def test_wlpicture(): assert path.exists(wlp.image_path) - f = wlp.image_file('r') + f = wlp.image_file() f.close() -- 2.20.1