X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/a04f11baee3eb7d090867c2d5639a120ec3217b8..4b0833f222da430d31356b6f065e56073b849d0f:/tests/test_html_annotations.py?ds=sidebyside diff --git a/tests/test_html_annotations.py b/tests/test_html_annotations.py index f269042..bdbe4fb 100644 --- a/tests/test_html_annotations.py +++ b/tests/test_html_annotations.py @@ -1,105 +1,128 @@ -# -*- coding: utf-8 -from __future__ import unicode_literals - -from StringIO import StringIO -import tempfile +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. +# +import unittest from librarian.parser import WLDocument from librarian.html import extract_annotations -from lxml import etree -from nose.tools import eq_ - -def _test_annotation(expected, got, name): - assert got[0].startswith('anchor-'), "%s: Unexpected anchor: '%s', should begin with 'anchor-'" % (name, got[0]) - eq_(expected[0], got[1], "%s: Unexpected type, expected '%s', got '%s'" % (name, expected[0], got[1])) - eq_(expected[1], got[2], "%s: Unexpected qualifier, expected '%s', got '%s'" % (name, expected[1], got[2])) - eq_(expected[2], got[3], "%s: Unexpected text representation, expected '%s', got '%s'" % (name, expected[2], got[3])) - exp_html = '<div class="fn-%s">%s</div>' % (expected[0], expected[3]) - eq_(exp_html, got[4], "%s: Unexpected html representation, expected '%s', got '%s'" % (name, exp_html, got[4])) - - -def test_annotations(): - annotations = ( +class AnnotationsTests(unittest.TestCase): + def _test_annotation(self, expected, got, name): + self.assertTrue( + got[0].startswith('anchor-'), + "%s: Unexpected anchor: '%s', should begin with 'anchor-'" % (name, got[0]) + ) + self.assertEqual( + expected[0], got[1], + "%s: Unexpected type, expected '%s', got '%s'" % (name, expected[0], got[1]) + ) + self.assertEqual( + expected[1], got[2], + "%s: Unexpected qualifier, expected '%s', got '%s'" % (name, expected[1], got[2]) + ) + self.assertEqual( + expected[2], got[3], + "%s: Unexpected text representation, expected '%s', got '%s'" % (name, expected[2], got[3]) + ) + exp_html = '<div class="fn-%s">%s</div>' % (expected[0], expected[3]) + self.assertEqual( + exp_html, got[4], + "%s: Unexpected html representation, expected '%s', got '%s'" % (name, exp_html, got[4]) + ) + + def test_annotations(self): + annotations = ( ('<pe/>', ( 'pe', - None, - '', - '<p></p>' + [], + '[przypis edytorski]', + '<p> [przypis edytorski]</p>' ), 'Empty footnote'), - ( - '<pr>Definiendum --- definiens.</pr>', ( + ('<pr>Definiendum --- definiens.</pr>', ( 'pr', - None, - 'Definiendum \u2014 definiens.', - '<p>Definiendum \u2014 definiens.</p>' + [], + 'Definiendum \u2014 definiens. [przypis redakcyjny]', + '<p>Definiendum \u2014 definiens. [przypis redakcyjny]</p>' ), 'Plain footnote.'), ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', ( 'pt', - None, - 'Definiendum \u2014 definiens.', - '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>' + [], + 'Definiendum \u2014 definiens. [przypis tÅumacza]', + '<p><em class="foreign-word">Definiendum</em> \u2014 definiens. [przypis tÅumacza]</p>' ), 'Standard footnote.'), ('<pr>Definiendum (Åac.) --- definiens.</pr>', ( 'pr', - 'Åac.', - 'Definiendum (Åac.) \u2014 definiens.', - '<p>Definiendum (Åac.) \u2014 definiens.</p>' + ['Åac.'], + 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]', + '<p>Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]</p>' ), 'Plain footnote with qualifier'), ('<pe><slowo_obce>Definiendum</slowo_obce> (Åac.) --- definiens.</pe>', ( 'pe', - 'Åac.', - 'Definiendum (Åac.) \u2014 definiens.', - '<p><em class="foreign-word">Definiendum</em> (Åac.) \u2014 definiens.</p>' + ['Åac.'], + 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]', + '<p><em class="foreign-word">Definiendum</em> (Åac.) \u2014 definiens. [przypis edytorski]</p>' ), 'Standard footnote with qualifier.'), ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', ( 'pt', - 'daw.', - 'Definiendum (daw.) \u2014 definiens.', - '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>' + ['daw.'], + 'Definiendum (daw.) \u2014 definiens. [przypis tÅumacza]', + '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens. [przypis tÅumacza]</p>' ), 'Standard footnote with leading whitespace and qualifier.'), ('<pr>Definiendum (Åac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', ( 'pr', - 'Åac.', - 'Definiendum (Åac.) \u2014 definiens.', - '<p>Definiendum (Åac.) \u2014 <em class="foreign-word">definiens</em>.</p>' + ['Åac.'], + 'Definiendum (Åac.) \u2014 definiens. [przypis redakcyjny]', + '<p>Definiendum (Åac.) \u2014 <em class="foreign-word">definiens</em>. [przypis redakcyjny]</p>' ), 'Plain footnote with qualifier and some emphasis.'), ('<pe><slowo_obce>Definiendum</slowo_obce> (Åac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', ( 'pe', - 'Åac.', - 'Definiendum (Åac.) \u2014 definiens.', - '<p><em class="foreign-word">Definiendum</em> (Åac.) \u2014 <em class="foreign-word">definiens</em>.</p>' + ['Åac.'], + 'Definiendum (Åac.) \u2014 definiens. [przypis edytorski]', + '<p><em class="foreign-word">Definiendum</em> (Åac.) \u2014 <em class="foreign-word">definiens</em>. [przypis edytorski]</p>' ), 'Standard footnote with qualifier and some emphasis.'), ('<pe>Definiendum (Åac.) --- definiens (some) --- more text.</pe>', ( 'pe', - 'Åac.', - 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text.', - '<p>Definiendum (Åac.) \u2014 definiens (some) \u2014 more text.</p>', + ['Åac.'], + 'Definiendum (Åac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]', + '<p>Definiendum (Åac.) \u2014 definiens (some) \u2014 more text. [przypis edytorski]</p>', ), 'Footnote with a second parentheses and mdash.'), - ) - - xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join( - t[0] for t in annotations) - html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file() - res_annotations = list(extract_annotations(html)) - - for i, (src, expected, name) in enumerate(annotations): - yield _test_annotation, expected, res_annotations[i], name + ('<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykÅy) --- ' + 'czÄÅciej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>', ( + 'pe', + ['daw.', 'niem.'], + 'gemajna (daw., z niem. gemein: zwykÅy) \u2014 czÄÅciej: gemajn, ' + 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]', + '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykÅy) ' + '\u2014 czÄÅciej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu. [przypis edytorski]</p>' + ), + 'Footnote with multiple and qualifiers and emphasis.'), + ) + + xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join( + t[0] for t in annotations) + html = WLDocument.from_bytes( + xml_src.encode('utf-8'), + parse_dublincore=False).as_html().get_file() + res_annotations = list(extract_annotations(html)) + + for i, (src, expected, name) in enumerate(annotations): + with self.subTest(i=i): + self._test_annotation(expected, res_annotations[i], name)