1d698fdf852c0ba06f023471466a6d75e218399e
[librarian.git] / tests / test_html.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 import re
5 from unittest import TestCase
6 from librarian import NoDublinCore
7 from librarian.builders import builders
8 from librarian.document import WLDocument
9 from librarian.parser import WLDocument as LegacyWLDocument
10 from .utils import get_fixture
11
12
13 class TransformTest(TestCase):
14     maxDiff = None
15
16     def test_transform_legacy(self):
17         expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.legacy.html')
18
19         html = LegacyWLDocument.from_file(
20             get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
21         ).as_html().get_bytes().decode('utf-8')
22
23         html = re.sub(r'idm\d+', 'idmNNN', html)
24         with open(expected_output_file_path) as f:
25             self.assertEqual(f.read(), html)
26
27     def test_transform(self):
28         expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
29         html = WLDocument(
30             filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
31         ).build(builders['html']).get_bytes().decode('utf-8')
32
33         with open(expected_output_file_path) as f:
34             self.assertEqual(html, f.read())
35
36     def test_no_dublincore(self):
37         with self.assertRaises(NoDublinCore):
38             LegacyWLDocument.from_file(
39                 get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
40             ).as_html()
41
42     def test_passing_parse_dublincore_to_transform(self):
43         """Passing parse_dublincore=False to transform omits DublinCore parsing."""
44         LegacyWLDocument.from_file(
45             get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
46             parse_dublincore=False,
47         ).as_html()
48
49     def test_empty(self):
50         self.assertIsNone(
51             LegacyWLDocument.from_bytes(
52                 b'<utwor />',
53                 parse_dublincore=False,
54             ).as_html()
55         )