X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/6642c1c71c5c6ce6ef3401c8c9da84cf076b018b..175c5cf4f727162fa5bddd2460d37595251bbe8e:/tests/test_dcparser.py?ds=sidebyside diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py old mode 100755 new mode 100644 index 62e664c..4dab764 --- a/tests/test_dcparser.py +++ b/tests/test_dcparser.py @@ -1,56 +1,61 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from __future__ import unicode_literals -import unittest - +from librarian import dcparser from lxml import etree -from utils import get_file_path -from librarian import dcparser, html, ParseError -from utils import AutoTestMetaclass - -class TestDCParser(unittest.TestCase): - __metaclass__ = AutoTestMetaclass - - TEST_DIR = 'dcparser' - - def run_auto_test(self, in_data, out_data): - info = dcparser.BookInfo.from_string(in_data).to_dict() - should_be = eval(out_data) - for key in should_be: - self.assertEqual( info[key], should_be[key] ) - -class TestDCSerialize(unittest.TestCase): - __metaclass__ = AutoTestMetaclass - - TEST_DIR = 'dcserialize' - - def run_auto_test(self, in_data, out_data): - import lxml.etree - # first parse the input - info = dcparser.BookInfo.from_string(in_data) - - # serialize - serialized = lxml.etree.tostring(info.to_etree(), encoding=unicode).encode('utf-8') - - # then parse again - info_bis = dcparser.BookInfo.from_string(serialized) - - # check if they are the same - for key in vars(info): - self.assertEqual( getattr(info, key), getattr(info_bis, key)) - - for key in vars(info_bis): - self.assertEqual( getattr(info, key), getattr(info_bis, key)) - -class TestParserErrors(unittest.TestCase): - def test_error(self): - try: - html.transform(get_file_path('erroneous', 'asnyk_miedzy_nami.xml'), - get_file_path('erroneous', 'asnyk_miedzy_nami.html')) - self.fail() - except ParseError: - pass - #self.assertEqual(e.position, (25, 13)) - -if __name__ == '__main__': - unittest.main() +from nose.tools import * +from os.path import splitext +from tests.utils import get_all_fixtures +import codecs +from datetime import date + + +def check_dcparser(xml_file, result_file): + xml = open(xml_file, 'rb').read() + result = codecs.open(result_file, encoding='utf-8').read() + info = dcparser.BookInfo.from_bytes(xml).to_dict() + should_be = eval(result) + for key in should_be: + assert_equals(info[key], should_be[key]) + + +def test_dcparser(): + for fixture in get_all_fixtures('dcparser', '*.xml'): + base_name = splitext(fixture)[0] + yield check_dcparser, fixture, base_name + '.out' + + +def check_serialize(xml_file): + xml = open(xml_file, 'rb').read() + info = dcparser.BookInfo.from_bytes(xml) + + # serialize + serialized = etree.tostring(info.to_etree(), encoding='unicode').encode('utf-8') + # then parse again + info_bis = dcparser.BookInfo.from_bytes(serialized) + + # check if they are the same + for key in vars(info): + assert_equals(getattr(info, key), getattr(info_bis, key)) + for key in vars(info_bis): + assert_equals(getattr(info, key), getattr(info_bis, key)) + + +def test_serialize(): + for fixture in get_all_fixtures('dcparser', '*.xml'): + yield check_serialize, fixture + + +def test_asdate(): + assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 3)) + assert_equals(dcparser.as_date(u"2011"), date(2011, 1, 1)) + assert_equals(dcparser.as_date(u"2 poł. XIX w."), date(1950, 1, 1)) + assert_equals(dcparser.as_date(u"XVII w., l. 20"), date(1720, 1, 1)) + assert_equals(dcparser.as_date(u"po 1460"), date(1460, 1, 1)) + assert_equals(dcparser.as_date(u"ok. 1813-1814"), date(1813, 1, 1)) + assert_equals(dcparser.as_date(u"ok.1876-ok.1886"), date(1876, 1, 1)) + assert_equals(dcparser.as_date(u"1893/1894"), date(1893, 1, 1))