From: Marcin Koziej Date: Tue, 5 Nov 2013 11:26:10 +0000 (+0100) Subject: tests for handling "2 poł XIX w." dates + utf8 check X-Git-Tag: 1.7~124 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/8f62bd3a1a38a25e860740dc4054fd034719b477 tests for handling "2 poł XIX w." dates + utf8 check --- diff --git a/librarian/dcparser.py b/librarian/dcparser.py index bcee932..079c434 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -65,7 +65,8 @@ class Person(object): def as_date(text): try: # check out the "N. poł X w." syntax - m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) .*[.]?", text) + if isinstance(text, str): text = text.decode("utf-8") + m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) *w[.]?", text) if m: half = int(m.groups()[0]) century = roman_to_int(str(m.groups()[1])) diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py index ee29bc9..a0845a0 100644 --- a/tests/test_dcparser.py +++ b/tests/test_dcparser.py @@ -9,7 +9,7 @@ from nose.tools import * from os.path import splitext from tests.utils import get_all_fixtures import codecs - +from datetime import date def check_dcparser(xml_file, result_file): xml = file(xml_file).read() @@ -46,3 +46,7 @@ def test_serialize(): for fixture in get_all_fixtures('dcparser', '*.xml'): yield check_serialize, fixture +def test_as_date(): + assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03)) + assert_equals(dcparser.as_date("2011"), date(2011, 1, 1)) + assert_equals(dcparser.as_date("2 poł. XIX w."), date(1950, 1, 1))