From 8f62bd3a1a38a25e860740dc4054fd034719b477 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Tue, 5 Nov 2013 12:26:10 +0100 Subject: [PATCH] =?utf8?q?tests=20for=20handling=20"2=20po=C5=82=20XIX=20w?= =?utf8?q?."=20dates=20+=20utf8=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- librarian/dcparser.py | 3 ++- tests/test_dcparser.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/librarian/dcparser.py b/librarian/dcparser.py index bcee932..079c434 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -65,7 +65,8 @@ class Person(object): def as_date(text): try: # check out the "N. poł X w." syntax - m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) .*[.]?", text) + if isinstance(text, str): text = text.decode("utf-8") + m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) *w[.]?", text) if m: half = int(m.groups()[0]) century = roman_to_int(str(m.groups()[1])) diff --git a/tests/test_dcparser.py b/tests/test_dcparser.py index ee29bc9..a0845a0 100644 --- a/tests/test_dcparser.py +++ b/tests/test_dcparser.py @@ -9,7 +9,7 @@ from nose.tools import * from os.path import splitext from tests.utils import get_all_fixtures import codecs - +from datetime import date def check_dcparser(xml_file, result_file): xml = file(xml_file).read() @@ -46,3 +46,7 @@ def test_serialize(): for fixture in get_all_fixtures('dcparser', '*.xml'): yield check_serialize, fixture +def test_as_date(): + assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03)) + assert_equals(dcparser.as_date("2011"), date(2011, 1, 1)) + assert_equals(dcparser.as_date("2 poł. XIX w."), date(1950, 1, 1)) -- 2.20.1