def as_date(text):
try:
# check out the "N. poł X w." syntax
- m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) .*[.]?", text)
+ if isinstance(text, str): text = text.decode("utf-8")
+ m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) *w[.]?", text)
if m:
half = int(m.groups()[0])
century = roman_to_int(str(m.groups()[1]))
from os.path import splitext
from tests.utils import get_all_fixtures
import codecs
-
+from datetime import date
def check_dcparser(xml_file, result_file):
xml = file(xml_file).read()
for fixture in get_all_fixtures('dcparser', '*.xml'):
yield check_serialize, fixture
+def test_as_date():
+ assert_equals(dcparser.as_date(u"2010-10-03"), date(2010, 10, 03))
+ assert_equals(dcparser.as_date("2011"), date(2011, 1, 1))
+ assert_equals(dcparser.as_date("2 poł. XIX w."), date(1950, 1, 1))