X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/ac6f4dfb3c4e7ac8b22ab56390e61634a8152df6..5b1dcc7d247996752fa566c7150a45037b068565:/librarian/dcparser.py diff --git a/librarian/dcparser.py b/librarian/dcparser.py index a907a52..7418f70 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -6,6 +6,8 @@ from xml.parsers.expat import ExpatError from datetime import date import time +import re +from librarian.util import roman_to_int from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, WLURI) @@ -62,10 +64,24 @@ class Person(object): def as_date(text): try: - try: - t = time.strptime(text, '%Y-%m-%d') - except ValueError: - t = time.strptime(text, '%Y') + # check out the "N. poł X w." syntax + if isinstance(text, str): text = text.decode("utf-8") + m = re.match(u"(?:([12]) *poł[.]? )?([MCDXVI]+) *w[.]?", text) + if m: + + half = m.groups()[0] + if half is not None: + half = int(half) + else: + half = 1 + century = roman_to_int(str(m.groups()[1])) + t = ((century*100 + (half-1)*50), 1, 1) + else: + text = re.sub(r"(po|ok[.]?) *", "", text) + try: + t = time.strptime(text, '%Y-%m-%d') + except ValueError: + t = time.strptime(re.split(r'[-/]', text)[0], '%Y') return date(t[0], t[1], t[2]) except ValueError, e: raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")