X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/5ecf203ebb02e8b60616d210f81908bcf3da2634..5b1dcc7d247996752fa566c7150a45037b068565:/librarian/dcparser.py diff --git a/librarian/dcparser.py b/librarian/dcparser.py index abee11b..7418f70 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -6,6 +6,8 @@ from xml.parsers.expat import ExpatError from datetime import date import time +import re +from librarian.util import roman_to_int from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, WLURI) @@ -62,10 +64,24 @@ class Person(object): def as_date(text): try: - try: - t = time.strptime(text, '%Y-%m-%d') - except ValueError: - t = time.strptime(text, '%Y') + # check out the "N. poł X w." syntax + if isinstance(text, str): text = text.decode("utf-8") + m = re.match(u"(?:([12]) *poł[.]? )?([MCDXVI]+) *w[.]?", text) + if m: + + half = m.groups()[0] + if half is not None: + half = int(half) + else: + half = 1 + century = roman_to_int(str(m.groups()[1])) + t = ((century*100 + (half-1)*50), 1, 1) + else: + text = re.sub(r"(po|ok[.]?) *", "", text) + try: + t = time.strptime(text, '%Y-%m-%d') + except ValueError: + t = time.strptime(re.split(r'[-/]', text)[0], '%Y') return date(t[0], t[1], t[2]) except ValueError, e: raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.") @@ -173,6 +189,7 @@ class WorkInfo(object): as_person, salias='technical_editor', multiple=True, default=[]), Field( DCNS('contributor.funding'), 'funders', salias='funder', multiple=True, default=[]), + Field( DCNS('contributor.thanks'), 'thanks', required=False), Field( DCNS('date'), 'created_at', as_date), Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),