from xml.parsers.expat import ExpatError
from datetime import date
import time
+import re
+from librarian.util import roman_to_int
from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
WLURI)
def as_date(text):
try:
- try:
- t = time.strptime(text, '%Y-%m-%d')
- except ValueError:
- t = time.strptime(text, '%Y')
+ # check out the "N. poł X w." syntax
+ if isinstance(text, str): text = text.decode("utf-8")
+ m = re.match(u"(?:([12]) *poł[.]? )?([MCDXVI]+) *w[.]?", text)
+ if m:
+
+ half = m.groups()[0]
+ if half is not None:
+ half = int(half)
+ else:
+ half = 1
+ century = roman_to_int(str(m.groups()[1]))
+ t = ((century*100 + (half-1)*50), 1, 1)
+ else:
+ text = re.sub(r"(po|ok[.]?) *", "", text)
+ try:
+ t = time.strptime(text, '%Y-%m-%d')
+ except ValueError:
+ t = time.strptime(re.split(r'[-/]', text)[0], '%Y')
return date(t[0], t[1], t[2])
except ValueError, e:
raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
except ValueError, e:
raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
- def validate(self, fdict, strict=False):
+ def validate(self, fdict, fallbacks=None, strict=False):
+ if fallbacks is None:
+ fallbacks = {}
if not fdict.has_key(self.uri):
if not self.required:
- f = self.default
+ # Accept single value for single fields and saliases.
+ if self.name in fallbacks:
+ if self.multiple:
+ f = fallbacks[self.name]
+ else:
+ f = [fallbacks[self.name]]
+ elif self.salias and self.salias in fallbacks:
+ f = [fallbacks[self.salias]]
+ else:
+ f = self.default
else:
raise ValidationError("Required field %s not found" % self.uri)
else:
as_person, salias='editor', multiple=True, default=[]),
Field( DCNS('contributor.technical_editor'), 'technical_editors',
as_person, salias='technical_editor', multiple=True, default=[]),
+ Field( DCNS('contributor.funding'), 'funders',
+ salias='funder', multiple=True, default=[]),
+ Field( DCNS('contributor.thanks'), 'thanks', required=False),
Field( DCNS('date'), 'created_at', as_date),
Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
return cls(desc.attrib, field_dict, *args, **kwargs)
- def __init__(self, rdf_attrs, dc_fields, strict=False):
+ def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
"""rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
given field. """
self.fmap = {}
for field in self.FIELDS:
- value = field.validate(dc_fields, strict=strict)
+ value = field.validate(dc_fields, fallbacks=fallbacks,
+ strict=strict)
setattr(self, 'prop_' + field.name, value)
self.fmap[field.name] = field
if field.salias: self.fmap[field.salias] = field
if not field.multiple:
raise "OUCH!! for field %s" % name
- return value[0]
+ return value[0] if value else None
except (KeyError, AttributeError):
return object.__getattribute__(self, name)
Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
required=False),
- Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
- Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
- Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True),
+ Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
+ required=False),
+ Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
+ required=False),
+ Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
+ required=False),
Field( DCNS('contributor.translator'), 'translators', \
as_person, salias='translator', multiple=True, default=[]),
Field( DCNS('relation.isVariantOf'), 'variant_of',
WLURI, strict=as_wluri_strict, required=False),
- Field( DCNS('relation.cover_image.url'), 'cover_url', required=False),
- Field( DCNS('relation.cover_image.attribution'), 'cover_by', required=False),
- Field( DCNS('relation.cover_image.source'), 'cover_source', required=False),
+ Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
+ Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+ Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
)