fixes for dates etc
[librarian.git] / librarian / dcparser.py
index f64317a..7418f70 100644 (file)
@@ -6,6 +6,8 @@
 from xml.parsers.expat import ExpatError
 from datetime import date
 import time
+import re
+from librarian.util import roman_to_int
 
 from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
                        WLURI)
@@ -62,10 +64,24 @@ class Person(object):
 
 def as_date(text):
     try:
-        try:
-            t = time.strptime(text, '%Y-%m-%d')
-        except ValueError:
-            t = time.strptime(text, '%Y')
+        # check out the "N. poł X w." syntax
+        if isinstance(text, str): text = text.decode("utf-8")
+        m = re.match(u"(?:([12]) *poł[.]? )?([MCDXVI]+) *w[.]?", text)
+        if m:
+            
+            half = m.groups()[0]
+            if half is not None: 
+                half = int(half)
+            else: 
+                half = 1
+            century = roman_to_int(str(m.groups()[1]))
+            t = ((century*100 + (half-1)*50), 1, 1)
+        else:
+            text = re.sub(r"(po|ok[.]?) *", "", text)
+            try:
+                t = time.strptime(text, '%Y-%m-%d')
+            except ValueError:
+                t = time.strptime(re.split(r'[-/]', text)[0], '%Y')
         return date(t[0], t[1], t[2])
     except ValueError, e:
         raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
@@ -115,10 +131,21 @@ class Field(object):
         except ValueError, e:
             raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
 
-    def validate(self, fdict, strict=False):
+    def validate(self, fdict, fallbacks=None, strict=False):
+        if fallbacks is None:
+            fallbacks = {}
         if not fdict.has_key(self.uri):
             if not self.required:
-                f = self.default
+                # Accept single value for single fields and saliases.
+                if self.name in fallbacks:
+                    if self.multiple:
+                        f = fallbacks[self.name]
+                    else:
+                        f = [fallbacks[self.name]]
+                elif self.salias and self.salias in fallbacks:
+                    f = [fallbacks[self.salias]]
+                else:
+                    f = self.default
             else:
                 raise ValidationError("Required field %s not found" % self.uri)
         else:
@@ -160,6 +187,9 @@ class WorkInfo(object):
             as_person, salias='editor', multiple=True, default=[]),
         Field( DCNS('contributor.technical_editor'), 'technical_editors',
             as_person, salias='technical_editor', multiple=True, default=[]),
+        Field( DCNS('contributor.funding'), 'funders',
+            salias='funder', multiple=True, default=[]),
+        Field( DCNS('contributor.thanks'), 'thanks', required=False),
 
         Field( DCNS('date'), 'created_at', as_date),
         Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
@@ -224,7 +254,7 @@ class WorkInfo(object):
 
         return cls(desc.attrib, field_dict, *args, **kwargs)
 
-    def __init__(self, rdf_attrs, dc_fields, strict=False):
+    def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
         given field. """
@@ -233,7 +263,8 @@ class WorkInfo(object):
         self.fmap = {}
 
         for field in self.FIELDS:
-            value = field.validate(dc_fields, strict=strict)
+            value = field.validate(dc_fields, fallbacks=fallbacks,
+                            strict=strict)
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias: self.fmap[field.salias] = field
@@ -248,7 +279,7 @@ class WorkInfo(object):
                 if not field.multiple:
                     raise "OUCH!! for field %s" % name
 
-                return value[0]
+                return value[0] if value else None
         except (KeyError, AttributeError):
             return object.__getattribute__(self, name)