Add lesson TOC. Also: don't just arbitrary paste data into XML.
[librarian.git] / librarian / dcparser.py
index 21244ef..78f231c 100644 (file)
@@ -70,7 +70,8 @@ def as_date(text):
     except ValueError, e:
         raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
 
-as_person = Person.from_text
+def as_person(text):
+    return Person.from_text(text)
 
 def as_unicode(text):
     if isinstance(text, unicode):
@@ -78,12 +79,15 @@ def as_unicode(text):
     else:
         return text.decode('utf-8')
 
+def as_wluri_strict(text):
+    return WLURI.strict(text)
+
 class Field(object):
     def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
         self.uri = uri
         self.name = attr_name
-        self.validator = lambda x: validator(x)
-        self.strict = lambda x: strict(x)
+        self.validator = validator
+        self.strict = strict
         self.multiple = multiple
         self.salias = salias
 
@@ -91,6 +95,7 @@ class Field(object):
         self.default = kwargs.get('default', [] if multiple else [None])
 
     def validate_value(self, val, strict=False):
+        val = [v.strip() if v is not None else v for v in val]
         if strict and self.strict is not None:
             validator = self.strict
         else:
@@ -111,10 +116,21 @@ class Field(object):
         except ValueError, e:
             raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
 
-    def validate(self, fdict, strict=False):
+    def validate(self, fdict, fallbacks=None, strict=False):
+        if fallbacks is None:
+            fallbacks = {}
         if not fdict.has_key(self.uri):
             if not self.required:
-                f = self.default
+                # Accept single value for single fields and saliases.
+                if self.name in fallbacks:
+                    if self.multiple:
+                        f = fallbacks[self.name]
+                    else:
+                        f = [fallbacks[self.name]]
+                elif self.salias and self.salias in fallbacks:
+                    f = [fallbacks[self.salias]]
+                else:
+                    f = self.default
             else:
                 raise ValidationError("Required field %s not found" % self.uri)
         else:
@@ -123,7 +139,7 @@ class Field(object):
         return self.validate_value(f, strict=strict)
 
     def __eq__(self, other):
-        if isinstance(other, Field) and other.name == self.name:
+        if isinstance(other, Field) and other.name == self.name and other.uri == self.uri:
             return True
         return False
 
@@ -148,9 +164,11 @@ class WorkInfo(object):
     __metaclass__ = DCInfo
 
     FIELDS = (
-        Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
+        Field( DCNS('creator.expert'), 'authors_expert', as_person, salias='author', required=False, multiple=True),
+        Field( DCNS('creator.scenario'), 'authors_scenario', as_person, salias='author', required=False, multiple=True),
+        Field( DCNS('creator.textbook'), 'authors_textbook', as_person, salias='author', required=False, multiple=True),
         Field( DCNS('title'), 'title'),
-        Field( DCNS('type'), 'type', required=False, multiple=True),
+        Field( DCNS('type'), 'type', required=False),
 
         Field( DCNS('contributor.editor'), 'editors', \
             as_person, salias='editor', multiple=True, default=[]),
@@ -161,12 +179,15 @@ class WorkInfo(object):
         Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
         Field( DCNS('publisher'), 'publisher'),
 
+        Field( DCNS('subject.competence'), 'competences', multiple=True, required=False),
+        Field( DCNS('subject.curriculum'), 'curriculum', multiple=True, required=False),
+
         Field( DCNS('language'), 'language'),
         Field( DCNS('description'), 'description', required=False),
 
         Field( DCNS('source'), 'source_name', required=False),
         Field( DCNS('source.URL'), 'source_url', required=False),
-        Field( DCNS('identifier.url'), 'url', WLURI, strict=WLURI.strict),
+        Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
         Field( DCNS('rights.license'), 'license', required=False),
         Field( DCNS('rights'), 'license_description'),
     )
@@ -220,7 +241,7 @@ class WorkInfo(object):
 
         return cls(desc.attrib, field_dict, *args, **kwargs)
 
-    def __init__(self, rdf_attrs, dc_fields, strict=False):
+    def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
         given field. """
@@ -229,7 +250,10 @@ class WorkInfo(object):
         self.fmap = {}
 
         for field in self.FIELDS:
-            value = field.validate(dc_fields, strict=strict)
+            value = field.validate(dc_fields, fallbacks=fallbacks,
+                            strict=strict)
+            if field.multiple:
+                value = getattr(self, 'prop_' + field.name, []) + value
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias: self.fmap[field.salias] = field
@@ -244,7 +268,7 @@ class WorkInfo(object):
                 if not field.multiple:
                     raise "OUCH!! for field %s" % name
 
-                return value[0]
+                return value[0] if value else None
         except (KeyError, AttributeError):
             return object.__getattribute__(self, name)
 
@@ -343,20 +367,25 @@ class BookInfo(WorkInfo):
         Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
                 required=False),
 
-        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
-        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
-        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True),
-                
+        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
+                required=False),
+        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
+                required=False),
+        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
+                required=False),
+
         Field( DCNS('contributor.translator'), 'translators', \
             as_person,  salias='translator', multiple=True, default=[]),
-        Field( DCNS('relation.hasPart'), 'parts', 
-            WLURI, strict=WLURI.strict, multiple=True, required=False),
-        Field( DCNS('relation.isVariantOf'), 'variant_of', 
-            WLURI, strict=WLURI.strict, required=False),
-
-        Field( DCNS('relation.cover_image.url'), 'cover_url', required=False),
-        Field( DCNS('relation.cover_image.attribution'), 'cover_by', required=False),
-        Field( DCNS('relation.cover_image.source'), 'cover_source', required=False),
+        Field( DCNS('relation.hasPart'), 'parts',
+            WLURI, strict=as_wluri_strict, multiple=True, required=False),
+        Field( DCNS('relation.isVariantOf'), 'variant_of',
+            WLURI, strict=as_wluri_strict, required=False),
+        Field( DCNS('relation'), 'relations',
+            WLURI, strict=as_wluri_strict, multiple=True, required=False),
+
+        Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
+        Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+        Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
     )