remove lang from URI and add relation.isVariantOf
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 29 Dec 2011 14:02:42 +0000 (15:02 +0100)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 29 Dec 2011 14:02:42 +0000 (15:02 +0100)
librarian/__init__.py
librarian/dcparser.py
librarian/parser.py
librarian/picture.py

index feb9974..dd09ce4 100644 (file)
@@ -66,42 +66,35 @@ WLNS = EmptyNamespace()
 
 
 class WLURI(object):
-    """Represents a WL URI. Extracts slug and language from it."""
-    DEFAULT_LANGUAGE = u'pol'
-
+    """Represents a WL URI. Extracts slug from it."""
     slug = None
-    language = None
 
     example = 'http://wolnelektury.pl/katalog/lektura/template/'
     _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/'
-            '(?P<slug>[-a-z0-9]+)(/(?P<lang>[a-z]{3}))?/?$')
-
-    def __init__(self, uri=None):
-        if uri is not None:
-            uri = unicode(uri)
-            self.uri = uri
-            match = self._re_wl_uri.match(uri)
-            if not match:
-                raise ValueError('Supplied URI (%s) does not match '
-                        'the WL document URI template.' % uri)
-            self.slug = match.group('slug')
-            self.language = match.group('lang') or self.DEFAULT_LANGUAGE
+            '(?P<slug>[-a-z0-9]+)/?$')
+
+    def __init__(self, uri):
+        uri = unicode(uri)
+        self.uri = uri
+        self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
+
+    @classmethod
+    def strict(cls, uri):
+        match = cls._re_wl_uri.match(uri)
+        if not match:
+            raise ValueError('Supplied URI (%s) does not match '
+                'the template: %s.' % (uri, cls._re_wl_uri))
+        return cls(uri)
 
     @classmethod
-    def from_slug_and_lang(cls, slug, lang):
-        """Contructs an URI from slug and language code.
+    def from_slug(cls, slug):
+        """Contructs an URI from slug.
 
-        >>> WLURI.from_slug_and_lang('a-slug', WLURI.DEFAULT_LANGUAGE).uri
+        >>> WLURI.from_slug('a-slug').uri
         u'http://wolnelektury.pl/katalog/lektura/a-slug/'
-        >>> WLURI.from_slug_and_lang('a-slug', 'deu').uri
-        u'http://wolnelektury.pl/katalog/lektura/a-slug/deu/'
 
         """
-        if lang is None:
-            lang = cls.DEFAULT_LANGUAGE
         uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
-        if lang is not None and lang != cls.DEFAULT_LANGUAGE:
-            uri += lang + '/'
         return cls(uri)
 
     def __unicode__(self):
@@ -111,17 +104,7 @@ class WLURI(object):
         return self.uri
 
     def __eq__(self, other):
-        return self.slug, self.language == other.slug, other.language
-
-    def filename_stem(self):
-        stem = self.slug
-        if self.language != self.DEFAULT_LANGUAGE:
-            stem += '_' + self.language
-        return stem
-
-    def validate_language(self, language):
-        if language != self.language:
-            raise ValidationError("Incorrect language definition in URI")
+        return self.slug == other.slug
 
 
 class DocProvider(object):
@@ -130,18 +113,14 @@ class DocProvider(object):
     Used for generating joined files, like EPUBs.
     """
 
-    def by_slug_and_lang(self, slug, lang=None):
-        """Should return a file-like object with a WL document XML."""
-        raise NotImplementedError
-
     def by_slug(self, slug):
         """Should return a file-like object with a WL document XML."""
-        return self.by_slug_and_lang(slug)
+        raise NotImplementedError
 
     def by_uri(self, uri, wluri=WLURI):
         """Should return a file-like object with a WL document XML."""
         wluri = wluri(uri)
-        return self.by_slug_and_lang(wluri.slug, wluri.language)
+        return self.by_slug(wluri.slug)
 
 
 class DirDocProvider(DocProvider):
@@ -151,8 +130,8 @@ class DirDocProvider(DocProvider):
         self.dir = dir_
         self.files = {}
 
-    def by_slug_and_lang(self, slug, lang=None):
-        fname = WLURI.from_slug_and_lang(slug, lang).filename_stem() + '.xml'
+    def by_slug(self, slug):
+        fname = slug + '.xml'
         return open(os.path.join(self.dir, fname))
 
 
@@ -167,7 +146,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo(
           DCNS('subject.type'): [u'Unknown'],
           DCNS('subject.genre'): [u'Unknown'],
           DCNS('date'): ['1970-01-01'],
-          DCNS('language'): [WLURI.DEFAULT_LANGUAGE],
+          DCNS('language'): [u'pol'],
           # DCNS('date'): [creation_date],
           DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
           DCNS('description'):
index d99aaf0..21244ef 100644 (file)
@@ -70,8 +70,7 @@ def as_date(text):
     except ValueError, e:
         raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.")
 
-def as_person(text):
-    return Person.from_text(text)
+as_person = Person.from_text
 
 def as_unicode(text):
     if isinstance(text, unicode):
@@ -80,34 +79,39 @@ def as_unicode(text):
         return text.decode('utf-8')
 
 class Field(object):
-    def __init__(self, uri, attr_name, type=as_unicode, multiple=False, salias=None, **kwargs):
+    def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
         self.uri = uri
         self.name = attr_name
-        self.validator = type
+        self.validator = lambda x: validator(x)
+        self.strict = lambda x: strict(x)
         self.multiple = multiple
         self.salias = salias
 
         self.required = kwargs.get('required', True) and not kwargs.has_key('default')
         self.default = kwargs.get('default', [] if multiple else [None])
 
-    def validate_value(self, val):
+    def validate_value(self, val, strict=False):
+        if strict and self.strict is not None:
+            validator = self.strict
+        else:
+            validator = self.validator
         try:
             if self.multiple:
-                if self.validator is None:
+                if validator is None:
                     return val
-                return [ self.validator(v) if v is not None else v for v in val ]
+                return [ validator(v) if v is not None else v for v in val ]
             elif len(val) > 1:
                 raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
             elif len(val) == 0:
                 raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri)
             else:
-                if self.validator is None or val[0] is None:
+                if validator is None or val[0] is None:
                     return val[0]
-                return self.validator(val[0])
+                return validator(val[0])
         except ValueError, e:
             raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
 
-    def validate(self, fdict):
+    def validate(self, fdict, strict=False):
         if not fdict.has_key(self.uri):
             if not self.required:
                 f = self.default
@@ -116,7 +120,7 @@ class Field(object):
         else:
             f = fdict[self.uri]
 
-        return self.validate_value(f)
+        return self.validate_value(f, strict=strict)
 
     def __eq__(self, other):
         if isinstance(other, Field) and other.name == self.name:
@@ -162,18 +166,18 @@ class WorkInfo(object):
 
         Field( DCNS('source'), 'source_name', required=False),
         Field( DCNS('source.URL'), 'source_url', required=False),
-        Field( DCNS('identifier.url'), 'url', WLURI),
+        Field( DCNS('identifier.url'), 'url', WLURI, strict=WLURI.strict),
         Field( DCNS('rights.license'), 'license', required=False),
         Field( DCNS('rights'), 'license_description'),
     )
 
     @classmethod
-    def from_string(cls, xml):
+    def from_string(cls, xml, *args, **kwargs):
         from StringIO import StringIO
-        return cls.from_file(StringIO(xml))
+        return cls.from_file(StringIO(xml), *args, **kwargs)
 
     @classmethod
-    def from_file(cls, xmlfile):
+    def from_file(cls, xmlfile, *args, **kwargs):
         desc_tag = None
         try:
             iter = etree.iterparse(xmlfile, ['start', 'end'])
@@ -194,14 +198,14 @@ class WorkInfo(object):
             # if there is no end, Expat should yell at us with an ExpatError
 
             # extract data from the element and make the info
-            return cls.from_element(desc_tag)
+            return cls.from_element(desc_tag, *args, **kwargs)
         except XMLSyntaxError, e:
             raise ParseError(e)
         except ExpatError, e:
             raise ParseError(e)
 
     @classmethod
-    def from_element(cls, rdf_tag):
+    def from_element(cls, rdf_tag, *args, **kwargs):
         # the tree is already parsed, so we don't need to worry about Expat errors
         field_dict = {}
         desc = rdf_tag.find(".//" + RDFNS('Description'))
@@ -214,9 +218,9 @@ class WorkInfo(object):
             fv.append(e.text)
             field_dict[e.tag] = fv
 
-        return cls(desc.attrib, field_dict)
+        return cls(desc.attrib, field_dict, *args, **kwargs)
 
-    def __init__(self, rdf_attrs, dc_fields):
+    def __init__(self, rdf_attrs, dc_fields, strict=False):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
         given field. """
@@ -225,16 +229,11 @@ class WorkInfo(object):
         self.fmap = {}
 
         for field in self.FIELDS:
-            value = field.validate( dc_fields )
+            value = field.validate(dc_fields, strict=strict)
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias: self.fmap[field.salias] = field
 
-        self.validate()
-
-    def validate(self):
-        self.url.validate_language(self.language)
-
     def __getattribute__(self, name):
         try:
             field = object.__getattribute__(self, 'fmap')[name]
@@ -350,7 +349,10 @@ class BookInfo(WorkInfo):
                 
         Field( DCNS('contributor.translator'), 'translators', \
             as_person,  salias='translator', multiple=True, default=[]),
-        Field( DCNS('relation.hasPart'), 'parts', WLURI, multiple=True, required=False),
+        Field( DCNS('relation.hasPart'), 'parts', 
+            WLURI, strict=WLURI.strict, multiple=True, required=False),
+        Field( DCNS('relation.isVariantOf'), 'variant_of', 
+            WLURI, strict=WLURI.strict, required=False),
 
         Field( DCNS('relation.cover_image.url'), 'cover_url', required=False),
         Field( DCNS('relation.cover_image.attribution'), 'cover_by', required=False),
index 5ae06e2..2ece72f 100644 (file)
@@ -19,7 +19,7 @@ class WLDocument(object):
     LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
     provider = None
 
-    def __init__(self, edoc, parse_dublincore=True, provider=None):
+    def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False):
         self.edoc = edoc
         self.provider = provider
 
@@ -36,7 +36,8 @@ class WLDocument(object):
             if self.rdf_elem is None:
                 raise NoDublinCore('Document has no DublinCore - which is required.')
 
-            self.book_info = dcparser.BookInfo.from_element(self.rdf_elem)
+            self.book_info = dcparser.BookInfo.from_element(
+                    self.rdf_elem, strict=strict)
         else:
             self.book_info = None
 
@@ -192,7 +193,7 @@ class WLDocument(object):
                 save_path = os.path.join(save_path,
                         unicode(self.book_info.author).encode('utf-8'))
             save_path = os.path.join(save_path,
-                                self.book_info.uri.filename_stem())
+                                self.book_info.uri.slug)
             if ext:
                 save_path += '.%s' % ext
         else:
index 0f5c99a..ce1a551 100644 (file)
@@ -11,19 +11,13 @@ import re
 
 class WLPictureURI(WLURI):
     _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)(/(?P<lang>[a-z]{3}))?/?$')
-
-    def __init__(self, *args, **kw):
-        super(WLPictureURI, self).__init__(*args, **kw)
+            '(?P<slug>[-a-z0-9]+)/?$')
 
     @classmethod
-    def from_slug_and_lang(cls, slug, lang):
+    def from_slug(cls, slug):
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
-    def filename_stem(self):
-        return self.slug
-
 
 class PictureInfo(WorkInfo):
     """
@@ -39,15 +33,9 @@ class PictureInfo(WorkInfo):
         Field(DCNS('description.medium'), 'medium', required=False),
         Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI),
+        Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=WLPictureURI.strict),
         )
 
-    def validate(self):
-        """
-        WorkInfo has a language validation code only, which we do not need.
-        """
-        pass
-
 
 class ImageStore(object):
     EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'