#2044: handle weird whitespace for covers
[librarian.git] / librarian / picture.py
index edf541f..ee3c61d 100644 (file)
@@ -11,18 +11,15 @@ import re
 
 class WLPictureURI(WLURI):
     _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)(/(?P<lang>[a-z]{3}))?/?$')
-
-    def __init__(self, *args, **kw):
-        super(WLPictureURI, self).__init__(*args, **kw)
+            '(?P<slug>[-a-z0-9]+)/?$')
 
     @classmethod
-    def from_slug_and_lang(cls, slug, lang):
+    def from_slug(cls, slug):
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
-    def filename_stem(self):
-        return self.slug
+def as_wlpictureuri_strict(text):
+    return WLPictureURI.strict(text)
 
 
 class PictureInfo(WorkInfo):
@@ -39,15 +36,10 @@ class PictureInfo(WorkInfo):
         Field(DCNS('description.medium'), 'medium', required=False),
         Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI),
+        Field(DCNS('identifier.url'), 'url', WLPictureURI,
+            strict=as_wlpictureuri_strict),
         )
 
-    def validate(self):
-        """
-        WorkInfo has a language validation code only, which we do not need.
-        """
-        pass
-
 
 class ImageStore(object):
     EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
@@ -176,6 +168,6 @@ class WLPicture(object):
                 if parent.tag == 'sem':
                     if parent.get('type') == 'theme':
                         pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
-                    elif parent.get('type') == 'object' and not pd['object']:
-                        pd['object'] = parent.get('name')
+                    elif parent.get('type') == 'object' and pd['object'] is None:
+                        pd['object'] = parent.get('object')
             yield pd