fix expected html output to contain secX anchors
[librarian.git] / librarian / picture.py
index 7830528..ee3c61d 100644 (file)
@@ -11,18 +11,15 @@ import re
 
 class WLPictureURI(WLURI):
     _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
 
 class WLPictureURI(WLURI):
     _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)(/(?P<lang>[a-z]{3}))?/?$')
-
-    def __init__(self, *args, **kw):
-        super(WLPictureURI, self).__init__(*args, **kw)
+            '(?P<slug>[-a-z0-9]+)/?$')
 
     @classmethod
 
     @classmethod
-    def from_slug_and_lang(cls, slug, lang):
+    def from_slug(cls, slug):
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
-    def filename_stem(self):
-        return self.slug
+def as_wlpictureuri_strict(text):
+    return WLPictureURI.strict(text)
 
 
 class PictureInfo(WorkInfo):
 
 
 class PictureInfo(WorkInfo):
@@ -30,19 +27,19 @@ class PictureInfo(WorkInfo):
     Dublin core metadata for a picture
     """
     FIELDS = (
     Dublin core metadata for a picture
     """
     FIELDS = (
-        Field(DCNS('format.dimensions.digital'), 'dimensions', required=False),
-        Field(DCNS('format.dimensions.original'), 'dimensions_original', required=False),
-        Field(DCNS('format.physical'), 'physical', required=False),
+        Field(DCNS('language'), 'language', required=False),
+        Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
+        Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
+
+        Field(DCNS('format.dimensions'), 'dimensions', required=False),
+        Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
+        Field(DCNS('description.medium'), 'medium', required=False),
+        Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI),
+        Field(DCNS('identifier.url'), 'url', WLPictureURI,
+            strict=as_wlpictureuri_strict),
         )
 
         )
 
-    def validate(self):
-        """
-        WorkInfo has a language validation code only, which we do not need.
-        """
-        pass
-    
 
 class ImageStore(object):
     EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
 
 class ImageStore(object):
     EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
@@ -149,3 +146,28 @@ class WLPicture(object):
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
+
+    def partiter(self):
+        """
+        Iterates the parts of this picture and returns them and their metadata
+        """
+        for part in self.edoc.iter("div"):
+            pd = {}
+            pd['type'] = part.get('type')
+            if pd['type'] == 'area':
+                pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
+                                (int(part.get('x2')), int(part.get('y2'))))
+
+            pd['themes'] = []
+            pd['object'] = None
+            parent = part
+            while True:
+                parent = parent.getparent()
+                if parent is None:
+                    break
+                if parent.tag == 'sem':
+                    if parent.get('type') == 'theme':
+                        pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
+                    elif parent.get('type') == 'object' and pd['object'] is None:
+                        pd['object'] = parent.get('object')
+            yield pd