WLPicture parts iterator (partiter)
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 21 Dec 2011 12:01:41 +0000 (13:01 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 21 Dec 2011 12:02:11 +0000 (13:02 +0100)
librarian/picture.py
tests/files/picture/angelus-novus.xml
tests/test_picture.py

index 70b372f..b770030 100644 (file)
@@ -153,3 +153,28 @@ class WLPicture(object):
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
+
+    def partiter(self):
+        """
+        Iterates the parts of this picture and returns them and their metadata
+        """
+        for part in self.edoc.iter("div"):
+            pd = {}
+            pd['type'] = part.get('type')
+            if pd['type'] == 'area':
+                pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
+                                (int(part.get('x2')), int(part.get('y2'))))
+
+            pd['themes'] = []
+            pd['object'] = None
+            parent = part
+            while True:
+                parent = parent.getparent()
+                if parent is None:
+                    break
+                if parent.tag == 'sem':
+                    if parent.get('type') == 'theme':
+                        pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
+                    elif parent.get('type') == 'object' and not pd['object']:
+                        pd['object'] = parent.get('name')
+            yield pd
index 032545d..b3b9ccb 100644 (file)
       <dc:language xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">lat</dc:language>
     </rdf:Description>
   </rdf:RDF>
       <dc:language xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">lat</dc:language>
     </rdf:Description>
   </rdf:RDF>
-
-  <sem type="motif" motif="anioł historii">
+  <sem type="object" object="obraz cały">
+    <div type="whole"/>
+  </sem>
+  <sem type="theme" theme="anioł historii">
     <div type="area" x1="462" y1="212" x2="1283" y2="1730"/>
   </sem>
     <div type="area" x1="462" y1="212" x2="1283" y2="1730"/>
   </sem>
-  <sem type="motif" motif="spojrzenie">
+  <sem type="theme" theme="spojrzenie">
     <div type="area" x1="688" y1="500" x2="1054" y2="618"/>
   </sem>
     <div type="area" x1="688" y1="500" x2="1054" y2="618"/>
   </sem>
-  <sem type="object" name="skrzydło">
+  <sem type="object" object="skrzydło">
     <div type="area" x1="468" y1="741" x2="694" y2="1027"/>
     <div type="area" x1="1044" y1="762" x2="1260" y2="1041"/>
   </sem>
     <div type="area" x1="468" y1="741" x2="694" y2="1027"/>
     <div type="area" x1="1044" y1="762" x2="1260" y2="1041"/>
   </sem>
index 4e3b252..35f2cf9 100644 (file)
@@ -38,3 +38,22 @@ def test_wlpicture():
     
     f = wlp.image_file('r')
     f.close()
     
     f = wlp.image_file('r')
     f.close()
+
+def test_picture_parts():
+    wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
+    parts = list(wlp.partiter())
+    assert len(parts) == 5, "there should be %d parts of the picture" % 5
+    motifs = set()
+    names = set()
+    
+    for p in parts:
+        for m in p['motifs']:
+            motifs.add(m)
+    for p in parts:
+        if p['object']:
+            names.add(p['object'])
+
+    assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
+    assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
+    
+