turns out properties are not callable
[librarian.git] / librarian / picture.py
index 7830528..b665a34 100644 (file)
@@ -1,5 +1,5 @@
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
+# -*- coding: utf-8 -*-
+from dcparser import Field, WorkInfo, DCNS
 from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
 from xml.parsers.expat import ExpatError
 from os import path
 from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
 from xml.parsers.expat import ExpatError
 from os import path
@@ -10,19 +10,16 @@ import re
 
 
 class WLPictureURI(WLURI):
 
 
 class WLPictureURI(WLURI):
-    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)(/(?P<lang>[a-z]{3}))?/?$')
-
-    def __init__(self, *args, **kw):
-        super(WLPictureURI, self).__init__(*args, **kw)
+    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/(?P<slug>[-a-z0-9]+)/?$')
 
     @classmethod
 
     @classmethod
-    def from_slug_and_lang(cls, slug, lang):
+    def from_slug(cls, slug):
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
         uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
         return cls(uri)
 
-    def filename_stem(self):
-        return self.slug
+
+def as_wlpictureuri_strict(text):
+    return WLPictureURI.strict(text)
 
 
 class PictureInfo(WorkInfo):
 
 
 class PictureInfo(WorkInfo):
@@ -30,24 +27,24 @@ class PictureInfo(WorkInfo):
     Dublin core metadata for a picture
     """
     FIELDS = (
     Dublin core metadata for a picture
     """
     FIELDS = (
-        Field(DCNS('format.dimensions.digital'), 'dimensions', required=False),
-        Field(DCNS('format.dimensions.original'), 'dimensions_original', required=False),
-        Field(DCNS('format.physical'), 'physical', required=False),
+        Field(DCNS('language'), 'language', required=False),
+        Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
+        Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
+
+        Field(DCNS('format.dimensions'), 'dimensions', required=False),
+        Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
+        Field(DCNS('description.medium'), 'medium', required=False),
+        Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
         Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI),
-        )
+        Field(DCNS('identifier.url'), 'url', WLPictureURI, strict=as_wlpictureuri_strict),
+    )
 
 
-    def validate(self):
-        """
-        WorkInfo has a language validation code only, which we do not need.
-        """
-        pass
-    
 
 class ImageStore(object):
 
 class ImageStore(object):
-    EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
-            'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
-            'aiff', 'wbmp', 'xbm']
+    EXT = [
+        'gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
+        'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
+        'aiff', 'wbmp', 'xbm']
     MIME = ['image/gif', 'image/jpeg', 'image/png',
             'application/x-shockwave-flash', 'image/psd', 'image/bmp',
             'image/tiff', 'image/tiff', 'application/octet-stream',
     MIME = ['image/gif', 'image/jpeg', 'image/png',
             'application/x-shockwave-flash', 'image/psd', 'image/bmp',
             'image/tiff', 'image/tiff', 'application/octet-stream',
@@ -56,7 +53,7 @@ class ImageStore(object):
 
     def __init__(self, dir_):
         self.dir = dir_
 
     def __init__(self, dir_):
         self.dir = dir_
-        return super(ImageStore, self).__init__()
+        super(ImageStore, self).__init__()
 
     def path(self, slug, mime_type):
         """
 
     def path(self, slug, mime_type):
         """
@@ -97,20 +94,16 @@ class WLPicture(object):
         else:
             self.picture_info = None
 
         else:
             self.picture_info = None
 
-    @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
-
     @classmethod
     def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
 
         # first, prepare for parsing
         if isinstance(xmlfile, basestring):
     @classmethod
     def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
 
         # first, prepare for parsing
         if isinstance(xmlfile, basestring):
-            file = open(xmlfile, 'rb')
+            xmlfile = open(xmlfile, 'rb')
             try:
             try:
-                data = file.read()
+                data = xmlfile.read()
             finally:
             finally:
-                file.close()
+                xmlfile.close()
         else:
             data = xmlfile.read()
 
         else:
             data = xmlfile.read()
 
@@ -124,7 +117,7 @@ class WLPicture(object):
             image_store = ImageStore(path.dirname(xmlfile.name))
 
         try:
             image_store = ImageStore(path.dirname(xmlfile.name))
 
         try:
-            parser = etree.XMLParser(remove_blank_text=False)
+            parser = etree.XMLParser()
             tree = etree.parse(StringIO(data.encode('utf-8')), parser)
 
             return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
             tree = etree.parse(StringIO(data.encode('utf-8')), parser)
 
             return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
@@ -149,3 +142,25 @@ class WLPicture(object):
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
 
     def image_file(self, *args, **kwargs):
         return open(self.image_path, *args, **kwargs)
+
+    def partiter(self):
+        """
+        Iterates the parts of this picture and returns them and their metadata
+        """
+        for part in self.edoc.iter("div"):
+            pd = {'themes': [], 'object': None, 'type': part.get('type')}
+            if pd['type'] == 'area':
+                pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
+                                (int(part.get('x2')), int(part.get('y2'))))
+
+            parent = part
+            while True:
+                parent = parent.getparent()
+                if parent is None:
+                    break
+                if parent.tag == 'sem':
+                    if parent.get('type') == 'theme':
+                        pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
+                    elif parent.get('type') == 'object' and pd['object'] is None:
+                        pd['object'] = parent.get('object')
+            yield pd