Epub: only attach images referenced in the text.
[librarian.git] / librarian / picture.py
index dbbb4de..10d2ae7 100644 (file)
@@ -1,14 +1,16 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from dcparser import Field, WorkInfo, DCNS
+from __future__ import unicode_literals
+
+from operator import and_
+
+from .dcparser import Field, WorkInfo, DCNS
 from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
 from xml.parsers.expat import ExpatError
 from os import path
 from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
 from xml.parsers.expat import ExpatError
 from os import path
-from StringIO import StringIO
 from lxml import etree
 from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
 import re
 from lxml import etree
 from lxml.etree import (XMLSyntaxError, XSLTApplyError, Element)
 import re
-from functools import *
-from operator import *
+import six
 
 
 class WLPictureURI(WLURI):
 
 
 class WLPictureURI(WLURI):
@@ -96,16 +98,17 @@ class WLPicture(object):
             self.picture_info = PictureInfo.from_element(self.rdf_elem)
         else:
             self.picture_info = None
             self.picture_info = PictureInfo.from_element(self.rdf_elem)
         else:
             self.picture_info = None
+        self.frame = None
 
     @classmethod
 
     @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
+    def from_bytes(cls, xml, *args, **kwargs):
+        return cls.from_file(six.BytesIO(xml), *args, **kwargs)
 
     @classmethod
     def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
 
         # first, prepare for parsing
 
     @classmethod
     def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
 
         # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
+        if isinstance(xmlfile, six.text_type):
             file = open(xmlfile, 'rb')
             try:
                 data = file.read()
             file = open(xmlfile, 'rb')
             try:
                 data = file.read()
@@ -114,7 +117,7 @@ class WLPicture(object):
         else:
             data = xmlfile.read()
 
         else:
             data = xmlfile.read()
 
-        if not isinstance(data, unicode):
+        if not isinstance(data, six.text_type):
             data = data.decode('utf-8')
 
         data = data.replace(u'\ufeff', '')
             data = data.decode('utf-8')
 
         data = data.replace(u'\ufeff', '')
@@ -125,12 +128,12 @@ class WLPicture(object):
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
 
         try:
             parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+            tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
 
             me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
             me.load_frame_info()
             return me
 
             me = cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
             me.load_frame_info()
             return me
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+        except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
             raise ParseError(e)
 
     @property
             raise ParseError(e)
 
     @property
@@ -151,7 +154,7 @@ class WLPicture(object):
         return self.image_store.path(self.slug, self.mime_type)
 
     def image_file(self, *args, **kwargs):
         return self.image_store.path(self.slug, self.mime_type)
 
     def image_file(self, *args, **kwargs):
-        return open(self.image_path, *args, **kwargs)
+        return open(self.image_path, 'rb', *args, **kwargs)
 
     def get_sem_coords(self, sem):
         area = sem.find("div[@type='rect']")
 
     def get_sem_coords(self, sem):
         area = sem.find("div[@type='rect']")
@@ -183,7 +186,7 @@ class WLPicture(object):
             pd['coords'] = coords
 
             def want_unicode(x):
             pd['coords'] = coords
 
             def want_unicode(x):
-                if not isinstance(x, unicode):
+                if not isinstance(x, six.text_type):
                     return x.decode('utf-8')
                 else:
                     return x
                     return x.decode('utf-8')
                 else:
                     return x