import os
import re
import shutil
+import urllib
-class ParseError(Exception):
+
+class UnicodeException(Exception):
def __str__(self):
""" Dirty workaround for Python Unicode handling problems. """
- return self.message
+ return unicode(self).encode('utf-8')
def __unicode__(self):
""" Dirty workaround for Python Unicode handling problems. """
- return self.message
+ args = self.args[0] if len(self.args) == 1 else self.args
+ try:
+ message = unicode(args)
+ except UnicodeDecodeError:
+ message = unicode(args, encoding='utf-8', errors='ignore')
+ return message
+
+class ParseError(UnicodeException):
+ pass
-class ValidationError(Exception):
+class ValidationError(UnicodeException):
pass
class NoDublinCore(ValidationError):
"""There's no DublinCore section, and it's required."""
pass
-class NoProvider(Exception):
+class NoProvider(UnicodeException):
+ """There's no DocProvider specified, and it's needed."""
+ pass
+
+class NoSponsorProvider(UnicodeException):
"""There's no DocProvider specified, and it's needed."""
pass
slug = None
example = 'http://wolnelektury.pl/katalog/lektura/template/'
- _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/'
+ _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/'
'(?P<slug>[-a-z0-9]+)/?$')
def __init__(self, uri):
def strict(cls, uri):
match = cls._re_wl_uri.match(uri)
if not match:
- raise ValueError('Supplied URI (%s) does not match '
- 'the template: %s.' % (uri, cls._re_wl_uri))
+ raise ValidationError(u'Invalid URI (%s). Should match: %s' % (
+ uri, cls._re_wl_uri.pattern))
return cls(uri)
@classmethod
fname = slug + '.xml'
return open(os.path.join(self.dir, fname))
+class SponsorProvider(object):
+ class NoLogo(UnicodeException): pass
+
+ def by_name(self, name):
+ raise NotImplementedError
+
+class DirSponsorProvider(SponsorProvider):
+ exts = ["png", "jpg", "jpeg", "gif"]
+
+ def __init__(self, dir_):
+ self.dir = dir_
+
+ def by_name(self, name):
+ base = name.replace("/", "_")
+ fnames = ["%s.%s" % (base, ext) for ext in self.exts]
+ for fname in fnames:
+ fpath = os.path.join(self.dir, fname)
+ if os.path.exists(fpath):
+ return OutputFile.from_filename(fpath)
+ raise self.NoLogo('Missing sponsor logo: "%s.[%s]"' % (base, ",".join(self.exts)))
import lxml.etree as etree
import dcparser
if not os.path.isdir(dirname):
os.makedirs(dirname)
shutil.copy(self.get_filename(), path)
+
+
+class URLOpener(urllib.FancyURLopener):
+ version = 'FNP Librarian (http://github.com/fnp/librarian)'
+urllib._urlopener = URLOpener()