librarian/__init__.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 import re
   7 import urllib
   8 from .utils import XMLNamespace
   9
  10
  11 class UnicodeException(Exception):
  12     def __str__(self):
  13         """ Dirty workaround for Python Unicode handling problems. """
  14         return unicode(self).encode('utf-8')
  15
  16     def __unicode__(self):
  17         """ Dirty workaround for Python Unicode handling problems. """
  18         args = self.args[0] if len(self.args) == 1 else self.args
  19         try:
  20             message = unicode(args)
  21         except UnicodeDecodeError:
  22             message = unicode(args, encoding='utf-8', errors='ignore')
  23         return message
  24
  25
  26 class ParseError(UnicodeException):
  27     pass
  28
  29
  30 class ValidationError(UnicodeException):
  31     pass
  32
  33
  34 # was deleted, but still used???
  35 class NoDublinCore(ValidationError):
  36     pass
  37
  38
  39 class BuildError(Exception):
  40     pass
  41
  42
  43 class EmptyNamespace(XMLNamespace):
  44     def __init__(self):
  45         super(EmptyNamespace, self).__init__('')
  46
  47     def __call__(self, tag):
  48         return tag
  49
  50 # some common namespaces we use
  51 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  52 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
  53 XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
  54 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
  55 NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/")
  56 OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
  57
  58 SSTNS = XMLNamespace('http://nowoczesnapolska.org.pl/sst#')
  59
  60
  61 class WLURI(object):
  62     """Represents a WL URI. Extracts slug from it."""
  63     slug = None
  64
  65     example = 'http://wolnelektury.pl/katalog/lektura/template/'
  66     _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/(?P<slug>[-a-z0-9]+)/?$')
  67
  68     def __init__(self, uri):
  69         uri = unicode(uri)
  70         self.uri = uri
  71         self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
  72
  73     @classmethod
  74     def strict(cls, uri):
  75         match = cls._re_wl_uri.match(uri)
  76         if not match:
  77             raise ValidationError(u'Invalid URI (%s). Should match: %s' % (
  78                         uri, cls._re_wl_uri.pattern))
  79         return cls(uri)
  80
  81     @classmethod
  82     def from_slug(cls, slug):
  83         """Contructs an URI from slug.
  84
  85         >>> WLURI.from_slug('a-slug').uri
  86         u'http://wolnelektury.pl/katalog/lektura/a-slug/'
  87
  88         """
  89         uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
  90         return cls(uri)
  91
  92     def __unicode__(self):
  93         return self.uri
  94
  95     def __str__(self):
  96         return self.uri
  97
  98     def __eq__(self, other):
  99         return self.slug == other.slug
 100
 101
 102 class URLOpener(urllib.FancyURLopener):
 103     version = 'FNP Librarian (http://git.nowoczesnapolska.org.pl/?p=librarian.git)'
 104 urllib._urlopener = URLOpener()