librarian/__init__.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 import os
   7 import re
   8 import urllib
   9 from .utils import XMLNamespace
  10
  11
  12 class UnicodeException(Exception):
  13     def __str__(self):
  14         """ Dirty workaround for Python Unicode handling problems. """
  15         return unicode(self).encode('utf-8')
  16
  17     def __unicode__(self):
  18         """ Dirty workaround for Python Unicode handling problems. """
  19         args = self.args[0] if len(self.args) == 1 else self.args
  20         try:
  21             message = unicode(args)
  22         except UnicodeDecodeError:
  23             message = unicode(args, encoding='utf-8', errors='ignore')
  24         return message
  25
  26 class ParseError(UnicodeException):
  27     pass
  28
  29 class ValidationError(UnicodeException):
  30     pass
  31
  32
  33 class EmptyNamespace(XMLNamespace):
  34     def __init__(self):
  35         super(EmptyNamespace, self).__init__('')
  36
  37     def __call__(self, tag):
  38         return tag
  39
  40 # some common namespaces we use
  41 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  42 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
  43 XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
  44 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
  45 NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/")
  46 OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
  47
  48 SSTNS = XMLNamespace('http://nowoczesnapolska.org.pl/sst#')
  49
  50
  51 class WLURI(object):
  52     """Represents a WL URI. Extracts slug from it."""
  53     slug = None
  54
  55     example = 'http://wolnelektury.pl/katalog/lektura/template/'
  56     _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/'
  57             '(?P<slug>[-a-z0-9]+)/?$')
  58
  59     def __init__(self, uri):
  60         uri = unicode(uri)
  61         self.uri = uri
  62         self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
  63
  64     @classmethod
  65     def strict(cls, uri):
  66         match = cls._re_wl_uri.match(uri)
  67         if not match:
  68             raise ValidationError(u'Invalid URI (%s). Should match: %s' % (
  69                         uri, cls._re_wl_uri.pattern))
  70         return cls(uri)
  71
  72     @classmethod
  73     def from_slug(cls, slug):
  74         """Contructs an URI from slug.
  75
  76         >>> WLURI.from_slug('a-slug').uri
  77         u'http://wolnelektury.pl/katalog/lektura/a-slug/'
  78
  79         """
  80         uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
  81         return cls(uri)
  82
  83     def __unicode__(self):
  84         return self.uri
  85
  86     def __str__(self):
  87         return self.uri
  88
  89     def __eq__(self, other):
  90         return self.slug == other.slug
  91
  92
  93 class URLOpener(urllib.FancyURLopener):
  94     version = 'FNP Librarian (http://git.nowoczesnapolska.org.pl/?p=librarian.git)'
  95 urllib._urlopener = URLOpener()
  96