src/librarian/__init__.py

   1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 import io
   5 import os
   6 import re
   7 import shutil
   8 from tempfile import NamedTemporaryFile
   9 import urllib
  10 from lxml import etree
  11 from urllib.request import FancyURLopener
  12 from .util import makedirs
  13
  14 # Compatibility imports.
  15 from .meta.types.wluri import WLURI
  16
  17
  18 class UnicodeException(Exception):
  19     def __str__(self):
  20         """ Dirty workaround for Python Unicode handling problems. """
  21         args = self.args[0] if len(self.args) == 1 else self.args
  22         try:
  23             message = str(args)
  24         except UnicodeDecodeError:
  25             message = str(args, encoding='utf-8', errors='ignore')
  26         return message
  27
  28
  29 class ParseError(UnicodeException):
  30     pass
  31
  32
  33 class ValidationError(UnicodeException):
  34     pass
  35
  36
  37 class NoDublinCore(ValidationError):
  38     """There's no DublinCore section, and it's required."""
  39     pass
  40
  41
  42 class NoProvider(UnicodeException):
  43     """There's no DocProvider specified, and it's needed."""
  44     pass
  45
  46
  47 class XMLNamespace:
  48     '''A handy structure to repsent names in an XML namespace.'''
  49
  50     def __init__(self, uri):
  51         self.uri = uri
  52
  53     def __call__(self, tag):
  54         return '{%s}%s' % (self.uri, tag)
  55
  56     def __contains__(self, tag):
  57         return tag.startswith('{' + str(self) + '}')
  58
  59     def __repr__(self):
  60         return 'XMLNamespace(%r)' % self.uri
  61
  62     def __str__(self):
  63         return '%s' % self.uri
  64
  65
  66 class EmptyNamespace(XMLNamespace):
  67     def __init__(self):
  68         super(EmptyNamespace, self).__init__('')
  69
  70     def __call__(self, tag):
  71         return tag
  72
  73
  74 # some common namespaces we use
  75 XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
  76 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  77 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
  78 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
  79 PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
  80
  81 WLNS = EmptyNamespace()
  82
  83
  84 class DocProvider:
  85     """Base class for a repository of XML files.
  86
  87     Used for generating joined files, like EPUBs.
  88     """
  89
  90     def by_slug(self, slug):
  91         """Should return a file-like object with a WL document XML."""
  92         raise NotImplementedError
  93
  94
  95 class DirDocProvider(DocProvider):
  96     """ Serve docs from a directory of files in form <slug>.xml """
  97
  98     def __init__(self, dir_):
  99         self.dir = dir_
 100         self.files = {}
 101
 102     def by_slug(self, slug):
 103         fname = slug + '.xml'
 104         return open(os.path.join(self.dir, fname), 'rb')
 105
 106
 107 def get_resource(path):
 108     return os.path.join(os.path.dirname(__file__), path)
 109
 110
 111 class OutputFile:
 112     """Represents a file returned by one of the converters."""
 113
 114     _bytes = None
 115     _filename = None
 116
 117     def __del__(self):
 118         if self._filename:
 119             os.unlink(self._filename)
 120
 121     def __nonzero__(self):
 122         return self._bytes is not None or self._filename is not None
 123
 124     @classmethod
 125     def from_bytes(cls, bytestring):
 126         """Converter returns contents of a file as a string."""
 127
 128         instance = cls()
 129         instance._bytes = bytestring
 130         return instance
 131
 132     @classmethod
 133     def from_filename(cls, filename):
 134         """Converter returns contents of a file as a named file."""
 135
 136         instance = cls()
 137         instance._filename = filename
 138         return instance
 139
 140     def get_bytes(self):
 141         """Get file's contents as a bytestring."""
 142
 143         if self._filename is not None:
 144             with open(self._filename, 'rb') as f:
 145                 return f.read()
 146         else:
 147             return self._bytes
 148
 149     def get_file(self):
 150         """Get file as a file-like object."""
 151
 152         if self._bytes is not None:
 153             return io.BytesIO(self._bytes)
 154         elif self._filename is not None:
 155             return open(self._filename, 'rb')
 156
 157     def get_filename(self):
 158         """Get file as a fs path."""
 159
 160         if self._filename is not None:
 161             return self._filename
 162         elif self._bytes is not None:
 163             temp = NamedTemporaryFile(prefix='librarian-', delete=False)
 164             temp.write(self._bytes)
 165             temp.close()
 166             self._filename = temp.name
 167             return self._filename
 168         else:
 169             return None
 170
 171     def save_as(self, path):
 172         """Save file to a path. Create directories, if necessary."""
 173
 174         dirname = os.path.dirname(os.path.abspath(path))
 175         makedirs(dirname)
 176         shutil.copy(self.get_filename(), path)
 177
 178
 179 class URLOpener(FancyURLopener):
 180     version = 'WL Librarian (http://github.com/fnp/librarian)'
 181
 182
 183 urllib._urlopener = URLOpener()