1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
8 from tempfile import NamedTemporaryFile
10 from lxml import etree
11 from urllib.request import FancyURLopener
12 from .util import makedirs
14 # Compatibility imports.
15 from .meta.types.wluri import WLURI
18 class UnicodeException(Exception):
20 """ Dirty workaround for Python Unicode handling problems. """
21 args = self.args[0] if len(self.args) == 1 else self.args
24 except UnicodeDecodeError:
25 message = str(args, encoding='utf-8', errors='ignore')
29 class ParseError(UnicodeException):
33 class ValidationError(UnicodeException):
37 class NoDublinCore(ValidationError):
38 """There's no DublinCore section, and it's required."""
42 class NoProvider(UnicodeException):
43 """There's no DocProvider specified, and it's needed."""
48 '''A handy structure to repsent names in an XML namespace.'''
50 def __init__(self, uri):
53 def __call__(self, tag):
54 return '{%s}%s' % (self.uri, tag)
56 def __contains__(self, tag):
57 return tag.startswith('{' + str(self) + '}')
60 return 'XMLNamespace(%r)' % self.uri
63 return '%s' % self.uri
66 class EmptyNamespace(XMLNamespace):
68 super(EmptyNamespace, self).__init__('')
70 def __call__(self, tag):
74 # some common namespaces we use
75 XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace')
76 RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
77 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
78 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
79 PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
81 WLNS = EmptyNamespace()
85 """Base class for a repository of XML files.
87 Used for generating joined files, like EPUBs.
90 def by_slug(self, slug):
91 """Should return a file-like object with a WL document XML."""
92 raise NotImplementedError
95 class DirDocProvider(DocProvider):
96 """ Serve docs from a directory of files in form <slug>.xml """
98 def __init__(self, dir_):
102 def by_slug(self, slug):
103 fname = slug + '.xml'
104 return open(os.path.join(self.dir, fname), 'rb')
107 def get_resource(path):
108 return os.path.join(os.path.dirname(__file__), path)
112 """Represents a file returned by one of the converters."""
119 os.unlink(self._filename)
121 def __nonzero__(self):
122 return self._bytes is not None or self._filename is not None
125 def from_bytes(cls, bytestring):
126 """Converter returns contents of a file as a string."""
129 instance._bytes = bytestring
133 def from_filename(cls, filename):
134 """Converter returns contents of a file as a named file."""
137 instance._filename = filename
141 """Get file's contents as a bytestring."""
143 if self._filename is not None:
144 with open(self._filename, 'rb') as f:
150 """Get file as a file-like object."""
152 if self._bytes is not None:
153 return io.BytesIO(self._bytes)
154 elif self._filename is not None:
155 return open(self._filename, 'rb')
157 def get_filename(self):
158 """Get file as a fs path."""
160 if self._filename is not None:
161 return self._filename
162 elif self._bytes is not None:
163 temp = NamedTemporaryFile(prefix='librarian-', delete=False)
164 temp.write(self._bytes)
166 self._filename = temp.name
167 return self._filename
171 def save_as(self, path):
172 """Save file to a path. Create directories, if necessary."""
174 dirname = os.path.dirname(os.path.abspath(path))
176 shutil.copy(self.get_filename(), path)
179 class URLOpener(FancyURLopener):
180 version = 'WL Librarian (http://github.com/fnp/librarian)'
183 urllib._urlopener = URLOpener()