# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
from librarian import RDFNS
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
from librarian import dcparser
from xml.parsers.expat import ExpatError
import os
import re
-from StringIO import StringIO
+import six
class WLDocument(object):
self.rdf_elem = root_elem.find(dc_path)
if self.rdf_elem is None:
- raise NoDublinCore('Document has no DublinCore - which is required.')
+ raise NoDublinCore("Document must have a '%s' element." % RDFNS('RDF'))
self.book_info = dcparser.BookInfo.from_element(
self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
self.book_info = None
@classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
+ def from_bytes(cls, xml, *args, **kwargs):
+ return cls.from_file(six.BytesIO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
# first, prepare for parsing
- if isinstance(xmlfile, basestring):
+ if isinstance(xmlfile, six.text_type):
file = open(xmlfile, 'rb')
try:
data = file.read()
else:
data = xmlfile.read()
- if not isinstance(data, unicode):
+ if not isinstance(data, six.text_type):
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
try:
parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+ tree = etree.parse(six.BytesIO(data.encode('utf-8')), parser)
return cls(tree, *args, **kwargs)
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+ except (ExpatError, XMLSyntaxError, XSLTApplyError) as e:
raise ParseError(e)
def swap_endlines(self):
def serialize(self):
self.update_dc()
- return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
+ return etree.tostring(self.edoc, encoding='unicode', pretty_print=True)
def merge_chunks(self, chunk_dict):
unmerged = []
node = self.edoc.xpath(xpath)[0]
repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
- except Exception, e:
+ except Exception as e:
unmerged.append(repr((key, xpath, e)))
return unmerged
def as_cover(self, cover_class=None, *args, **kwargs):
if cover_class is None:
- cover_class = DefaultEbookCover
+ cover_class = make_cover
return cover_class(self.book_info, *args, **kwargs).output_file()
+ # for debugging only
+ def latex_dir(self, *args, **kwargs):
+ kwargs['latex_dir'] = True
+ from librarian import pdf
+ return pdf.transform(self, *args, **kwargs)
+
def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
- save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8'))
- save_path = os.path.join(save_path, self.book_info.uri.slug)
+ save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8'))
+ save_path = os.path.join(save_path, self.book_info.url.slug)
if ext:
save_path += '.%s' % ext
else: