fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Epub: only attach images referenced in the text.
[librarian.git]
/
librarian
/
parser.py
diff --git
a/librarian/parser.py
b/librarian/parser.py
index
502192f
..
73ddd52
100644
(file)
--- a/
librarian/parser.py
+++ b/
librarian/parser.py
@@
-3,9
+3,11
@@
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import unicode_literals
+
from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
from librarian import RDFNS
from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
from librarian import RDFNS
-from librarian.cover import
DefaultEbookC
over
+from librarian.cover import
make_c
over
from librarian import dcparser
from xml.parsers.expat import ExpatError
from librarian import dcparser
from xml.parsers.expat import ExpatError
@@
-14,7
+16,7
@@
from lxml.etree import XMLSyntaxError, XSLTApplyError
import os
import re
import os
import re
-from StringIO import StringIO
+import six
class WLDocument(object):
class WLDocument(object):
@@
-45,14
+47,14
@@
class WLDocument(object):
self.book_info = None
@classmethod
self.book_info = None
@classmethod
- def from_
string
(cls, xml, *args, **kwargs):
- return cls.from_file(
String
IO(xml), *args, **kwargs)
+ def from_
bytes
(cls, xml, *args, **kwargs):
+ return cls.from_file(
six.Bytes
IO(xml), *args, **kwargs)
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
# first, prepare for parsing
@classmethod
def from_file(cls, xmlfile, *args, **kwargs):
# first, prepare for parsing
- if isinstance(xmlfile,
basestring
):
+ if isinstance(xmlfile,
six.text_type
):
file = open(xmlfile, 'rb')
try:
data = file.read()
file = open(xmlfile, 'rb')
try:
data = file.read()
@@
-61,17
+63,17
@@
class WLDocument(object):
else:
data = xmlfile.read()
else:
data = xmlfile.read()
- if not isinstance(data,
unicod
e):
+ if not isinstance(data,
six.text_typ
e):
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
try:
parser = etree.XMLParser(remove_blank_text=False)
data = data.decode('utf-8')
data = data.replace(u'\ufeff', '')
try:
parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(
String
IO(data.encode('utf-8')), parser)
+ tree = etree.parse(
six.Bytes
IO(data.encode('utf-8')), parser)
return cls(tree, *args, **kwargs)
return cls(tree, *args, **kwargs)
- except (ExpatError, XMLSyntaxError, XSLTApplyError)
,
e:
+ except (ExpatError, XMLSyntaxError, XSLTApplyError)
as
e:
raise ParseError(e)
def swap_endlines(self):
raise ParseError(e)
def swap_endlines(self):
@@
-139,7
+141,7
@@
class WLDocument(object):
def serialize(self):
self.update_dc()
def serialize(self):
self.update_dc()
- return etree.tostring(self.edoc, encoding=
unicode
, pretty_print=True)
+ return etree.tostring(self.edoc, encoding=
'unicode'
, pretty_print=True)
def merge_chunks(self, chunk_dict):
unmerged = []
def merge_chunks(self, chunk_dict):
unmerged = []
@@
-150,7
+152,7
@@
class WLDocument(object):
node = self.edoc.xpath(xpath)[0]
repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
node = self.edoc.xpath(xpath)[0]
repl = etree.fromstring(u"<%s>%s</%s>" % (node.tag, data, node.tag))
node.getparent().replace(node, repl)
- except Exception
,
e:
+ except Exception
as
e:
unmerged.append(repr((key, xpath, e)))
return unmerged
unmerged.append(repr((key, xpath, e)))
return unmerged
@@
-207,15
+209,21
@@
class WLDocument(object):
def as_cover(self, cover_class=None, *args, **kwargs):
if cover_class is None:
def as_cover(self, cover_class=None, *args, **kwargs):
if cover_class is None:
- cover_class =
DefaultEbookC
over
+ cover_class =
make_c
over
return cover_class(self.book_info, *args, **kwargs).output_file()
return cover_class(self.book_info, *args, **kwargs).output_file()
+ # for debugging only
+ def latex_dir(self, *args, **kwargs):
+ kwargs['latex_dir'] = True
+ from librarian import pdf
+ return pdf.transform(self, *args, **kwargs)
+
def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
save_path = output_dir_path
if make_author_dir:
- save_path = os.path.join(save_path,
unicod
e(self.book_info.author).encode('utf-8'))
- save_path = os.path.join(save_path, self.book_info.ur
i
.slug)
+ save_path = os.path.join(save_path,
six.text_typ
e(self.book_info.author).encode('utf-8'))
+ save_path = os.path.join(save_path, self.book_info.ur
l
.slug)
if ext:
save_path += '.%s' % ext
else:
if ext:
save_path += '.%s' % ext
else: