X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/5329a22cd6643da657dd24546b382ada9e048b68..1e438727014e352799b4dfb8d3dfb2f8e7a58251:/librarian/pdf.py?ds=inline diff --git a/librarian/pdf.py b/librarian/pdf.py index af68a1b..1989239 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -21,7 +21,7 @@ from lxml.etree import XMLSyntaxError, XSLTApplyError from librarian.dcparser import Person from librarian.parser import WLDocument -from librarian import ParseError, DCNS +from librarian import ParseError, DCNS, get_resource from librarian import functions @@ -137,9 +137,6 @@ def parse_creator(doc): creator.getparent().insert(0, creator_parsed) -def get_resource(path): - return os.path.join(os.path.dirname(__file__), path) - def get_stylesheet(name): return get_resource(STYLESHEETS[name]) @@ -282,9 +279,11 @@ def load_including_children(provider, slug=None, uri=None, file_path=None): else: raise ValueError('Neither slug, URI nor file path provided for a book.') - document = WLDocument.from_file(f, True, - parse_dublincore=True, - preserve_lines=False) + text = f.read().decode('utf-8') + text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) + + document = WLDocument.from_string(text, True, + parse_dublincore=True) f.close()