X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fc3d3f81bf77754e36543c8604bfa31fc5d0d2d6..1ce2c1255aee01fab9940fc26d251767bbf8c960:/librarian/pdf.py diff --git a/librarian/pdf.py b/librarian/pdf.py index 2d45372..07272f2 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -282,7 +282,10 @@ def load_including_children(provider, slug=None, uri=None, file_path=None): else: raise ValueError('Neither slug, URI nor file path provided for a book.') - document = WLDocument.from_file(f, True, + text = f.read().decode('utf-8') + text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) + + document = WLDocument.from_string(text, True, parse_dublincore=True) f.close()