fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Marquise: scale title text in all layouts.
[librarian.git]
/
src
/
librarian
/
pdf.py
diff --git
a/src/librarian/pdf.py
b/src/librarian/pdf.py
index
31dfe1e
..
b32395f
100644
(file)
--- a/
src/librarian/pdf.py
+++ b/
src/librarian/pdf.py
@@
-425,7
+425,7
@@
def load_including_children(wldoc=None, provider=None, uri=None):
"""
if uri and provider:
"""
if uri and provider:
- f = provider.by_
uri(uri
)
+ f = provider.by_
slug(uri.slug
)
text = f.read().decode('utf-8')
f.close()
elif wldoc is not None:
text = f.read().decode('utf-8')
f.close()
elif wldoc is not None:
@@
-436,7
+436,10
@@
def load_including_children(wldoc=None, provider=None, uri=None):
'Neither a WLDocument, nor provider and URI were provided.'
)
'Neither a WLDocument, nor provider and URI were provided.'
)
+ # Cyrrilic
text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+ # Geometric shapes.
+ text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
document = WLDocument.from_bytes(text.encode('utf-8'),
parse_dublincore=True, provider=provider)
document = WLDocument.from_bytes(text.encode('utf-8'),
parse_dublincore=True, provider=provider)