From b7ab4f2e27b8fd7fb89f550ed60e6af39d350712 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 5 Feb 2025 14:47:13 +0100 Subject: [PATCH] Add Hebrew support in pdfs. --- src/librarian/pdf.py | 2 ++ src/librarian/pdf/wl.cls | 10 ++++++++++ src/librarian/pdf/wl2tex.xslt | 13 +++++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py index de09755..647b18f 100644 --- a/src/librarian/pdf.py +++ b/src/librarian/pdf.py @@ -463,6 +463,8 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text) # Geometric shapes. text = re.sub(r"([\u25a0-\u25ff]+)", r"\1", text) + # Hebrew + text = re.sub(r"([\u0590-\u05ff]+)", r"\1", text) document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider) diff --git a/src/librarian/pdf/wl.cls b/src/librarian/pdf/wl.cls index bf1387c..ef12bff 100644 --- a/src/librarian/pdf/wl.cls +++ b/src/librarian/pdf/wl.cls @@ -173,6 +173,13 @@ Scale=0.85, LetterSpace=-1.0 ] {DejaVu Serif} +\newfontfamily\fallback[ +SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}}, +Numbers=OldStyle, +Scale=0.85, +LetterSpace=-1.0 +] {Open Sans} + \ifenablewlfont \setmainfont [ %ExternalLocation, @@ -788,17 +795,20 @@ Letters={Uppercase} \newcommand{\pe}[1]{\NoCaseChange{\endnote{#1 [przypis edytorski]}}} \newcommand{\pr}[1]{\NoCaseChange{\endnote{#1 [przypis redakcyjny]}}} \newcommand{\pt}[1]{\NoCaseChange{\endnote{#1 [przypis tłumacza]}}} + \newcommand{\ptrad}[1]{\NoCaseChange{\endnote{#1 [przypis z tradycji]}}} \else \newcommand{\pa}[1]{\NoCaseChange{\footnote{#1 [przypis autorski]}}} \newcommand{\pe}[1]{\NoCaseChange{\footnote{#1 [przypis edytorski]}}} \newcommand{\pr}[1]{\NoCaseChange{\footnote{#1 [przypis redakcyjny]}}} \newcommand{\pt}[1]{\NoCaseChange{\footnote{#1 [przypis tłumacza]}}} + \newcommand{\ptrad}[1]{\NoCaseChange{\footnote{#1 [przypis z tradycji]}}} \fi \else \newcommand{\pa}[1]{} \newcommand{\pe}[1]{} \newcommand{\pr}[1]{} \newcommand{\pt}[1]{} + \newcommand{\ptrad}[1]{} \fi \newcommand{\mat}[1]{$#1$} diff --git a/src/librarian/pdf/wl2tex.xslt b/src/librarian/pdf/wl2tex.xslt index 3388154..78e9c2b 100644 --- a/src/librarian/pdf/wl2tex.xslt +++ b/src/librarian/pdf/wl2tex.xslt @@ -448,7 +448,7 @@ + match="pa|pe|pr|pt|ptrad|mat|didask_tekst|slowo_obce|wyroznienie|osoba|indeks_dolny|wieksze_odstepy"> @@ -546,7 +546,7 @@ - + @@ -557,6 +557,9 @@ + + + @@ -658,6 +661,12 @@ + + + + + + -- 2.20.1