Geometric shapes in PDF.
authorRadek Czajka <rczajka@rczajka.pl>
Thu, 12 May 2022 07:49:49 +0000 (09:49 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Thu, 12 May 2022 07:49:49 +0000 (09:49 +0200)
src/librarian/pdf.py

index 31dfe1e..7b93997 100644 (file)
@@ -436,7 +436,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
+    # Cyrrilic
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)