Don't validate <uwaga> contents
[librarian.git] / src / librarian / pdf.py
index cad66a4..b32395f 100644 (file)
@@ -284,6 +284,14 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         elif package_available('morefloats', 'maxfloats=19'):
             root.set('morefloats', 'new')
 
+        if customizations is None:
+            customizations = []
+        else:
+            customizations = list(customizations)
+
+        if book_info.endnotes:
+            customizations.append('endnotes')
+
         # add customizations
         if customizations is not None:
             root.set('customizations', u','.join(customizations))
@@ -306,6 +314,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         fix_hanging(document.edoc)
         fix_tables(document.edoc)
         mark_subauthors(document.edoc)
+        document.fix_pa_akap()
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
@@ -320,8 +329,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
                 base_url,
                 ilustr.get('src')
             )
-            with six.moves.urllib.request.urlopen(url) as imgfile:
-                img = Image.open(imgfile)
+            imgfile = six.moves.urllib.request.urlopen(url)
+            img = Image.open(imgfile)
 
             th_format, ext, media_type = {
                 'GIF': ('GIF', 'gif', 'image/gif'),
@@ -338,6 +347,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
             th.save(os.path.join(temp, file_name))
             ilustr.set('src', file_name)
 
+            imgfile.close()
+
         for sponsor in book_info.sponsors:
             ins = etree.Element("data-sponsor", name=sponsor)
             logo = sponsor_logo(sponsor)
@@ -414,7 +425,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
     """
 
     if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
         text = f.read().decode('utf-8')
         f.close()
     elif wldoc is not None:
@@ -425,7 +436,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
+    # Cyrrilic
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)