X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/716a9ab552bffbb7df2cb31ae41ee196902c7653..261a3308d235fc252f30ae03603d0e964bb60223:/src/librarian/html.py?ds=sidebyside diff --git a/src/librarian/html.py b/src/librarian/html.py index 78f3dad..f0f11db 100644 --- a/src/librarian/html.py +++ b/src/librarian/html.py @@ -12,6 +12,7 @@ import copy from lxml import etree from librarian import XHTMLNS, ParseError, OutputFile from librarian import functions +from PIL import Image from lxml.etree import XMLSyntaxError, XSLTApplyError import six @@ -50,7 +51,48 @@ def transform_abstrakt(abstrakt_element): return re.sub(']*>', '', html) -def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None): +def add_image_sizes(tree, gallery_path, gallery_url, base_url): + widths = [360, 600, 1200, 1800, 2400] + + for i, ilustr in enumerate(tree.findall('//ilustr')): + rel_path = ilustr.attrib['src'] + img_url = six.moves.urllib.parse.urljoin(base_url, rel_path) + + f = six.moves.urllib.request.urlopen(img_url) + img = Image.open(f) + ext = {'GIF': 'gif', 'PNG': 'png'}.get(img.format, 'jpg') + + srcset = [] + # Needed widths: predefined and original, limited by + # whichever is smaller. + img_widths = [ + w for w in + sorted( + set(widths + [img.size[0]]) + ) + if w <= min(widths[-1], img.size[0]) + ] + largest = None + for w in widths: + fname = '%d.W%d.%s' % (i, w, ext) + fpath = gallery_path + fname + if not os.path.exists(fpath): + height = round(img.size[1] * w / img.size[0]) + th = img.resize((w, height)) + th.save(fpath) + th_url = gallery_url + fname + srcset.append(" ".join(( + th_url, + '%dw' % w + ))) + largest_url = th_url + ilustr.attrib['srcset'] = ", ".join(srcset) + ilustr.attrib['src'] = largest_url + + f.close() + + +def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None, gallery_path='img/', gallery_url='img/', base_url='file://./'): """Transforms the WL document to XHTML. If output_filename is None, returns an XML, @@ -75,7 +117,13 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None, css=None): if not options: options = {} - options.setdefault('gallery', "''") + + try: + os.makedirs(gallery_path) + except OSError: + pass + + add_image_sizes(document.edoc, gallery_path, gallery_url, base_url) css = ( css @@ -183,6 +231,8 @@ def extract_fragments(input_filename): while parent.get('id', None) != 'book-text': cparent = copy.deepcopy(parent) cparent.text = None + if 'id' in cparent.attrib: + del cparent.attrib['id'] parents.append(cparent) parent = parent.getparent() @@ -222,8 +272,11 @@ def extract_fragments(input_filename): ) else: for fragment_id in open_fragments: + celem = copy.copy(element) + if 'id' in celem.attrib: + del celem.attrib['id'] open_fragments[fragment_id].append( - event, copy.copy(element) + event, celem ) return closed_fragments, open_fragments