X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/8df0bad21a18b56933084288320fa35e2a3fce6f..721dffe62ca6a37e6b496907af4eb007d59bff1c:/src/librarian/epub.py diff --git a/src/librarian/epub.py b/src/librarian/epub.py index e2cdae7..a3931b5 100644 --- a/src/librarian/epub.py +++ b/src/librarian/epub.py @@ -30,6 +30,7 @@ functions.reg_person_name() def squeeze_whitespace(s): + return s return re.sub(b'\\s+', b' ', s) @@ -62,33 +63,6 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph): parent.tail = newt -def inner_xml(node): - """ returns node's text and children as a string - - >>> print(inner_xml(etree.fromstring('xyz'))) - xyz - """ - - nt = node.text if node.text is not None else '' - return ''.join( - [nt] + [etree.tostring(child, encoding='unicode') for child in node] - ) - - -def set_inner_xml(node, text): - """ sets node's text and children from a string - - >>> e = etree.fromstring('bxx') - >>> set_inner_xml(e, 'xyz') - >>> print(etree.tostring(e, encoding='unicode')) - xyz - """ - - p = etree.fromstring('%s' % text) - node.text = p.text - node[:] = p[:] - - def node_name(node): """ Find out a node's name @@ -377,17 +351,8 @@ def remove_empty_lists_from_toc(toc): toc[i] = e[0] -def transform(wldoc, verbose=False, style=None, - sample=None, cover=None, flags=None, hyphenate=False, - base_url='file://./', output_type='epub'): - """ produces a EPUB file - - sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover factory or True for default - flags: less-advertising, without-fonts, working-copy - """ - def transform_file(wldoc, chunk_counter=1, first=True, sample=None): +def transform_file(wldoc, chunk_counter=1, first=True, sample=None, hyphenate=False, output_type='epub', spine=None, output=None, annotations=None): """ processes one input file and proceeds to its children """ replace_characters(wldoc.edoc.getroot()) @@ -518,12 +483,27 @@ def transform(wldoc, verbose=False, style=None, for child in wldoc.parts(): child_toc, chunk_counter, chunk_chars, sample = transform_file( - child, chunk_counter, first=False, sample=sample) + child, chunk_counter, first=False, sample=sample, + hyphenate=hyphenate, output_type=output_type, + spine=spine, output=output, annotations=annotations, + ) toc[-1][1].extend(child_toc) chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample + +def transform(wldoc, verbose=False, style=None, + sample=None, cover=None, flags=None, hyphenate=False, + base_url='file://./', output_type='epub'): + """ produces a EPUB file + + sample=n: generate sample e-book (with at least n paragraphs) + cover: a cover.Cover factory or True for default + flags: less-advertising, without-fonts, working-copy + """ + + document = deepcopy(wldoc) del wldoc @@ -584,8 +564,8 @@ def transform(wldoc, verbose=False, style=None, base_url, ilustr.get('src') ) - with six.moves.urllib.request.urlopen(url) as imgfile: - img = Image.open(imgfile) + imgfile = six.moves.urllib.request.urlopen(url) + img = Image.open(imgfile) th_format, ext, media_type = { 'GIF': ('GIF', 'gif', 'image/gif'), @@ -598,6 +578,8 @@ def transform(wldoc, verbose=False, style=None, else: th = img.resize((width, round(width * img.size[1] / img.size[0]))) + imgfile.close() + buffer = six.BytesIO() th.save(buffer, format=th_format) @@ -677,7 +659,11 @@ def transform(wldoc, verbose=False, style=None, annotations = etree.Element('annotations') - toc, chunk_counter, chars, sample = transform_file(document, sample=sample) + toc, chunk_counter, chars, sample = transform_file( + document, sample=sample, + hyphenate=hyphenate, output_type=output_type, + spine=spine, output=output, annotations=annotations + ) output.toc = toc[0][1] # Last modifications in container files and EPUB creation