X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fe763827605d48b2ad27d313cbcd84b588066070..e9aeedc51047d8d5e9e45c5253c776f8994da965:/src/librarian/epub.py?ds=sidebyside
diff --git a/src/librarian/epub.py b/src/librarian/epub.py
index fc5ee16..a3931b5 100644
--- a/src/librarian/epub.py
+++ b/src/librarian/epub.py
@@ -15,6 +15,7 @@ from mimetypes import guess_type
from ebooklib import epub
from lxml import etree
+from PIL import Image
from tempfile import mkdtemp, NamedTemporaryFile
from shutil import rmtree
@@ -29,6 +30,7 @@ functions.reg_person_name()
def squeeze_whitespace(s):
+ return s
return re.sub(b'\\s+', b' ', s)
@@ -61,33 +63,6 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
parent.tail = newt
-def inner_xml(node):
- """ returns node's text and children as a string
-
- >>> print(inner_xml(etree.fromstring('xyz')))
- xyz
- """
-
- nt = node.text if node.text is not None else ''
- return ''.join(
- [nt] + [etree.tostring(child, encoding='unicode') for child in node]
- )
-
-
-def set_inner_xml(node, text):
- """ sets node's text and children from a string
-
- >>> e = etree.fromstring('bxx')
- >>> set_inner_xml(e, 'xyz')
- >>> print(etree.tostring(e, encoding='unicode'))
- xyz
- """
-
- p = etree.fromstring('%s' % text)
- node.text = p.text
- node[:] = p[:]
-
-
def node_name(node):
""" Find out a node's name
@@ -376,17 +351,8 @@ def remove_empty_lists_from_toc(toc):
toc[i] = e[0]
-def transform(wldoc, verbose=False, style=None,
- sample=None, cover=None, flags=None, hyphenate=False,
- ilustr_path='', output_type='epub'):
- """ produces a EPUB file
-
- sample=n: generate sample e-book (with at least n paragraphs)
- cover: a cover.Cover factory or True for default
- flags: less-advertising, without-fonts, working-copy
- """
- def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
+def transform_file(wldoc, chunk_counter=1, first=True, sample=None, hyphenate=False, output_type='epub', spine=None, output=None, annotations=None):
""" processes one input file and proceeds to its children """
replace_characters(wldoc.edoc.getroot())
@@ -447,6 +413,14 @@ def transform(wldoc, verbose=False, style=None,
output.add_item(item)
spine.append(item)
+ toc[-1][1].append(
+ epub.Link(
+ "part1.xhtml",
+ "PoczÄ
tek utworu",
+ "part1"
+ )
+ )
+
elif wldoc.book_info.parts:
# write title page for every parent
if sample is not None and sample <= 0:
@@ -509,12 +483,27 @@ def transform(wldoc, verbose=False, style=None,
for child in wldoc.parts():
child_toc, chunk_counter, chunk_chars, sample = transform_file(
- child, chunk_counter, first=False, sample=sample)
+ child, chunk_counter, first=False, sample=sample,
+ hyphenate=hyphenate, output_type=output_type,
+ spine=spine, output=output, annotations=annotations,
+ )
toc[-1][1].extend(child_toc)
chars = chars.union(chunk_chars)
return toc, chunk_counter, chars, sample
+
+def transform(wldoc, verbose=False, style=None,
+ sample=None, cover=None, flags=None, hyphenate=False,
+ base_url='file://./', output_type='epub'):
+ """ produces a EPUB file
+
+ sample=n: generate sample e-book (with at least n paragraphs)
+ cover: a cover.Cover factory or True for default
+ flags: less-advertising, without-fonts, working-copy
+ """
+
+
document = deepcopy(wldoc)
del wldoc
@@ -540,16 +529,18 @@ def transform(wldoc, verbose=False, style=None,
output.set_identifier(six.text_type(document.book_info.url))
output.set_language(functions.lang_code_3to2(document.book_info.language))
output.set_title(document.book_info.title)
- for author in document.book_info.authors:
+ for i, author in enumerate(document.book_info.authors):
output.add_author(
author.readable(),
- file_as=six.text_type(author)
+ file_as=six.text_type(author),
+ uid='creator{}'.format(i)
)
for translator in document.book_info.translators:
output.add_author(
translator.readable(),
file_as=six.text_type(translator),
- role='translator'
+ role='trl',
+ uid='translator{}'.format(i)
)
for publisher in document.book_info.publisher:
output.add_metadata("DC", "publisher", publisher)
@@ -567,23 +558,42 @@ def transform(wldoc, verbose=False, style=None,
functions.reg_mathml_epub(output)
- if os.path.isdir(ilustr_path):
- ilustr_elements = set(ilustr.get('src')
- for ilustr in document.edoc.findall('//ilustr'))
- for i, filename in enumerate(os.listdir(ilustr_path)):
- if filename not in ilustr_elements:
- continue
- file_path = os.path.join(ilustr_path, filename)
- with open(file_path, 'rb') as f:
- output.add_item(
- epub.EpubItem(
- uid='image%s' % i,
- file_name=filename,
- media_type=guess_type(file_path)[0],
- content=f.read()
- )
- )
+ # FIXME
+ for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
+ url = six.moves.urllib.parse.urljoin(
+ base_url,
+ ilustr.get('src')
+ )
+ imgfile = six.moves.urllib.request.urlopen(url)
+ img = Image.open(imgfile)
+ th_format, ext, media_type = {
+ 'GIF': ('GIF', 'gif', 'image/gif'),
+ 'PNG': ('PNG', 'png', 'image/png'),
+ }.get(img.format, ('JPEG', 'jpg', 'image/jpeg'))
+
+ width = 1200
+ if img.size[0] < width:
+ th = img
+ else:
+ th = img.resize((width, round(width * img.size[1] / img.size[0])))
+
+ imgfile.close()
+
+ buffer = six.BytesIO()
+ th.save(buffer, format=th_format)
+
+ file_name = 'image%d.%s' % (i, ext)
+ ilustr.set('src', file_name)
+ output.add_item(
+ epub.EpubItem(
+ uid='image%s' % i,
+ file_name=file_name,
+ media_type=media_type,
+ content=buffer.getvalue()
+ )
+ )
+
# write static elements
with open(get_resource('res/wl-logo-small.png'), 'rb') as f:
@@ -649,18 +659,13 @@ def transform(wldoc, verbose=False, style=None,
annotations = etree.Element('annotations')
- toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
+ toc, chunk_counter, chars, sample = transform_file(
+ document, sample=sample,
+ hyphenate=hyphenate, output_type=output_type,
+ spine=spine, output=output, annotations=annotations
+ )
output.toc = toc[0][1]
- if len(toc) < 2:
- output.toc.append(
- epub.Link(
- "part1.xhtml",
- "PoczÄ
tek utworu",
- "part1"
- )
- )
-
# Last modifications in container files and EPUB creation
if len(annotations) > 0:
output.toc.append(