From: Radek Czajka Date: Wed, 12 Jan 2011 13:44:38 +0000 (+0100) Subject: #1032: epubs for virtualo X-Git-Tag: 1.7~222 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/1e438727014e352799b4dfb8d3dfb2f8e7a58251?ds=inline;hp=cbc4c58f8d8cc36b4608da2303047bfbf7fb6cdd #1032: epubs for virtualo --- diff --git a/librarian/__init__.py b/librarian/__init__.py index 0074ee9..5c4a355 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -130,3 +130,7 @@ SERIALIZERS = { def serialize_children(element, format='raw'): return SERIALIZERS[format](element) + +def get_resource(path): + return os.path.join(os.path.dirname(__file__), path) + diff --git a/librarian/cover.py b/librarian/cover.py new file mode 100644 index 0000000..a7fc674 --- /dev/null +++ b/librarian/cover.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import Image, ImageFont, ImageDraw +from librarian import get_resource + + +def cover(width, height, author, title): + def draw_centered_text(text, draw, font, width, pos_y, lineskip): + while text: + line = text + while draw.textsize(line, font=font)[0] > width: + try: + line, ext = line.rsplit(' ', 1) + except: + break + draw.text(((img.size[0] - draw.textsize(line, font=font)[0]) / 2, pos_y), line, font=font, fill='#000') + pos_y += lineskip + text = text[len(line)+1:] + return pos_y + + + img = Image.new('RGB', (width, height), (255, 255, 255)) + + # WL logo + logo = Image.open(get_resource('pdf/wl-logo.png')) + logo = logo.resize((img.size[0] / 2, logo.size[1] * img.size[0] / 2 / logo.size[0])) + img.paste(logo, (width / 4, img.size[1] - logo.size[1])) + + draw = ImageDraw.Draw(img) + author_font = ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), width/20) + title_y = draw_centered_text(author, draw, author_font, width*9/10, height/10, width/15) + height/10 + + title_font = ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), width/15) + draw_centered_text(title, draw, title_font, width*9/10, title_y, width/11) + + return img diff --git a/librarian/epub.py b/librarian/epub.py index a5607f7..2fec331 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -245,7 +245,7 @@ def chop(main_text): yield part_xml -def transform_chunk(chunk_xml, chunk_no, annotations): +def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]): """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """ toc = TOC() @@ -255,15 +255,21 @@ def transform_chunk(chunk_xml, chunk_no, annotations): elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): subnumber = toc.add(node_name(element), chunk_no, level=1, is_part=False) element.set('sub', str(subnumber)) - find_annotations(annotations, chunk_xml, chunk_no) - replace_by_verse(chunk_xml) - html_tree = xslt(chunk_xml, res('xsltScheme.xsl')) - chars = used_chars(html_tree.getroot()) - output_html = etree.tostring(html_tree, method="html", pretty_print=True) + if empty: + if not _empty_html_static: + _empty_html_static.append(open(res('emptyChunk.html')).read()) + chars = set() + output_html = _empty_html_static[0] + else: + find_annotations(annotations, chunk_xml, chunk_no) + replace_by_verse(chunk_xml) + html_tree = xslt(chunk_xml, res('xsltScheme.xsl')) + chars = used_chars(html_tree.getroot()) + output_html = etree.tostring(html_tree, method="html", pretty_print=True) return output_html, toc, chars -def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False): +def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, sample=None): """ produces a EPUB file provider: a DocProvider @@ -271,9 +277,10 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= output_file: file-like object or path to output file output_dir: path to directory to save output file to; either this or output_file must be present make_dir: writes output to //.epub instead of /.epub + sample=n: generate sample e-book (with at least n paragraphs) """ - def transform_file(input_xml, chunk_counter=1, first=True): + def transform_file(input_xml, chunk_counter=1, first=True, sample=None): """ processes one input file and proceeds to its children """ replace_characters(input_xml.getroot()) @@ -292,10 +299,14 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= etree.tostring(html_tree, method="html", pretty_print=True)) elif children: # write title page for every parent - html_tree = xslt(input_xml, res('xsltChunkTitle.xsl')) - chars = used_chars(html_tree.getroot()) - zip.writestr('OPS/part%d.html' % chunk_counter, - etree.tostring(html_tree, method="html", pretty_print=True)) + if sample is not None and sample <= 0: + chars = set() + html_string = open(res('emptyChunk.html')).read() + else: + html_tree = xslt(input_xml, res('xsltChunkTitle.xsl')) + chars = used_chars(html_tree.getroot()) + html_string = etree.tostring(html_tree, method="html", pretty_print=True) + zip.writestr('OPS/part%d.html' % chunk_counter, html_string) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 @@ -311,7 +322,14 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= if main_text is not None: for chunk_xml in chop(main_text): - chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations) + empty = False + if sample is not None: + if sample <= 0: + empty = True + else: + sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog')) + chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty) + toc.extend(chunk_toc) chars = chars.union(chunk_chars) zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html) @@ -322,11 +340,11 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= if children: for child in children: child_xml = etree.parse(provider.by_uri(child)) - child_toc, chunk_counter, chunk_chars = transform_file(child_xml, chunk_counter, first=False) + child_toc, chunk_counter, chunk_chars, sample = transform_file(child_xml, chunk_counter, first=False, sample=sample) toc.append(child_toc) chars = chars.union(chunk_chars) - return toc, chunk_counter, chars + return toc, chunk_counter, chars, sample # read metadata from the first file if file_path: @@ -391,7 +409,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '') nav_map = toc_file[-1] - toc, chunk_counter, chars = transform_file(input_xml) + toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample) if not toc.children: toc.add(u"Początek utworu", 1) diff --git a/librarian/epub/emptyChunk.html b/librarian/epub/emptyChunk.html new file mode 100644 index 0000000..14dd415 --- /dev/null +++ b/librarian/epub/emptyChunk.html @@ -0,0 +1,8 @@ + + + + + WolneLektury.pl + + + \ No newline at end of file diff --git a/librarian/pdf.py b/librarian/pdf.py index 07272f2..1989239 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -21,7 +21,7 @@ from lxml.etree import XMLSyntaxError, XSLTApplyError from librarian.dcparser import Person from librarian.parser import WLDocument -from librarian import ParseError, DCNS +from librarian import ParseError, DCNS, get_resource from librarian import functions @@ -137,9 +137,6 @@ def parse_creator(doc): creator.getparent().insert(0, creator_parsed) -def get_resource(path): - return os.path.join(os.path.dirname(__file__), path) - def get_stylesheet(name): return get_resource(STYLESHEETS[name]) diff --git a/scripts/book2partner b/scripts/book2partner new file mode 100755 index 0000000..5866cc3 --- /dev/null +++ b/scripts/book2partner @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import os.path +import optparse +from copy import deepcopy +from lxml import etree + +from librarian import epub, DirDocProvider, ParseError, cover +from librarian.dcparser import BookInfo + + +def utf_trunc(text, limit): +""" truncates text to at most `limit' bytes in utf-8 """ + if text is None: + return text + orig_text = text + if len(text.encode('utf-8')) > limit: + newlimit = limit - 3 + while len(text.encode('utf-8')) > newlimit: + text = text[:(newlimit - len(text.encode('utf-8'))) / 4] + text += '...' + return text + + +def virtualo(filenames, output_dir, verbose): + xml = etree.fromstring(""" + """) + product = etree.fromstring(""" + + + + + + + Jan + Kowalski + + + 0.0 + PL + """) + + try: + for main_input in input_filenames: + if options.verbose: + print main_input + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + slug, ext = os.path.splitext(fname) + + outfile_dir = os.path.join(output_dir, slug) + os.makedirs(os.path.join(output_dir, slug)) + + info = BookInfo.from_file(main_input) + + product_elem = deepcopy(product) + product_elem[0].text = utf_trunc(slug, 100) + product_elem[1].text = utf_trunc(info.title, 255) + product_elem[2].text = utf_trunc(info.description, 255) + product_elem[3].text = utf_trunc(info.source_name, 3000) + product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100) + product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100) + xml.append(product_elem) + + cover.cover(600, 730, + u' '.join(info.author.first_names + (info.author.last_name,)), + info.title + ).save(os.path.join(outfile_dir, slug+'.jpg')) + outfile = os.path.join(outfile_dir, '1.epub') + outfile_sample = os.path.join(outfile_dir, '1.sample.epub') + epub.transform(provider, file_path=main_input, output_file=outfile) + epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25) + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e.message + } + + xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w') + xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8')) + xml_file.close() + + + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Prepare SOURCE files for a partner.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='', + help='specifies the directory for output') + parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False, + help='prepare files for Virtualo API') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + if options.virtualo: + virtualo(input_filenames, options.output_dir, options.verbose) diff --git a/setup.py b/setup.py index db2551a..98488dd 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ setup( 'scripts/book2txt', 'scripts/book2epub', 'scripts/book2pdf', + 'scripts/book2partner', 'scripts/bookfragments', 'scripts/genslugs'], tests_require=['nose>=0.11', 'coverage>=3.0.1'],