def serialize_children(element, format='raw'):
return SERIALIZERS[format](element)
+
+def get_resource(path):
+ return os.path.join(os.path.dirname(__file__), path)
+
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import Image, ImageFont, ImageDraw
+from librarian import get_resource
+
+
+def cover(width, height, author, title):
+ def draw_centered_text(text, draw, font, width, pos_y, lineskip):
+ while text:
+ line = text
+ while draw.textsize(line, font=font)[0] > width:
+ try:
+ line, ext = line.rsplit(' ', 1)
+ except:
+ break
+ draw.text(((img.size[0] - draw.textsize(line, font=font)[0]) / 2, pos_y), line, font=font, fill='#000')
+ pos_y += lineskip
+ text = text[len(line)+1:]
+ return pos_y
+
+
+ img = Image.new('RGB', (width, height), (255, 255, 255))
+
+ # WL logo
+ logo = Image.open(get_resource('pdf/wl-logo.png'))
+ logo = logo.resize((img.size[0] / 2, logo.size[1] * img.size[0] / 2 / logo.size[0]))
+ img.paste(logo, (width / 4, img.size[1] - logo.size[1]))
+
+ draw = ImageDraw.Draw(img)
+ author_font = ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), width/20)
+ title_y = draw_centered_text(author, draw, author_font, width*9/10, height/10, width/15) + height/10
+
+ title_font = ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), width/15)
+ draw_centered_text(title, draw, title_font, width*9/10, title_y, width/11)
+
+ return img
yield part_xml
-def transform_chunk(chunk_xml, chunk_no, annotations):
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
""" transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
toc = TOC()
elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
subnumber = toc.add(node_name(element), chunk_no, level=1, is_part=False)
element.set('sub', str(subnumber))
- find_annotations(annotations, chunk_xml, chunk_no)
- replace_by_verse(chunk_xml)
- html_tree = xslt(chunk_xml, res('xsltScheme.xsl'))
- chars = used_chars(html_tree.getroot())
- output_html = etree.tostring(html_tree, method="html", pretty_print=True)
+ if empty:
+ if not _empty_html_static:
+ _empty_html_static.append(open(res('emptyChunk.html')).read())
+ chars = set()
+ output_html = _empty_html_static[0]
+ else:
+ find_annotations(annotations, chunk_xml, chunk_no)
+ replace_by_verse(chunk_xml)
+ html_tree = xslt(chunk_xml, res('xsltScheme.xsl'))
+ chars = used_chars(html_tree.getroot())
+ output_html = etree.tostring(html_tree, method="html", pretty_print=True)
return output_html, toc, chars
-def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False):
+def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, sample=None):
""" produces a EPUB file
provider: a DocProvider
output_file: file-like object or path to output file
output_dir: path to directory to save output file to; either this or output_file must be present
make_dir: writes output to <output_dir>/<author>/<slug>.epub instead of <output_dir>/<slug>.epub
+ sample=n: generate sample e-book (with at least n paragraphs)
"""
- def transform_file(input_xml, chunk_counter=1, first=True):
+ def transform_file(input_xml, chunk_counter=1, first=True, sample=None):
""" processes one input file and proceeds to its children """
replace_characters(input_xml.getroot())
etree.tostring(html_tree, method="html", pretty_print=True))
elif children:
# write title page for every parent
- html_tree = xslt(input_xml, res('xsltChunkTitle.xsl'))
- chars = used_chars(html_tree.getroot())
- zip.writestr('OPS/part%d.html' % chunk_counter,
- etree.tostring(html_tree, method="html", pretty_print=True))
+ if sample is not None and sample <= 0:
+ chars = set()
+ html_string = open(res('emptyChunk.html')).read()
+ else:
+ html_tree = xslt(input_xml, res('xsltChunkTitle.xsl'))
+ chars = used_chars(html_tree.getroot())
+ html_string = etree.tostring(html_tree, method="html", pretty_print=True)
+ zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
add_to_manifest(manifest, chunk_counter)
add_to_spine(spine, chunk_counter)
chunk_counter += 1
if main_text is not None:
for chunk_xml in chop(main_text):
- chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations)
+ empty = False
+ if sample is not None:
+ if sample <= 0:
+ empty = True
+ else:
+ sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
+ chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
+
toc.extend(chunk_toc)
chars = chars.union(chunk_chars)
zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
if children:
for child in children:
child_xml = etree.parse(provider.by_uri(child))
- child_toc, chunk_counter, chunk_chars = transform_file(child_xml, chunk_counter, first=False)
+ child_toc, chunk_counter, chunk_chars, sample = transform_file(child_xml, chunk_counter, first=False, sample=sample)
toc.append(child_toc)
chars = chars.union(chunk_chars)
- return toc, chunk_counter, chars
+ return toc, chunk_counter, chars, sample
# read metadata from the first file
if file_path:
'</navPoint></navMap></ncx>')
nav_map = toc_file[-1]
- toc, chunk_counter, chars = transform_file(input_xml)
+ toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample)
if not toc.children:
toc.add(u"Początek utworu", 1)
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\r
+<html xmlns="http://www.w3.org/1999/xhtml">\r
+ <head>\r
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />\r
+ <title>WolneLektury.pl</title>\r
+ </head>\r
+ <body></body>\r
+</html>
\ No newline at end of file
from librarian.dcparser import Person
from librarian.parser import WLDocument
-from librarian import ParseError, DCNS
+from librarian import ParseError, DCNS, get_resource
from librarian import functions
creator.getparent().insert(0, creator_parsed)
-def get_resource(path):
- return os.path.join(os.path.dirname(__file__), path)
-
def get_stylesheet(name):
return get_resource(STYLESHEETS[name])
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+import optparse
+from copy import deepcopy
+from lxml import etree
+
+from librarian import epub, DirDocProvider, ParseError, cover
+from librarian.dcparser import BookInfo
+
+
+def utf_trunc(text, limit):
+""" truncates text to at most `limit' bytes in utf-8 """
+ if text is None:
+ return text
+ orig_text = text
+ if len(text.encode('utf-8')) > limit:
+ newlimit = limit - 3
+ while len(text.encode('utf-8')) > newlimit:
+ text = text[:(newlimit - len(text.encode('utf-8'))) / 4]
+ text += '...'
+ return text
+
+
+def virtualo(filenames, output_dir, verbose):
+ xml = etree.fromstring("""<?xml version="1.0" encoding="utf-8"?>
+ <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></products>""")
+ product = etree.fromstring("""<product>
+ <publisherProductId></publisherProductId>
+ <title></title>
+ <info></info>
+ <description></description>
+ <authors>
+ <author>
+ <names>Jan</names>
+ <lastName>Kowalski</lastName>
+ </author>
+ </authors>
+ <price>0.0</price>
+ <language>PL</language>
+ </product>""")
+
+ try:
+ for main_input in input_filenames:
+ if options.verbose:
+ print main_input
+ path, fname = os.path.realpath(main_input).rsplit('/', 1)
+ provider = DirDocProvider(path)
+ slug, ext = os.path.splitext(fname)
+
+ outfile_dir = os.path.join(output_dir, slug)
+ os.makedirs(os.path.join(output_dir, slug))
+
+ info = BookInfo.from_file(main_input)
+
+ product_elem = deepcopy(product)
+ product_elem[0].text = utf_trunc(slug, 100)
+ product_elem[1].text = utf_trunc(info.title, 255)
+ product_elem[2].text = utf_trunc(info.description, 255)
+ product_elem[3].text = utf_trunc(info.source_name, 3000)
+ product_elem[4][0][0].text = utf_trunc(u' '.join(info.author.first_names), 100)
+ product_elem[4][0][1].text = utf_trunc(info.author.last_name, 100)
+ xml.append(product_elem)
+
+ cover.cover(600, 730,
+ u' '.join(info.author.first_names + (info.author.last_name,)),
+ info.title
+ ).save(os.path.join(outfile_dir, slug+'.jpg'))
+ outfile = os.path.join(outfile_dir, '1.epub')
+ outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
+ epub.transform(provider, file_path=main_input, output_file=outfile)
+ epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
+ except ParseError, e:
+ print '%(file)s:%(name)s:%(message)s' % {
+ 'file': main_input,
+ 'name': e.__class__.__name__,
+ 'message': e.message
+ }
+
+ xml_file = open(os.path.join(output_dir, 'import_products.xml'), 'w')
+ xml_file.write(etree.tostring(xml, pretty_print=True, encoding=unicode).encode('utf-8'))
+ xml_file.close()
+
+
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Prepare SOURCE files for a partner."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', default='',
+ help='specifies the directory for output')
+ parser.add_option('--virtualo', action='store_true', dest='virtualo', default=False,
+ help='prepare files for Virtualo API')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ if options.virtualo:
+ virtualo(input_filenames, options.output_dir, options.verbose)
'scripts/book2txt',
'scripts/book2epub',
'scripts/book2pdf',
+ 'scripts/book2partner',
'scripts/bookfragments',
'scripts/genslugs'],
tests_require=['nose>=0.11', 'coverage>=3.0.1'],