From: Marcin Koziej Date: Wed, 7 Aug 2013 10:03:14 +0000 (+0200) Subject: Merge changes from master to Aigrain publishing code - that will be used for new... X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/f318053fb3349c5364cfb866b2a3d33c2423e12a?hp=6795396c107ee145c80d0dc24f197ecc3ba25b30 Merge changes from master to Aigrain publishing code - that will be used for new publishing code for few publications Summer/Autumn 2013 --- diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/librarian/__init__.py b/librarian/__init__.py index 8a69d00..c46d5d1 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -8,15 +8,22 @@ from __future__ import with_statement import os import re import shutil +import urllib + class UnicodeException(Exception): def __str__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + return unicode(self).encode('utf-8') def __unicode__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + args = self.args[0] if len(self.args) == 1 else self.args + try: + message = unicode(args) + except UnicodeDecodeError: + message = unicode(args, encoding='utf-8', errors='ignore') + return message class ParseError(UnicodeException): pass @@ -267,3 +274,8 @@ class OutputFile(object): if not os.path.isdir(dirname): os.makedirs(dirname) shutil.copy(self.get_filename(), path) + + +class URLOpener(urllib.FancyURLopener): + version = 'FNP Librarian (http://github.com/fnp/librarian)' +urllib._urlopener = URLOpener() diff --git a/librarian/book2anything.py b/librarian/book2anything.py new file mode 100755 index 0000000..b8b8d27 --- /dev/null +++ b/librarian/book2anything.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from collections import namedtuple +import os.path +import optparse + +from librarian import DirDocProvider, ParseError +from librarian.parser import WLDocument +from librarian.cover import WLCover + + +class Option(object): + """Option for optparse. Use it like `optparse.OptionParser.add_option`.""" + def __init__(self, *names, **options): + self.names = names + self.options = options + + def add(self, parser): + parser.add_option(*self.names, **self.options) + + def name(self): + return self.options['dest'] + + def value(self, options): + return getattr(options, self.name()) + + +class Book2Anything(object): + """A class for creating book2... scripts. + + Subclass it for any format you want to convert to. + """ + format_name = None # Set format name, like "PDF". + ext = None # Set file extension, like "pdf". + uses_cover = False # Can it add a cover? + cover_optional = True # Only relevant if uses_cover + uses_provider = False # Does it need a DocProvider? + transform = None # Transform method. Uses WLDocument.as_{ext} by default. + parser_options = [] # List of Option objects for additional parser args. + transform_options = [] # List of Option objects for additional transform args. + transform_flags = [] # List of Option objects for supported transform flags. + + + @classmethod + def run(cls): + # Parse commandline arguments + usage = """Usage: %%prog [options] SOURCE [SOURCE...] + Convert SOURCE files to %s format.""" % cls.format_name + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', + action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-d', '--make-dir', + action='store_true', dest='make_dir', default=False, + help='create a directory for author and put the output file in it') + parser.add_option('-o', '--output-file', + dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', + dest='output_dir', metavar='DIR', + help='specifies the directory for output') + if cls.uses_cover: + if cls.cover_optional: + parser.add_option('-c', '--with-cover', + action='store_true', dest='with_cover', default=False, + help='create default cover') + parser.add_option('-C', '--image-cache', + dest='image_cache', metavar='URL', + help='prefix for image download cache' + + (' (implies --with-cover)' if cls.cover_optional else '')) + for option in cls.parser_options + cls.transform_options + cls.transform_flags: + option.add(parser) + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + return(1) + + # Prepare additional args for parser. + parser_args = {} + for option in cls.parser_options: + parser_args[option.name()] = option.value(options) + # Prepare additional args for transform method. + transform_args = {} + for option in cls.transform_options: + transform_args[option.name()] = option.value(options) + # Add flags to transform_args, if any. + transform_flags = [flag.name() for flag in cls.transform_flags + if flag.value(options)] + if transform_flags: + transform_args['flags'] = transform_flags + # Add cover support, if any. + if cls.uses_cover: + if options.image_cache: + def cover_class(*args, **kwargs): + return WLCover(image_cache=options.image_cache, *args, **kwargs) + transform_args['cover'] = cover_class + elif not cls.cover_optional or options.with_cover: + transform_args['cover'] = WLCover + + + # Do some real work + try: + for main_input in input_filenames: + if options.verbose: + print main_input + + # Where to find input? + if cls.uses_provider: + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + else: + provider = None + + # Where to write output? + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.' + cls.ext + else: + output_file = None + + # Do the transformation. + doc = WLDocument.from_file(main_input, provider=provider, **parser_args) + transform = cls.transform + if transform is None: + transform = getattr(WLDocument, 'as_%s' % cls.ext) + output = transform(doc, **transform_args) + + doc.save_output_file(output, + output_file, options.output_dir, options.make_dir, cls.ext) + + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e + } diff --git a/librarian/cover.py b/librarian/cover.py index 741436a..dfd451b 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -4,8 +4,22 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import re -import Image, ImageFont, ImageDraw, ImageFilter -from librarian import get_resource +from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance +from StringIO import StringIO +from librarian import get_resource, OutputFile, URLOpener + + +class Metric(object): + """Gets metrics from an object, scaling it by a factor.""" + def __init__(self, obj, scale): + self._obj = obj + self._scale = float(scale) + + def __getattr__(self, name): + src = getattr(self._obj, name) + if src and self._scale: + src = type(src)(self._scale * src) + return src class TextBox(object): @@ -93,7 +107,8 @@ class Cover(object): author_lineskip = 40 author_color = '#000' author_shadow = None - author_font = None + author_font_ttf = get_resource('fonts/DejaVuSerif.ttf') + author_font_size = 30 title_top = 100 title_margin_left = 20 @@ -101,13 +116,15 @@ class Cover(object): title_lineskip = 54 title_color = '#000' title_shadow = None - title_font = None + title_font_ttf = get_resource('fonts/DejaVuSerif.ttf') + title_font_size = 40 logo_bottom = None logo_width = None uses_dc_cover = False format = 'JPEG' + scale = 1 exts = { 'JPEG': 'jpg', @@ -119,9 +136,14 @@ class Cover(object): 'PNG': 'image/png', } - def __init__(self, book_info): - #self.author = ", ".join(auth.readable() for auth in book_info.authors) + def __init__(self, book_info, format=None, width=None, height=None): + self.author = ", ".join(auth.readable() for auth in book_info.authors) self.title = book_info.title + if format is not None: + self.format = format + scale = max(float(width or 0) / self.width, float(height or 0) / self.height) + if scale: + self.scale = scale def pretty_author(self): """Allows for decorating author's name.""" @@ -132,7 +154,8 @@ class Cover(object): return self.title def image(self): - img = Image.new('RGB', (self.width, self.height), self.background_color) + metr = Metric(self, self.scale) + img = Image.new('RGB', (metr.width, metr.height), self.background_color) if self.background_img: background = Image.open(self.background_img) @@ -140,34 +163,35 @@ class Cover(object): del background # WL logo - if self.logo_width: + if metr.logo_width: logo = Image.open(get_resource('res/wl-logo.png')) - logo = logo.resize((self.logo_width, logo.size[1] * self.logo_width / logo.size[0])) - img.paste(logo, ((self.width - self.logo_width) / 2, img.size[1] - logo.size[1] - self.logo_bottom)) + logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0])) + img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom)) - top = self.author_top + top = metr.author_top tbox = TextBox( - self.width - self.author_margin_left - self.author_margin_right, - self.height - top, + metr.width - metr.author_margin_left - metr.author_margin_right, + metr.height - top, ) - author_font = self.author_font or ImageFont.truetype( - get_resource('fonts/DejaVuSerif.ttf'), 30) + + author_font = ImageFont.truetype( + self.author_font_ttf, metr.author_font_size) tbox.text(self.pretty_author(), self.author_color, author_font, - self.author_lineskip, self.author_shadow) + metr.author_lineskip, self.author_shadow) text_img = tbox.image() - img.paste(text_img, (self.author_margin_left, top), text_img) + img.paste(text_img, (metr.author_margin_left, top), text_img) - top += text_img.size[1] + self.title_top + top += text_img.size[1] + metr.title_top tbox = TextBox( - self.width - self.title_margin_left - self.title_margin_right, - self.height - top, + metr.width - metr.title_margin_left - metr.title_margin_right, + metr.height - top, ) - title_font = self.author_font or ImageFont.truetype( - get_resource('fonts/DejaVuSerif.ttf'), 40) + title_font = ImageFont.truetype( + self.title_font_ttf, metr.title_font_size) tbox.text(self.pretty_title(), self.title_color, title_font, - self.title_lineskip, self.title_shadow) + metr.title_lineskip, self.title_shadow) text_img = tbox.image() - img.paste(text_img, (self.title_margin_left, top), text_img) + img.paste(text_img, (metr.title_margin_left, top), text_img) return img @@ -178,7 +202,12 @@ class Cover(object): return self.exts[self.format] def save(self, *args, **kwargs): - return self.image().save(format=self.format, *args, **kwargs) + return self.image().save(format=self.format, quality=95, *args, **kwargs) + + def output_file(self, *args, **kwargs): + imgstr = StringIO() + self.save(imgstr, *args, **kwargs) + return OutputFile.from_string(imgstr.getvalue()) class WLCover(Cover): @@ -186,13 +215,26 @@ class WLCover(Cover): width = 600 height = 833 uses_dc_cover = True - author_font = ImageFont.truetype( - get_resource('fonts/JunicodeWL-Regular.ttf'), 20) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 20 author_lineskip = 30 - title_font = ImageFont.truetype( - get_resource('fonts/DejaVuSerif-Bold.ttf'), 30) + title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf') + title_font_size = 30 title_lineskip = 40 title_box_width = 350 + + box_top_margin = 100 + box_bottom_margin = 100 + box_padding_y = 20 + box_above_line = 10 + box_below_line = 15 + box_line_left = 75 + box_line_right = 275 + box_line_width = 2 + + logo_top = 15 + logo_width = 140 + bar_width = 35 background_color = '#444' author_color = '#444' @@ -212,16 +254,15 @@ class WLCover(Cover): u'Współczesność': '#06393d', } - def __init__(self, book_info): - super(WLCover, self).__init__(book_info) + def __init__(self, book_info, format=None, width=None, height=None, with_logo=False): + super(WLCover, self).__init__(book_info, format=format, width=width, height=height) self.kind = book_info.kind self.epoch = book_info.epoch - print book_info.cover_url if book_info.cover_url: - from urllib2 import urlopen - from StringIO import StringIO - - bg_src = urlopen(book_info.cover_url) + url = book_info.cover_url + bg_src = None + if bg_src is None: + bg_src = URLOpener().open(url) self.background_img = StringIO(bg_src.read()) bg_src.close() else: @@ -231,25 +272,26 @@ class WLCover(Cover): return self.author.upper() def image(self): - img = Image.new('RGB', (self.width, self.height), self.background_color) + metr = Metric(self, self.scale) + img = Image.new('RGB', (metr.width, metr.height), self.background_color) draw = ImageDraw.Draw(img) if self.epoch in self.epoch_colors: epoch_color = self.epoch_colors[self.epoch] else: epoch_color = '#000' - draw.rectangle((0, 0, self.bar_width, self.height), fill=epoch_color) + draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color) if self.background_img: src = Image.open(self.background_img) - trg_size = (self.width - self.bar_width, self.height) + trg_size = (metr.width - metr.bar_width, metr.height) if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]: resized = ( trg_size[0], src.size[1] * trg_size[0] / src.size[0] ) cut = (resized[1] - trg_size[1]) / 2 - src = src.resize(resized) + src = src.resize(resized, Image.ANTIALIAS) src = src.crop((0, cut, src.size[0], src.size[1] - cut)) else: resized = ( @@ -257,52 +299,69 @@ class WLCover(Cover): trg_size[1], ) cut = (resized[0] - trg_size[0]) / 2 - src = src.resize(resized) + src = src.resize(resized, Image.ANTIALIAS) src = src.crop((cut, 0, src.size[0] - cut, src.size[1])) - img.paste(src, (self.bar_width, 0)) + img.paste(src, (metr.bar_width, 0)) del src - box = TextBox(self.title_box_width, self.height, padding_y=20) + box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y) + author_font = ImageFont.truetype( + self.author_font_ttf, metr.author_font_size) box.text(self.pretty_author(), - font=self.author_font, - line_height=self.author_lineskip, + font=author_font, + line_height=metr.author_lineskip, color=self.author_color, shadow_color=self.author_shadow, ) - box.skip(10) - box.draw.line((75, box.height, 275, box.height), - fill=self.author_color, width=2) - box.skip(15) + box.skip(metr.box_above_line) + box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height), + fill=self.author_color, width=metr.box_line_width) + box.skip(metr.box_below_line) + title_font = ImageFont.truetype( + self.title_font_ttf, metr.title_font_size) box.text(self.pretty_title(), - line_height=self.title_lineskip, - font=self.title_font, + line_height=metr.title_lineskip, + font=title_font, color=epoch_color, shadow_color=self.title_shadow, ) + + if self.with_logo: + logo = Image.open(get_resource('res/wl-logo-mono.png')) + logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS) + alpha = logo.split()[3] + alpha = ImageEnhance.Brightness(alpha).enhance(.75) + logo.putalpha(alpha) + box.skip(metr.logo_top + logo.size[1]) + box_img = box.image() if self.kind == 'Liryka': # top - box_top = 100 + box_top = metr.box_top_margin elif self.kind == 'Epika': # bottom - box_top = self.height - 100 - box_img.size[1] + box_top = metr.height - metr.box_bottom_margin - box_img.size[1] else: # center - box_top = (self.height - box_img.size[1]) / 2 + box_top = (metr.height - box_img.size[1]) / 2 - box_left = self.bar_width + (self.width - self.bar_width - + box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2 draw.rectangle((box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff') img.paste(box_img, (box_left, box_top), box_img) - return img + if self.with_logo: + img.paste(logo, + (box_left + (box_img.size[0] - logo.size[0]) / 2, + box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo) + return img class VirtualoCover(Cover): @@ -325,7 +384,8 @@ class PrestigioCover(Cover): author_lineskip = 60 author_color = '#fff' author_shadow = '#000' - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Italic.ttf'), 50) + author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf') + author_font_size = 50 title_top = 0 title_margin_left = 118 @@ -333,7 +393,8 @@ class PrestigioCover(Cover): title_lineskip = 60 title_color = '#fff' title_shadow = '#000' - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Italic.ttf'), 50) + title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf') + title_font_size = 50 def pretty_title(self): return u"„%s”" % self.title @@ -349,14 +410,16 @@ class BookotekaCover(Cover): author_margin_right = 233 author_lineskip = 156 author_color = '#d9d919' - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 130) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 130 title_top = 400 title_margin_left = 307 title_margin_right = 233 title_lineskip = 168 title_color = '#d9d919' - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 140) + title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + title_font_size = 140 format = 'PNG' @@ -365,18 +428,10 @@ class GandalfCover(Cover): width = 600 height = 730 background_img = get_resource('res/cover-gandalf.png') - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 30) - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 40) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 30 + title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + title_font_size = 40 logo_bottom = 25 logo_width = 250 format = 'PNG' - -class ImageCover(WLCover): - format = 'JPEG' - def __init__(self, *args, **kwargs): - super(ImageCover, self).__init__(*args, **kwargs) - self.im = Image.open(self.background_img) - self.width, self.height = self.im.size - - def image(self): - return self.im diff --git a/librarian/dcparser.py b/librarian/dcparser.py index d4bdc67..3e6ac1b 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -115,10 +115,21 @@ class Field(object): except ValueError, e: raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) - def validate(self, fdict, strict=False): + def validate(self, fdict, fallbacks=None, strict=False): + if fallbacks is None: + fallbacks = {} if not fdict.has_key(self.uri): if not self.required: - f = self.default + # Accept single value for single fields and saliases. + if self.name in fallbacks: + if self.multiple: + f = fallbacks[self.name] + else: + f = [fallbacks[self.name]] + elif self.salias and self.salias in fallbacks: + f = [fallbacks[self.salias]] + else: + f = self.default else: raise ValidationError("Required field %s not found" % self.uri) else: @@ -224,7 +235,7 @@ class WorkInfo(object): return cls(desc.attrib, field_dict, *args, **kwargs) - def __init__(self, rdf_attrs, dc_fields, strict=False): + def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False): """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the given field. """ @@ -233,7 +244,8 @@ class WorkInfo(object): self.fmap = {} for field in self.FIELDS: - value = field.validate(dc_fields, strict=strict) + value = field.validate(dc_fields, fallbacks=fallbacks, + strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: self.fmap[field.salias] = field diff --git a/librarian/epub.py b/librarian/epub.py index 034d82d..223bde9 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -7,6 +7,7 @@ from __future__ import with_statement import os import os.path +import re import subprocess from StringIO import StringIO from copy import deepcopy @@ -16,7 +17,7 @@ from tempfile import mkdtemp, NamedTemporaryFile from shutil import rmtree from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile -from librarian.cover import ImageCover as WLCover +from librarian.cover import WLCover from librarian import functions, get_resource @@ -79,7 +80,7 @@ def replace_characters(node): return text.replace(u"\ufeff", u"")\ .replace("---", u"\u2014")\ .replace("--", u"\u2013")\ - .replace(",,", u"“")\ + .replace(",,", u"\u201E")\ .replace('"', u"\u201D")\ .replace("'", u"\u2019") if node.tag in ('uwaga', 'extra'): @@ -109,31 +110,74 @@ def find_annotations(annotations, source, part_no): find_annotations(annotations, child, part_no) +class Stanza(object): + """ + Converts / verse endings into verse elements in a stanza. + + Slashes may only occur directly in the stanza. Any slashes in subelements + will be ignored, and the subelements will be put inside verse elements. + + >>> s = etree.fromstring("a c c/\\nbx/\\nyc/ \\nd") + >>> Stanza(s).versify() + >>> print etree.tostring(s) + a c cbx/ + ycd + + """ + def __init__(self, stanza_elem): + self.stanza = stanza_elem + self.verses = [] + self.open_verse = None + + def versify(self): + self.push_text(self.stanza.text) + for elem in self.stanza: + self.push_elem(elem) + self.push_text(elem.tail) + tail = self.stanza.tail + self.stanza.clear() + self.stanza.tail = tail + self.stanza.extend(self.verses) + + def open_normal_verse(self): + self.open_verse = self.stanza.makeelement("wers_normalny") + self.verses.append(self.open_verse) + + def get_open_verse(self): + if self.open_verse is None: + self.open_normal_verse() + return self.open_verse + + def push_text(self, text): + if not text: + return + for i, verse_text in enumerate(re.split(r"/\s*\n", text)): + if i: + self.open_normal_verse() + verse = self.get_open_verse() + if len(verse): + verse[-1].tail = (verse[-1].tail or "") + verse_text + else: + verse.text = (verse.text or "") + verse_text + + def push_elem(self, elem): + if elem.tag.startswith("wers"): + verse = deepcopy(elem) + verse.tail = None + self.verses.append(verse) + self.open_verse = verse + else: + appended = deepcopy(elem) + appended.tail = None + self.get_open_verse().append(appended) + + def replace_by_verse(tree): """ Find stanzas and create new verses in place of a '/' character """ stanzas = tree.findall('.//' + WLNS('strofa')) - for node in stanzas: - for child_node in node: - if child_node.tag in ('slowo_obce', 'wyroznienie'): - foreign_verses = inner_xml(child_node).split('/\n') - if len(foreign_verses) > 1: - new_foreign = '' - for foreign_verse in foreign_verses: - if foreign_verse.startswith('', foreign_verse, '')) - set_inner_xml(child_node, new_foreign) - verses = inner_xml(node).split('/\n') - if len(verses) > 1: - modified_inner_xml = '' - for verse in verses: - if verse.startswith('', verse, '')) - set_inner_xml(node, modified_inner_xml) + for stanza in stanzas: + Stanza(stanza).versify() def add_to_manifest(manifest, partno): @@ -248,15 +292,14 @@ def chop(main_text): last_node_part = False for one_part in main_text: name = one_part.tag - #if name == 'naglowek_czesc': - # yield part_xml - # last_node_part = True - # main_xml_part[:] = [deepcopy(one_part)] - #elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): - # yield part_xml - # main_xml_part[:] = [deepcopy(one_part)] - #else: - if True: + if name == 'naglowek_czesc': + yield part_xml + last_node_part = True + main_xml_part[:] = [deepcopy(one_part)] + elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): + yield part_xml + main_xml_part[:] = [deepcopy(one_part)] + else: main_xml_part.append(deepcopy(one_part)) last_node_part = False yield part_xml @@ -266,24 +309,21 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """ toc = TOC() - #for element in chunk_xml[0]: - # if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): - # toc.add(node_name(element), "part%d.html" % chunk_no) - # elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - # subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) - # element.set('sub', str(subnumber)) + for element in chunk_xml[0]: + if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): + toc.add(node_name(element), "part%d.html" % chunk_no) + elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): + subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) + element.set('sub', str(subnumber)) if empty: if not _empty_html_static: _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read()) chars = set() output_html = _empty_html_static[0] else: - if chunk_no == 1: - html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme-FoC.xsl')) - else: - find_annotations(annotations, chunk_xml, chunk_no) - replace_by_verse(chunk_xml) - html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl')) + find_annotations(annotations, chunk_xml, chunk_no) + replace_by_verse(chunk_xml) + html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl')) chars = used_chars(html_tree.getroot()) output_html = etree.tostring(html_tree, method="html", pretty_print=True) return output_html, toc, chars @@ -295,7 +335,7 @@ def transform(wldoc, verbose=False, """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object or True for default + cover: a cover.Cover factory or True for default flags: less-advertising, without-fonts, working-copy """ @@ -306,16 +346,7 @@ def transform(wldoc, verbose=False, # every input file will have a TOC entry, # pointing to starting chunk - - # hack for FoC: - if wldoc.book_info.author is not None: - toc_title = "%s, %s" % (wldoc.book_info.author.readable(), wldoc.book_info.title) - note = wldoc.edoc.find('//dzielo_nadrzedne') - if note is not None: - toc_title += " (%s)" % note.text - else: - toc_title = wldoc.book_info.title - toc = TOC(toc_title, "part%d.html" % chunk_counter) + toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) chars = set() if first: # write book title page @@ -324,8 +355,7 @@ def transform(wldoc, verbose=False, zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True)) # add a title page TOC entry - toc.add(u"Title page", "title.html") - toc.add(u"Dear readers!", "part1.html") + toc.add(u"Strona tytułowa", "title.html") elif wldoc.book_info.parts: # write title page for every parent if sample is not None and sample <= 0: @@ -382,6 +412,10 @@ def transform(wldoc, verbose=False, for flag in flags: document.edoc.getroot().set(flag, 'yes') + # add editors info + document.edoc.getroot().set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) + opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) @@ -401,9 +435,8 @@ def transform(wldoc, verbose=False, '' \ '') - #zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) - #zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) - zip.write('logo.png', os.path.join('OPS', 'logo.png')) + zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) + zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) if not style: style = get_resource('epub/style.css') zip.write(style, os.path.join('OPS', 'style.css')) @@ -411,31 +444,29 @@ def transform(wldoc, verbose=False, if cover: if cover is True: cover = WLCover - if cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) cover_file = StringIO() c = cover(document.book_info) - import Image - c.im = Image.open('cover.jpg') - c.ext = lambda: 'jpg' c.save(cover_file) c_name = 'cover.%s' % c.ext() zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue()) del cover_file cover_tree = etree.parse(get_resource('epub/cover.html')) - cover_tree.find('//' + XHTMLNS('img')).set('src', c_name) + cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) zip.writestr('OPS/cover.html', etree.tostring( cover_tree, method="html", pretty_print=True)) + if bound_cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + manifest.append(etree.fromstring( '')) manifest.append(etree.fromstring( - '' % (c_name, c.mime_type()))) + '' % (cover_name, bound_cover.mime_type()))) spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring('')) @@ -455,7 +486,7 @@ def transform(wldoc, verbose=False, '')) spine.append(etree.fromstring( '')) - guide.append(etree.fromstring('')) + guide.append(etree.fromstring('')) toc, chunk_counter, chars, sample = transform_file(document, sample=sample) @@ -475,7 +506,16 @@ def transform(wldoc, verbose=False, zip.writestr('OPS/annotations.html', etree.tostring( html_tree, method="html", pretty_print=True)) - toc.add("Editorial page", "last.html") + toc.add("Weprzyj Wolne Lektury", "support.html") + manifest.append(etree.fromstring( + '')) + spine.append(etree.fromstring( + '')) + html_string = open(get_resource('epub/support.html')).read() + chars.update(used_chars(etree.fromstring(html_string))) + zip.writestr('OPS/support.html', html_string) + + toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '')) spine.append(etree.fromstring( @@ -488,7 +528,10 @@ def transform(wldoc, verbose=False, if not flags or not 'without-fonts' in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') - cwd = os.getcwd() + try: + cwd = os.getcwd() + except OSError: + cwd = None os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': @@ -503,7 +546,8 @@ def transform(wldoc, verbose=False, manifest.append(etree.fromstring( '' % (fname, fname))) rmtree(tmpdir) - os.chdir(cwd) + if cwd is not None: + os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) title = document.book_info.title @@ -519,7 +563,7 @@ def transform(wldoc, verbose=False, # write TOC if html_toc: - toc.add(u"Table of Contents", "toc.html", index=1) + toc.add(u"Spis treści", "toc.html", index=1) zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) toc.write_to_xml(nav_map) zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) diff --git a/librarian/epub/support.html b/librarian/epub/support.html new file mode 100755 index 0000000..8ca550b --- /dev/null +++ b/librarian/epub/support.html @@ -0,0 +1,48 @@ + + + + + Wesprzyj Wolne Lektury + + + +
+ +

Wesprzyj Wolne Lektury!

+ +

+ Wolne Lektury to projekt fundacji Nowoczesna Polska – organizacji + pożytku publicznego działającej na rzecz wolności korzystania + z dóbr kultury.

+ +

+ Co roku do domeny publicznej przechodzi twórczość kolejnych autorów. + Dzięki Twojemu wsparciu będziemy je mogli udostępnić wszystkim bezpłatnie. +

+ +

+ Jak możesz pomóc? +

+ +

+ Logo 1%
+ Przekaż 1% podatku na rozwój Wolnych Lektur:
+ Fundacja Nowoczesna Polska
+ KRS 0000070056 +

+ +

+ Pomóż uwolnić konkretną książkę, wspierając + zbiórkę + na stronie wolnelektury.pl. +

+ +

+ Przekaż darowiznę na konto: + szczegóły + na stronie Fundacji. +

+ +
+ + diff --git a/librarian/epub/xsltLast.xsl b/librarian/epub/xsltLast.xsl index 09dbc15..f6802d8 100644 --- a/librarian/epub/xsltLast.xsl +++ b/librarian/epub/xsltLast.xsl @@ -15,7 +15,7 @@ - <xsl:text>Editorial page</xsl:text> + <xsl:text>Strona redakcyjna</xsl:text> @@ -23,13 +23,13 @@

- This book is available under the terms of + Ten utwór jest udostepniony na licencji - . + Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie @@ -43,12 +43,28 @@

-

Published by Modern Poland Foundation, 2012.

+

Źródło: + + + + + , + + +

+ + +

Tekst opracowany na podstawie:

+
+ + +

+
-

Cover image: +

Okładka na podstawie: @@ -64,6 +80,19 @@

+ +
+ Logo 1% +
Przekaż 1% podatku na rozwój Wolnych Lektur.
+
Nazwa organizacji: Fundacja Nowoczesna Polska
+
KRS 0000070056
+
+ +

 

+

+ Plik wygenerowany dnia . +

+ @@ -74,15 +103,10 @@ - +

- Technical editors: - - - , - - . -

+ Opracowanie redakcyjne i przypisy: + .

diff --git a/librarian/fb2.py b/librarian/fb2.py new file mode 100644 index 0000000..d979566 --- /dev/null +++ b/librarian/fb2.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import os.path +from copy import deepcopy +from lxml import etree + +from librarian import functions, OutputFile +from .epub import replace_by_verse + + +functions.reg_substitute_entities() +functions.reg_person_name() + + +def sectionify(tree): + """Finds section headers and adds a tree of _section tags.""" + sections = ['naglowek_czesc', + 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', + 'naglowek_podrozdzial'] + section_level = dict((v,k) for (k,v) in enumerate(sections)) + + # We can assume there are just subelements an no text at section level. + for level, section_name in reversed(list(enumerate(sections))): + for header in tree.findall('//' + section_name): + section = header.makeelement("_section") + header.addprevious(section) + section.append(header) + sibling = section.getnext() + while (sibling is not None and + section_level.get(sibling.tag, 1000) > level): + section.append(sibling) + sibling = section.getnext() + + +def transform(wldoc, verbose=False, + cover=None, flags=None): + """ produces a FB2 file + + cover: a cover.Cover object or True for default + flags: less-advertising, working-copy + """ + + document = deepcopy(wldoc) + del wldoc + + if flags: + for flag in flags: + document.edoc.getroot().set(flag, 'yes') + + style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') + style = etree.parse(style_filename) + + replace_by_verse(document.edoc) + sectionify(document.edoc) + + result = document.transform(style) + + return OutputFile.from_string(unicode(result).encode('utf-8')) + +# vim:et diff --git a/librarian/fb2/description.xslt b/librarian/fb2/description.xslt new file mode 100644 index 0000000..312df2d --- /dev/null +++ b/librarian/fb2/description.xslt @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + literature + + + + + + + + + + + book2fb2 + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/drama.xslt b/librarian/fb2/drama.xslt new file mode 100755 index 0000000..ab8fb06 --- /dev/null +++ b/librarian/fb2/drama.xslt @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + +

+
+ + +

+
+ + +

+
+ + + + + +
diff --git a/librarian/fb2/fb2.xslt b/librarian/fb2/fb2.xslt new file mode 100644 index 0000000..950b526 --- /dev/null +++ b/librarian/fb2/fb2.xslt @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:apply-templates mode="title" + select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul"/> + <xsl:call-template name="translators" /> + + + + +

+ Utwór opracowany został w ramach projektu + Wolne Lektury + przez fundację + Nowoczesna Polska. +

+ + + + + + + + + + + + +

+
+ + + +

+ tłum. + + , + + +

+
+
+ + + + + + + + + diff --git a/librarian/fb2/footnotes.xslt b/librarian/fb2/footnotes.xslt new file mode 100644 index 0000000..09270b9 --- /dev/null +++ b/librarian/fb2/footnotes.xslt @@ -0,0 +1,42 @@ + + + + + + + + + + + fn + +

+ + [przypis autorski] +

+
+
+ + + + + + + note + #fn + + [ + + ] + + +
diff --git a/librarian/fb2/inline.xslt b/librarian/fb2/inline.xslt new file mode 100644 index 0000000..03c6b65 --- /dev/null +++ b/librarian/fb2/inline.xslt @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + „ + + ” + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/paragraphs.xslt b/librarian/fb2/paragraphs.xslt new file mode 100644 index 0000000..68c6257 --- /dev/null +++ b/librarian/fb2/paragraphs.xslt @@ -0,0 +1,46 @@ + + + + + + + + + +

+
+ + + + + + +

+
+ + + + + + +

*

+
+ + +

————————

+
+ + + + + +
diff --git a/librarian/fb2/poems.xslt b/librarian/fb2/poems.xslt new file mode 100644 index 0000000..31b05b4 --- /dev/null +++ b/librarian/fb2/poems.xslt @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/sections.xslt b/librarian/fb2/sections.xslt new file mode 100644 index 0000000..80ffb65 --- /dev/null +++ b/librarian/fb2/sections.xslt @@ -0,0 +1,47 @@ + + + + + + + +
+ + + + + + + + + + +
+
+ + + +
+ + + +
+ +
+
+ + + + <p><xsl:apply-templates mode="inline"/></p> + +
diff --git a/librarian/html.py b/librarian/html.py index c1a5e5b..70fc6e5 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -57,6 +57,7 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None): if html_has_content(result): add_anchors(result.getroot()) + add_table_of_themes(result.getroot()) add_table_of_contents(result.getroot()) return OutputFile.from_string(etree.tostring(result, method='html', @@ -263,6 +264,34 @@ def add_table_of_contents(root): root.insert(0, toc) + +def add_table_of_themes(root): + try: + from sortify import sortify + except ImportError: + sortify = lambda x: x + + book_themes = {} + for fragment in root.findall('.//a[@class="theme-begin"]'): + if not fragment.text: + continue + theme_names = [s.strip() for s in fragment.text.split(',')] + for theme_name in theme_names: + book_themes.setdefault(theme_name, []).append(fragment.get('name')) + book_themes = book_themes.items() + book_themes.sort(key=lambda s: sortify(s[0])) + themes_div = etree.Element('div', id="themes") + themes_ol = etree.SubElement(themes_div, 'ol') + for theme_name, fragments in book_themes: + themes_li = etree.SubElement(themes_ol, 'li') + themes_li.text = "%s: " % theme_name + for i, fragment in enumerate(fragments): + item = etree.SubElement(themes_li, 'a', href="#%s" % fragment) + item.text = str(i + 1) + item.tail = ' ' + root.insert(0, themes_div) + + def extract_annotations(html_path): """For each annotation, yields a tuple: anchor, text, html.""" diff --git a/librarian/mobi.py b/librarian/mobi.py old mode 100755 new mode 100644 index 6de72b3..99b724e --- a/librarian/mobi.py +++ b/librarian/mobi.py @@ -9,7 +9,7 @@ import subprocess from tempfile import NamedTemporaryFile from librarian import OutputFile -from librarian.cover import ImageCover as WLCover +from librarian.cover import WLCover from librarian import get_resource @@ -19,7 +19,7 @@ def transform(wldoc, verbose=False, wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object + cover: a cover.Cover factory overriding default flags: less-advertising, """ @@ -30,15 +30,12 @@ def transform(wldoc, verbose=False, # provide a cover by default if not cover: cover = WLCover + cover_file = NamedTemporaryFile(suffix='.png', delete=False) c = cover(book_info) - import Image - c.im = Image.open('cover.jpg') - c.ext = lambda: 'jpg' - cover_file = NamedTemporaryFile(suffix='.' + c.ext(), delete=False) c.save(cover_file) - if cover.uses_dc_cover: - if document.book_info.cover_by: + if bound_cover.uses_dc_cover: + if document.remobook_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) diff --git a/librarian/mobi/style.css b/librarian/mobi/style.css old mode 100755 new mode 100644 diff --git a/librarian/parser.py b/librarian/parser.py index 2ece72f..a9e8c65 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -5,6 +5,7 @@ # from librarian import ValidationError, NoDublinCore, ParseError, NoProvider from librarian import RDFNS +from librarian.cover import WLCover from librarian import dcparser from xml.parsers.expat import ExpatError @@ -19,7 +20,8 @@ class WLDocument(object): LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) provider = None - def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False): + def __init__(self, edoc, parse_dublincore=True, provider=None, + strict=False, meta_fallbacks=None): self.edoc = edoc self.provider = provider @@ -37,7 +39,7 @@ class WLDocument(object): raise NoDublinCore('Document has no DublinCore - which is required.') self.book_info = dcparser.BookInfo.from_element( - self.rdf_elem, strict=strict) + self.rdf_elem, fallbacks=meta_fallbacks, strict=strict) else: self.book_info = None @@ -46,7 +48,7 @@ class WLDocument(object): return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile, parse_dublincore=True, provider=None): + def from_file(cls, xmlfile, *args, **kwargs): # first, prepare for parsing if isinstance(xmlfile, basestring): @@ -67,7 +69,7 @@ class WLDocument(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(StringIO(data.encode('utf-8')), parser) - return cls(tree, parse_dublincore=parse_dublincore, provider=provider) + return cls(tree, *args, **kwargs) except (ExpatError, XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) @@ -147,7 +149,7 @@ class WLDocument(object): xpath = self.path_to_xpath(key) node = self.edoc.xpath(xpath)[0] repl = etree.fromstring(u"<%s>%s" %(node.tag, data, node.tag) ) - node.getparent().replace(node, repl); + node.getparent().replace(node, repl) except Exception, e: unmerged.append( repr( (key, xpath, e) ) ) @@ -163,6 +165,21 @@ class WLDocument(object): node.tag = 'span' node.tail = tail + def editors(self): + """Returns a set of all editors for book and its children. + + :returns: set of dcparser.Person objects + """ + if self.book_info is None: + raise NoDublinCore('No Dublin Core in document.') + persons = set(self.book_info.editors + + self.book_info.technical_editors) + for child in self.parts(): + persons.update(child.editors()) + if None in persons: + persons.remove(None) + return persons + # Converters def as_html(self, *args, **kwargs): @@ -185,6 +202,15 @@ class WLDocument(object): from librarian import mobi return mobi.transform(self, *args, **kwargs) + def as_fb2(self, *args, **kwargs): + from librarian import fb2 + return fb2.transform(self, *args, **kwargs) + + def as_cover(self, cover_class=None, *args, **kwargs): + if cover_class is None: + cover_class = WLCover + return cover_class(self.book_info, *args, **kwargs).output_file() + def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None): if output_dir_path: diff --git a/librarian/pdf.py b/librarian/pdf.py index af3d1df..9fb92b1 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -3,6 +3,12 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +"""PDF creation library. + +Creates one big XML from the book and its children, converts it to LaTeX +with TeXML, then runs it by XeLaTeX. + +""" from __future__ import with_statement import os import os.path @@ -21,7 +27,7 @@ from librarian.dcparser import Person from librarian.parser import WLDocument from librarian import ParseError, DCNS, get_resource, OutputFile from librarian import functions -from librarian.cover import ImageCover as WLCover +from librarian.cover import WLCover functions.reg_substitute_entities() @@ -135,9 +141,13 @@ def hack_motifs(doc): def parse_creator(doc): - """ find all dc:creator and dc.contributor tags and add *_parsed versions with forenames first """ + """Generates readable versions of creator and translator tags. + + Finds all dc:creator and dc.contributor.translator tags + and adds *_parsed versions with forenames first. + """ for person in doc.xpath("|".join('//dc:'+(tag) for tag in ( - 'creator', 'contributor.translator', 'contributor.editor', 'contributor.technical_editor')), + 'creator', 'contributor.translator')), namespaces = {'dc': str(DCNS)})[::-1]: if not person.text: continue @@ -174,54 +184,59 @@ def package_available(package, args='', verbose=False): def transform(wldoc, verbose=False, save_tex=None, morefloats=None, - cover=None, flags=None, customizations=None, - imgdir=""): + cover=None, flags=None, customizations=None): """ produces a PDF file with XeLaTeX wldoc: a WLDocument verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats - cover: a cover.Cover object + cover: a cover.Cover factory or True for default flags: less-advertising, customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ # Parse XSLT try: + book_info = wldoc.book_info document = load_including_children(wldoc) + root = document.edoc.getroot() if cover: if cover is True: cover = WLCover - the_cover = cover(document.book_info) - document.edoc.getroot().set('data-cover-width', str(the_cover.width)) - document.edoc.getroot().set('data-cover-height', str(the_cover.height)) - if the_cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + bound_cover = cover(book_info) + root.set('data-cover-width', str(bound_cover.width)) + root.set('data-cover-height', str(bound_cover.height)) + if bound_cover.uses_dc_cover: + if book_info.cover_by: + root.set('data-cover-by', book_info.cover_by) + if book_info.cover_source: + root.set('data-cover-source', + book_info.cover_source) if flags: for flag in flags: - document.edoc.getroot().set('flag-' + flag, 'yes') + root.set('flag-' + flag, 'yes') # check for LaTeX packages if morefloats: - document.edoc.getroot().set('morefloats', morefloats.lower()) + root.set('morefloats', morefloats.lower()) elif package_available('morefloats', 'maxfloats=19'): - document.edoc.getroot().set('morefloats', 'new') + root.set('morefloats', 'new') # add customizations if customizations is not None: - document.edoc.getroot().set('customizations', u','.join(customizations)) + root.set('customizations', u','.join(customizations)) + + # add editors info + root.set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) # hack the tree - #move_motifs_inside(document.edoc) - #hack_motifs(document.edoc) + move_motifs_inside(document.edoc) + hack_motifs(document.edoc) parse_creator(document.edoc) - if document.book_info.language == 'pol': - substitute_hyphens(document.edoc) + substitute_hyphens(document.edoc) fix_hanging(document.edoc) # wl -> TeXML @@ -229,20 +244,13 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, style = etree.parse(style_filename) texml = document.transform(style) - # etree.dump(texml.getroot()) + # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') if cover: - with open(os.path.join(temp, 'cover.jpg'), 'w') as f: - the_cover.save(f) - - shutil.copy("cce_trust.eps", temp) - shutil.copy("logo.eps", temp) - for img in document.edoc.findall('//ilustr'): - # print "--->> %s %s %s" % (imgdir, img, img.get('src')) - shutil.copy(os.path.join(imgdir, img.get('src')), temp) - + with open(os.path.join(temp, 'cover.png'), 'w') as f: + bound_cover.save(f) del document # no longer needed large object :) @@ -258,9 +266,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, # LaTeX -> PDF shutil.copy(get_resource('pdf/wl.cls'), temp) shutil.copy(get_resource('res/wl-logo.png'), temp) - shutil.copy('logo.eps', temp) - cwd = os.getcwd() + try: + cwd = os.getcwd() + except OSError: + cwd = None os.chdir(temp) if verbose: @@ -270,7 +280,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, if p: raise ParseError("Error parsing .tex file") - os.chdir(cwd) + if cwd is not None: + os.chdir(cwd) output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) pdf_path = os.path.join(temp, 'doc.pdf') @@ -279,7 +290,6 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, return OutputFile.from_filename(output_file.name) except (XMLSyntaxError, XSLTApplyError), e: - print e raise ParseError(e) @@ -301,7 +311,8 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - document = WLDocument.from_string(text, parse_dublincore=True) + document = WLDocument.from_string(text, + parse_dublincore=True, provider=provider) document.swap_endlines() for child_uri in document.book_info.parts: diff --git a/librarian/pdf/wl.cls b/librarian/pdf/wl.cls index 82e1362..cb53a3f 100644 --- a/librarian/pdf/wl.cls +++ b/librarian/pdf/wl.cls @@ -77,7 +77,6 @@ \usepackage{xunicode} \usepackage{xltxtra} - \usepackage[overload]{textcase} \usepackage{scalefnt} \usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref} @@ -178,10 +177,8 @@ Letters={SmallCaps,UppercaseSmallCaps} \raisebox{0pt}[0pt][0pt]{\makebox[0pt][r]{\usebox{\xglyphbox}}}} \newcommand{\makecover}[2]{ - - - %\pdfpagewidth=#1 - %\pdfpageheight=#2 + \pdfpagewidth=#1 + \pdfpageheight=#2 \thispagestyle{empty} \newlength{\PictHOffset} @@ -196,16 +193,16 @@ Letters={SmallCaps,UppercaseSmallCaps} \addtolength{\PictVOffset}{\headheight} \addtolength{\PictVOffset}{\headsep} \addtolength{\PictVOffset}{\topskip} - - \addtolength{\PictVOffset}{-#2} + \addtolength{\PictVOffset}{-\pdfpageheight} \noindent\hspace*{-\PictHOffset}% \raisebox{\PictVOffset}[0pt][0pt]{\makebox[0pt][l]{% - \includegraphics[height=#2,width=#1]{cover.jpg}}} - %\clearpage - \vspace{#2} + \includegraphics[height=\pdfpageheight,width=\pdfpagewidth]{cover.png}}} + \clearpage - \hspace{-36mm}\parbox{16cm}{ + +%% Strona tytułowa %% XXX + \hspace{-36mm}\parbox{16cm}{ {\addfontfeature{LetterSpace=-4.0}{\scalefont{4}% \noindent Philippe Aigrain}} @@ -222,15 +219,10 @@ Letters={SmallCaps,UppercaseSmallCaps} {\addfontfeature{LetterSpace=-4.0}{\scalefont{3}% Kultura i gospodarka epoki internetu }} - - -%\emph{\thankyou}\\ -%\indent\emph{You made this book possible.} } - - %\setlength{\pdfpagewidth}{210mm} - %\setlength{\pdfpageheight}{297mm} + \setlength{\pdfpagewidth}{210mm} + \setlength{\pdfpageheight}{297mm} } @@ -260,13 +252,14 @@ Letters={SmallCaps,UppercaseSmallCaps} \vspace{.6em} \color{black} - } } \usepackage{printlen} \newcommand{\editorialsection}{ +%% XXX % sprawdzic czy czegos nie zgubilem z master +%% szczegolnie jesli chodzi o makra wstawiajace dane z DC, jak np \editors \clearpage \thispagestyle{empty} @@ -328,6 +321,8 @@ Letters={SmallCaps,UppercaseSmallCaps} \includegraphics[scale=.2]{logo.eps} } + Przekaż darowiznę na konto: + \href{http://nowoczesnapolska.org.pl/pomoz-nam/wesprzyj-nas/}{szczegóły na stronie Fundacji}. \clearpage \thispagestyle{empty} @@ -435,13 +430,10 @@ Letters={Uppercase} %{\addfontfeature{Scale=2.0, FakeStretch=0.98, LetterSpace=-2.0}\emph{#1}} } + \newcommand{\tytul}[1]{% #1% -%\vspace{1em}% -} - -\newcommand{\autorpodutworu}[1]{% -\section*{\typosection{#1}}% +\vspace{1em}% } \newcommand{\nazwapodutworu}[1]{% @@ -449,7 +441,6 @@ Letters={Uppercase} } \newcommand{\autorutworu}[1]{% -\addcontentsline{toc}{part}{???} \subsection*{\typosubsection{#1}}% } @@ -473,6 +464,7 @@ Letters={Uppercase} \subsection*{\typosubsubsection{tłum. #1}}% } + \newcommand{\powiesc}[1]{#1} \newcommand{\opowiadanie}[1]{#1} \newcommand{\lirykal}[1]{#1} @@ -482,9 +474,7 @@ Letters={Uppercase} \newcommand{\dramatwspolczesny}[1]{#1} \newcommand{\nota}[1]{% -\begin{quotation}% -#1% -\end{quotation}% +\par{#1}% } \newcommand{\dedykacja}[1]{% @@ -535,25 +525,25 @@ Letters={Uppercase} \subsection*{\typosubsection{#1}}% } \newcommand{\naglowekczesc}[1]{% -%\pagebreak +\pagebreak \subsection*{\typosubsection{#1}}% } -\newcommand{\naglowekrozdzial}[1]{% +\newcommand{\srodtytul}[1]{% \subsection*{\typosubsection{#1}}% } \newcommand{\naglowekscena}[1]{% \subsubsection*{\typosubsubsection{#1}}% } -\newcommand{\naglowekpodrozdzial}[1]{% +\newcommand{\naglowekrozdzial}[1]{% \subsubsection*{\typosubsubsection{#1}}% } \newcommand{\naglowekosoba}[1]{% \par{\textsc{#1}}\nopagebreak% } -\newcommand{\srodtytul}[1]{% -\vskip 1em \par{\large \it \noindent #1}\vskip .5em\nopagebreak% +\newcommand{\naglowekpodrozdzial}[1]{% +\par{#1}\nopagebreak% } \newcommand{\miejsceczas}[1]{% @@ -637,7 +627,6 @@ Letters={Uppercase} \begin{center}% \par{*}% \end{center}% -\noindent% } \newcommand{\separatorlinia}{% @@ -659,11 +648,3 @@ Letters={Uppercase} \fi } -\newcommand{\ilustr}[2]{ - -\vspace{1em}% -\begin{center}% -\par{\includegraphics[width=\textwidth]{#1}\\#2}% -\end{center}% -\vspace{1em}% -} diff --git a/librarian/pdf/wl2tex.xslt b/librarian/pdf/wl2tex.xslt index 58bb68d..d3b8faf 100644 --- a/librarian/pdf/wl2tex.xslt +++ b/librarian/pdf/wl2tex.xslt @@ -58,77 +58,62 @@ - - \def\thankyou{% - - Thank you for your contribution, ! - Thank you for all your contributions! - - } - - + mm 210mm - mm - + - - + - + - + + + - \def\coverby{ - Obraz na okładce: + + \def\coverby{Okładka na podstawie: - \href{\datacoversource}{\datacoverby}. + \href{\datacoversource}{\datacoverby} - \datacoverby{}. + \datacoverby{} - } + + \def\editors{} - -
- - - - - - - @@ -170,7 +155,7 @@ \href{http://creativecommons.org/licenses/by-sa/3.0/}{Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL}.} - \def\rightsinfo{Ta książka jest udostpęniona na licencji + \def\rightsinfo{Ten utwór jest udostepniony na licencji \href{}{}.} @@ -362,75 +347,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tableh! - - - - - - table - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -462,13 +378,10 @@
- - Redakcja techniczna: - - - , - - . + + Opracowanie redakcyjne i przypisy: + + . diff --git a/librarian/res/wl-logo-mono.png b/librarian/res/wl-logo-mono.png new file mode 100644 index 0000000..c99447e Binary files /dev/null and b/librarian/res/wl-logo-mono.png differ diff --git a/librarian/res/wl-logo-small.png b/librarian/res/wl-logo-small.png index 104d56a..b32c46d 100644 Binary files a/librarian/res/wl-logo-small.png and b/librarian/res/wl-logo-small.png differ diff --git a/librarian/xslt/book2html.xslt b/librarian/xslt/book2html.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/book2txt.xslt b/librarian/xslt/book2txt.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/config.xml b/librarian/xslt/config.xml old mode 100755 new mode 100644 diff --git a/librarian/xslt/normalize.xslt b/librarian/xslt/normalize.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_base.xslt b/librarian/xslt/wl2html_base.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_full.xslt b/librarian/xslt/wl2html_full.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_partial.xslt b/librarian/xslt/wl2html_partial.xslt old mode 100755 new mode 100644 diff --git a/scripts/book2cover b/scripts/book2cover index 49cd539..758ab0e 100755 --- a/scripts/book2cover +++ b/scripts/book2cover @@ -4,36 +4,31 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse +from StringIO import StringIO +from librarian import OutputFile +from librarian.book2anything import Book2Anything, Option -from librarian import ParseError -from librarian.parser import WLDocument -from librarian.cover import WLCover +class Book2Cover(Book2Anything): + format_name = "JPEG" + ext = "jpg" + uses_cover = True + cover_optional = False -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Create cover images for SOURCE files.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') + transform_options = [ + Option('-W', '--width', action='store', type='int', dest='width', default=None, + help='Set width.'), + Option('-H', '--height', action='store', type='int', dest='height', default=None, + help='Set height.'), + Option('-l', '--with-logo', dest='with_logo', + action='store_true', default=False, + help='Add WL logo in white box.'), + ] - options, input_filenames = parser.parse_args() + @staticmethod + def transform(wldoc, cover, *args, **kwargs): + return wldoc.as_cover(cover_class=cover, *args, **kwargs) - if len(input_filenames) < 1: - parser.print_help() - exit(1) - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.jpg' - - doc = WLDocument.from_file(input_filename) - WLCover(doc.book_info).save(output_filename) +if __name__ == '__main__': + Book2Cover.run() diff --git a/scripts/book2epub b/scripts/book2epub index e60b932..01ca79a 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -4,65 +4,20 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything, Option -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Epub(Book2Anything): + format_name = "EPUB" + ext = "epub" + uses_cover = True + uses_provider = True + transform_flags = [ + Option('-w', '--working-copy', dest='working-copy', + action='store_true', default=False, + help='mark the output as a working copy') + ] -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to EPUB format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False, - help='specifies the directory for output') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - flags = [] - if options.working_copy: - flags.append('working-copy') - # Do some real work - try: - for main_input in input_filenames: - if options.verbose: - print main_input - - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.epub' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - epub = doc.as_epub(cover=options.with_cover, flags=flags) - - doc.save_output_file(epub, - output_file, options.output_dir, options.make_dir, 'epub') - - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Epub.run() diff --git a/scripts/book2fb2 b/scripts/book2fb2 new file mode 100755 index 0000000..584ae99 --- /dev/null +++ b/scripts/book2fb2 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from librarian.book2anything import Book2Anything + + +class Book2Fb2(Book2Anything): + format_name = "FB2" + ext = "fb2" + uses_cover = False + uses_provider = True + + +if __name__ == '__main__': + Book2Fb2.run() diff --git a/scripts/book2html b/scripts/book2html index 8adeb38..5d48eec 100755 --- a/scripts/book2html +++ b/scripts/book2html @@ -4,62 +4,25 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument +from librarian.book2anything import Book2Anything, Option + + +class Book2Html(Book2Anything): + format_name = "HTML" + ext = "html" + uses_cover = False + uses_provider = False + transform_flags = [ + Option('-r', '--raw', dest='full-page', + action='store_false', default=True, + help='output raw text for use in templates') + ] + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - parser.add_option('-r', '--raw', action='store_false', dest='full_page', default=True, - help='outpu raw text for use in templates') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - flags = ('full-page',) if options.full_page else None - html = doc.as_html(flags=flags) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e, - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise - + Book2Html.run() diff --git a/scripts/book2ihtml b/scripts/book2ihtml deleted file mode 100755 index 779f245..0000000 --- a/scripts/book2ihtml +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_html(flags=('full-page',), stylesheet='partial') - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.message.encode('utf-8') - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e.message.encode('utf-8'), - } - raise - diff --git a/scripts/book2mobi b/scripts/book2mobi index 665dcfa..f477a83 100755 --- a/scripts/book2mobi +++ b/scripts/book2mobi @@ -4,53 +4,16 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Mobi(Book2Anything): + format_name = "MOBI" + ext = "mobi" + uses_cover = True + cover_optional = False + uses_provider = True -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to MOBI format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - try: - for main_input in input_filenames: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.mobi' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - mobi = doc.as_mobi() - - doc.save_output_file(mobi, - output_file, options.output_dir, options.make_dir, 'mobi') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Mobi.run() diff --git a/scripts/book2pdf b/scripts/book2pdf index 37fcb17..11c5c04 100755 --- a/scripts/book2pdf +++ b/scripts/book2pdf @@ -4,61 +4,23 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from librarian.book2anything import Book2Anything, Option import os.path from optparse import OptionParser -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Pdf(Book2Anything): + format_name = "PDF" + ext = "pdf" + uses_cover = True + uses_provider = True + transform_args = [ + Option('-t', '--save-tex', dest='save_tex', metavar='FILE', + help='path to save the intermediary LaTeX file to'), + Option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', + help='force morefloats in old (<1.0c), new (>=1.0c) or none') + ] -if __name__ == '__main__': - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to PDF format.""" - - parser = OptionParser(usage) - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='make lots of noise and revert to default interaction in LaTeX') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE', - help='path to save the intermediary LaTeX file to') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - parser.add_option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', - help='force morefloats in old (<1.0c), new (>=1.0c) or none') - (options, args) = parser.parse_args() - - if len(args) < 1: - parser.print_help() - exit(1) - - if options.output_dir and options.output_file: - raise ValueError("Either --output-dir or --output file should be specified") - try: - for main_input in args: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - output_file, output_dir = options.output_file, options.output_dir - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.pdf' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - pdf = doc.as_pdf(save_tex=options.save_tex, - cover=options.with_cover, - morefloats=options.morefloats, verbose=options.verbose) - - doc.save_output_file(pdf, - output_file, options.output_dir, options.make_dir, 'pdf') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s; use -v to see more output' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Pdf.run() diff --git a/scripts/book2txt b/scripts/book2txt index 9cfdef2..1b4c0ef 100755 --- a/scripts/book2txt +++ b/scripts/book2txt @@ -4,60 +4,26 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse - -from librarian import ParseError +from librarian.book2anything import Book2Anything, Option from librarian.parser import WLDocument -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to TXT format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, - help='set line wrap column') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') +class Book2Txt(Book2Anything): + format_name = "TXT" + ext = "txt" + uses_cover = False + uses_provider = False + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] + transform_args = [ + Option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, + help='set line wrap column') + ] + transform = WLDocument.as_text - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.txt' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_text(wrapping=str(options.wrapping)) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise +if __name__ == '__main__': + Book2Txt.run() diff --git a/scripts/master.css b/scripts/master.css old mode 100755 new mode 100644 diff --git a/scripts/master.plain.css b/scripts/master.plain.css old mode 100755 new mode 100644 diff --git a/setup.py b/setup.py index 7ea9ed0..b20040a 100755 --- a/setup.py +++ b/setup.py @@ -21,23 +21,27 @@ def whole_tree(prefix, path): setup( name='librarian', - version='1.4.2a', + version='1.5.1', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', maintainer='Radek Czajka', - maintainer_email='radek.czajka@gmail.com', + maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl', url='http://github.com/fnp/librarian', packages=['librarian'], - package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fonts/*', 'res/*'] + + package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] + whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')}, include_package_data=True, - install_requires=['lxml>=2.2'], + install_requires=[ + 'lxml>=2.2', + 'Pillow', + ], scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/book2epub', 'scripts/book2mobi', 'scripts/book2pdf', + 'scripts/book2fb2', 'scripts/book2partner', 'scripts/book2cover', 'scripts/bookfragments', diff --git a/tests/files/example-wl.xml b/tests/files/example-wl.xml new file mode 100644 index 0000000..d2fd87b --- /dev/null +++ b/tests/files/example-wl.xml @@ -0,0 +1,170 @@ + + + + + +Utworu, Autor +Tytuł w DC +Utworu, Tłumacz +Literacki, Redaktor +Techniczny, Redaktor +Fundacja Nowoczesna Polska +period +type +genre +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana + przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/test1 +source +Domena publiczna +1500 +xml +text +text +2000 +pol + + + + +nota_red + +autor_utworu +dzielo_nadrzedne +nazwa_utworu +podtytul + +[powyżej: +nota_red (nie pojawia się w tekście, może być podana osobno), +autor_utworu, dzielo_nadrzedne, nazwa_utworu, podtytul, tłumacz (z DC)] + +[Noty: nota/akap, dedykacja/akap, motto/akap, motto_podpis] + + +nota/akap +dedykacja/akap +motto/akap +motto_podpis + +[Początek dramatu: lista_osob, naglowek_listy, lista_osoba, miejsce_czas] + + + lista_osob/naglowek_listy + lista_osob/lista_osoba + lista_osob/lista_osoba + +miejsce_czas + +[naglowek_czesc, naglowek_rozdzial, naglowek_podrozdzial, srodtytul] + +naglowek_czesc +naglowek_rozdzial +naglowek_podrozdzial +srodtytul + +[akap, akap_cd, akap_dialog, motyw] + +akapmotyw +akap_cd +akap_dialog + +[strofa, wers_akap, wers_wciety,typ=1-6, wers_cd, zastepnik_wersu] + +strofa/ +wers_akap/ +wers_wciety@typ=1/ +wers_wciety@typ=2/ +wers_wciety@typ=3 + +wers_wciety@typ=4/ +wers_wciety@typ=5/ +wers_wciety@typ=6/ +wers_cd/ +. . . . . . . . . . . . . . . . + + +[dlugi_cytat/akap] + +Cytowany akapit powinien wyglądać jak cytowany akapit. +Znaczy, może mieć jakieś dodatkowe wcięcie, jakiś rodzaj wyróżnienia czy coś. + +[poezja_cyt/strofa] + +To jest poezja/ +cytowana/ +ma być porządnie/ +wyrównana + +[naglowek_akt, naglowek_scena] + +naglowek_akt +naglowek_scena + +[Kwestia: naglowek_osoba, kwestia, didask_tekst, didaskalia, strofa, akap] + +naglowek_osoba + + +didask_tekst +didaskalia +Strofa w dramacie/ +jak amen w pacie/ +rzu. +Powyższy kawałek wiersza jest najzupełniej bez sensu i tak naprawdę wcale nie trzyma rytmu ani rymu. Być może należy skoncentrować się na dramacie prozą, jak ta tutaj niniejsza wypowiedź. + +[didaskalia, osoba] + +odezwał się autor. + +[Wyróżnienia: tytul_dziela, tytul_dziela@typ=1, wyroznienie, slowo_obce] + + +tytul_dziela, +tytul_dziela@typ=1, +wyroznienie, +slowo_obce + + +[Przypisy: pa, pt, pr, pe] + + +pa - - - przypis autorski +pt - - - przypis tłumacza +pr - - - przypis redakcyjny +pe - - - przypis edytorski + + +[Separatory] + +[sekcja_swiatlo:] + + + +[sekcja_asterysk:] + + + +[separator_linia:] + + + + + +[Komentarze: uwaga, extra] +uwaga +extra + +[Nieużywane] + +wyp_osoba +wywiad_pyt/akap +wywiad_odp/akap +mat +www + + + diff --git a/tests/files/picture/angelus-novus.jpeg b/tests/files/picture/angelus-novus.jpeg new file mode 100644 index 0000000..fd0394f Binary files /dev/null and b/tests/files/picture/angelus-novus.jpeg differ diff --git a/tests/files/picture/angelus-novus.png b/tests/files/picture/angelus-novus.png deleted file mode 100644 index 9925dad..0000000 Binary files a/tests/files/picture/angelus-novus.png and /dev/null differ diff --git a/tests/files/picture/angelus-novus.xml b/tests/files/picture/angelus-novus.xml index 0f26730..964faed 100644 --- a/tests/files/picture/angelus-novus.xml +++ b/tests/files/picture/angelus-novus.xml @@ -18,9 +18,9 @@ Domena publiczna - Paul Klee zm. 1940 1940 Image - image/png - 1645 x 2000 px - d9ead48f3442ac4e1add602aacdffa4638ae8e21 + image/jpeg + 329 x 400 px + 5ed8e8d24d92017c6341c0b8cfcc414dec55b8bf 1920 lat @@ -29,14 +29,14 @@
-
+
-
+
-
-
+
+
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.html b/tests/files/text/asnyk_miedzy_nami_expected.html index 6bc7649..fd18174 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.html +++ b/tests/files/text/asnyk_miedzy_nami_expected.html @@ -3,29 +3,33 @@

Spis treści

    +
      +
    1. Miłość platoniczna: 1 2
    2. +
    3. Natura: 1
    4. +

    Adam AsnykMiłość platonicznaMiędzy nami nic nie było

    Miłość platoniczna
    -

    1Między nami nic nie było!

    +

    1Między nami nic nie było!

    Żadnych zwierzeń, wyznań żadnych!

    Nic nas z sobą nie łączyło —

    Prócz wiosennych marzeń zdradnych;

    -

    5NaturaPrócz tych woni, barw i blasków,

    +

    5NaturaPrócz tych woni, barw i blasków,

    Unoszących się w przestrzeni;

    Prócz szumiących śpiewem lasków

    I tej świeżej łąk zieleni;

    -

    Prócz tych kaskad i potoków,

    +

    Prócz tych kaskad i potoków,

    10Zraszających każdy parów,

    Prócz girlandy tęcz, obłoków,

    Prócz natury słodkich czarów;

    -

    Prócz tych wspólnych, jasnych zdrojów,

    +

    Prócz tych wspólnych, jasnych zdrojów,

    Z których serce zachwyt piło;

    15Prócz pierwiosnków i powojów,—

    Między nami nic nie było!

    diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt index 70c3185..d300b3e 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.txt +++ b/tests/files/text/asnyk_miedzy_nami_expected.txt @@ -39,4 +39,4 @@ Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. -Opracowanie redakcyjne i przypisy: Aleksandra Sekuła, Olga Sutkowska +Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml index c585a8b..6a781f3 100755 --- a/tests/files/text/asnyk_zbior.xml +++ b/tests/files/text/asnyk_zbior.xml @@ -9,9 +9,11 @@ Pozytywizm Liryka Wiersz +Fikcyjny, Adam Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. http://wolnelektury.pl/katalog/lektura/poezye http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo +http://wolnelektury.pl/katalog/lektura/do-mlodych http://www.polona.pl/Content/5164 (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 Domena publiczna - Adam Asnyk zm. 1897 diff --git a/tests/files/text/do-mlodych.xml b/tests/files/text/do-mlodych.xml new file mode 100755 index 0000000..21fa522 --- /dev/null +++ b/tests/files/text/do-mlodych.xml @@ -0,0 +1,70 @@ + + +Asnyk, Adam +Do młodych +Sekuła, Aleksandra +Sutkowska, Olga +Fundacja Nowoczesna Polska +Pozytywizm +Liryka +Wiersz +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/do-mlodych +http://www.polona.pl/Content/8616 +El...y (Adam Asnyk), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa 1898 +Domena publiczna - Adam Asnyk zm. 1897 +1897 +xml +text +text +2009-04-07 +L +pol +http://redakcja.wolnelektury.pl/media/dynamic/cover/image/35.jpg +leboski@Flickr, CC BY 2.0 +http://redakcja.wolnelektury.pl/cover/image/35 + + + +Adam Asnyk + +Do młodych + + + + +Szukajcie prawdy jasnego płomienia,/ +Szukajcie nowych, nieodkrytych dróg!/ +Za każdym krokiem w tajniki stworzenia/ +Coraz się dusza ludzka rozprzestrzenia/ +I większym staje się Bóg! + + +Choć otrząśniecie kwiaty barwnych mitów,/ +Choć rozproszycie legendowy mrok,/ +Choć mgłę urojeń zedrzecie z błękitów, ---/ +Ludziom niebiańskich nie zbraknie zachwytów,/ +Lecz dalej sięgnie ich wzrok. + + +Czas, Kondycja ludzka, PrzemijanieKażda epoka ma swe własne cele/ +I zapomina o wczorajszych snach:/ +Nieście więc wiedzy pochodnię na czele/ +I nowy udział bierzcie w wieków dziele,---/ +Przyszłości podnoście gmach! + + +Ale nie depczcie przeszłości ołtarzy,/ +Choć macie sami doskonalsze wznieść:/ +Na nich się jeszcze święty ogień żarzy,/ +I miłość ludzka stoi tam na straży,/ +I wy winniście im cześć! + + +Ze światem, który w ciemność już zachodzi/ +Wraz z całą tęczą idealnych snów,/ +Prawdziwa mądrość niechaj was pogodzi:/ +I wasze gwiazdy, o zdobywcy młodzi,/ +W ciemnościach pogasną znów! + + \ No newline at end of file diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml index 124940e..a94b8f0 100644 --- a/tests/files/text/miedzy-nami-nic-nie-bylo.xml +++ b/tests/files/text/miedzy-nami-nic-nie-bylo.xml @@ -9,6 +9,8 @@ Sekuła, Aleksandra Sutkowska, Olga +Fikcyjny, Adam +Fikcyjny, Adam Fundacja Nowoczesna Polska Pozytywizm Liryka diff --git a/tests/test_epub.py b/tests/test_epub.py index 9fc5637..faa76e7 100644 --- a/tests/test_epub.py +++ b/tests/test_epub.py @@ -3,14 +3,29 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from zipfile import ZipFile +from lxml import html +from nose.tools import * from librarian import DirDocProvider from librarian.parser import WLDocument -from nose.tools import * -from utils import get_fixture +from tests.utils import get_fixture def test_transform(): - WLDocument.from_file( + epub = WLDocument.from_file( get_fixture('text', 'asnyk_zbior.xml'), provider=DirDocProvider(get_fixture('text', '')) - ).as_epub(flags=['without_fonts']) + ).as_epub(flags=['without_fonts']).get_file() + zipf = ZipFile(epub) + + # Check contributor list. + last = zipf.open('OPS/last.html') + tree = html.parse(last) + editors_attribution = False + for par in tree.findall("//p"): + if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'): + editors_attribution = True + assert_equal(par.text.rstrip(), + u'Opracowanie redakcyjne i przypisy: ' + u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') + assert_true(editors_attribution) diff --git a/tests/test_pdf.py b/tests/test_pdf.py new file mode 100644 index 0000000..75b73bc --- /dev/null +++ b/tests/test_pdf.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import re +from tempfile import NamedTemporaryFile +from nose.tools import * +from librarian import DirDocProvider +from librarian.parser import WLDocument +from utils import get_fixture + + +def test_transform(): + temp = NamedTemporaryFile(delete=False) + temp.close() + WLDocument.from_file( + get_fixture('text', 'asnyk_zbior.xml'), + provider=DirDocProvider(get_fixture('text', '')) + ).as_pdf(save_tex=temp.name) + tex = open(temp.name).read().decode('utf-8') + print tex + + # Check contributor list. + editors = re.search(ur'\\def\\editors\{' + ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) + assert_equal(editors.group(1), + u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") diff --git a/tests/test_picture.py b/tests/test_picture.py index 71a77dc..f64f624 100644 --- a/tests/test_picture.py +++ b/tests/test_picture.py @@ -31,7 +31,7 @@ def test_wlpicture(): # from nose.tools import set_trace; set_trace() assert pi.type[0] == u"Image" - assert pi.mime_type == u'image/png' == wlp.mime_type + assert pi.mime_type == u'image/jpeg' == wlp.mime_type assert wlp.slug == 'angelus-novus' assert path.exists(wlp.image_path)