From: Marcin Koziej Date: Tue, 5 Nov 2013 09:04:45 +0000 (+0100) Subject: Merge branch '__images' X-Git-Tag: 1.7~125 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/e57b146bf49e38b3bb57615110b27de5b4d1ae69?hp=00906f2fc0432cede204c7870e6caecf427d7024 Merge branch '__images' --- diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/librarian/__init__.py b/librarian/__init__.py index feb9974..c46d5d1 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -8,24 +8,34 @@ from __future__ import with_statement import os import re import shutil +import urllib -class ParseError(Exception): + +class UnicodeException(Exception): def __str__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + return unicode(self).encode('utf-8') def __unicode__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message + args = self.args[0] if len(self.args) == 1 else self.args + try: + message = unicode(args) + except UnicodeDecodeError: + message = unicode(args, encoding='utf-8', errors='ignore') + return message + +class ParseError(UnicodeException): + pass -class ValidationError(Exception): +class ValidationError(UnicodeException): pass class NoDublinCore(ValidationError): """There's no DublinCore section, and it's required.""" pass -class NoProvider(Exception): +class NoProvider(UnicodeException): """There's no DocProvider specified, and it's needed.""" pass @@ -66,42 +76,35 @@ WLNS = EmptyNamespace() class WLURI(object): - """Represents a WL URI. Extracts slug and language from it.""" - DEFAULT_LANGUAGE = u'pol' - + """Represents a WL URI. Extracts slug from it.""" slug = None - language = None example = 'http://wolnelektury.pl/katalog/lektura/template/' - _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/' - '(?P[-a-z0-9]+)(/(?P[a-z]{3}))?/?$') - - def __init__(self, uri=None): - if uri is not None: - uri = unicode(uri) - self.uri = uri - match = self._re_wl_uri.match(uri) - if not match: - raise ValueError('Supplied URI (%s) does not match ' - 'the WL document URI template.' % uri) - self.slug = match.group('slug') - self.language = match.group('lang') or self.DEFAULT_LANGUAGE + _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektura/' + '(?P[-a-z0-9]+)/?$') + + def __init__(self, uri): + uri = unicode(uri) + self.uri = uri + self.slug = uri.rstrip('/').rsplit('/', 1)[-1] + + @classmethod + def strict(cls, uri): + match = cls._re_wl_uri.match(uri) + if not match: + raise ValidationError(u'Invalid URI (%s). Should match: %s' % ( + uri, cls._re_wl_uri.pattern)) + return cls(uri) @classmethod - def from_slug_and_lang(cls, slug, lang): - """Contructs an URI from slug and language code. + def from_slug(cls, slug): + """Contructs an URI from slug. - >>> WLURI.from_slug_and_lang('a-slug', WLURI.DEFAULT_LANGUAGE).uri + >>> WLURI.from_slug('a-slug').uri u'http://wolnelektury.pl/katalog/lektura/a-slug/' - >>> WLURI.from_slug_and_lang('a-slug', 'deu').uri - u'http://wolnelektury.pl/katalog/lektura/a-slug/deu/' """ - if lang is None: - lang = cls.DEFAULT_LANGUAGE uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug - if lang is not None and lang != cls.DEFAULT_LANGUAGE: - uri += lang + '/' return cls(uri) def __unicode__(self): @@ -111,17 +114,7 @@ class WLURI(object): return self.uri def __eq__(self, other): - return self.slug, self.language == other.slug, other.language - - def filename_stem(self): - stem = self.slug - if self.language != self.DEFAULT_LANGUAGE: - stem += '_' + self.language - return stem - - def validate_language(self, language): - if language != self.language: - raise ValidationError("Incorrect language definition in URI") + return self.slug == other.slug class DocProvider(object): @@ -130,18 +123,14 @@ class DocProvider(object): Used for generating joined files, like EPUBs. """ - def by_slug_and_lang(self, slug, lang=None): - """Should return a file-like object with a WL document XML.""" - raise NotImplementedError - def by_slug(self, slug): """Should return a file-like object with a WL document XML.""" - return self.by_slug_and_lang(slug) + raise NotImplementedError def by_uri(self, uri, wluri=WLURI): """Should return a file-like object with a WL document XML.""" wluri = wluri(uri) - return self.by_slug_and_lang(wluri.slug, wluri.language) + return self.by_slug(wluri.slug) class DirDocProvider(DocProvider): @@ -151,8 +140,8 @@ class DirDocProvider(DocProvider): self.dir = dir_ self.files = {} - def by_slug_and_lang(self, slug, lang=None): - fname = WLURI.from_slug_and_lang(slug, lang).filename_stem() + '.xml' + def by_slug(self, slug): + fname = slug + '.xml' return open(os.path.join(self.dir, fname)) @@ -167,7 +156,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( DCNS('subject.type'): [u'Unknown'], DCNS('subject.genre'): [u'Unknown'], DCNS('date'): ['1970-01-01'], - DCNS('language'): [WLURI.DEFAULT_LANGUAGE], + DCNS('language'): [u'pol'], # DCNS('date'): [creation_date], DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], DCNS('description'): @@ -285,3 +274,8 @@ class OutputFile(object): if not os.path.isdir(dirname): os.makedirs(dirname) shutil.copy(self.get_filename(), path) + + +class URLOpener(urllib.FancyURLopener): + version = 'FNP Librarian (http://github.com/fnp/librarian)' +urllib._urlopener = URLOpener() diff --git a/librarian/book2anything.py b/librarian/book2anything.py new file mode 100755 index 0000000..b8b8d27 --- /dev/null +++ b/librarian/book2anything.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from collections import namedtuple +import os.path +import optparse + +from librarian import DirDocProvider, ParseError +from librarian.parser import WLDocument +from librarian.cover import WLCover + + +class Option(object): + """Option for optparse. Use it like `optparse.OptionParser.add_option`.""" + def __init__(self, *names, **options): + self.names = names + self.options = options + + def add(self, parser): + parser.add_option(*self.names, **self.options) + + def name(self): + return self.options['dest'] + + def value(self, options): + return getattr(options, self.name()) + + +class Book2Anything(object): + """A class for creating book2... scripts. + + Subclass it for any format you want to convert to. + """ + format_name = None # Set format name, like "PDF". + ext = None # Set file extension, like "pdf". + uses_cover = False # Can it add a cover? + cover_optional = True # Only relevant if uses_cover + uses_provider = False # Does it need a DocProvider? + transform = None # Transform method. Uses WLDocument.as_{ext} by default. + parser_options = [] # List of Option objects for additional parser args. + transform_options = [] # List of Option objects for additional transform args. + transform_flags = [] # List of Option objects for supported transform flags. + + + @classmethod + def run(cls): + # Parse commandline arguments + usage = """Usage: %%prog [options] SOURCE [SOURCE...] + Convert SOURCE files to %s format.""" % cls.format_name + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', + action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-d', '--make-dir', + action='store_true', dest='make_dir', default=False, + help='create a directory for author and put the output file in it') + parser.add_option('-o', '--output-file', + dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', + dest='output_dir', metavar='DIR', + help='specifies the directory for output') + if cls.uses_cover: + if cls.cover_optional: + parser.add_option('-c', '--with-cover', + action='store_true', dest='with_cover', default=False, + help='create default cover') + parser.add_option('-C', '--image-cache', + dest='image_cache', metavar='URL', + help='prefix for image download cache' + + (' (implies --with-cover)' if cls.cover_optional else '')) + for option in cls.parser_options + cls.transform_options + cls.transform_flags: + option.add(parser) + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + return(1) + + # Prepare additional args for parser. + parser_args = {} + for option in cls.parser_options: + parser_args[option.name()] = option.value(options) + # Prepare additional args for transform method. + transform_args = {} + for option in cls.transform_options: + transform_args[option.name()] = option.value(options) + # Add flags to transform_args, if any. + transform_flags = [flag.name() for flag in cls.transform_flags + if flag.value(options)] + if transform_flags: + transform_args['flags'] = transform_flags + # Add cover support, if any. + if cls.uses_cover: + if options.image_cache: + def cover_class(*args, **kwargs): + return WLCover(image_cache=options.image_cache, *args, **kwargs) + transform_args['cover'] = cover_class + elif not cls.cover_optional or options.with_cover: + transform_args['cover'] = WLCover + + + # Do some real work + try: + for main_input in input_filenames: + if options.verbose: + print main_input + + # Where to find input? + if cls.uses_provider: + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + else: + provider = None + + # Where to write output? + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.' + cls.ext + else: + output_file = None + + # Do the transformation. + doc = WLDocument.from_file(main_input, provider=provider, **parser_args) + transform = cls.transform + if transform is None: + transform = getattr(WLDocument, 'as_%s' % cls.ext) + output = transform(doc, **transform_args) + + doc.save_output_file(output, + output_file, options.output_dir, options.make_dir, cls.ext) + + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e + } diff --git a/librarian/cover.py b/librarian/cover.py index 63e4aa0..8b770ca 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -3,8 +3,23 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import Image, ImageFont, ImageDraw, ImageFilter -from librarian import get_resource +import re +from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance +from StringIO import StringIO +from librarian import get_resource, OutputFile, URLOpener + + +class Metric(object): + """Gets metrics from an object, scaling it by a factor.""" + def __init__(self, obj, scale): + self._obj = obj + self._scale = float(scale) + + def __getattr__(self, name): + src = getattr(self._obj, name) + if src and self._scale: + src = type(src)(self._scale * src) + return src class TextBox(object): @@ -33,9 +48,10 @@ class TextBox(object): """Skips some vertical space.""" self.height += height - def text(self, text, color='#000', font=None, line_height=20, + def text(self, text, color='#000', font=None, line_height=20, shadow_color=None): """Writes some centered text.""" + text = re.sub(r'\s+', ' ', text) if shadow_color: if not self.shadow_img: self.shadow_img = Image.new('RGBA', self.img.size) @@ -91,7 +107,8 @@ class Cover(object): author_lineskip = 40 author_color = '#000' author_shadow = None - author_font = None + author_font_ttf = get_resource('fonts/DejaVuSerif.ttf') + author_font_size = 30 title_top = 100 title_margin_left = 20 @@ -99,13 +116,15 @@ class Cover(object): title_lineskip = 54 title_color = '#000' title_shadow = None - title_font = None + title_font_ttf = get_resource('fonts/DejaVuSerif.ttf') + title_font_size = 40 logo_bottom = None logo_width = None uses_dc_cover = False format = 'JPEG' + scale = 1 exts = { 'JPEG': 'jpg', @@ -117,9 +136,14 @@ class Cover(object): 'PNG': 'image/png', } - def __init__(self, book_info): + def __init__(self, book_info, format=None, width=None, height=None): self.author = ", ".join(auth.readable() for auth in book_info.authors) self.title = book_info.title + if format is not None: + self.format = format + scale = max(float(width or 0) / self.width, float(height or 0) / self.height) + if scale: + self.scale = scale def pretty_author(self): """Allows for decorating author's name.""" @@ -130,7 +154,8 @@ class Cover(object): return self.title def image(self): - img = Image.new('RGB', (self.width, self.height), self.background_color) + metr = Metric(self, self.scale) + img = Image.new('RGB', (metr.width, metr.height), self.background_color) if self.background_img: background = Image.open(self.background_img) @@ -138,34 +163,35 @@ class Cover(object): del background # WL logo - if self.logo_width: + if metr.logo_width: logo = Image.open(get_resource('res/wl-logo.png')) - logo = logo.resize((self.logo_width, logo.size[1] * self.logo_width / logo.size[0])) - img.paste(logo, ((self.width - self.logo_width) / 2, img.size[1] - logo.size[1] - self.logo_bottom)) + logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0])) + img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom)) - top = self.author_top + top = metr.author_top tbox = TextBox( - self.width - self.author_margin_left - self.author_margin_right, - self.height - top, + metr.width - metr.author_margin_left - metr.author_margin_right, + metr.height - top, ) - author_font = self.author_font or ImageFont.truetype( - get_resource('fonts/DejaVuSerif.ttf'), 30) + + author_font = ImageFont.truetype( + self.author_font_ttf, metr.author_font_size) tbox.text(self.pretty_author(), self.author_color, author_font, - self.author_lineskip, self.author_shadow) + metr.author_lineskip, self.author_shadow) text_img = tbox.image() - img.paste(text_img, (self.author_margin_left, top), text_img) - - top += text_img.size[1] + self.title_top + img.paste(text_img, (metr.author_margin_left, top), text_img) + + top += text_img.size[1] + metr.title_top tbox = TextBox( - self.width - self.title_margin_left - self.title_margin_right, - self.height - top, + metr.width - metr.title_margin_left - metr.title_margin_right, + metr.height - top, ) - title_font = self.author_font or ImageFont.truetype( - get_resource('fonts/DejaVuSerif.ttf'), 40) + title_font = ImageFont.truetype( + self.title_font_ttf, metr.title_font_size) tbox.text(self.pretty_title(), self.title_color, title_font, - self.title_lineskip, self.title_shadow) + metr.title_lineskip, self.title_shadow) text_img = tbox.image() - img.paste(text_img, (self.title_margin_left, top), text_img) + img.paste(text_img, (metr.title_margin_left, top), text_img) return img @@ -176,74 +202,97 @@ class Cover(object): return self.exts[self.format] def save(self, *args, **kwargs): - return self.image().save(format=self.format, *args, **kwargs) + return self.image().save(format=self.format, quality=95, *args, **kwargs) + + def output_file(self, *args, **kwargs): + imgstr = StringIO() + self.save(imgstr, *args, **kwargs) + return OutputFile.from_string(imgstr.getvalue()) class WLCover(Cover): """Default Wolne Lektury cover generator.""" + width = 600 + height = 833 uses_dc_cover = True - author_font = ImageFont.truetype( - get_resource('fonts/JunicodeWL-Regular.ttf'), 20) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 20 author_lineskip = 30 - title_font = ImageFont.truetype( - get_resource('fonts/DejaVuSerif-Bold.ttf'), 30) + title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf') + title_font_size = 30 title_lineskip = 40 title_box_width = 350 + + box_top_margin = 100 + box_bottom_margin = 100 + box_padding_y = 20 + box_above_line = 10 + box_below_line = 15 + box_line_left = 75 + box_line_right = 275 + box_line_width = 2 + + logo_top = 15 + logo_width = 140 + bar_width = 35 background_color = '#444' author_color = '#444' + default_background = get_resource('res/cover.png') + format = 'JPEG' - epochs = { - u'Starożytność': 0, - u'Średniowiecze': 30, - u'Renesans': 60, - u'Barok': 90, - u'Oświecenie': 120, - u'Romantyzm': 150, - u'Pozytywizm': 180, - u'Modernizm': 210, - u'Dwudziestolecie międzywojenne': 240, - u'Współczesność': 270, + epoch_colors = { + u'Starożytność': '#9e3610', + u'Średniowiecze': '#564c09', + u'Renesans': '#8ca629', + u'Barok': '#a6820a', + u'Oświecenie': '#f2802e', + u'Romantyzm': '#db4b16', + u'Pozytywizm': '#961060', + u'Modernizm': '#7784e0', + u'Dwudziestolecie międzywojenne': '#3044cf', + u'Współczesność': '#06393d', } - def __init__(self, book_info): - super(WLCover, self).__init__(book_info) + def __init__(self, book_info, format=None, width=None, height=None, with_logo=False): + super(WLCover, self).__init__(book_info, format=format, width=width, height=height) self.kind = book_info.kind self.epoch = book_info.epoch + self.with_logo = with_logo if book_info.cover_url: - from urllib2 import urlopen - from StringIO import StringIO - - bg_src = urlopen(book_info.cover_url) + url = book_info.cover_url + bg_src = None + if bg_src is None: + bg_src = URLOpener().open(url) self.background_img = StringIO(bg_src.read()) bg_src.close() + else: + self.background_img = self.default_background def pretty_author(self): return self.author.upper() def image(self): - from colorsys import hsv_to_rgb - - img = Image.new('RGB', (self.width, self.height), self.background_color) + metr = Metric(self, self.scale) + img = Image.new('RGB', (metr.width, metr.height), self.background_color) draw = ImageDraw.Draw(img) - if self.epoch in self.epochs: - epoch_color = tuple(int(255 * c) for c in hsv_to_rgb( - float(self.epochs[self.epoch]) / 360, .7, .7)) + if self.epoch in self.epoch_colors: + epoch_color = self.epoch_colors[self.epoch] else: epoch_color = '#000' - draw.rectangle((0, 0, self.bar_width, self.height), fill=epoch_color) + draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color) if self.background_img: src = Image.open(self.background_img) - trg_size = (self.width - self.bar_width, self.height) + trg_size = (metr.width - metr.bar_width, metr.height) if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]: resized = ( trg_size[0], src.size[1] * trg_size[0] / src.size[0] ) cut = (resized[1] - trg_size[1]) / 2 - src = src.resize(resized) + src = src.resize(resized, Image.ANTIALIAS) src = src.crop((0, cut, src.size[0], src.size[1] - cut)) else: resized = ( @@ -251,52 +300,69 @@ class WLCover(Cover): trg_size[1], ) cut = (resized[0] - trg_size[0]) / 2 - src = src.resize(resized) + src = src.resize(resized, Image.ANTIALIAS) src = src.crop((cut, 0, src.size[0] - cut, src.size[1])) - - img.paste(src, (self.bar_width, 0)) + + img.paste(src, (metr.bar_width, 0)) del src - box = TextBox(self.title_box_width, self.height, padding_y=20) - box.text(self.pretty_author(), - font=self.author_font, - line_height=self.author_lineskip, + box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y) + author_font = ImageFont.truetype( + self.author_font_ttf, metr.author_font_size) + box.text(self.pretty_author(), + font=author_font, + line_height=metr.author_lineskip, color=self.author_color, shadow_color=self.author_shadow, ) - box.skip(10) - box.draw.line((75, box.height, 275, box.height), - fill=self.author_color, width=2) - box.skip(15) + box.skip(metr.box_above_line) + box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height), + fill=self.author_color, width=metr.box_line_width) + box.skip(metr.box_below_line) + title_font = ImageFont.truetype( + self.title_font_ttf, metr.title_font_size) box.text(self.pretty_title(), - line_height=self.title_lineskip, - font=self.title_font, + line_height=metr.title_lineskip, + font=title_font, color=epoch_color, shadow_color=self.title_shadow, ) + + if self.with_logo: + logo = Image.open(get_resource('res/wl-logo-mono.png')) + logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS) + alpha = logo.split()[3] + alpha = ImageEnhance.Brightness(alpha).enhance(.75) + logo.putalpha(alpha) + box.skip(metr.logo_top + logo.size[1]) + box_img = box.image() if self.kind == 'Liryka': # top - box_top = 100 + box_top = metr.box_top_margin elif self.kind == 'Epika': # bottom - box_top = self.height - 100 - box_img.size[1] + box_top = metr.height - metr.box_bottom_margin - box_img.size[1] else: # center - box_top = (self.height - box_img.size[1]) / 2 + box_top = (metr.height - box_img.size[1]) / 2 - box_left = self.bar_width + (self.width - self.bar_width - + box_left = metr.bar_width + (metr.width - metr.bar_width - box_img.size[0]) / 2 - draw.rectangle((box_left, box_top, + draw.rectangle((box_left, box_top, box_left + box_img.size[0], box_top + box_img.size[1]), fill='#fff') img.paste(box_img, (box_left, box_top), box_img) - return img + if self.with_logo: + img.paste(logo, + (box_left + (box_img.size[0] - logo.size[0]) / 2, + box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo) + return img class VirtualoCover(Cover): @@ -319,7 +385,8 @@ class PrestigioCover(Cover): author_lineskip = 60 author_color = '#fff' author_shadow = '#000' - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Italic.ttf'), 50) + author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf') + author_font_size = 50 title_top = 0 title_margin_left = 118 @@ -327,7 +394,8 @@ class PrestigioCover(Cover): title_lineskip = 60 title_color = '#fff' title_shadow = '#000' - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Italic.ttf'), 50) + title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf') + title_font_size = 50 def pretty_title(self): return u"„%s”" % self.title @@ -343,14 +411,16 @@ class BookotekaCover(Cover): author_margin_right = 233 author_lineskip = 156 author_color = '#d9d919' - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 130) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 130 title_top = 400 title_margin_left = 307 title_margin_right = 233 title_lineskip = 168 title_color = '#d9d919' - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 140) + title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + title_font_size = 140 format = 'PNG' @@ -359,8 +429,10 @@ class GandalfCover(Cover): width = 600 height = 730 background_img = get_resource('res/cover-gandalf.png') - author_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 30) - title_font = ImageFont.truetype(get_resource('fonts/JunicodeWL-Regular.ttf'), 40) + author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + author_font_size = 30 + title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf') + title_font_size = 40 logo_bottom = 25 logo_width = 250 format = 'PNG' diff --git a/librarian/dcparser.py b/librarian/dcparser.py index fff8ac2..bcee932 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -88,44 +88,63 @@ def as_unicode(text): else: return text.decode('utf-8') +def as_wluri_strict(text): + return WLURI.strict(text) + class Field(object): - def __init__(self, uri, attr_name, type=as_unicode, multiple=False, salias=None, **kwargs): + def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs): self.uri = uri self.name = attr_name - self.validator = type + self.validator = validator + self.strict = strict self.multiple = multiple self.salias = salias self.required = kwargs.get('required', True) and not kwargs.has_key('default') self.default = kwargs.get('default', [] if multiple else [None]) - def validate_value(self, val): + def validate_value(self, val, strict=False): + if strict and self.strict is not None: + validator = self.strict + else: + validator = self.validator try: if self.multiple: - if self.validator is None: + if validator is None: return val - return [ self.validator(v) if v is not None else v for v in val ] + return [ validator(v) if v is not None else v for v in val ] elif len(val) > 1: raise ValidationError("Multiple values not allowed for field '%s'" % self.uri) elif len(val) == 0: raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri) else: - if self.validator is None or val[0] is None: + if validator is None or val[0] is None: return val[0] - return self.validator(val[0]) + return validator(val[0]) except ValueError, e: raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) - def validate(self, fdict): + def validate(self, fdict, fallbacks=None, strict=False): + if fallbacks is None: + fallbacks = {} if not fdict.has_key(self.uri): if not self.required: - f = self.default + # Accept single value for single fields and saliases. + if self.name in fallbacks: + if self.multiple: + f = fallbacks[self.name] + else: + f = [fallbacks[self.name]] + elif self.salias and self.salias in fallbacks: + f = [fallbacks[self.salias]] + else: + f = self.default else: raise ValidationError("Required field %s not found" % self.uri) else: f = fdict[self.uri] - return self.validate_value(f) + return self.validate_value(f, strict=strict) def __eq__(self, other): if isinstance(other, Field) and other.name == self.name: @@ -161,6 +180,9 @@ class WorkInfo(object): as_person, salias='editor', multiple=True, default=[]), Field( DCNS('contributor.technical_editor'), 'technical_editors', as_person, salias='technical_editor', multiple=True, default=[]), + Field( DCNS('contributor.funding'), 'funders', + salias='funder', multiple=True, default=[]), + Field( DCNS('contributor.thanks'), 'thanks', required=False), Field( DCNS('date'), 'created_at', as_date), Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), @@ -171,18 +193,18 @@ class WorkInfo(object): Field( DCNS('source'), 'source_name', required=False), Field( DCNS('source.URL'), 'source_url', required=False), - Field( DCNS('identifier.url'), 'url', WLURI), + Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), Field( DCNS('rights.license'), 'license', required=False), Field( DCNS('rights'), 'license_description'), ) @classmethod - def from_string(cls, xml): + def from_string(cls, xml, *args, **kwargs): from StringIO import StringIO - return cls.from_file(StringIO(xml)) + return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile): + def from_file(cls, xmlfile, *args, **kwargs): desc_tag = None try: iter = etree.iterparse(xmlfile, ['start', 'end']) @@ -203,14 +225,14 @@ class WorkInfo(object): # if there is no end, Expat should yell at us with an ExpatError # extract data from the element and make the info - return cls.from_element(desc_tag) + return cls.from_element(desc_tag, *args, **kwargs) except XMLSyntaxError, e: raise ParseError(e) except ExpatError, e: raise ParseError(e) @classmethod - def from_element(cls, rdf_tag): + def from_element(cls, rdf_tag, *args, **kwargs): # the tree is already parsed, so we don't need to worry about Expat errors field_dict = {} desc = rdf_tag.find(".//" + RDFNS('Description')) @@ -223,9 +245,9 @@ class WorkInfo(object): fv.append(e.text) field_dict[e.tag] = fv - return cls(desc.attrib, field_dict) + return cls(desc.attrib, field_dict, *args, **kwargs) - def __init__(self, rdf_attrs, dc_fields): + def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False): """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the given field. """ @@ -234,16 +256,12 @@ class WorkInfo(object): self.fmap = {} for field in self.FIELDS: - value = field.validate( dc_fields ) + value = field.validate(dc_fields, fallbacks=fallbacks, + strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: self.fmap[field.salias] = field - self.validate() - - def validate(self): - self.url.validate_language(self.language) - def __getattribute__(self, name): try: field = object.__getattribute__(self, 'fmap')[name] @@ -254,7 +272,7 @@ class WorkInfo(object): if not field.multiple: raise "OUCH!! for field %s" % name - return value[0] + return value[0] if value else None except (KeyError, AttributeError): return object.__getattribute__(self, name) @@ -353,17 +371,23 @@ class BookInfo(WorkInfo): Field( DCNS('audience'), 'audiences', salias='audience', multiple=True, required=False), - Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True), - Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True), - Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True), + Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, + required=False), + Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True, + required=False), + Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True, + required=False), Field( DCNS('contributor.translator'), 'translators', \ as_person, salias='translator', multiple=True, default=[]), - Field( DCNS('relation.hasPart'), 'parts', WLURI, multiple=True, required=False), - - Field( DCNS('relation.cover_image.url'), 'cover_url', required=False), - Field( DCNS('relation.cover_image.attribution'), 'cover_by', required=False), - Field( DCNS('relation.cover_image.source'), 'cover_source', required=False), + Field( DCNS('relation.hasPart'), 'parts', + WLURI, strict=as_wluri_strict, multiple=True, required=False), + Field( DCNS('relation.isVariantOf'), 'variant_of', + WLURI, strict=as_wluri_strict, required=False), + + Field( DCNS('relation.coverImage.url'), 'cover_url', required=False), + Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False), + Field( DCNS('relation.coverImage.source'), 'cover_source', required=False), ) diff --git a/librarian/epub.py b/librarian/epub.py index 80941eb..bfd7570 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -7,6 +7,7 @@ from __future__ import with_statement import os import os.path +import re import subprocess from StringIO import StringIO from copy import deepcopy @@ -109,31 +110,74 @@ def find_annotations(annotations, source, part_no): find_annotations(annotations, child, part_no) +class Stanza(object): + """ + Converts / verse endings into verse elements in a stanza. + + Slashes may only occur directly in the stanza. Any slashes in subelements + will be ignored, and the subelements will be put inside verse elements. + + >>> s = etree.fromstring("a c c/\\nbx/\\nyc/ \\nd") + >>> Stanza(s).versify() + >>> print etree.tostring(s) + a c cbx/ + ycd + + """ + def __init__(self, stanza_elem): + self.stanza = stanza_elem + self.verses = [] + self.open_verse = None + + def versify(self): + self.push_text(self.stanza.text) + for elem in self.stanza: + self.push_elem(elem) + self.push_text(elem.tail) + tail = self.stanza.tail + self.stanza.clear() + self.stanza.tail = tail + self.stanza.extend(self.verses) + + def open_normal_verse(self): + self.open_verse = self.stanza.makeelement("wers_normalny") + self.verses.append(self.open_verse) + + def get_open_verse(self): + if self.open_verse is None: + self.open_normal_verse() + return self.open_verse + + def push_text(self, text): + if not text: + return + for i, verse_text in enumerate(re.split(r"/\s*\n", text)): + if i: + self.open_normal_verse() + verse = self.get_open_verse() + if len(verse): + verse[-1].tail = (verse[-1].tail or "") + verse_text + else: + verse.text = (verse.text or "") + verse_text + + def push_elem(self, elem): + if elem.tag.startswith("wers"): + verse = deepcopy(elem) + verse.tail = None + self.verses.append(verse) + self.open_verse = verse + else: + appended = deepcopy(elem) + appended.tail = None + self.get_open_verse().append(appended) + + def replace_by_verse(tree): """ Find stanzas and create new verses in place of a '/' character """ stanzas = tree.findall('.//' + WLNS('strofa')) - for node in stanzas: - for child_node in node: - if child_node.tag in ('slowo_obce', 'wyroznienie'): - foreign_verses = inner_xml(child_node).split('/\n') - if len(foreign_verses) > 1: - new_foreign = '' - for foreign_verse in foreign_verses: - if foreign_verse.startswith('', foreign_verse, '')) - set_inner_xml(child_node, new_foreign) - verses = inner_xml(node).split('/\n') - if len(verses) > 1: - modified_inner_xml = '' - for verse in verses: - if verse.startswith('', verse, '')) - set_inner_xml(node, modified_inner_xml) + for stanza in stanzas: + Stanza(stanza).versify() def add_to_manifest(manifest, partno): @@ -291,8 +335,8 @@ def transform(wldoc, verbose=False, """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object or True for default - flags: less-advertising, without-fonts + cover: a cover.Cover factory or True for default + flags: less-advertising, without-fonts, working-copy """ def transform_file(wldoc, chunk_counter=1, first=True, sample=None): @@ -368,6 +412,15 @@ def transform(wldoc, verbose=False, for flag in flags: document.edoc.getroot().set(flag, 'yes') + # add editors info + document.edoc.getroot().set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) + if document.book_info.funders: + document.edoc.getroot().set('funders', u', '.join( + document.book_info.funders)) + if document.book_info.thanks: + document.edoc.getroot().set('thanks', document.book_info.thanks) + opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) @@ -396,28 +449,29 @@ def transform(wldoc, verbose=False, if cover: if cover is True: cover = WLCover - if cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) cover_file = StringIO() - c = cover(document.book_info) - c.save(cover_file) - c_name = 'cover.%s' % c.ext() - zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue()) + bound_cover = cover(document.book_info) + bound_cover.save(cover_file) + cover_name = 'cover.%s' % bound_cover.ext() + zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) del cover_file cover_tree = etree.parse(get_resource('epub/cover.html')) - cover_tree.find('//' + XHTMLNS('img')).set('src', c_name) + cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) zip.writestr('OPS/cover.html', etree.tostring( cover_tree, method="html", pretty_print=True)) + if bound_cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + manifest.append(etree.fromstring( '')) manifest.append(etree.fromstring( - '' % (c_name, c.mime_type()))) + '' % (cover_name, bound_cover.mime_type()))) spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring('')) @@ -457,6 +511,15 @@ def transform(wldoc, verbose=False, zip.writestr('OPS/annotations.html', etree.tostring( html_tree, method="html", pretty_print=True)) + toc.add("Wesprzyj Wolne Lektury", "support.html") + manifest.append(etree.fromstring( + '')) + spine.append(etree.fromstring( + '')) + html_string = open(get_resource('epub/support.html')).read() + chars.update(used_chars(etree.fromstring(html_string))) + zip.writestr('OPS/support.html', html_string) + toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '')) @@ -470,7 +533,10 @@ def transform(wldoc, verbose=False, if not flags or not 'without-fonts' in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') - cwd = os.getcwd() + try: + cwd = os.getcwd() + except OSError: + cwd = None os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': @@ -485,7 +551,8 @@ def transform(wldoc, verbose=False, manifest.append(etree.fromstring( '' % (fname, fname))) rmtree(tmpdir) - os.chdir(cwd) + if cwd is not None: + os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) title = document.book_info.title diff --git a/librarian/epub/style.css b/librarian/epub/style.css index 622c8da..a862dce 100644 --- a/librarian/epub/style.css +++ b/librarian/epub/style.css @@ -364,7 +364,9 @@ em.author-emphasis margin-right: 2em; } -p.minor { +p.minor-info { + text-align: center; + margin-bottom: 1em; font-size: 0.75em; } p.footer { diff --git a/librarian/epub/support.html b/librarian/epub/support.html new file mode 100755 index 0000000..8ca550b --- /dev/null +++ b/librarian/epub/support.html @@ -0,0 +1,48 @@ + + + + + Wesprzyj Wolne Lektury + + + +
+ +

Wesprzyj Wolne Lektury!

+ +

+ Wolne Lektury to projekt fundacji Nowoczesna Polska – organizacji + pożytku publicznego działającej na rzecz wolności korzystania + z dóbr kultury.

+ +

+ Co roku do domeny publicznej przechodzi twórczość kolejnych autorów. + Dzięki Twojemu wsparciu będziemy je mogli udostępnić wszystkim bezpłatnie. +

+ +

+ Jak możesz pomóc? +

+ +

+ Logo 1%
+ Przekaż 1% podatku na rozwój Wolnych Lektur:
+ Fundacja Nowoczesna Polska
+ KRS 0000070056 +

+ +

+ Pomóż uwolnić konkretną książkę, wspierając + zbiórkę + na stronie wolnelektury.pl. +

+ +

+ Przekaż darowiznę na konto: + szczegóły + na stronie Fundacji. +

+ +
+ + diff --git a/librarian/epub/xsltLast.xsl b/librarian/epub/xsltLast.xsl index 751f97a..41afd36 100644 --- a/librarian/epub/xsltLast.xsl +++ b/librarian/epub/xsltLast.xsl @@ -32,7 +32,7 @@ - Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie + Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to @@ -63,6 +63,8 @@ + +

Okładka na podstawie: @@ -81,15 +83,8 @@

-
- Logo 1% -
Przekaż 1% podatku na rozwój Wolnych Lektur.
-
Nazwa organizacji: Fundacja Nowoczesna Polska
-
KRS 0000070056
-
-

 

-

+

Plik wygenerowany dnia .

@@ -103,20 +98,18 @@ - +

Opracowanie redakcyjne i przypisy: - - - , - - . -

+ .

- -
+ + +

Publikację ufundowali i ufundowały: + .

+
diff --git a/librarian/epub/xsltTitle.xsl b/librarian/epub/xsltTitle.xsl index 74ef64a..3d734be 100644 --- a/librarian/epub/xsltTitle.xsl +++ b/librarian/epub/xsltTitle.xsl @@ -36,6 +36,10 @@ + +

[Kopia robocza]

+
+

@@ -48,6 +52,10 @@

+ +

+
+

Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska.

diff --git a/librarian/fb2.py b/librarian/fb2.py new file mode 100644 index 0000000..d979566 --- /dev/null +++ b/librarian/fb2.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import os.path +from copy import deepcopy +from lxml import etree + +from librarian import functions, OutputFile +from .epub import replace_by_verse + + +functions.reg_substitute_entities() +functions.reg_person_name() + + +def sectionify(tree): + """Finds section headers and adds a tree of _section tags.""" + sections = ['naglowek_czesc', + 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', + 'naglowek_podrozdzial'] + section_level = dict((v,k) for (k,v) in enumerate(sections)) + + # We can assume there are just subelements an no text at section level. + for level, section_name in reversed(list(enumerate(sections))): + for header in tree.findall('//' + section_name): + section = header.makeelement("_section") + header.addprevious(section) + section.append(header) + sibling = section.getnext() + while (sibling is not None and + section_level.get(sibling.tag, 1000) > level): + section.append(sibling) + sibling = section.getnext() + + +def transform(wldoc, verbose=False, + cover=None, flags=None): + """ produces a FB2 file + + cover: a cover.Cover object or True for default + flags: less-advertising, working-copy + """ + + document = deepcopy(wldoc) + del wldoc + + if flags: + for flag in flags: + document.edoc.getroot().set(flag, 'yes') + + style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') + style = etree.parse(style_filename) + + replace_by_verse(document.edoc) + sectionify(document.edoc) + + result = document.transform(style) + + return OutputFile.from_string(unicode(result).encode('utf-8')) + +# vim:et diff --git a/librarian/fb2/description.xslt b/librarian/fb2/description.xslt new file mode 100644 index 0000000..312df2d --- /dev/null +++ b/librarian/fb2/description.xslt @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + literature + + + + + + + + + + + book2fb2 + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/drama.xslt b/librarian/fb2/drama.xslt new file mode 100755 index 0000000..ab8fb06 --- /dev/null +++ b/librarian/fb2/drama.xslt @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + +

+
+ + +

+
+ + +

+
+ + + + + +
diff --git a/librarian/fb2/fb2.xslt b/librarian/fb2/fb2.xslt new file mode 100644 index 0000000..950b526 --- /dev/null +++ b/librarian/fb2/fb2.xslt @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:apply-templates mode="title" + select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul"/> + <xsl:call-template name="translators" /> + + + + +

+ Utwór opracowany został w ramach projektu + Wolne Lektury + przez fundację + Nowoczesna Polska. +

+
+ + + +
+ + + + + + + +

+
+ + + +

+ tłum. + + , + + +

+
+
+ + + + + + + + +
diff --git a/librarian/fb2/footnotes.xslt b/librarian/fb2/footnotes.xslt new file mode 100644 index 0000000..09270b9 --- /dev/null +++ b/librarian/fb2/footnotes.xslt @@ -0,0 +1,42 @@ + + + + + + + + + + + fn + +

+ + [przypis autorski] +

+
+
+ + + + + + + note + #fn + + [ + + ] + + +
diff --git a/librarian/fb2/inline.xslt b/librarian/fb2/inline.xslt new file mode 100644 index 0000000..03c6b65 --- /dev/null +++ b/librarian/fb2/inline.xslt @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + „ + + ” + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/paragraphs.xslt b/librarian/fb2/paragraphs.xslt new file mode 100644 index 0000000..68c6257 --- /dev/null +++ b/librarian/fb2/paragraphs.xslt @@ -0,0 +1,46 @@ + + + + + + + + + +

+
+ + + + + + +

+
+ + + + + + +

*

+
+ + +

————————

+
+ + + + + +
diff --git a/librarian/fb2/poems.xslt b/librarian/fb2/poems.xslt new file mode 100644 index 0000000..31b05b4 --- /dev/null +++ b/librarian/fb2/poems.xslt @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/fb2/sections.xslt b/librarian/fb2/sections.xslt new file mode 100644 index 0000000..80ffb65 --- /dev/null +++ b/librarian/fb2/sections.xslt @@ -0,0 +1,47 @@ + + + + + + + +
+ + + + + + + + + + +
+
+ + + +
+ + + +
+ +
+
+ + + + <p><xsl:apply-templates mode="inline"/></p> + +
diff --git a/librarian/html.py b/librarian/html.py index 997f904..70fc6e5 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -57,6 +57,7 @@ def transform(wldoc, stylesheet='legacy', options=None, flags=None): if html_has_content(result): add_anchors(result.getroot()) + add_table_of_themes(result.getroot()) add_table_of_contents(result.getroot()) return OutputFile.from_string(etree.tostring(result, method='html', @@ -163,7 +164,8 @@ def extract_fragments(input_filename): # Process all elements except begin and end else: # Omit annotation tags - if len(element.get('name', '')) or element.get('class', '') == 'annotation': + if (len(element.get('name', '')) or + element.get('class', '') in ('annotation', 'anchor')): if event == 'end' and element.tail: for fragment_id in open_fragments: open_fragments[fragment_id].append('text', element.tail) @@ -220,18 +222,27 @@ def add_anchors(root): counter += 1 +def raw_printable_text(element): + working = copy.deepcopy(element) + for e in working.findall('a'): + if e.get('class') == 'annotation': + e.text = '' + return etree.tostring(working, method='text', encoding=unicode).strip() + + def add_table_of_contents(root): sections = [] counter = 1 for element in root.iterdescendants(): if element.tag in ('h2', 'h3'): - if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)): + if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)): continue + element_text = raw_printable_text(element) if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': - sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), [])) + sections[-1][3].append((counter, element.tag, element_text, [])) else: - sections.append((counter, element.tag, ''.join(element.xpath('text()')), [])) + sections.append((counter, element.tag, element_text, [])) add_anchor(element, "s%d" % counter, with_link=False) counter += 1 @@ -253,6 +264,34 @@ def add_table_of_contents(root): root.insert(0, toc) + +def add_table_of_themes(root): + try: + from sortify import sortify + except ImportError: + sortify = lambda x: x + + book_themes = {} + for fragment in root.findall('.//a[@class="theme-begin"]'): + if not fragment.text: + continue + theme_names = [s.strip() for s in fragment.text.split(',')] + for theme_name in theme_names: + book_themes.setdefault(theme_name, []).append(fragment.get('name')) + book_themes = book_themes.items() + book_themes.sort(key=lambda s: sortify(s[0])) + themes_div = etree.Element('div', id="themes") + themes_ol = etree.SubElement(themes_div, 'ol') + for theme_name, fragments in book_themes: + themes_li = etree.SubElement(themes_ol, 'li') + themes_li.text = "%s: " % theme_name + for i, fragment in enumerate(fragments): + item = etree.SubElement(themes_li, 'a', href="#%s" % fragment) + item.text = str(i + 1) + item.tail = ' ' + root.insert(0, themes_div) + + def extract_annotations(html_path): """For each annotation, yields a tuple: anchor, text, html.""" diff --git a/librarian/mobi.py b/librarian/mobi.py old mode 100755 new mode 100644 index 1e7569b..d98b838 --- a/librarian/mobi.py +++ b/librarian/mobi.py @@ -19,7 +19,7 @@ def transform(wldoc, verbose=False, wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object + cover: a cover.Cover factory overriding default flags: less-advertising, """ @@ -31,10 +31,10 @@ def transform(wldoc, verbose=False, if not cover: cover = WLCover cover_file = NamedTemporaryFile(suffix='.png', delete=False) - c = cover(book_info) - c.save(cover_file) + bound_cover = cover(book_info) + bound_cover.save(cover_file) - if cover.uses_dc_cover: + if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: diff --git a/librarian/mobi/style.css b/librarian/mobi/style.css old mode 100755 new mode 100644 diff --git a/librarian/packagers.py b/librarian/packagers.py index 36a7b60..ddfd7c8 100644 --- a/librarian/packagers.py +++ b/librarian/packagers.py @@ -75,7 +75,7 @@ class PrestigioPdfPackager(PdfPackager): flags = ('less-advertising',) -class VirtualoEpubPackager(Packager): +class VirtualoPackager(Packager): @staticmethod def utf_trunc(text, limit): """ truncates text to at most `limit' bytes in utf-8 """ @@ -133,9 +133,16 @@ class VirtualoEpubPackager(Packager): cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) outfile = os.path.join(outfile_dir, '1.epub') outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - doc.save_output_file(epub.transform(doc), + doc.save_output_file(doc.as_epub(), output_path=outfile) - doc.save_output_file(epub.transform(doc, sample=25), + doc.save_output_file(doc.as_epub(doc, sample=25), + output_path=outfile_sample) + outfile = os.path.join(outfile_dir, '1.mobi') + outfile_sample = os.path.join(outfile_dir, '1.sample.mobi') + doc.save_output_file(doc.as_mobi(cover=cover.VirtualoCover), + output_path=outfile) + doc.save_output_file( + doc.as_mobi(doc, cover=cover.VirtualoCover, sample=25), output_path=outfile_sample) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { diff --git a/librarian/parser.py b/librarian/parser.py index 5ae06e2..a9e8c65 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -5,6 +5,7 @@ # from librarian import ValidationError, NoDublinCore, ParseError, NoProvider from librarian import RDFNS +from librarian.cover import WLCover from librarian import dcparser from xml.parsers.expat import ExpatError @@ -19,7 +20,8 @@ class WLDocument(object): LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) provider = None - def __init__(self, edoc, parse_dublincore=True, provider=None): + def __init__(self, edoc, parse_dublincore=True, provider=None, + strict=False, meta_fallbacks=None): self.edoc = edoc self.provider = provider @@ -36,7 +38,8 @@ class WLDocument(object): if self.rdf_elem is None: raise NoDublinCore('Document has no DublinCore - which is required.') - self.book_info = dcparser.BookInfo.from_element(self.rdf_elem) + self.book_info = dcparser.BookInfo.from_element( + self.rdf_elem, fallbacks=meta_fallbacks, strict=strict) else: self.book_info = None @@ -45,7 +48,7 @@ class WLDocument(object): return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile, parse_dublincore=True, provider=None): + def from_file(cls, xmlfile, *args, **kwargs): # first, prepare for parsing if isinstance(xmlfile, basestring): @@ -66,7 +69,7 @@ class WLDocument(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(StringIO(data.encode('utf-8')), parser) - return cls(tree, parse_dublincore=parse_dublincore, provider=provider) + return cls(tree, *args, **kwargs) except (ExpatError, XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) @@ -146,7 +149,7 @@ class WLDocument(object): xpath = self.path_to_xpath(key) node = self.edoc.xpath(xpath)[0] repl = etree.fromstring(u"<%s>%s" %(node.tag, data, node.tag) ) - node.getparent().replace(node, repl); + node.getparent().replace(node, repl) except Exception, e: unmerged.append( repr( (key, xpath, e) ) ) @@ -162,6 +165,21 @@ class WLDocument(object): node.tag = 'span' node.tail = tail + def editors(self): + """Returns a set of all editors for book and its children. + + :returns: set of dcparser.Person objects + """ + if self.book_info is None: + raise NoDublinCore('No Dublin Core in document.') + persons = set(self.book_info.editors + + self.book_info.technical_editors) + for child in self.parts(): + persons.update(child.editors()) + if None in persons: + persons.remove(None) + return persons + # Converters def as_html(self, *args, **kwargs): @@ -184,6 +202,15 @@ class WLDocument(object): from librarian import mobi return mobi.transform(self, *args, **kwargs) + def as_fb2(self, *args, **kwargs): + from librarian import fb2 + return fb2.transform(self, *args, **kwargs) + + def as_cover(self, cover_class=None, *args, **kwargs): + if cover_class is None: + cover_class = WLCover + return cover_class(self.book_info, *args, **kwargs).output_file() + def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None): if output_dir_path: @@ -192,7 +219,7 @@ class WLDocument(object): save_path = os.path.join(save_path, unicode(self.book_info.author).encode('utf-8')) save_path = os.path.join(save_path, - self.book_info.uri.filename_stem()) + self.book_info.uri.slug) if ext: save_path += '.%s' % ext else: diff --git a/librarian/pdf.py b/librarian/pdf.py index bcf8d9a..b4edfdb 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -3,6 +3,12 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +"""PDF creation library. + +Creates one big XML from the book and its children, converts it to LaTeX +with TeXML, then runs it by XeLaTeX. + +""" from __future__ import with_statement import os import os.path @@ -34,13 +40,14 @@ STYLESHEETS = { 'wl2tex': 'pdf/wl2tex.xslt', } -CUSTOMIZATIONS = [ - 'nofootnotes', - 'nothemes', - 'onehalfleading', - 'doubleleading', - 'nowlfont', - ] +#CUSTOMIZATIONS = [ +# 'nofootnotes', +# 'nothemes', +# 'defaultleading', +# 'onehalfleading', +# 'doubleleading', +# 'nowlfont', +# ] def insert_tags(doc, split_re, tagname, exclude=None): """ inserts for every occurence of `split_re' in text nodes in the `doc' tree @@ -134,9 +141,13 @@ def hack_motifs(doc): def parse_creator(doc): - """ find all dc:creator and dc.contributor tags and add *_parsed versions with forenames first """ + """Generates readable versions of creator and translator tags. + + Finds all dc:creator and dc.contributor.translator tags + and adds *_parsed versions with forenames first. + """ for person in doc.xpath("|".join('//dc:'+(tag) for tag in ( - 'creator', 'contributor.translator', 'contributor.editor', 'contributor.technical_editor')), + 'creator', 'contributor.translator')), namespaces = {'dc': str(DCNS)})[::-1]: if not person.text: continue @@ -180,38 +191,50 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats - cover: a cover.Cover object + cover: a cover.Cover factory or True for default flags: less-advertising, customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ # Parse XSLT try: + book_info = wldoc.book_info document = load_including_children(wldoc) + root = document.edoc.getroot() if cover: if cover is True: cover = WLCover - document.edoc.getroot().set('data-cover-width', str(cover.width)) - document.edoc.getroot().set('data-cover-height', str(cover.height)) - if cover.uses_dc_cover: - if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) - if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + bound_cover = cover(book_info) + root.set('data-cover-width', str(bound_cover.width)) + root.set('data-cover-height', str(bound_cover.height)) + if bound_cover.uses_dc_cover: + if book_info.cover_by: + root.set('data-cover-by', book_info.cover_by) + if book_info.cover_source: + root.set('data-cover-source', + book_info.cover_source) if flags: for flag in flags: - document.edoc.getroot().set('flag-' + flag, 'yes') + root.set('flag-' + flag, 'yes') # check for LaTeX packages if morefloats: - document.edoc.getroot().set('morefloats', morefloats.lower()) + root.set('morefloats', morefloats.lower()) elif package_available('morefloats', 'maxfloats=19'): - document.edoc.getroot().set('morefloats', 'new') + root.set('morefloats', 'new') # add customizations if customizations is not None: - document.edoc.getroot().set('customizations', u','.join(customizations)) + root.set('customizations', u','.join(customizations)) + + # add editors info + root.set('editors', u', '.join(sorted( + editor.readable() for editor in document.editors()))) + if document.book_info.funders: + root.set('funders', u', '.join(document.book_info.funders)) + if document.book_info.thanks: + root.set('thanks', document.book_info.thanks) # hack the tree move_motifs_inside(document.edoc) @@ -230,9 +253,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, temp = mkdtemp('-wl2pdf') if cover: - c = cover(document.book_info) with open(os.path.join(temp, 'cover.png'), 'w') as f: - c.save(f) + bound_cover.save(f) del document # no longer needed large object :) @@ -249,7 +271,10 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, shutil.copy(get_resource('pdf/wl.cls'), temp) shutil.copy(get_resource('res/wl-logo.png'), temp) - cwd = os.getcwd() + try: + cwd = os.getcwd() + except OSError: + cwd = None os.chdir(temp) if verbose: @@ -259,7 +284,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, if p: raise ParseError("Error parsing .tex file") - os.chdir(cwd) + if cwd is not None: + os.chdir(cwd) output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) pdf_path = os.path.join(temp, 'doc.pdf') @@ -289,7 +315,8 @@ def load_including_children(wldoc=None, provider=None, uri=None): text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - document = WLDocument.from_string(text, parse_dublincore=True) + document = WLDocument.from_string(text, + parse_dublincore=True, provider=provider) document.swap_endlines() for child_uri in document.book_info.parts: diff --git a/librarian/pdf/wl.cls b/librarian/pdf/wl.cls index c9305ca..a9ace8e 100644 --- a/librarian/pdf/wl.cls +++ b/librarian/pdf/wl.cls @@ -6,6 +6,7 @@ % % nofootnotes - disable generation of footnotes % nothemes - disable generation of themes +% defaultleading - default leading % onehalfleading - leading of 1.5 (interlinia) % doubleleading - double leading (interlinia) % a4paper,... - paper size as required by LaTeX @@ -39,6 +40,7 @@ %% \DeclareOption{14pt}{\renewcommand{\normalsize}{\AtEndOfClass{\fontsize{14}{17}\selectfont}}} +\DeclareOption{defaultleading}{} \DeclareOption{doubleleading}{\AtBeginDocument{\doublespacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}} \DeclareOption{onehalfleading}{\AtBeginDocument{\onehalfspacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}} @@ -213,6 +215,11 @@ Letters={SmallCaps,UppercaseSmallCaps} \vspace{.5em} \fi + \ifdefined\thanknote + \thanknote + \vspace{.5em} + \fi + Utwór opracowany został w ramach projektu \href{http://www.wolnelektury.pl/}{Wolne Lektury} przez \href{http://nowoczesnapolska.org.pl}{fundację Nowoczesna Polska}. @@ -244,8 +251,37 @@ Letters={SmallCaps,UppercaseSmallCaps} \editors + \ifdefined\funders + \vspace{.6em} + \funders + \fi + + \ifdefined\coverby + \vspace{.6em} + \coverby + \fi + \vspace{.6em} - \coverby + \emph{Wesprzyj Wolne Lektury!} + + Wolne Lektury to projekt fundacji Nowoczesna Polska – organizacji + pożytku publicznego działającej na rzecz wolności korzystania + z dóbr kultury. + + Co roku do domeny publicznej przechodzi twórczość kolejnych autorów. + Dzięki Twojemu wsparciu będziemy je mogli udostępnić wszystkim bezpłatnie. + + \vspace{.6em} + \emph{Jak możesz pomóc?} + + Przekaż 1\% podatku na rozwój Wolnych Lektur: + Fundacja Nowoczesna Polska, KRS 0000070056. + + Pomóż uwolnić konkretną książkę, wspierając + \href{http://www.wolnelektury.pl/wesprzyj/}{zbiórkę na stronie wolnelektury.pl}. + + Przekaż darowiznę na konto: + \href{http://nowoczesnapolska.org.pl/pomoz-nam/wesprzyj-nas/}{szczegóły na stronie Fundacji}. \color{black} } diff --git a/librarian/pdf/wl2tex.xslt b/librarian/pdf/wl2tex.xslt index 1a675ba..ca948da 100644 --- a/librarian/pdf/wl2tex.xslt +++ b/librarian/pdf/wl2tex.xslt @@ -64,6 +64,11 @@ 210mm + + + \def\thanknote{} + + @@ -88,8 +93,8 @@ - \def\coverby{ - Okładka na podstawie: + + \def\coverby{Okładka na podstawie: \href{\datacoversource}{\datacoverby} @@ -98,11 +103,17 @@ \datacoverby{}
- } + + \def\editors{} + + \def\funders{Publikację ufundowali i ufundowały: + .} + + @@ -145,7 +156,7 @@ \def\bookurl{} - \def\rightsinfo{Ten utwór nie jest chroniony prawem autorskim i~znajduje się w~domenie + \def\rightsinfo{Ten utwór nie jest objęty majątkowym prawem autorskim i~znajduje się w~domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i~rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to @@ -163,7 +174,6 @@ \vspace{.6em} } \def\description{} - \def\editors{} @@ -376,13 +386,10 @@ - + Opracowanie redakcyjne i przypisy: - - - , - - . + + . diff --git a/librarian/picture.py b/librarian/picture.py index 0f5c99a..ee3c61d 100644 --- a/librarian/picture.py +++ b/librarian/picture.py @@ -11,18 +11,15 @@ import re class WLPictureURI(WLURI): _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/' - '(?P[-a-z0-9]+)(/(?P[a-z]{3}))?/?$') - - def __init__(self, *args, **kw): - super(WLPictureURI, self).__init__(*args, **kw) + '(?P[-a-z0-9]+)/?$') @classmethod - def from_slug_and_lang(cls, slug, lang): + def from_slug(cls, slug): uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug return cls(uri) - def filename_stem(self): - return self.slug +def as_wlpictureuri_strict(text): + return WLPictureURI.strict(text) class PictureInfo(WorkInfo): @@ -39,15 +36,10 @@ class PictureInfo(WorkInfo): Field(DCNS('description.medium'), 'medium', required=False), Field(DCNS('description.dimensions'), 'original_dimensions', required=False), Field(DCNS('format'), 'mime_type', required=False), - Field(DCNS('identifier.url'), 'url', WLPictureURI), + Field(DCNS('identifier.url'), 'url', WLPictureURI, + strict=as_wlpictureuri_strict), ) - def validate(self): - """ - WorkInfo has a language validation code only, which we do not need. - """ - pass - class ImageStore(object): EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp' diff --git a/librarian/res/cover.png b/librarian/res/cover.png new file mode 100644 index 0000000..0a1511e Binary files /dev/null and b/librarian/res/cover.png differ diff --git a/librarian/res/webtreatsetc-5647576127-ccby.png b/librarian/res/webtreatsetc-5647576127-ccby.png new file mode 100644 index 0000000..6919e93 Binary files /dev/null and b/librarian/res/webtreatsetc-5647576127-ccby.png differ diff --git a/librarian/res/wl-logo-mono.png b/librarian/res/wl-logo-mono.png new file mode 100644 index 0000000..c99447e Binary files /dev/null and b/librarian/res/wl-logo-mono.png differ diff --git a/librarian/res/wl-logo-small.png b/librarian/res/wl-logo-small.png index 104d56a..b32c46d 100644 Binary files a/librarian/res/wl-logo-small.png and b/librarian/res/wl-logo-small.png differ diff --git a/librarian/text.py b/librarian/text.py index d99e7cf..9a4fd7a 100644 --- a/librarian/text.py +++ b/librarian/text.py @@ -26,7 +26,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz %(license_description)s.%(source)s -%(description)s%(contributors)s +%(description)s%(contributors)s%(funders)s """ def transform(wldoc, flags=None, **options): @@ -59,7 +59,7 @@ def transform(wldoc, flags=None, **options): if license: license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license) else: - license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)" + license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)" source = parsed_dc.source_name if source: @@ -70,7 +70,10 @@ def transform(wldoc, flags=None, **options): contributors = ', '.join(person.readable() for person in sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p))) if contributors: - contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors + contributors = "\n\nOpracowanie redakcyjne i przypisy: %s." % contributors + funders = ', '.join(parsed_dc.funders) + if funders: + funders = u"\n\nPublikację ufundowali i ufundowały: %s." % funders else: description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' url = '*' * 10 @@ -78,6 +81,7 @@ def transform(wldoc, flags=None, **options): license_description = "" source = "" contributors = "" + funders = "" return OutputFile.from_string((TEMPLATE % { 'description': description, 'url': url, @@ -85,6 +89,7 @@ def transform(wldoc, flags=None, **options): 'text': unicode(result), 'source': source, 'contributors': contributors, + 'funders': funders, }).encode('utf-8')) else: return OutputFile.from_string(unicode(result).encode('utf-8')) diff --git a/librarian/xslt/book2html.xslt b/librarian/xslt/book2html.xslt old mode 100755 new mode 100644 index 8e1a574..81e865b --- a/librarian/xslt/book2html.xslt +++ b/librarian/xslt/book2html.xslt @@ -10,6 +10,7 @@ xmlns:dc="http://purl.org/dc/elements/1.1/" > + @@ -507,23 +508,28 @@ +

+

+

+

+
@@ -532,11 +538,15 @@
-

+

+ + +

+ @@ -711,5 +721,19 @@ + + + + + + + - \ No newline at end of file + diff --git a/librarian/xslt/book2txt.xslt b/librarian/xslt/book2txt.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/config.xml b/librarian/xslt/config.xml old mode 100755 new mode 100644 diff --git a/librarian/xslt/normalize.xslt b/librarian/xslt/normalize.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_base.xslt b/librarian/xslt/wl2html_base.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_full.xslt b/librarian/xslt/wl2html_full.xslt old mode 100755 new mode 100644 diff --git a/librarian/xslt/wl2html_partial.xslt b/librarian/xslt/wl2html_partial.xslt old mode 100755 new mode 100644 diff --git a/scripts/book2cover b/scripts/book2cover index d2befc3..758ab0e 100755 --- a/scripts/book2cover +++ b/scripts/book2cover @@ -4,36 +4,31 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse +from StringIO import StringIO +from librarian import OutputFile +from librarian.book2anything import Book2Anything, Option -from librarian import ParseError -from librarian.parser import WLDocument -from librarian.cover import WLCover +class Book2Cover(Book2Anything): + format_name = "JPEG" + ext = "jpg" + uses_cover = True + cover_optional = False -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Create cover images for SOURCE files.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') + transform_options = [ + Option('-W', '--width', action='store', type='int', dest='width', default=None, + help='Set width.'), + Option('-H', '--height', action='store', type='int', dest='height', default=None, + help='Set height.'), + Option('-l', '--with-logo', dest='with_logo', + action='store_true', default=False, + help='Add WL logo in white box.'), + ] - options, input_filenames = parser.parse_args() + @staticmethod + def transform(wldoc, cover, *args, **kwargs): + return wldoc.as_cover(cover_class=cover, *args, **kwargs) - if len(input_filenames) < 1: - parser.print_help() - exit(1) - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.png' - - doc = WLDocument.from_file(input_filename) - WLCover(doc.book_info).save(output_filename) +if __name__ == '__main__': + Book2Cover.run() diff --git a/scripts/book2epub b/scripts/book2epub index bdb5ac6..01ca79a 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -4,59 +4,20 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything, Option -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Epub(Book2Anything): + format_name = "EPUB" + ext = "epub" + uses_cover = True + uses_provider = True + transform_flags = [ + Option('-w', '--working-copy', dest='working-copy', + action='store_true', default=False, + help='mark the output as a working copy') + ] -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to EPUB format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - # Do some real work - try: - for main_input in input_filenames: - if options.verbose: - print main_input - - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.epub' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - epub = doc.as_epub(cover=options.with_cover) - - doc.save_output_file(epub, - output_file, options.output_dir, options.make_dir, 'epub') - - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Epub.run() diff --git a/scripts/book2fb2 b/scripts/book2fb2 new file mode 100755 index 0000000..584ae99 --- /dev/null +++ b/scripts/book2fb2 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from librarian.book2anything import Book2Anything + + +class Book2Fb2(Book2Anything): + format_name = "FB2" + ext = "fb2" + uses_cover = False + uses_provider = True + + +if __name__ == '__main__': + Book2Fb2.run() diff --git a/scripts/book2html b/scripts/book2html index 1e88823..5d48eec 100755 --- a/scripts/book2html +++ b/scripts/book2html @@ -4,59 +4,25 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument +from librarian.book2anything import Book2Anything, Option + + +class Book2Html(Book2Anything): + format_name = "HTML" + ext = "html" + uses_cover = False + uses_provider = False + transform_flags = [ + Option('-r', '--raw', dest='full-page', + action='store_false', default=True, + help='output raw text for use in templates') + ] + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_html(flags=('full-page',)) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e, - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise - + Book2Html.run() diff --git a/scripts/book2ihtml b/scripts/book2ihtml deleted file mode 100755 index 779f245..0000000 --- a/scripts/book2ihtml +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -import os -import optparse - -from librarian import ParseError -from librarian.parser import WLDocument - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_html(flags=('full-page',), stylesheet='partial') - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.message.encode('utf-8') - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e.message.encode('utf-8'), - } - raise - diff --git a/scripts/book2mobi b/scripts/book2mobi index 665dcfa..f477a83 100755 --- a/scripts/book2mobi +++ b/scripts/book2mobi @@ -4,53 +4,16 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -import optparse +from librarian.book2anything import Book2Anything -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Mobi(Book2Anything): + format_name = "MOBI" + ext = "mobi" + uses_cover = True + cover_optional = False + uses_provider = True -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to MOBI format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - try: - for main_input in input_filenames: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.mobi' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - mobi = doc.as_mobi() - - doc.save_output_file(mobi, - output_file, options.output_dir, options.make_dir, 'mobi') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Mobi.run() diff --git a/scripts/book2partner b/scripts/book2partner index 0488273..4b84c2f 100755 --- a/scripts/book2partner +++ b/scripts/book2partner @@ -46,7 +46,7 @@ if __name__ == '__main__': if options.gandalf_pdf: packagers.GandalfPdfPackager.prepare(input_filenames, options.output_dir, options.verbose) if options.virtualo: - packagers.VirtualoEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) + packagers.VirtualoPackager.prepare(input_filenames, options.output_dir, options.verbose) if options.prestigio: packagers.PrestigioEpubPackager.prepare(input_filenames, options.output_dir, options.verbose) if options.prestigio_pdf: diff --git a/scripts/book2pdf b/scripts/book2pdf index 258c20d..68e2d08 100755 --- a/scripts/book2pdf +++ b/scripts/book2pdf @@ -4,61 +4,21 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os.path -from optparse import OptionParser +from librarian.book2anything import Book2Anything, Option -from librarian import DirDocProvider, ParseError -from librarian.parser import WLDocument +class Book2Pdf(Book2Anything): + format_name = "PDF" + ext = "pdf" + uses_cover = True + uses_provider = True + transform_args = [ + Option('-t', '--save-tex', dest='save_tex', metavar='FILE', + help='path to save the intermediary LaTeX file to'), + Option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', + help='force morefloats in old (<1.0c), new (>=1.0c) or none') + ] -if __name__ == '__main__': - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to PDF format.""" - - parser = OptionParser(usage) - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='make lots of noise and revert to default interaction in LaTeX') - parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, - help='create default cover') - parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, - help='create a directory for author and put the PDF in it') - parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE', - help='path to save the intermediary LaTeX file to') - parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', - help='specifies the output file') - parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', - help='specifies the directory for output') - parser.add_option('-m', '--morefloats', dest='morefloats', metavar='old/new/none', - help='force morefloats in old (<1.0c), new (>=1.0c) or none') - (options, args) = parser.parse_args() - - if len(args) < 1: - parser.print_help() - exit(1) - - if options.output_dir and options.output_file: - raise ValueError("Either --output-dir or --output file should be specified") - try: - for main_input in args: - path, fname = os.path.realpath(main_input).rsplit('/', 1) - provider = DirDocProvider(path) - output_file, output_dir = options.output_file, options.output_dir - if not (options.output_file or options.output_dir): - output_file = os.path.splitext(main_input)[0] + '.pdf' - else: - output_file = None - - doc = WLDocument.from_file(main_input, provider=provider) - pdf = doc.as_pdf(save_tex=options.save_tex, - cover=options.with_cover, - morefloats=options.morefloats) - - doc.save_output_file(pdf, - output_file, options.output_dir, options.make_dir, 'pdf') - except ParseError, e: - print '%(file)s:%(name)s:%(message)s; use -v to see more output' % { - 'file': main_input, - 'name': e.__class__.__name__, - 'message': e - } +if __name__ == '__main__': + Book2Pdf.run() diff --git a/scripts/book2txt b/scripts/book2txt index 9cfdef2..1b4c0ef 100755 --- a/scripts/book2txt +++ b/scripts/book2txt @@ -4,60 +4,26 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import os -import optparse - -from librarian import ParseError +from librarian.book2anything import Book2Anything, Option from librarian.parser import WLDocument -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to TXT format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, - help='set line wrap column') - parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, - help='don\'t try to parse dublin core metadata') +class Book2Txt(Book2Anything): + format_name = "TXT" + ext = "txt" + uses_cover = False + uses_provider = False + parser_args = [ + Option('-i', '--ignore-dublin-core', dest='parse_dublincore', + action='store_false', default=True, + help='don\'t try to parse dublin core metadata') + ] + transform_args = [ + Option('-w', '--wrap', action='store', type='int', dest='wrapping', default=0, + help='set line wrap column') + ] + transform = WLDocument.as_text - options, input_filenames = parser.parse_args() - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.txt' - try: - doc = WLDocument.from_file(input_filename, - parse_dublincore=options.parse_dublincore) - html = doc.as_text(wrapping=str(options.wrapping)) - doc.save_output_file(html, output_path=output_filename) - except ParseError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e - } - except IOError, e: - print '%(file)s:%(name)s:%(message)s' % { - 'file': input_filename, - 'name': e.__class__.__name__, - 'message': e.strerror, - } - except BaseException, e: - print '%(file)s:%(etype)s:%(message)s' % { - 'file': input_filename, - 'etype': e.__class__.__name__, - 'message': e, - } - raise +if __name__ == '__main__': + Book2Txt.run() diff --git a/scripts/master.css b/scripts/master.css old mode 100755 new mode 100644 diff --git a/scripts/master.plain.css b/scripts/master.plain.css old mode 100755 new mode 100644 diff --git a/setup.py b/setup.py index b1ea926..b20040a 100755 --- a/setup.py +++ b/setup.py @@ -21,23 +21,27 @@ def whole_tree(prefix, path): setup( name='librarian', - version='1.4.1', + version='1.5.1', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', maintainer='Radek Czajka', - maintainer_email='radek.czajka@gmail.com', + maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl', url='http://github.com/fnp/librarian', packages=['librarian'], - package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fonts/*', 'res/*'] + + package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] + whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')}, include_package_data=True, - install_requires=['lxml>=2.2'], + install_requires=[ + 'lxml>=2.2', + 'Pillow', + ], scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/book2epub', 'scripts/book2mobi', 'scripts/book2pdf', + 'scripts/book2fb2', 'scripts/book2partner', 'scripts/book2cover', 'scripts/bookfragments', diff --git a/tests/files/example-wl.xml b/tests/files/example-wl.xml new file mode 100644 index 0000000..d2fd87b --- /dev/null +++ b/tests/files/example-wl.xml @@ -0,0 +1,170 @@ + + + + + +Utworu, Autor +Tytuł w DC +Utworu, Tłumacz +Literacki, Redaktor +Techniczny, Redaktor +Fundacja Nowoczesna Polska +period +type +genre +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana + przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/test1 +source +Domena publiczna +1500 +xml +text +text +2000 +pol + + + + +nota_red + +autor_utworu +dzielo_nadrzedne +nazwa_utworu +podtytul + +[powyżej: +nota_red (nie pojawia się w tekście, może być podana osobno), +autor_utworu, dzielo_nadrzedne, nazwa_utworu, podtytul, tłumacz (z DC)] + +[Noty: nota/akap, dedykacja/akap, motto/akap, motto_podpis] + + +nota/akap +dedykacja/akap +motto/akap +motto_podpis + +[Początek dramatu: lista_osob, naglowek_listy, lista_osoba, miejsce_czas] + + + lista_osob/naglowek_listy + lista_osob/lista_osoba + lista_osob/lista_osoba + +miejsce_czas + +[naglowek_czesc, naglowek_rozdzial, naglowek_podrozdzial, srodtytul] + +naglowek_czesc +naglowek_rozdzial +naglowek_podrozdzial +srodtytul + +[akap, akap_cd, akap_dialog, motyw] + +akapmotyw +akap_cd +akap_dialog + +[strofa, wers_akap, wers_wciety,typ=1-6, wers_cd, zastepnik_wersu] + +strofa/ +wers_akap/ +wers_wciety@typ=1/ +wers_wciety@typ=2/ +wers_wciety@typ=3 + +wers_wciety@typ=4/ +wers_wciety@typ=5/ +wers_wciety@typ=6/ +wers_cd/ +. . . . . . . . . . . . . . . . + + +[dlugi_cytat/akap] + +Cytowany akapit powinien wyglądać jak cytowany akapit. +Znaczy, może mieć jakieś dodatkowe wcięcie, jakiś rodzaj wyróżnienia czy coś. + +[poezja_cyt/strofa] + +To jest poezja/ +cytowana/ +ma być porządnie/ +wyrównana + +[naglowek_akt, naglowek_scena] + +naglowek_akt +naglowek_scena + +[Kwestia: naglowek_osoba, kwestia, didask_tekst, didaskalia, strofa, akap] + +naglowek_osoba + + +didask_tekst +didaskalia +Strofa w dramacie/ +jak amen w pacie/ +rzu. +Powyższy kawałek wiersza jest najzupełniej bez sensu i tak naprawdę wcale nie trzyma rytmu ani rymu. Być może należy skoncentrować się na dramacie prozą, jak ta tutaj niniejsza wypowiedź. + +[didaskalia, osoba] + +odezwał się autor. + +[Wyróżnienia: tytul_dziela, tytul_dziela@typ=1, wyroznienie, slowo_obce] + + +tytul_dziela, +tytul_dziela@typ=1, +wyroznienie, +slowo_obce + + +[Przypisy: pa, pt, pr, pe] + + +pa - - - przypis autorski +pt - - - przypis tłumacza +pr - - - przypis redakcyjny +pe - - - przypis edytorski + + +[Separatory] + +[sekcja_swiatlo:] + + + +[sekcja_asterysk:] + + + +[separator_linia:] + + + + + +[Komentarze: uwaga, extra] +uwaga +extra + +[Nieużywane] + +wyp_osoba +wywiad_pyt/akap +wywiad_odp/akap +mat +www + + + diff --git a/tests/files/picture/angelus-novus.jpeg b/tests/files/picture/angelus-novus.jpeg new file mode 100644 index 0000000..fd0394f Binary files /dev/null and b/tests/files/picture/angelus-novus.jpeg differ diff --git a/tests/files/picture/angelus-novus.png b/tests/files/picture/angelus-novus.png deleted file mode 100644 index 9925dad..0000000 Binary files a/tests/files/picture/angelus-novus.png and /dev/null differ diff --git a/tests/files/picture/angelus-novus.xml b/tests/files/picture/angelus-novus.xml index 0f26730..964faed 100644 --- a/tests/files/picture/angelus-novus.xml +++ b/tests/files/picture/angelus-novus.xml @@ -18,9 +18,9 @@ Domena publiczna - Paul Klee zm. 1940 1940 Image - image/png - 1645 x 2000 px - d9ead48f3442ac4e1add602aacdffa4638ae8e21 + image/jpeg + 329 x 400 px + 5ed8e8d24d92017c6341c0b8cfcc414dec55b8bf 1920 lat @@ -29,14 +29,14 @@
-
+
-
+
-
-
+
+
diff --git a/tests/files/text/asnyk_miedzy_nami_expected.html b/tests/files/text/asnyk_miedzy_nami_expected.html index 6bc7649..fd18174 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.html +++ b/tests/files/text/asnyk_miedzy_nami_expected.html @@ -3,29 +3,33 @@

Spis treści

    +

    Adam AsnykMiłość platonicznaMiędzy nami nic nie było

    Miłość platoniczna
    -

    1Między nami nic nie było!

    +

    1Między nami nic nie było!

    Żadnych zwierzeń, wyznań żadnych!

    Nic nas z sobą nie łączyło —

    Prócz wiosennych marzeń zdradnych;

    -

    5NaturaPrócz tych woni, barw i blasków,

    +

    5NaturaPrócz tych woni, barw i blasków,

    Unoszących się w przestrzeni;

    Prócz szumiących śpiewem lasków

    I tej świeżej łąk zieleni;

    -

    Prócz tych kaskad i potoków,

    +

    Prócz tych kaskad i potoków,

    10Zraszających każdy parów,

    Prócz girlandy tęcz, obłoków,

    Prócz natury słodkich czarów;

    -

    Prócz tych wspólnych, jasnych zdrojów,

    +

    Prócz tych wspólnych, jasnych zdrojów,

    Z których serce zachwyt piło;

    15Prócz pierwiosnków i powojów,—

    Między nami nic nie było!

    diff --git a/tests/files/text/asnyk_miedzy_nami_expected.txt b/tests/files/text/asnyk_miedzy_nami_expected.txt index 70c3185..89e310a 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.txt +++ b/tests/files/text/asnyk_miedzy_nami_expected.txt @@ -33,10 +33,10 @@ Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dost Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska. -Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/). +Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/). Tekst opracowany na podstawie: (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. -Opracowanie redakcyjne i przypisy: Aleksandra Sekuła, Olga Sutkowska +Opracowanie redakcyjne i przypisy: Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml index c585a8b..6a781f3 100755 --- a/tests/files/text/asnyk_zbior.xml +++ b/tests/files/text/asnyk_zbior.xml @@ -9,9 +9,11 @@ Pozytywizm Liryka Wiersz +Fikcyjny, Adam Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. http://wolnelektury.pl/katalog/lektura/poezye http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo +http://wolnelektury.pl/katalog/lektura/do-mlodych http://www.polona.pl/Content/5164 (Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 Domena publiczna - Adam Asnyk zm. 1897 diff --git a/tests/files/text/do-mlodych.xml b/tests/files/text/do-mlodych.xml new file mode 100755 index 0000000..21fa522 --- /dev/null +++ b/tests/files/text/do-mlodych.xml @@ -0,0 +1,70 @@ + + +Asnyk, Adam +Do młodych +Sekuła, Aleksandra +Sutkowska, Olga +Fundacja Nowoczesna Polska +Pozytywizm +Liryka +Wiersz +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/do-mlodych +http://www.polona.pl/Content/8616 +El...y (Adam Asnyk), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa 1898 +Domena publiczna - Adam Asnyk zm. 1897 +1897 +xml +text +text +2009-04-07 +L +pol +http://redakcja.wolnelektury.pl/media/dynamic/cover/image/35.jpg +leboski@Flickr, CC BY 2.0 +http://redakcja.wolnelektury.pl/cover/image/35 + + + +Adam Asnyk + +Do młodych + + + + +Szukajcie prawdy jasnego płomienia,/ +Szukajcie nowych, nieodkrytych dróg!/ +Za każdym krokiem w tajniki stworzenia/ +Coraz się dusza ludzka rozprzestrzenia/ +I większym staje się Bóg! + + +Choć otrząśniecie kwiaty barwnych mitów,/ +Choć rozproszycie legendowy mrok,/ +Choć mgłę urojeń zedrzecie z błękitów, ---/ +Ludziom niebiańskich nie zbraknie zachwytów,/ +Lecz dalej sięgnie ich wzrok. + + +Czas, Kondycja ludzka, PrzemijanieKażda epoka ma swe własne cele/ +I zapomina o wczorajszych snach:/ +Nieście więc wiedzy pochodnię na czele/ +I nowy udział bierzcie w wieków dziele,---/ +Przyszłości podnoście gmach! + + +Ale nie depczcie przeszłości ołtarzy,/ +Choć macie sami doskonalsze wznieść:/ +Na nich się jeszcze święty ogień żarzy,/ +I miłość ludzka stoi tam na straży,/ +I wy winniście im cześć! + + +Ze światem, który w ciemność już zachodzi/ +Wraz z całą tęczą idealnych snów,/ +Prawdziwa mądrość niechaj was pogodzi:/ +I wasze gwiazdy, o zdobywcy młodzi,/ +W ciemnościach pogasną znów! + + \ No newline at end of file diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml index 124940e..a94b8f0 100644 --- a/tests/files/text/miedzy-nami-nic-nie-bylo.xml +++ b/tests/files/text/miedzy-nami-nic-nie-bylo.xml @@ -9,6 +9,8 @@ Sekuła, Aleksandra Sutkowska, Olga +Fikcyjny, Adam +Fikcyjny, Adam Fundacja Nowoczesna Polska Pozytywizm Liryka diff --git a/tests/test_epub.py b/tests/test_epub.py index 9fc5637..faa76e7 100644 --- a/tests/test_epub.py +++ b/tests/test_epub.py @@ -3,14 +3,29 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from zipfile import ZipFile +from lxml import html +from nose.tools import * from librarian import DirDocProvider from librarian.parser import WLDocument -from nose.tools import * -from utils import get_fixture +from tests.utils import get_fixture def test_transform(): - WLDocument.from_file( + epub = WLDocument.from_file( get_fixture('text', 'asnyk_zbior.xml'), provider=DirDocProvider(get_fixture('text', '')) - ).as_epub(flags=['without_fonts']) + ).as_epub(flags=['without_fonts']).get_file() + zipf = ZipFile(epub) + + # Check contributor list. + last = zipf.open('OPS/last.html') + tree = html.parse(last) + editors_attribution = False + for par in tree.findall("//p"): + if par.text.startswith(u'Opracowanie redakcyjne i przypisy:'): + editors_attribution = True + assert_equal(par.text.rstrip(), + u'Opracowanie redakcyjne i przypisy: ' + u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') + assert_true(editors_attribution) diff --git a/tests/test_pdf.py b/tests/test_pdf.py new file mode 100644 index 0000000..75b73bc --- /dev/null +++ b/tests/test_pdf.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import re +from tempfile import NamedTemporaryFile +from nose.tools import * +from librarian import DirDocProvider +from librarian.parser import WLDocument +from utils import get_fixture + + +def test_transform(): + temp = NamedTemporaryFile(delete=False) + temp.close() + WLDocument.from_file( + get_fixture('text', 'asnyk_zbior.xml'), + provider=DirDocProvider(get_fixture('text', '')) + ).as_pdf(save_tex=temp.name) + tex = open(temp.name).read().decode('utf-8') + print tex + + # Check contributor list. + editors = re.search(ur'\\def\\editors\{' + ur'Opracowanie redakcyjne i przypisy: ([^}]*?)\.\s*\}', tex) + assert_equal(editors.group(1), + u"Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska") diff --git a/tests/test_picture.py b/tests/test_picture.py index 71a77dc..f64f624 100644 --- a/tests/test_picture.py +++ b/tests/test_picture.py @@ -31,7 +31,7 @@ def test_wlpicture(): # from nose.tools import set_trace; set_trace() assert pi.type[0] == u"Image" - assert pi.mime_type == u'image/png' == wlp.mime_type + assert pi.mime_type == u'image/jpeg' == wlp.mime_type assert wlp.slug == 'angelus-novus' assert path.exists(wlp.image_path)