From: Radek Czajka Date: Thu, 19 Jan 2012 15:35:59 +0000 (+0100) Subject: Merge branch 'pretty' into commerce X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/65916b0958b55a24073cb592e31b6bb7ac0585b9?hp=04007a695c00e4b9191dcd873b6a6d7b96939279 Merge branch 'pretty' into commerce Conflicts: librarian/cover.py librarian/epub.py librarian/epub/xsltContent.xsl librarian/epub/xsltScheme.xsl scripts/book2epub --- diff --git a/librarian/__init__.py b/librarian/__init__.py index 5b6981d..dd09ce4 100644 --- a/librarian/__init__.py +++ b/librarian/__init__.py @@ -3,21 +3,30 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from __future__ import with_statement + import os +import re +import shutil class ParseError(Exception): def __str__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message.message + return self.message def __unicode__(self): """ Dirty workaround for Python Unicode handling problems. """ - return self.message.message + return self.message class ValidationError(Exception): pass class NoDublinCore(ValidationError): + """There's no DublinCore section, and it's required.""" + pass + +class NoProvider(Exception): + """There's no DocProvider specified, and it's needed.""" pass class XMLNamespace(object): @@ -56,43 +65,88 @@ OPFNS = XMLNamespace("http://www.idpf.org/2007/opf") WLNS = EmptyNamespace() +class WLURI(object): + """Represents a WL URI. Extracts slug from it.""" + slug = None + + example = 'http://wolnelektury.pl/katalog/lektura/template/' + _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/' + '(?P[-a-z0-9]+)/?$') + + def __init__(self, uri): + uri = unicode(uri) + self.uri = uri + self.slug = uri.rstrip('/').rsplit('/', 1)[-1] + + @classmethod + def strict(cls, uri): + match = cls._re_wl_uri.match(uri) + if not match: + raise ValueError('Supplied URI (%s) does not match ' + 'the template: %s.' % (uri, cls._re_wl_uri)) + return cls(uri) + + @classmethod + def from_slug(cls, slug): + """Contructs an URI from slug. + + >>> WLURI.from_slug('a-slug').uri + u'http://wolnelektury.pl/katalog/lektura/a-slug/' + + """ + uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug + return cls(uri) + + def __unicode__(self): + return self.uri + + def __str__(self): + return self.uri + + def __eq__(self, other): + return self.slug == other.slug + + class DocProvider(object): - """ Base class for a repository of XML files. - Used for generating joined files, like EPUBs + """Base class for a repository of XML files. + + Used for generating joined files, like EPUBs. """ def by_slug(self, slug): - raise NotImplemented - - def __getitem__(self, slug): - return self.by_slug(slug) + """Should return a file-like object with a WL document XML.""" + raise NotImplementedError - def by_uri(self, uri): - return self.by_slug(uri.rsplit('/', 1)[1]) + def by_uri(self, uri, wluri=WLURI): + """Should return a file-like object with a WL document XML.""" + wluri = wluri(uri) + return self.by_slug(wluri.slug) class DirDocProvider(DocProvider): """ Serve docs from a directory of files in form .xml """ - def __init__(self, dir): - self.dir = dir + def __init__(self, dir_): + self.dir = dir_ self.files = {} def by_slug(self, slug): - return open(os.path.join(self.dir, '%s.xml' % slug)) + fname = slug + '.xml' + return open(os.path.join(self.dir, fname)) import lxml.etree as etree import dcparser DEFAULT_BOOKINFO = dcparser.BookInfo( - { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'}, \ + { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'}, { DCNS('creator'): [u'Some, Author'], DCNS('title'): [u'Some Title'], DCNS('subject.period'): [u'Unknown'], DCNS('subject.type'): [u'Unknown'], DCNS('subject.genre'): [u'Unknown'], DCNS('date'): ['1970-01-01'], + DCNS('language'): [u'pol'], # DCNS('date'): [creation_date], DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], DCNS('description'): @@ -100,8 +154,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo( Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN."""], - DCNS('identifier.url'): - [u"http://wolnelektury.pl/katalog/lektura/template"], + DCNS('identifier.url'): [WLURI.example], DCNS('rights'): [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] }) @@ -118,14 +171,15 @@ def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO): method='xml', encoding=unicode, pretty_print=True) return u'\n' + dcstring + u'\n\n' + ocrtext + \ - u'\n\n'; + u'\n\n' def serialize_raw(element): b = u'' + (element.text or '') for child in element.iterchildren(): - e = etree.tostring(child, method='xml', encoding=unicode, pretty_print=True) + e = etree.tostring(child, method='xml', encoding=unicode, + pretty_print=True) b += e return b @@ -140,3 +194,73 @@ def serialize_children(element, format='raw'): def get_resource(path): return os.path.join(os.path.dirname(__file__), path) + +class OutputFile(object): + """Represents a file returned by one of the converters.""" + + _string = None + _filename = None + + def __del__(self): + if self._filename: + os.unlink(self._filename) + + def __nonzero__(self): + return self._string is not None or self._filename is not None + + @classmethod + def from_string(cls, string): + """Converter returns contents of a file as a string.""" + + instance = cls() + instance._string = string + return instance + + @classmethod + def from_filename(cls, filename): + """Converter returns contents of a file as a named file.""" + + instance = cls() + instance._filename = filename + return instance + + def get_string(self): + """Get file's contents as a string.""" + + if self._filename is not None: + with open(self._filename) as f: + return f.read() + else: + return self._string + + def get_file(self): + """Get file as a file-like object.""" + + if self._string is not None: + from StringIO import StringIO + return StringIO(self._string) + elif self._filename is not None: + return open(self._filename) + + def get_filename(self): + """Get file as a fs path.""" + + if self._filename is not None: + return self._filename + elif self._string is not None: + from tempfile import NamedTemporaryFile + temp = NamedTemporaryFile(prefix='librarian-', delete=False) + temp.write(self._string) + temp.close() + self._filename = temp.name + return self._filename + else: + return None + + def save_as(self, path): + """Save file to a path. Create directories, if necessary.""" + + dirname = os.path.dirname(os.path.abspath(path)) + if not os.path.isdir(dirname): + os.makedirs(dirname) + shutil.copy(self.get_filename(), path) diff --git a/librarian/cover.py b/librarian/cover.py index 8e61fe3..e7a8e1b 100644 --- a/librarian/cover.py +++ b/librarian/cover.py @@ -7,7 +7,110 @@ import Image, ImageFont, ImageDraw, ImageFilter from librarian import get_resource +class TextBox(object): + """Creates an Image with a series of centered strings.""" + + SHADOW_X = 3 + SHADOW_Y = 3 + SHADOW_BLUR = 3 + + def __init__(self, max_width, max_height, padding_x=None, padding_y=None): + if padding_x is None: + padding_x = self.SHADOW_X + self.SHADOW_BLUR + if padding_y is None: + padding_y = self.SHADOW_Y + self.SHADOW_BLUR + + self.max_width = max_width + self.max_text_width = max_width - 2 * padding_x + self.padding_y = padding_y + self.height = padding_y + self.img = Image.new('RGBA', (max_width, max_height)) + self.draw = ImageDraw.Draw(self.img) + self.shadow_img = None + self.shadow_draw = None + + def skip(self, height): + """Skips some vertical space.""" + self.height += height + + def text(self, text, color='#000', font=None, line_height=20, + shadow_color=None, shortener=None): + """Writes some centered text.""" + if shadow_color: + if not self.shadow_img: + self.shadow_img = Image.new('RGBA', self.img.size) + self.shadow_draw = ImageDraw.Draw(self.shadow_img) + while text: + if shortener: + for line in shortener(text): + if text_draw.textsize(line, font=font)[0] <= self.max_text_width: + break + text = '' + else: + line = text + line_width = self.draw.textsize(line, font=font)[0] + while line_width > self.max_text_width: + parts = line.rsplit(' ', 1) + if len(parts) == 1: + line_width = self.max_text_width + break + line = parts[0] + line_width = self.draw.textsize(line, font=font)[0] + + line = line.strip() + ' ' + + pos_x = (self.max_width - line_width) / 2 + + if shadow_color: + self.shadow_draw.text( + (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y), + line, font=font, fill=shadow_color + ) + + self.draw.text((pos_x, self.height), line, font=font, fill=color) + self.height += line_height + # go to next line + text = text[len(line):] + + @staticmethod + def person_shortener(text): + yield text + chunks = text.split() + n_chunks = len(chunks) + # make initials from given names, starting from last + for i in range(n_chunks - 2, -1, -1): + chunks[i] = chunks[i][0] + '.' + yield " ".join(chunks) + # remove given names initials, starting from last + while len(chunks) > 2: + del chunks[1] + yield " ".join(chunks) + + @staticmethod + def title_shortener(text): + yield text + chunks = text.split() + n_chunks = len(chunks) + # remove words, starting from last one + while len(chunks) > 1: + del chunks[-1] + yield " ".join(chunks) + u'…' + + def image(self): + """Creates the actual Image object.""" + image = Image.new('RGBA', (self.max_width, + self.height + self.padding_y)) + if self.shadow_img: + shadow = self.shadow_img.filter(ImageFilter.BLUR) + image.paste(shadow, (0, 0), shadow) + image.paste(self.img, (0, 0), self.img) + else: + image.paste(self.img, (0, 0)) + return image + + class Cover(object): + """Abstract base class for cover images generator.""" width = 600 height = 800 background_color = '#fff' @@ -35,10 +138,10 @@ class Cover(object): logo_bottom = None logo_width = None + uses_dc_cover = False format = 'JPEG' - exts = { 'JPEG': 'jpg', 'PNG': 'png', @@ -49,75 +152,16 @@ class Cover(object): 'PNG': 'image/png', } - @staticmethod - def person_shortener(text): - yield text - chunks = text.split() - n_chunks = len(chunks) - # make initials from given names, starting from last - for i in range(n_chunks - 2, -1, -1): - chunks[i] = chunks[i][0] + '.' - yield " ".join(chunks) - # remove given names initials, starting from last - while len(chunks) > 2: - del chunks[1] - yield " ".join(chunks) - - @staticmethod - def title_shortener(text): - yield text - chunks = text.split() - n_chunks = len(chunks) - # remove words, starting from last one - while len(chunks) > 1: - del chunks[-1] - yield " ".join(chunks) + u'…' - - @staticmethod - def draw_text(text, img, font, align, shortener, margin_left, width, pos_y, lineskip, color, shadow_color): - if shadow_color: - shadow_img = Image.new('RGBA', img.size) - shadow_draw = ImageDraw.Draw(shadow_img) - text_img = Image.new('RGBA', img.size) - text_draw = ImageDraw.Draw(text_img) - while text: - if shortener: - for line in shortener(text): - if text_draw.textsize(line, font=font)[0] <= width: - break - text = '' - else: - line = text - while text_draw.textsize(line, font=font)[0] > width: - try: - line, ext = line.rsplit(' ', 1) - except: - break - text = text[len(line)+1:] - pos_x = margin_left - if align == 'c': - pos_x += (width - text_draw.textsize(line, font=font)[0]) / 2 - elif align == 'r': - pos_x += (width - text_draw.textsize(line, font=font)[0]) - if shadow_color: - shadow_draw.text((pos_x + 3, pos_y + 3), line, font=font, fill=shadow_color) - text_draw.text((pos_x, pos_y), line, font=font, fill=color) - pos_y += lineskip - if shadow_color: - shadow_img = shadow_img.filter(ImageFilter.BLUR) - img.paste(shadow_img, None, shadow_img) - img.paste(text_img, None, text_img) - return pos_y - - - def __init__(self, author='', title=''): - self.author = author - self.title = title + def __init__(self, book_info): + self.author = ", ".join(auth.readable() for auth in book_info.authors) + self.title = book_info.title def pretty_author(self): + """Allows for decorating author's name.""" return self.author def pretty_title(self): + """Allows for decorating title.""" return self.title def image(self): @@ -137,16 +181,31 @@ class Cover(object): logo = logo.resize((self.logo_width, logo.size[1] * self.logo_width / logo.size[0])) img.paste(logo, ((self.width - self.logo_width) / 2, img.size[1] - logo.size[1] - self.logo_bottom)) - author_font = self.author_font or ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), 30) - author_shortener = None if self.author_wrap else self.person_shortener - title_y = self.draw_text(self.pretty_author(), img, author_font, self.author_align, author_shortener, - self.author_margin_left, self.width - self.author_margin_left - self.author_margin_right, self.author_top, - self.author_lineskip, self.author_color, self.author_shadow) + self.title_top - title_shortener = None if self.title_wrap else self.title_shortener - title_font = self.title_font or ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), 40) - self.draw_text(self.pretty_title(), img, title_font, self.title_align, title_shortener, - self.title_margin_left, self.width - self.title_margin_left - self.title_margin_right, title_y, - self.title_lineskip, self.title_color, self.title_shadow) + top = self.author_top + tbox = TextBox( + self.width - self.author_margin_left - self.author_margin_right, + self.height - top, + ) + author_font = self.author_font or ImageFont.truetype( + get_resource('fonts/DejaVuSerif.ttf'), 30) + author_shortener = None if self.author_wrap else TextBox.person_shortener + tbox.text(self.pretty_author(), self.author_color, author_font, + self.author_lineskip, self.author_shadow, author_shortener) + text_img = tbox.image() + img.paste(text_img, (self.author_margin_left, top), text_img) + + top += text_img.size[1] + self.title_top + tbox = TextBox( + self.width - self.title_margin_left - self.title_margin_right, + self.height - top, + ) + title_font = self.author_font or ImageFont.truetype( + get_resource('fonts/DejaVuSerif.ttf'), 40) + title_shortener = None if self.title_wrap else TextBox.title_shortener + tbox.text(self.pretty_title(), self.title_color, title_font, + self.title_lineskip, self.title_shadow, title_shortener) + text_img = tbox.image() + img.paste(text_img, (self.title_margin_left, top), text_img) return img @@ -160,6 +219,125 @@ class Cover(object): return self.image().save(format=self.format, *args, **kwargs) +class WLCover(Cover): + """Default Wolne Lektury cover generator.""" + uses_dc_cover = True + author_font = ImageFont.truetype( + get_resource('fonts/JunicodeWL-Regular.ttf'), 20) + author_lineskip = 30 + title_font = ImageFont.truetype( + get_resource('fonts/DejaVuSerif-Bold.ttf'), 30) + title_lineskip = 40 + title_box_width = 350 + bar_width = 35 + background_color = '#444' + author_color = '#444' + + epochs = { + u'Starożytność': 0, + u'Średniowiecze': 30, + u'Renesans': 60, + u'Barok': 90, + u'Oświecenie': 120, + u'Romantyzm': 150, + u'Pozytywizm': 180, + u'Modernizm': 210, + u'Dwudziestolecie międzywojenne': 240, + u'Współczesność': 270, + } + + def __init__(self, book_info): + super(WLCover, self).__init__(book_info) + self.kind = book_info.kind + self.epoch = book_info.epoch + if book_info.cover_url: + from urllib2 import urlopen + from StringIO import StringIO + + bg_src = urlopen(book_info.cover_url) + self.background_img = StringIO(bg_src.read()) + bg_src.close() + + def pretty_author(self): + return self.author.upper() + + def image(self): + from colorsys import hsv_to_rgb + + img = Image.new('RGB', (self.width, self.height), self.background_color) + draw = ImageDraw.Draw(img) + + if self.epoch in self.epochs: + epoch_color = tuple(int(255 * c) for c in hsv_to_rgb( + float(self.epochs[self.epoch]) / 360, .7, .7)) + else: + epoch_color = '#000' + draw.rectangle((0, 0, self.bar_width, self.height), fill=epoch_color) + + if self.background_img: + src = Image.open(self.background_img) + trg_size = (self.width - self.bar_width, self.height) + if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]: + resized = ( + trg_size[0], + src.size[1] * trg_size[0] / src.size[0] + ) + cut = (resized[1] - trg_size[1]) / 2 + src = src.resize(resized) + src = src.crop((0, cut, src.size[0], src.size[1] - cut)) + else: + resized = ( + src.size[0] * trg_size[1] / src.size[1], + trg_size[1], + ) + cut = (resized[0] - trg_size[0]) / 2 + src = src.resize(resized) + src = src.crop((cut, 0, src.size[0] - cut, src.size[1])) + + img.paste(src, (self.bar_width, 0)) + del src + + box = TextBox(self.title_box_width, self.height, padding_y=20) + box.text(self.pretty_author(), + font=self.author_font, + line_height=self.author_lineskip, + color=self.author_color, + shadow_color=self.author_shadow, + ) + + box.skip(10) + box.draw.line((75, box.height, 275, box.height), + fill=self.author_color, width=2) + box.skip(15) + + box.text(self.pretty_title(), + line_height=self.title_lineskip, + font=self.title_font, + color=epoch_color, + shadow_color=self.title_shadow, + ) + box_img = box.image() + + if self.kind == 'Liryka': + # top + box_top = 100 + elif self.kind == 'Epika': + # bottom + box_top = self.height - 100 - box_img.size[1] + else: + # center + box_top = (self.height - box_img.size[1]) / 2 + + box_left = self.bar_width + (self.width - self.bar_width - + box_img.size[0]) / 2 + draw.rectangle((box_left, box_top, + box_left + box_img.size[0], box_top + box_img.size[1]), + fill='#fff') + img.paste(box_img, (box_left, box_top), box_img) + + return img + + class VirtualoCover(Cover): width = 600 diff --git a/librarian/dcparser.py b/librarian/dcparser.py index 9faffe8..f64317a 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -7,7 +7,8 @@ from xml.parsers.expat import ExpatError from datetime import date import time -from librarian import ValidationError, NoDublinCore, ParseError, DCNS, RDFNS +from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, + WLURI) import lxml.etree as etree # ElementTree API using libxml2 from lxml.etree import XMLSyntaxError @@ -78,35 +79,43 @@ def as_unicode(text): else: return text.decode('utf-8') +def as_wluri_strict(text): + return WLURI.strict(text) + class Field(object): - def __init__(self, uri, attr_name, type=as_unicode, multiple=False, salias=None, **kwargs): + def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs): self.uri = uri self.name = attr_name - self.validator = type + self.validator = validator + self.strict = strict self.multiple = multiple self.salias = salias self.required = kwargs.get('required', True) and not kwargs.has_key('default') self.default = kwargs.get('default', [] if multiple else [None]) - def validate_value(self, val): + def validate_value(self, val, strict=False): + if strict and self.strict is not None: + validator = self.strict + else: + validator = self.validator try: if self.multiple: - if self.validator is None: + if validator is None: return val - return [ self.validator(v) if v is not None else v for v in val ] + return [ validator(v) if v is not None else v for v in val ] elif len(val) > 1: raise ValidationError("Multiple values not allowed for field '%s'" % self.uri) elif len(val) == 0: raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri) else: - if self.validator is None or val[0] is None: + if validator is None or val[0] is None: return val[0] - return self.validator(val[0]) + return validator(val[0]) except ValueError, e: raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message)) - def validate(self, fdict): + def validate(self, fdict, strict=False): if not fdict.has_key(self.uri): if not self.required: f = self.default @@ -115,47 +124,64 @@ class Field(object): else: f = fdict[self.uri] - return self.validate_value(f) + return self.validate_value(f, strict=strict) + + def __eq__(self, other): + if isinstance(other, Field) and other.name == self.name: + return True + return False + +class DCInfo(type): + def __new__(meta, classname, bases, class_dict): + fields = list(class_dict['FIELDS']) + for base in bases[::-1]: + if hasattr(base, 'FIELDS'): + for field in base.FIELDS[::-1]: + try: + fields.index(field) + except ValueError: + fields.insert(0, field) + class_dict['FIELDS'] = tuple(fields) + return super(DCInfo, meta).__new__(meta, classname, bases, class_dict) + + +class WorkInfo(object): + __metaclass__ = DCInfo -class BookInfo(object): FIELDS = ( - Field( DCNS('creator'), 'author', as_person), + Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True), Field( DCNS('title'), 'title'), - Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True), - Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True), - Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True), - Field( DCNS('date'), 'created_at', as_date), - Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), + Field( DCNS('type'), 'type', required=False, multiple=True), + Field( DCNS('contributor.editor'), 'editors', \ as_person, salias='editor', multiple=True, default=[]), - Field( DCNS('contributor.translator'), 'translators', \ - as_person, salias='translator', multiple=True, default=[]), Field( DCNS('contributor.technical_editor'), 'technical_editors', as_person, salias='technical_editor', multiple=True, default=[]), + + Field( DCNS('date'), 'created_at', as_date), + Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False), Field( DCNS('publisher'), 'publisher'), + + Field( DCNS('language'), 'language'), + Field( DCNS('description'), 'description', required=False), + Field( DCNS('source'), 'source_name', required=False), Field( DCNS('source.URL'), 'source_url', required=False), - Field( DCNS('identifier.url'), 'url'), - Field( DCNS('relation.hasPart'), 'parts', multiple=True, required=False), + Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict), Field( DCNS('rights.license'), 'license', required=False), Field( DCNS('rights'), 'license_description'), - Field( DCNS('description'), 'description', required=False), ) - @property - def slug(self): - return self.url.rsplit('/', 1)[1] - @classmethod - def from_string(cls, xml): + def from_string(cls, xml, *args, **kwargs): from StringIO import StringIO - return cls.from_file(StringIO(xml)) + return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile): + def from_file(cls, xmlfile, *args, **kwargs): desc_tag = None try: iter = etree.iterparse(xmlfile, ['start', 'end']) @@ -176,17 +202,17 @@ class BookInfo(object): # if there is no end, Expat should yell at us with an ExpatError # extract data from the element and make the info - return cls.from_element(desc_tag) + return cls.from_element(desc_tag, *args, **kwargs) except XMLSyntaxError, e: raise ParseError(e) except ExpatError, e: raise ParseError(e) @classmethod - def from_element(cls, rdf_tag): + def from_element(cls, rdf_tag, *args, **kwargs): # the tree is already parsed, so we don't need to worry about Expat errors field_dict = {} - desc = rdf_tag.find(".//" + RDFNS('Description') ) + desc = rdf_tag.find(".//" + RDFNS('Description')) if desc is None: raise NoDublinCore("No DublinCore section found.") @@ -196,9 +222,9 @@ class BookInfo(object): fv.append(e.text) field_dict[e.tag] = fv - return cls( desc.attrib, field_dict ) + return cls(desc.attrib, field_dict, *args, **kwargs) - def __init__(self, rdf_attrs, dc_fields): + def __init__(self, rdf_attrs, dc_fields, strict=False): """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description. dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the given field. """ @@ -207,7 +233,7 @@ class BookInfo(object): self.fmap = {} for field in self.FIELDS: - value = field.validate( dc_fields ) + value = field.validate(dc_fields, strict=strict) setattr(self, 'prop_' + field.name, value) self.fmap[field.name] = field if field.salias: self.fmap[field.salias] = field @@ -278,7 +304,6 @@ class BookInfo(object): return root - def serialize(self): rdf = {} rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about } @@ -316,5 +341,31 @@ class BookInfo(object): return result -def parse(file_name): - return BookInfo.from_file(file_name) + +class BookInfo(WorkInfo): + FIELDS = ( + Field( DCNS('audience'), 'audiences', salias='audience', multiple=True, + required=False), + + Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True, + required=False), + Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True, + required=False), + Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True, + required=False), + + Field( DCNS('contributor.translator'), 'translators', \ + as_person, salias='translator', multiple=True, default=[]), + Field( DCNS('relation.hasPart'), 'parts', + WLURI, strict=as_wluri_strict, multiple=True, required=False), + Field( DCNS('relation.isVariantOf'), 'variant_of', + WLURI, strict=as_wluri_strict, required=False), + + Field( DCNS('relation.coverImage.url'), 'cover_url', required=False), + Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False), + Field( DCNS('relation.coverImage.source'), 'cover_source', required=False), + ) + + +def parse(file_name, cls=BookInfo): + return cls.from_file(file_name) diff --git a/librarian/epub.py b/librarian/epub.py index 348df0c..48bb2f2 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -13,14 +13,11 @@ from StringIO import StringIO from copy import deepcopy from lxml import etree import zipfile -from tempfile import mkdtemp +from tempfile import mkdtemp, NamedTemporaryFile from shutil import rmtree -import sys - -from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, XHTMLNS, NoDublinCore -from librarian.dcparser import BookInfo -from librarian.cover import ImageCover +from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile +from librarian.cover import WLCover from librarian import functions, get_resource @@ -160,19 +157,23 @@ def add_to_spine(spine, partno): class TOC(object): - def __init__(self, name=None, part_number=None): + def __init__(self, name=None, part_href=None): self.children = [] self.name = name - self.part_number = part_number + self.part_href = part_href self.sub_number = None - def add(self, name, part_number, level=0, is_part=True): + def add(self, name, part_href, level=0, is_part=True, index=None): + assert level == 0 or index is None if level > 0 and self.children: - return self.children[-1].add(name, part_number, level-1, is_part) + return self.children[-1].add(name, part_href, level-1, is_part) else: t = TOC(name) - t.part_number = part_number - self.children.append(t) + t.part_href = part_href + if index is not None: + self.children.insert(index, t) + else: + self.children.append(t) if not is_part: t.sub_number = len(self.children) + 1 return t.sub_number @@ -189,7 +190,13 @@ class TOC(object): else: return 0 - def write_to_xml(self, nav_map, counter): + def href(self): + src = self.part_href + if self.sub_number is not None: + src += '#sub%d' % self.sub_number + return src + + def write_to_xml(self, nav_map, counter=1): for child in self.children: nav_point = nav_map.makeelement(NCXNS('navPoint')) nav_point.set('id', 'NavPoint-%d' % counter) @@ -202,15 +209,26 @@ class TOC(object): nav_point.append(nav_label) content = nav_map.makeelement(NCXNS('content')) - src = 'part%d.html' % child.part_number - if child.sub_number is not None: - src += '#sub%d' % child.sub_number - content.set('src', src) + content.set('src', child.href()) nav_point.append(content) nav_map.append(nav_point) counter = child.write_to_xml(nav_point, counter + 1) return counter + def html_part(self, depth=0): + texts = [] + for child in self.children: + texts.append( + "
%s
" % + (depth, child.href(), child.name)) + texts.append(child.html_part(depth+1)) + return "\n".join(texts) + + def html(self): + with open(get_resource('epub/toc.html')) as f: + t = unicode(f.read(), 'utf-8') + return t % self.html_part() + def used_chars(element): """ Lists characters used in an ETree Element """ @@ -250,9 +268,9 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s toc = TOC() for element in chunk_xml[0]: if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): - toc.add(node_name(element), chunk_no) + toc.add(node_name(element), "part%d.html" % chunk_no) elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - subnumber = toc.add(node_name(element), chunk_no, level=1, is_part=False) + subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) element.set('sub', str(subnumber)) if empty: if not _empty_html_static: @@ -268,44 +286,40 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s return output_html, toc, chars -def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, +def transform(wldoc, verbose=False, + style=None, html_toc=False, sample=None, cover=None, flags=None): """ produces a EPUB file - provider: a DocProvider - slug: slug of file to process, available by provider - output_file: file-like object or path to output file - output_dir: path to directory to save output file to; either this or output_file must be present - make_dir: writes output to //.epub instead of /.epub sample=n: generate sample e-book (with at least n paragraphs) - cover: a cover.Cover object - flags: less-advertising, images, not-wl + cover: a cover.Cover object or True for default + flags: less-advertising, without-fonts, images, not-wl """ - def transform_file(input_xml, chunk_counter=1, first=True, sample=None): + def transform_file(wldoc, chunk_counter=1, first=True, sample=None): """ processes one input file and proceeds to its children """ - replace_characters(input_xml.getroot()) - - children = [child.text for child in input_xml.findall('.//'+DCNS('relation.hasPart'))] + replace_characters(wldoc.edoc.getroot()) # every input file will have a TOC entry, # pointing to starting chunk - toc = TOC(node_name(input_xml.find('.//'+DCNS('title'))), chunk_counter) + toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) chars = set() if first: # write book title page - html_tree = xslt(input_xml, get_resource('epub/xsltTitle.xsl')) + html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl')) chars = used_chars(html_tree.getroot()) zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True)) - elif children: + # add a title page TOC entry + toc.add(u"Strona tytułowa", "title.html") + elif wldoc.book_info.parts: # write title page for every parent if sample is not None and sample <= 0: chars = set() html_string = open(get_resource('epub/emptyChunk.html')).read() else: - html_tree = xslt(input_xml, get_resource('epub/xsltChunkTitle.xsl')) + html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) chars = used_chars(html_tree.getroot()) html_string = etree.tostring(html_tree, method="html", pretty_print=True) zip.writestr('OPS/part%d.html' % chunk_counter, html_string) @@ -313,12 +327,12 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= add_to_spine(spine, chunk_counter) chunk_counter += 1 - if len(input_xml.getroot()) > 1: + if len(wldoc.edoc.getroot()) > 1: # rdf before style master - main_text = input_xml.getroot()[1] + main_text = wldoc.edoc.getroot()[1] else: # rdf in style master - main_text = input_xml.getroot()[0] + main_text = wldoc.edoc.getroot()[0] if main_text.tag == RDFNS('RDF'): main_text = None @@ -339,55 +353,29 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= add_to_spine(spine, chunk_counter) chunk_counter += 1 - if children: - for child in children: - child_xml = etree.parse(provider.by_uri(child)) - child_toc, chunk_counter, chunk_chars, sample = transform_file(child_xml, chunk_counter, first=False, sample=sample) - toc.append(child_toc) - chars = chars.union(chunk_chars) + for child in wldoc.parts(): + child_toc, chunk_counter, chunk_chars, sample = transform_file( + child, chunk_counter, first=False, sample=sample) + toc.append(child_toc) + chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample - # read metadata from the first file - if file_path: - if slug: - raise ValueError('slug or file_path should be specified, not both') - f = open(file_path, 'r') - input_xml = etree.parse(f) - f.close() - else: - if not slug: - raise ValueError('either slug or file_path should be specified') - input_xml = etree.parse(provider[slug]) + + document = deepcopy(wldoc) + del wldoc if flags: for flag in flags: - input_xml.getroot().set(flag, 'yes') - - metadata = input_xml.find('.//'+RDFNS('Description')) - if metadata is None: - raise NoDublinCore('Document has no DublinCore - which is required.') - book_info = BookInfo.from_element(input_xml) - metadata = etree.ElementTree(metadata) - - # if output to dir, create the file - if output_dir is not None: - if make_dir: - author = unicode(book_info.author) - output_dir = os.path.join(output_dir, author) - try: - os.makedirs(output_dir) - except OSError: - pass - if slug: - output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w') - else: - output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w') + document.edoc.getroot().set(flag, 'yes') - opf = xslt(metadata, get_resource('epub/xsltContent.xsl')) + opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) + guide = opf.find('.//' + OPFNS('guide')) spine = opf.find('.//' + OPFNS('spine')) + output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) + zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) # write static elements @@ -401,15 +389,29 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '' \ '') - zip.write(get_resource('epub/style.css'), os.path.join('OPS', 'style.css')) if not flags or 'not-wl' not in flags: manifest.append(etree.fromstring( '')) + manifest.append(etree.fromstring( + '')) zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) + zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) + + if not style: + style = get_resource('epub/style.css') + zip.write(style, os.path.join('OPS', 'style.css')) if cover: + if cover is True: + cover = WLCover + if cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + cover_file = StringIO() - c = cover(book_info.author.readable(), book_info.title) + c = cover(document.book_info) c.save(cover_file) c_name = 'cover.%s' % c.ext() zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue()) @@ -424,12 +426,12 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '')) manifest.append(etree.fromstring( '' % (c_name, c.mime_type()))) - spine.insert(0, etree.fromstring('')) + spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) - opf.getroot().append(etree.fromstring('')) + guide.append(etree.fromstring('')) if flags and 'images' in flags: - for ilustr in input_xml.findall('//ilustr'): + for ilustr in document.edoc.findall('//ilustr'): src = ilustr.get('src') mime = ImageCover(src)().mime_type() zip.write(src, os.path.join('OPS', src)) @@ -446,7 +448,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= moved.tail = None after.addnext(moved) else: - for ilustr in input_xml.findall('//ilustr'): + for ilustr in document.edoc.findall('//ilustr'): ilustr.tag = 'extra' annotations = etree.Element('annotations') @@ -455,23 +457,24 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \ '' \ - '' \ - 'Strona tytułowa' \ - '') + '') nav_map = toc_file[-1] - toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample) + if html_toc: + manifest.append(etree.fromstring( + '')) + spine.append(etree.fromstring( + '')) + guide.append(etree.fromstring('')) + + toc, chunk_counter, chars, sample = transform_file(document, sample=sample) - if not toc.children: - toc.add(u"Początek utworu", 1) - toc_counter = toc.write_to_xml(nav_map, 2) + if len(toc.children) < 2: + toc.add(u"Początek utworu", "part1.html") # Last modifications in container files and EPUB creation if len(annotations) > 0: - nav_map.append(etree.fromstring( - 'Przypisy'\ - '' % {'i': toc_counter})) - toc_counter += 1 + toc.add("Przypisy", "annotations.html") manifest.append(etree.fromstring( '')) spine.append(etree.fromstring( @@ -482,44 +485,44 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= zip.writestr('OPS/annotations.html', etree.tostring( html_tree, method="html", pretty_print=True)) - nav_map.append(etree.fromstring( - 'Strona redakcyjna'\ - '' % {'i': toc_counter})) + toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '')) spine.append(etree.fromstring( '')) - stopka = input_xml.find('//stopka') + stopka = document.edoc.find('//stopka') if stopka is not None: stopka.tag = 'stopka_' replace_by_verse(stopka) html_tree = xslt(stopka, get_resource('epub/xsltScheme.xsl')) else: - html_tree = xslt(input_xml, get_resource('epub/xsltLast.xsl')) + html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl')) chars.update(used_chars(html_tree.getroot())) zip.writestr('OPS/last.html', etree.tostring( html_tree, method="html", pretty_print=True)) - # strip fonts - tmpdir = mkdtemp('-librarian-epub') - cwd = os.getcwd() - - os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) - for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': - optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), - get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] - if verbose: - print "Running font-optimizer" - subprocess.check_call(optimizer_call) - else: - subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) - rmtree(tmpdir) - os.chdir(cwd) + if not flags or not 'without-fonts' in flags: + # strip fonts + tmpdir = mkdtemp('-librarian-epub') + cwd = os.getcwd() + + os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) + for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': + optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), + get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] + if verbose: + print "Running font-optimizer" + subprocess.check_call(optimizer_call) + else: + subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) + manifest.append(etree.fromstring( + '' % (fname, fname))) + rmtree(tmpdir) + os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) - contents = [] - title = node_name(etree.ETXPath('.//'+DCNS('title'))(input_xml)[0]) + title = document.book_info.title attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" for st in attributes: meta = toc_file.makeelement(NCXNS('meta')) @@ -529,5 +532,13 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl'))) toc_file[0][1].set('content', str(toc.depth())) set_inner_xml(toc_file[1], ''.join(('', title, ''))) + + # write TOC + if html_toc: + toc.add(u"Spis treści", "toc.html", index=1) + zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) + toc.write_to_xml(nav_map) zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) zip.close() + + return OutputFile.from_filename(output_file.name) diff --git a/librarian/epub/style.css b/librarian/epub/style.css index 7fb53a3..a4c61c8 100644 --- a/librarian/epub/style.css +++ b/librarian/epub/style.css @@ -108,28 +108,24 @@ p text-align: left; } -.annotation +.annotation-anchor { font-style: normal; font-weight: normal; font-size: 0.875em; -} - -#footnotes .annotation -{ display: block; float: left; width: 2.5em; clear: both; } -#footnotes div +.annotation { margin: 0; margin-top: 1.5em; } -#footnotes p +.annotation-body { margin-left: 2.5em; font-size: 0.875em; @@ -352,13 +348,17 @@ em.author-emphasis text-transform: uppercase; } -p.info +.info { text-align: center; margin-bottom: 1em; } +.info div +{ + text-align: center; +} -p.info img +.info img { margin: 0; margin-left: 2em; diff --git a/librarian/epub/toc.html b/librarian/epub/toc.html new file mode 100755 index 0000000..69d8724 --- /dev/null +++ b/librarian/epub/toc.html @@ -0,0 +1,11 @@ + + + + + WolneLektury.pl + + +

Spis treści

+ %s + + diff --git a/librarian/epub/xsltAnnotations.xsl b/librarian/epub/xsltAnnotations.xsl index c66730a..f3e6443 100644 --- a/librarian/epub/xsltAnnotations.xsl +++ b/librarian/epub/xsltAnnotations.xsl @@ -31,12 +31,12 @@ -
+

- + [] -

+

[przypis autorski]

diff --git a/librarian/epub/xsltContent.xsl b/librarian/epub/xsltContent.xsl index 65bf808..ef7ae74 100644 --- a/librarian/epub/xsltContent.xsl +++ b/librarian/epub/xsltContent.xsl @@ -29,14 +29,13 @@ - - - - + + + diff --git a/librarian/epub/xsltLast.xsl b/librarian/epub/xsltLast.xsl index dd44a30..751f97a 100644 --- a/librarian/epub/xsltLast.xsl +++ b/librarian/epub/xsltLast.xsl @@ -9,7 +9,7 @@ - + @@ -63,6 +63,31 @@ + +

Okładka na podstawie: + + + + + + + + + + + + + +

+
+ +
+ Logo 1% +
Przekaż 1% podatku na rozwój Wolnych Lektur.
+
Nazwa organizacji: Fundacja Nowoczesna Polska
+
KRS 0000070056
+
+

 

Plik wygenerowany dnia . diff --git a/librarian/epub/xsltScheme.xsl b/librarian/epub/xsltScheme.xsl index 3065cac..395e950 100644 --- a/librarian/epub/xsltScheme.xsl +++ b/librarian/epub/xsltScheme.xsl @@ -168,7 +168,7 @@

-
 
+
 
@@ -319,11 +319,11 @@ - + - + diff --git a/librarian/html.py b/librarian/html.py index 5f832e3..39e5a01 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -5,12 +5,10 @@ # import os import cStringIO -import re import copy from lxml import etree -from librarian.parser import WLDocument -from librarian import XHTMLNS, ParseError +from librarian import XHTMLNS, ParseError, OutputFile from librarian import functions from lxml.etree import XMLSyntaxError, XSLTApplyError @@ -30,9 +28,8 @@ def get_stylesheet(name): def html_has_content(text): return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text) -def transform(input, output_filename=None, is_file=True, \ - parse_dublincore=True, stylesheet='legacy', options={}, flags=None): - """Transforms file input_filename in XML to output_filename in XHTML. +def transform(wldoc, stylesheet='legacy', options=None, flags=None): + """Transforms the WL document to XHTML. If output_filename is None, returns an XML, otherwise returns True if file has been written,False if it hasn't. @@ -43,12 +40,9 @@ def transform(input, output_filename=None, is_file=True, \ style_filename = get_stylesheet(stylesheet) style = etree.parse(style_filename) - if is_file: - document = WLDocument.from_file(input, True, \ - parse_dublincore=parse_dublincore) - else: - document = WLDocument.from_string(input, True, \ - parse_dublincore=parse_dublincore) + document = copy.deepcopy(wldoc) + del wldoc + document.swap_endlines() if flags: for flag in flags: @@ -56,6 +50,8 @@ def transform(input, output_filename=None, is_file=True, \ document.clean_ed_note() + if not options: + options = {} result = document.transform(style, **options) del document # no longer needed large object :) @@ -63,16 +59,10 @@ def transform(input, output_filename=None, is_file=True, \ add_anchors(result.getroot()) add_table_of_contents(result.getroot()) - if output_filename is not None: - result.write(output_filename, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8') - else: - return result - return True + return OutputFile.from_string(etree.tostring(result, method='html', + xml_declaration=False, pretty_print=True, encoding='utf-8')) else: - if output_filename is not None: - return False - else: - return "" + return None except KeyError: raise ValueError("'%s' is not a valid stylesheet.") except (XMLSyntaxError, XSLTApplyError), e: @@ -238,10 +228,12 @@ def add_table_of_contents(root): if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)): continue + element_text = etree.tostring(element, method='text', + encoding=unicode).strip() if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': - sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), [])) + sections[-1][3].append((counter, element.tag, element_text, [])) else: - sections.append((counter, element.tag, ''.join(element.xpath('text()')), [])) + sections.append((counter, element.tag, element_text, [])) add_anchor(element, "s%d" % counter, with_link=False) counter += 1 @@ -263,3 +255,17 @@ def add_table_of_contents(root): root.insert(0, toc) + +def extract_annotations(html_path): + """For each annotation, yields a tuple: anchor, text, html.""" + parser = etree.HTMLParser(encoding='utf-8') + tree = etree.parse(html_path, parser) + footnotes = tree.find('//*[@id="footnotes"]') + if footnotes is not None: + for footnote in footnotes.findall('div'): + anchor = footnote.find('a[@name]').get('name') + del footnote[:2] + text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip() + html_str = etree.tostring(footnote, method='html', encoding='utf-8') + yield anchor, text_str, html_str + diff --git a/librarian/mobi.py b/librarian/mobi.py new file mode 100755 index 0000000..1e7569b --- /dev/null +++ b/librarian/mobi.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from copy import deepcopy +import os +import subprocess +from tempfile import NamedTemporaryFile + +from librarian import OutputFile +from librarian.cover import WLCover +from librarian import get_resource + + +def transform(wldoc, verbose=False, + sample=None, cover=None, flags=None): + """ produces a MOBI file + + wldoc: a WLDocument + sample=n: generate sample e-book (with at least n paragraphs) + cover: a cover.Cover object + flags: less-advertising, + """ + + document = deepcopy(wldoc) + del wldoc + book_info = document.book_info + + # provide a cover by default + if not cover: + cover = WLCover + cover_file = NamedTemporaryFile(suffix='.png', delete=False) + c = cover(book_info) + c.save(cover_file) + + if cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + + if not flags: + flags = [] + flags = list(flags) + ['without-fonts'] + epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, + flags=flags, style=get_resource('mobi/style.css')) + + if verbose: + kwargs = {} + else: + devnull = open("/dev/null", 'w') + kwargs = {"stdout": devnull, "stderr": devnull} + + output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False) + output_file.close() + subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name, + '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs) + os.unlink(cover_file.name) + return OutputFile.from_filename(output_file.name) \ No newline at end of file diff --git a/librarian/mobi/style.css b/librarian/mobi/style.css new file mode 100755 index 0000000..99792e5 --- /dev/null +++ b/librarian/mobi/style.css @@ -0,0 +1,306 @@ +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ + + +.h2 +{ + font-size: 2em; + margin: 0; + margin-top: 1.5em; + font-weight: bold; + line-height: 1.5em; +} + +.h3 +{ + text-align:left; + font-size: 1.5em; + margin-top: 1.5em; + font-weight: normal; + line-height: 1.5em; +} + +.h4 +{ + font-size: 1em; + margin: 0; + margin-top: 1em; + line-height: 1.5em; +} + +.paragraph +{ + margin-top: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ + +.annotation-anchor +{ + font-style: normal; + font-weight: normal; + font-size: 0.875em; + display: block; + float: left; + width: 2.5em; + clear: both; +} + +.annotation +{ + margin: 0; + margin-top: 1.5em; +} + +.annotation-body +{ + margin-left: 2.5em; + font-size: 0.875em; +} + +.block +{ + font-size: 0.875em; + padding: 1em; +} + +/* ============= */ +/* = Numbering = */ +/* ============= */ + +.anchor +{ + margin: -0.25em -0.5em; + color: #777; + font-size: 0.875em; + width: 2em; + text-align: center; + padding: 0.25em 0.5em; + line-height: 1.5em; +} + +/* =================== */ +/* = Custom elements = */ +/* =================== */ + +.title-page +{ + margin-top: 1.5em; +} + +.title +{ + font-size: 3em; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +.author +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.5em; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +.intitle +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.5em; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +.insubtitle +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1em; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +.collection +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.125em; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +.subtitle +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.5em; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia +{ + font-style: italic; + margin-top: 0.5em; + margin-left: 1.5em; +} + +div.kwestia +{ + margin-top: 0.5em; +} + +.stanza +{ + margin-bottom: 1em; +} + +.stanza-spacer +{ + display: none; +} + +.motto +{ + text-align: justify; + font-style: italic; + margin-top: 1.5em; +} + +.motto_podpis +{ + font-size: 0.875em; + text-align: right; +} + +div.fragment +{ + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note +{ + text-align: right; + font-style: italic; +} +div.note div.paragraph +{ + text-align: right; + font-style: italic; +} +div.dedication +{ + text-align: right; + font-style: italic; +} +div.dedication div.paragaph +{ + text-align: right; + font-style: italic; +} + + +hr.spacer +{ + height: 3em; + visibility: hidden; +} + +hr.spacer-line +{ + margin: 0; + margin-top: 1.5em; + margin-bottom: 1.5em; + border: none; + border-bottom: 0.1em solid #000; +} + +.spacer-asterisk +{ + padding: 0; + margin: 0; + margin-top: 1.5em; + margin-bottom: 1.5em; + text-align: center; +} + +div.person-list ol +{ + list-style: none; + padding: 0; + padding-left: 1.5em; +} + +.place-and-time +{ + font-style: italic; +} + +em.math +{ + font-style: italic; +} +em.foreign-word +{ + font-style: italic; +} +em.book-title +{ + font-style: italic; +} +em.didaskalia +{ + font-style: italic; +} + +em.author-emphasis +{ + letter-spacing: 0.1em; +} + +.person-list em.person +{ + font-style: normal; + text-transform: uppercase; +} + +.info +{ + text-align: center; + margin-bottom: 1em; +} +.info div +{ + text-align: center; +} + +.info img +{ + margin: 0; + margin-left: 2em; + margin-right: 2em; +} + +p.minor { + font-size: 0.75em; +} +p.footer { + margin-top: 2em; +} diff --git a/librarian/packagers.py b/librarian/packagers.py index 2c543da..9a93e56 100644 --- a/librarian/packagers.py +++ b/librarian/packagers.py @@ -6,8 +6,8 @@ import os from copy import deepcopy from lxml import etree -from librarian import epub, pdf, DirDocProvider, ParseError, cover -from librarian.dcparser import BookInfo +from librarian import pdf, epub, DirDocProvider, ParseError, cover +from librarian.parser import WLDocument class Packager(object): @@ -26,8 +26,11 @@ class Packager(object): except: pass outfile = os.path.join(output_dir, slug + '.' + cls.ext) - cls.converter.transform(provider, file_path=main_input, output_file=outfile, + + doc = WLDocument.from_file(main_input, provider=provider) + output_file = cls.converter.transform(doc, cover=cls.cover, flags=cls.flags) + doc.save_output_file(output_file, output_path=outfile) @classmethod @@ -84,7 +87,6 @@ class VirtualoEpubPackager(Packager): """ truncates text to at most `limit' bytes in utf-8 """ if text is None: return text - orig_text = text if len(text.encode('utf-8')) > limit: newlimit = limit - 3 while len(text.encode('utf-8')) > newlimit: @@ -122,7 +124,8 @@ class VirtualoEpubPackager(Packager): outfile_dir = os.path.join(output_dir, slug) os.makedirs(os.path.join(output_dir, slug)) - info = BookInfo.from_file(main_input) + doc = WLDocument.from_file(main_input, provider=provider) + info = doc.book_info product_elem = deepcopy(product) product_elem[0].text = cls.utf_trunc(slug, 100) @@ -133,14 +136,13 @@ class VirtualoEpubPackager(Packager): product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100) xml.append(product_elem) - cover.VirtualoCover( - u' '.join(info.author.first_names + (info.author.last_name,)), - info.title - ).save(os.path.join(outfile_dir, slug+'.jpg')) + cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg')) outfile = os.path.join(outfile_dir, '1.epub') outfile_sample = os.path.join(outfile_dir, '1.sample.epub') - epub.transform(provider, file_path=main_input, output_file=outfile) - epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25) + doc.save_output_file(epub.transform(doc), + output_path=outfile) + doc.save_output_file(epub.transform(doc, sample=25), + output_path=outfile_sample) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, diff --git a/librarian/parser.py b/librarian/parser.py index afc4f1a..2ece72f 100644 --- a/librarian/parser.py +++ b/librarian/parser.py @@ -3,7 +3,7 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from librarian import ValidationError, NoDublinCore, ParseError +from librarian import ValidationError, NoDublinCore, ParseError, NoProvider from librarian import RDFNS from librarian import dcparser @@ -11,14 +11,17 @@ from xml.parsers.expat import ExpatError from lxml import etree from lxml.etree import XMLSyntaxError, XSLTApplyError +import os import re from StringIO import StringIO class WLDocument(object): - LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE); + LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) + provider = None - def __init__(self, edoc, parse_dublincore=True): + def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False): self.edoc = edoc + self.provider = provider root_elem = edoc.getroot() @@ -33,7 +36,8 @@ class WLDocument(object): if self.rdf_elem is None: raise NoDublinCore('Document has no DublinCore - which is required.') - self.book_info = dcparser.BookInfo.from_element(self.rdf_elem) + self.book_info = dcparser.BookInfo.from_element( + self.rdf_elem, strict=strict) else: self.book_info = None @@ -42,7 +46,7 @@ class WLDocument(object): return cls.from_file(StringIO(xml), *args, **kwargs) @classmethod - def from_file(cls, xmlfile, swap_endlines=False, parse_dublincore=True): + def from_file(cls, xmlfile, parse_dublincore=True, provider=None): # first, prepare for parsing if isinstance(xmlfile, basestring): @@ -63,20 +67,17 @@ class WLDocument(object): parser = etree.XMLParser(remove_blank_text=False) tree = etree.parse(StringIO(data.encode('utf-8')), parser) - if swap_endlines: - cls.swap_endlines(tree) - - return cls(tree, parse_dublincore=parse_dublincore) + return cls(tree, parse_dublincore=parse_dublincore, provider=provider) except (ExpatError, XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) - @classmethod - def swap_endlines(cls, tree): + def swap_endlines(self): + """Converts line breaks in stanzas into
tags.""" # only swap inside stanzas - for elem in tree.iter('strofa'): + for elem in self.edoc.iter('strofa'): for child in list(elem): if child.tail: - chunks = cls.LINE_SWAP_EXPR.split(child.tail) + chunks = self.LINE_SWAP_EXPR.split(child.tail) ins_index = elem.index(child) + 1 while len(chunks) > 1: ins = etree.Element('br') @@ -84,13 +85,22 @@ class WLDocument(object): elem.insert(ins_index, ins) child.tail = chunks.pop(0) if elem.text: - chunks = cls.LINE_SWAP_EXPR.split(elem.text) + chunks = self.LINE_SWAP_EXPR.split(elem.text) while len(chunks) > 1: ins = etree.Element('br') ins.tail = chunks.pop() elem.insert(0, ins) elem.text = chunks.pop(0) + def parts(self): + if self.provider is None: + raise NoProvider('No document provider supplied.') + if self.book_info is None: + raise NoDublinCore('No Dublin Core in document.') + for part_uri in self.book_info.parts: + yield self.from_file(self.provider.by_uri(part_uri), + provider=self.provider) + def chunk(self, path): # convert the path to XPath expr = self.path_to_xpath(path) @@ -152,3 +162,41 @@ class WLDocument(object): node.clear() node.tag = 'span' node.tail = tail + + # Converters + + def as_html(self, *args, **kwargs): + from librarian import html + return html.transform(self, *args, **kwargs) + + def as_text(self, *args, **kwargs): + from librarian import text + return text.transform(self, *args, **kwargs) + + def as_epub(self, *args, **kwargs): + from librarian import epub + return epub.transform(self, *args, **kwargs) + + def as_pdf(self, *args, **kwargs): + from librarian import pdf + return pdf.transform(self, *args, **kwargs) + + def as_mobi(self, *args, **kwargs): + from librarian import mobi + return mobi.transform(self, *args, **kwargs) + + def save_output_file(self, output_file, output_path=None, + output_dir_path=None, make_author_dir=False, ext=None): + if output_dir_path: + save_path = output_dir_path + if make_author_dir: + save_path = os.path.join(save_path, + unicode(self.book_info.author).encode('utf-8')) + save_path = os.path.join(save_path, + self.book_info.uri.slug) + if ext: + save_path += '.%s' % ext + else: + save_path = output_path + + output_file.save_as(save_path) diff --git a/librarian/pdf.py b/librarian/pdf.py index 476fbee..bcf8d9a 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -8,21 +8,20 @@ import os import os.path import shutil from StringIO import StringIO -from tempfile import mkdtemp +from tempfile import mkdtemp, NamedTemporaryFile import re from copy import deepcopy from subprocess import call, PIPE -import sys - from Texml.processor import process from lxml import etree from lxml.etree import XMLSyntaxError, XSLTApplyError from librarian.dcparser import Person from librarian.parser import WLDocument -from librarian import ParseError, DCNS, get_resource +from librarian import ParseError, DCNS, get_resource, OutputFile from librarian import functions +from librarian.cover import WLCover functions.reg_substitute_entities() @@ -35,6 +34,13 @@ STYLESHEETS = { 'wl2tex': 'pdf/wl2tex.xslt', } +CUSTOMIZATIONS = [ + 'nofootnotes', + 'nothemes', + 'onehalfleading', + 'doubleleading', + 'nowlfont', + ] def insert_tags(doc, split_re, tagname, exclude=None): """ inserts for every occurence of `split_re' in text nodes in the `doc' tree @@ -152,7 +158,7 @@ def package_available(package, args='', verbose=False): fpath = os.path.join(tempdir, 'test.tex') f = open(fpath, 'w') f.write(r""" - \documentclass{book} + \documentclass{wl} \usepackage[%s]{%s} \begin{document} \end{document} @@ -166,38 +172,33 @@ def package_available(package, args='', verbose=False): return p == 0 -def transform(provider, slug=None, file_path=None, - output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None, morefloats=None, - cover=None, flags=None): +def transform(wldoc, verbose=False, save_tex=None, morefloats=None, + cover=None, flags=None, customizations=None): """ produces a PDF file with XeLaTeX - provider: a DocProvider - slug: slug of file to process, available by provider - file_path can be provided instead of a slug - output_file: file-like object or path to output file - output_dir: path to directory to save output file to; either this or output_file must be present - make_dir: writes output to //.pdf istead of /.pdf + wldoc: a WLDocument verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats cover: a cover.Cover object flags: less-advertising, + customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ # Parse XSLT try: - if file_path: - if slug: - raise ValueError('slug or file_path should be specified, not both') - document = load_including_children(provider, file_path=file_path) - else: - if not slug: - raise ValueError('either slug or file_path should be specified') - document = load_including_children(provider, slug=slug) + document = load_including_children(wldoc) if cover: + if cover is True: + cover = WLCover document.edoc.getroot().set('data-cover-width', str(cover.width)) document.edoc.getroot().set('data-cover-height', str(cover.height)) + if cover.uses_dc_cover: + if document.book_info.cover_by: + document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + if document.book_info.cover_source: + document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) if flags: for flag in flags: document.edoc.getroot().set('flag-' + flag, 'yes') @@ -208,6 +209,10 @@ def transform(provider, slug=None, file_path=None, elif package_available('morefloats', 'maxfloats=19'): document.edoc.getroot().set('morefloats', 'new') + # add customizations + if customizations is not None: + document.edoc.getroot().set('customizations', u','.join(customizations)) + # hack the tree move_motifs_inside(document.edoc) hack_motifs(document.edoc) @@ -215,21 +220,17 @@ def transform(provider, slug=None, file_path=None, substitute_hyphens(document.edoc) fix_hanging(document.edoc) - # find output dir - if make_dir and output_dir is not None: - author = unicode(document.book_info.author) - output_dir = os.path.join(output_dir, author) - # wl -> TeXML style_filename = get_stylesheet("wl2tex") style = etree.parse(style_filename) + texml = document.transform(style) # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') if cover: - c = cover(document.book_info.author.readable(), document.book_info.title) + c = cover(document.book_info) with open(os.path.join(temp, 'cover.png'), 'w') as f: c.save(f) @@ -245,7 +246,7 @@ def transform(provider, slug=None, file_path=None, shutil.copy(tex_path, save_tex) # LaTeX -> PDF - shutil.copy(get_resource('pdf/wl.sty'), temp) + shutil.copy(get_resource('pdf/wl.cls'), temp) shutil.copy(get_resource('res/wl-logo.png'), temp) cwd = os.getcwd() @@ -260,56 +261,38 @@ def transform(provider, slug=None, file_path=None, os.chdir(cwd) - # save the PDF + output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) pdf_path = os.path.join(temp, 'doc.pdf') - if output_dir is not None: - try: - os.makedirs(output_dir) - except OSError: - pass - if slug: - output_path = os.path.join(output_dir, '%s.pdf' % slug) - else: - output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf') - shutil.move(pdf_path, output_path) - else: - if hasattr(output_file, 'write'): - # file-like object - with open(pdf_path) as f: - output_file.write(f.read()) - output_file.close() - else: - # path to output file - shutil.copy(pdf_path, output_file) + shutil.move(pdf_path, output_file.name) shutil.rmtree(temp) + return OutputFile.from_filename(output_file.name) except (XMLSyntaxError, XSLTApplyError), e: raise ParseError(e) -def load_including_children(provider, slug=None, uri=None, file_path=None): - """ makes one big xml file with children inserted at end - either slug or uri must be provided +def load_including_children(wldoc=None, provider=None, uri=None): + """ Makes one big xml file with children inserted at end. + + Either wldoc or provider and URI must be provided. """ - if uri: + if uri and provider: f = provider.by_uri(uri) - elif slug: - f = provider[slug] - elif file_path: - f = open(file_path, 'r') + text = f.read().decode('utf-8') + f.close() + elif wldoc is not None: + text = etree.tostring(wldoc.edoc, encoding=unicode) + provider = wldoc.provider else: - raise ValueError('Neither slug, URI nor file path provided for a book.') + raise ValueError('Neither a WLDocument, nor provider and URI were provided.') - text = f.read().decode('utf-8') text = re.sub(ur"([\u0400-\u04ff]+)", ur"\1", text) - document = WLDocument.from_string(text, True, - parse_dublincore=True) + document = WLDocument.from_string(text, parse_dublincore=True) + document.swap_endlines() - f.close() for child_uri in document.book_info.parts: - print child_uri - child = load_including_children(provider, uri=child_uri) + child = load_including_children(provider=provider, uri=child_uri) document.edoc.getroot().append(child.edoc.getroot()) return document diff --git a/librarian/pdf/wl.cls b/librarian/pdf/wl.cls new file mode 100644 index 0000000..c9305ca --- /dev/null +++ b/librarian/pdf/wl.cls @@ -0,0 +1,490 @@ +% -*- coding: utf-8 -*- +\NeedsTeXFormat{LaTeX2e} +\ProvidesClass{wl}[2011/11/28 wolnelektury.pl book style] + +% PDF customizations +% +% nofootnotes - disable generation of footnotes +% nothemes - disable generation of themes +% onehalfleading - leading of 1.5 (interlinia) +% doubleleading - double leading (interlinia) +% a4paper,... - paper size as required by LaTeX +% nowlfont - don't use customized WL font + +\RequirePackage{setspace} +\RequirePackage{type1cm} +\DeclareOption{13pt}{% +\AtEndOfClass{% +% font size definitions, similar to ones in /usr/share/texmf-texlive/tex/latex/base/ +\renewcommand\normalsize{% + \@setfontsize\normalsize{13pt}{14.5pt}% + \abovedisplayskip 12\p@ \@plus3\p@ \@minus7\p@ + \abovedisplayshortskip \z@ \@plus3\p@ + \belowdisplayshortskip 6.5\p@ \@plus3.5\p@ \@minus3\p@ + \belowdisplayskip \abovedisplayskip + \let\@listi\@listI}\normalsize% +\renewcommand\footnotesize{% + \@setfontsize\footnotesize\@xpt\@xiipt + \abovedisplayskip 10\p@ \@plus2\p@ \@minus5\p@ + \abovedisplayshortskip \z@ \@plus3\p@ + \belowdisplayshortskip 6\p@ \@plus3\p@ \@minus3\p@ + \def\@listi{\leftmargin\leftmargini + \topsep 6\p@ \@plus2\p@ \@minus2\p@ + \parsep 3\p@ \@plus2\p@ \@minus\p@ + \itemsep \parsep}% + \belowdisplayskip \abovedisplayskip +}% +}% +} + +%% \DeclareOption{14pt}{\renewcommand{\normalsize}{\AtEndOfClass{\fontsize{14}{17}\selectfont}}} + +\DeclareOption{doubleleading}{\AtBeginDocument{\doublespacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}} +\DeclareOption{onehalfleading}{\AtBeginDocument{\onehalfspacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}} + +%% This does not really work, since dvipdfm(x) will use it's configuration in /etc/texmf/dvipdfm(x) and force a global paper size setting. +\DeclareOption{a5paper}{% + \setlength{\paperheight}{210mm}% + \setlength{\paperwidth}{148mm}} + + +\newif\ifshowfootnotes \showfootnotestrue +\DeclareOption{nofootnotes}{\showfootnotesfalse} + +\newif\ifshowthemes \showthemestrue +\DeclareOption{nothemes}{\showthemesfalse} + +\newif\ifenablewlfont \enablewlfonttrue +\DeclareOption{nowlfont}{\enablewlfontfalse} + +\DeclareOption*{\PassOptionsToClass{\CurrentOption}{book}} +\ProcessOptions\relax +\LoadClass[a4paper,oneside]{book} + + +\usepackage{trace} + +\usepackage[MeX]{polski} + +\usepackage[xetex]{graphicx} +\usepackage{fontspec} +\usepackage{xunicode} +\usepackage{xltxtra} + +\usepackage[overload]{textcase} +\usepackage{scalefnt} +\usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref} + +\ifenablewlfont +\setmainfont [ +%ExternalLocation, +UprightFont = JunicodeWL-Regular, +ItalicFont = JunicodeWL-Italic, +BoldFont = JunicodeWL-Regular, +BoldItalicFont = JunicodeWL-Italic, +SmallCapsFont = JunicodeWL-Regular, +SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}}, +Numbers=OldStyle, +Scale=1.04, +LetterSpace=-1.0 +] {JunicodeWL} + +\newfontfamily\alien[ +SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}}, +Numbers=OldStyle, +Scale=0.85, +LetterSpace=-1.0 +] {DejaVu Serif} + + +\defaultfontfeatures{ +SizeFeatures={ + {Size={-10.5}, FakeStretch=1.02, LetterSpace=2.0 }, + {Size={10.5-12}, FakeStretch=2.00, LetterSpace=0.0 }, + {Size={12-}, FakeStretch=0.98, LetterSpace=-2.0 } +} +} + +\renewcommand{\textsc}[1]{% +{\addfontfeature{ +SizeFeatures={ + {Size={-10.5}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 }, + {Size={10.5-12}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 }, + {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 } +}, +Letters={SmallCaps,UppercaseSmallCaps} +} +#1} +} +\fi% enablewlfont + +%{\Itshape JunicodeWL-Italic.ttf } +%{\bfseries Junicode-Bold.ttf } +%{\bfseries\itshape Junicode-BoldItalic.ttf } + +\pagestyle{plain} +\usepackage{fancyhdr} + +\makeatletter + +% bottom figure below footnotes +\usepackage{fnpos} +\makeFNabove + +\usepackage{color} +\definecolor{theme}{gray}{.3} + +\setlength{\marginparsep}{2em} +\setlength{\marginparwidth}{8.5em} +\setlength{\oddsidemargin}{0pt} +\setlength{\voffset}{0pt} +\setlength{\topmargin}{0pt} +\setlength{\headheight}{0pt} +\setlength{\headsep}{0pt} +\setlength{\textheight}{24cm} + +\pagestyle{fancy} +\fancyhf{} +\renewcommand{\headrulewidth}{0pt} +\renewcommand{\footrulewidth}{0pt} +\lfoot{{\footnotesize \textsc{\@author} \emph{\@title}}} +\cfoot{} +\rfoot{{\footnotesize \thepage}} + +\clubpenalty=100000 +\widowpenalty=100000 + + +% see http://osdir.com/ml/tex.xetex/2005-10/msg00003.html +\newsavebox{\ximagebox}\newlength{\ximageheight} +\newsavebox{\xglyphbox}\newlength{\xglyphheight} +\newcommand{\xbox}[1] +{\savebox{\ximagebox}{#1}\settoheight{\ximageheight}{\usebox {\ximagebox}}% +\savebox{\xglyphbox}{\char32}\settoheight{\xglyphheight}{\usebox {\xglyphbox}}% +\raisebox{\ximageheight}[0pt][0pt]{%\raisebox{-\xglyphheight}[0pt] [0pt]{% +\makebox[0pt][l]{\usebox{\xglyphbox}}}%}% +\usebox{\ximagebox}% +\raisebox{0pt}[0pt][0pt]{\makebox[0pt][r]{\usebox{\xglyphbox}}}} + +\newcommand{\makecover}[2]{ + \pdfpagewidth=#1 + \pdfpageheight=#2 + + \thispagestyle{empty} + \newlength{\PictHOffset} + \newlength{\PictVOffset} + \setlength{\PictHOffset}{1in} + \addtolength{\PictHOffset}{\hoffset} + \addtolength{\PictHOffset}{\oddsidemargin} + + \setlength{\PictVOffset}{1in} + \addtolength{\PictVOffset}{\voffset} + \addtolength{\PictVOffset}{\topmargin} + \addtolength{\PictVOffset}{\headheight} + \addtolength{\PictVOffset}{\headsep} + \addtolength{\PictVOffset}{\topskip} + \addtolength{\PictVOffset}{-\pdfpageheight} + + \noindent\hspace*{-\PictHOffset}% + \raisebox{\PictVOffset}[0pt][0pt]{\makebox[0pt][l]{% + \includegraphics[height=\pdfpageheight,width=\pdfpagewidth]{cover.png}}} + \clearpage + + \setlength{\pdfpagewidth}{210mm} + \setlength{\pdfpageheight}{297mm} +} + + +\renewcommand{\maketitle}{ + { + \thispagestyle{empty} + \footnotesize + \color{theme} + + \noindent \begin{minipage}[t]{.35\textwidth}\vspace{0pt} + \href{http://www.wolnelektury.pl}{\xbox{\includegraphics[width=\textwidth]{wl-logo.png}}} + \end{minipage} + \begin{minipage}[t]{.65\textwidth}\vspace{0pt} + + \ifflaglessadvertising + \else + \href{\bookurl}{Ta lektura}, podobnie jak tysiące innych, jest dostępna on-line na stronie + \href{http://www.wolnelektury.pl/}{wolnelektury.pl}. + \vspace{.5em} + \fi + + Utwór opracowany został w ramach projektu \href{http://www.wolnelektury.pl/}{Wolne Lektury} + przez \href{http://nowoczesnapolska.org.pl}{fundację Nowoczesna Polska}. + + \end{minipage} + \noindent \rule{\linewidth}{0.4pt} + + \vspace{.6em} + \color{black} + } +} + +\newcommand{\editorialsection}{ + \begin{figure}[b!] + { + \footnotesize + \color{theme} + \noindent \rule{\linewidth}{0.4pt} + + \rightsinfo + \vspace{.6em} + + Źródło: \href{\bookurl}{\bookurl} + + \vspace{.6em} + \sourceinfo + + \description + \vspace{.6em} + + \editors + + \vspace{.6em} + \coverby + + \color{black} + } + \end{figure} +} + + +\newcommand{\typosubsubsection}[1]{% +{\textsc{#1}} +} + +\newcommand{\typosubsection}[1]{% +{\addfontfeature{ +SizeFeatures={ + {Size={-10}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 }, + {Size={10.5-12}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 }, + {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 } +}, +Letters={Uppercase} +} +\MakeUppercase{#1}} +} + +\newcommand{\typosection}[1]{% +{\addfontfeature{FakeStretch=0.96, LetterSpace=-4.0}\emph{\scalefont{2}#1}} +%{\addfontfeature{Scale=2.0, FakeStretch=0.98, LetterSpace=-2.0}\emph{#1}} +} + + +\newcommand{\tytul}[1]{% +#1% +\vspace{1em}% +} + +\newcommand{\nazwapodutworu}[1]{% +\section*{\typosection{#1}}% +} + +\newcommand{\autorutworu}[1]{% +\subsection*{\typosubsection{#1}}% +} + +\newcommand{\dzielonadrzedne}[1]{% +\subsection*{\typosubsubsection{#1}}% +} + +\newcommand{\nazwautworu}[1]{% +\section*{\typosection{#1}}% +} + +\newcommand{\podtytul}[1]{% +\subsection*{\typosubsubsection{#1}}% +} + +\newcommand{\translator}[1]{% +\subsection*{\typosubsubsection{tłum. #1}}% +} + + +\newcommand{\powiesc}[1]{#1} +\newcommand{\opowiadanie}[1]{#1} +\newcommand{\lirykal}[1]{#1} +\newcommand{\lirykalp}[1]{#1} +\newcommand{\dramatwierszowanyl}[1]{#1} +\newcommand{\dramatwierszowanylp}[1]{#1} +\newcommand{\dramatwspolczesny}[1]{#1} + +\newcommand{\nota}[1]{% +\par{#1}% +} + +\newcommand{\dedykacja}[1]{% +\begin{em}% +\begin{flushright}% +#1% +\end{flushright}% +\end{em}% +} + +\newcommand{\dlugicytat}[1]{% +\begin{quotation}% +#1% +\end{quotation}% +} + +\newcommand{\poezjacyt}[1]{% +\begin{verse}% +#1% +\end{verse}% +} +\newcommand{\motto}[1]{% +\begin{em}% +#1% +\end{em}% +} +\newcommand{\listaosob}[2]{% +\par{#1}% +\begin{itemize}% +#2% +\end{itemize}% +} + +\newcommand{\nagloweklisty}[1]{% +\typosubsubsection{#1}% +} + +\newcommand{\listaosoba}[1]{% +\item{#1}% +} + +\newcommand{\kwestia}[1]{% +\par{#1}% +} + +\newcommand{\naglowekakt}[1]{% +\pagebreak +\subsection*{\typosubsection{#1}}% +} +\newcommand{\naglowekczesc}[1]{% +\pagebreak +\subsection*{\typosubsection{#1}}% +} +\newcommand{\srodtytul}[1]{% +\subsection*{\typosubsection{#1}}% +} + +\newcommand{\naglowekscena}[1]{% +\subsubsection*{\typosubsubsection{#1}}% +} +\newcommand{\naglowekrozdzial}[1]{% +\subsubsection*{\typosubsubsection{#1}}% +} + +\newcommand{\naglowekosoba}[1]{% +\par{\textsc{#1}}\nopagebreak% +} +\newcommand{\naglowekpodrozdzial}[1]{% +\par{#1}\nopagebreak% +} + +\newcommand{\miejsceczas}[1]{% +\par{\emph{#1}}% +} +\newcommand{\didaskalia}[1]{% +\par{\emph{#1}}% +} + +\newcommand{\akap}[1]{% +\par{#1}% +} +\newcommand{\akapdialog}[1]{% +\par{#1}% +} +\newcommand{\akapcd}[1]{% +\par{#1}% +} + +\newcommand{\mottopodpis}[1]{% +\begin{em}% +\begin{flushright}% +#1% +\end{flushright}% +\end{em}% +} + +\newcommand{\strofa}[1]{% +\par{\noindent{\ignorespaces#1\vspace{1em}}}% +} + +\newcommand{\wers}[1]{#1} + +\newcommand{\wersakap}[1]{% +\hspace*{1em}#1% +} +\newcommand{\werscd}[1]{% +\hspace*{8em}#1% +} +\newcommand{\werswciety}[2][1em]{% +\hspace*{#1}#2% +} + +\ifshowfootnotes + \newcommand{\pa}[1]{\NoCaseChange{\footnote{#1 [przypis autorski]}}} + \newcommand{\pe}[1]{\NoCaseChange{\footnote{#1}}} + \newcommand{\pr}[1]{\NoCaseChange{\footnote{#1}}} + \newcommand{\pt}[1]{\NoCaseChange{\footnote{#1}}} +\else + \newcommand{\pa}[1]{} + \newcommand{\pe}[1]{} + \newcommand{\pr}[1]{} + \newcommand{\pt}[1]{} +\fi + +\newcommand{\mat}[1]{$#1$} + +\newcommand{\didasktekst}[1]{% +\emph{#1}% +} +\newcommand{\slowoobce}[1]{% +\emph{#1}% +} +\newcommand{\tytuldziela}[1]{% +\emph{#1}% +} +\newcommand{\wyroznienie}[1]{% +\emph{#1}% +} + +\newcommand{\osoba}[1]{% +#1% +} + +\newcommand{\sekcjaswiatlo}{% +\vspace{30pt}% +} + +\newcommand{\sekcjaasterysk}{% +\vspace{10pt}% +\begin{center}% +\par{*}% +\end{center}% +} + +\newcommand{\separatorlinia}{% +\vspace{10pt}% +\hrule{}% +\vspace{10pt}% +} + +\newcommand{\motyw}[2][0]{% +\ifshowthemes +\mbox{}% +\marginpar{% +\vspace{-8pt}% +\vspace{-#1\baselineskip}% +\raggedright{\hspace{0pt}% +\footnotesize{\color{theme}{#2}}}% +\vspace{\baselineskip}% +}% +\fi +} + diff --git a/librarian/pdf/wl.sty b/librarian/pdf/wl.sty deleted file mode 100644 index 28a23a5..0000000 --- a/librarian/pdf/wl.sty +++ /dev/null @@ -1,413 +0,0 @@ -% -*- coding: utf-8 -*- - -\usepackage[MeX]{polski} - -\usepackage[xetex]{graphicx} -\usepackage{fontspec} -\usepackage{xunicode} -\usepackage{xltxtra} - -\usepackage[overload]{textcase} -\usepackage{scalefnt} -\usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref} - -\setmainfont [ -%ExternalLocation, -UprightFont = JunicodeWL-Regular, -ItalicFont = JunicodeWL-Italic, -BoldFont = JunicodeWL-Regular, -BoldItalicFont = JunicodeWL-Italic, -SmallCapsFont = JunicodeWL-Regular, -SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}}, -Numbers=OldStyle, -Scale=1.04, -LetterSpace=-1.0 -] {JunicodeWL} - -\newfontfamily\alien[ -SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}}, -Numbers=OldStyle, -Scale=0.85, -LetterSpace=-1.0 -] {DejaVu Serif} - - -\defaultfontfeatures{ -SizeFeatures={ - {Size={-10}, FakeStretch=1.02, LetterSpace=2.0 }, - {Size={10.5-12}, FakeStretch=2.00, LetterSpace=0.0 }, - {Size={12-}, FakeStretch=0.98, LetterSpace=-2.0 } -} -} - -\renewcommand{\textsc}[1]{% -{\addfontfeature{ -SizeFeatures={ - {Size={-10}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 }, - {Size={10.5-12}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 }, - {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 } -}, -Letters={SmallCaps,UppercaseSmallCaps} -} -#1} -} - -%{\itshape JunicodeWL-Italic.ttf } -%{\bfseries Junicode-Bold.ttf } -%{\bfseries\itshape Junicode-BoldItalic.ttf } - -\pagestyle{plain} -\usepackage{fancyhdr} - -\makeatletter - -% bottom figure below footnotes -\usepackage{fnpos} -\makeFNabove - -\usepackage{color} -\definecolor{theme}{gray}{.3} - -\setlength{\marginparsep}{2em} -\setlength{\marginparwidth}{8.5em} -\setlength{\oddsidemargin}{0pt} -\setlength{\voffset}{0pt} -\setlength{\topmargin}{0pt} -\setlength{\headheight}{0pt} -\setlength{\headsep}{0pt} -\setlength{\textheight}{24cm} - -\pagestyle{fancy} -\fancyhf{} -\renewcommand{\headrulewidth}{0pt} -\renewcommand{\footrulewidth}{0pt} -\lfoot{{\footnotesize \textsc{\@author} \emph{\@title}}} -\cfoot{} -\rfoot{{\footnotesize \thepage}} - -\clubpenalty=100000 -\widowpenalty=100000 - - -% see http://osdir.com/ml/tex.xetex/2005-10/msg00003.html -\newsavebox{\ximagebox}\newlength{\ximageheight} -\newsavebox{\xglyphbox}\newlength{\xglyphheight} -\newcommand{\xbox}[1] -{\savebox{\ximagebox}{#1}\settoheight{\ximageheight}{\usebox {\ximagebox}}% -\savebox{\xglyphbox}{\char32}\settoheight{\xglyphheight}{\usebox {\xglyphbox}}% -\raisebox{\ximageheight}[0pt][0pt]{%\raisebox{-\xglyphheight}[0pt] [0pt]{% -\makebox[0pt][l]{\usebox{\xglyphbox}}}%}% -\usebox{\ximagebox}% -\raisebox{0pt}[0pt][0pt]{\makebox[0pt][r]{\usebox{\xglyphbox}}}} - -\newcommand{\makecover}[2]{ - \pdfpagewidth=#1 - \pdfpageheight=#2 - - \thispagestyle{empty} - \newlength{\PictHOffset} - \newlength{\PictVOffset} - \setlength{\PictHOffset}{1in} - \addtolength{\PictHOffset}{\hoffset} - \addtolength{\PictHOffset}{\oddsidemargin} - - \setlength{\PictVOffset}{1in} - \addtolength{\PictVOffset}{\voffset} - \addtolength{\PictVOffset}{\topmargin} - \addtolength{\PictVOffset}{\headheight} - \addtolength{\PictVOffset}{\headsep} - \addtolength{\PictVOffset}{\topskip} - \addtolength{\PictVOffset}{-\pdfpageheight} - - \noindent\hspace*{-\PictHOffset}% - \raisebox{\PictVOffset}[0pt][0pt]{\makebox[0pt][l]{% - \includegraphics[height=\pdfpageheight,width=\pdfpagewidth]{cover.png}}} - \clearpage - - \setlength{\pdfpagewidth}{210mm} - \setlength{\pdfpageheight}{297mm} -} - - -\renewcommand{\maketitle}{ - { - \thispagestyle{empty} - \footnotesize - \color{theme} - - \noindent \begin{minipage}[t]{.35\textwidth}\vspace{0pt} - \href{http://www.wolnelektury.pl}{\xbox{\includegraphics[width=\textwidth]{wl-logo.png}}} - \end{minipage} - \begin{minipage}[t]{.65\textwidth}\vspace{0pt} - - \ifflaglessadvertising - \else - \href{\bookurl}{Ta lektura}, podobnie jak tysiące innych, jest dostępna on-line na stronie - \href{http://www.wolnelektury.pl/}{wolnelektury.pl}. - \vspace{.5em} - \fi - - Utwór opracowany został w ramach projektu \href{http://www.wolnelektury.pl/}{Wolne Lektury} - przez \href{http://nowoczesnapolska.org.pl}{fundację Nowoczesna Polska}. - - \end{minipage} - \noindent \rule{\linewidth}{0.4pt} - - \vspace{.6em} - \color{black} - } -} - -\newcommand{\editorialsection}{ - \begin{figure}[b!] - { - \footnotesize - \color{theme} - \noindent \rule{\linewidth}{0.4pt} - - \rightsinfo - \vspace{.6em} - - Źródło: \href{\bookurl}{\bookurl} - - \vspace{.6em} - \sourceinfo - - \description - \vspace{.6em} - - \editors - - \color{black} - } - \end{figure} -} - - -\newcommand{\typosubsubsection}[1]{% -{\textsc{#1}} -} - -\newcommand{\typosubsection}[1]{% -{\addfontfeature{ -SizeFeatures={ - {Size={-10}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 }, - {Size={10.5-12}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 }, - {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 } -}, -Letters={Uppercase} -} -\MakeUppercase{#1}} -} - -\newcommand{\typosection}[1]{% -{\addfontfeature{FakeStretch=0.96, LetterSpace=-4.0}\emph{\scalefont{2}#1}} -%{\addfontfeature{Scale=2.0, FakeStretch=0.98, LetterSpace=-2.0}\emph{#1}} -} - - -\newcommand{\tytul}[1]{% -#1% -\vspace{1em}% -} - -\newcommand{\nazwapodutworu}[1]{% -\section*{\typosection{#1}}% -} - -\newcommand{\autorutworu}[1]{% -\subsection*{\typosubsection{#1}}% -} - -\newcommand{\dzielonadrzedne}[1]{% -\subsection*{\typosubsubsection{#1}}% -} - -\newcommand{\nazwautworu}[1]{% -\section*{\typosection{#1}}% -} - -\newcommand{\podtytul}[1]{% -\subsection*{\typosubsubsection{#1}}% -} - -\newcommand{\translator}[1]{% -\subsection*{\typosubsubsection{tłum. #1}}% -} - - -\newcommand{\powiesc}[1]{#1} -\newcommand{\opowiadanie}[1]{#1} -\newcommand{\lirykal}[1]{#1} -\newcommand{\lirykalp}[1]{#1} -\newcommand{\dramatwierszowanyl}[1]{#1} -\newcommand{\dramatwierszowanylp}[1]{#1} -\newcommand{\dramatwspolczesny}[1]{#1} - -\newcommand{\nota}[1]{% -\par{#1}% -} - -\newcommand{\dedykacja}[1]{% -\begin{em}% -\begin{flushright}% -#1% -\end{flushright}% -\end{em}% -} - -\newcommand{\dlugicytat}[1]{% -\begin{quotation}% -#1% -\end{quotation}% -} - -\newcommand{\poezjacyt}[1]{% -\begin{verse}% -#1% -\end{verse}% -} -\newcommand{\motto}[1]{% -\begin{em}% -#1% -\end{em}% -} -\newcommand{\listaosob}[2]{% -\par{#1}% -\begin{itemize}% -#2% -\end{itemize}% -} - -\newcommand{\nagloweklisty}[1]{% -\typosubsubsection{#1}% -} - -\newcommand{\listaosoba}[1]{% -\item{#1}% -} - -\newcommand{\kwestia}[1]{% -\par{#1}% -} - -\newcommand{\naglowekakt}[1]{% -\pagebreak -\subsection*{\typosubsection{#1}}% -} -\newcommand{\naglowekczesc}[1]{% -\pagebreak -\subsection*{\typosubsection{#1}}% -} -\newcommand{\srodtytul}[1]{% -\subsection*{\typosubsection{#1}}% -} - -\newcommand{\naglowekscena}[1]{% -\subsubsection*{\typosubsubsection{#1}}% -} -\newcommand{\naglowekrozdzial}[1]{% -\subsubsection*{\typosubsubsection{#1}}% -} - -\newcommand{\naglowekosoba}[1]{% -\par{\textsc{#1}}\nopagebreak% -} -\newcommand{\naglowekpodrozdzial}[1]{% -\par{#1}\nopagebreak% -} - -\newcommand{\miejsceczas}[1]{% -\par{\emph{#1}}% -} -\newcommand{\didaskalia}[1]{% -\par{\emph{#1}}% -} - -\newcommand{\akap}[1]{% -\par{#1}% -} -\newcommand{\akapdialog}[1]{% -\par{#1}% -} -\newcommand{\akapcd}[1]{% -\par{#1}% -} - -\newcommand{\mottopodpis}[1]{% -\begin{em}% -\begin{flushright}% -#1% -\end{flushright}% -\end{em}% -} - -\newcommand{\strofa}[1]{% -\par{\noindent{\ignorespaces#1\vspace{1em}}}% -} - -\newcommand{\wers}[1]{#1} - -\newcommand{\wersakap}[1]{% -\hspace*{1em}#1% -} -\newcommand{\werscd}[1]{% -\hspace*{8em}#1% -} -\newcommand{\werswciety}[2][1em]{% -\hspace*{#1}#2% -} - - -\newcommand{\pa}[1]{\NoCaseChange{\footnote{#1 [przypis autorski]}}} -\newcommand{\pe}[1]{\NoCaseChange{\footnote{#1}}} -\newcommand{\pr}[1]{\NoCaseChange{\footnote{#1}}} -\newcommand{\pt}[1]{\NoCaseChange{\footnote{#1}}} - -\newcommand{\mat}[1]{$#1$} - -\newcommand{\didasktekst}[1]{% -\emph{#1}% -} -\newcommand{\slowoobce}[1]{% -\emph{#1}% -} -\newcommand{\tytuldziela}[1]{% -\emph{#1}% -} -\newcommand{\wyroznienie}[1]{% -\emph{#1}% -} - -\newcommand{\osoba}[1]{% -#1% -} - -\newcommand{\sekcjaswiatlo}{% -\vspace{30pt}% -} - -\newcommand{\sekcjaasterysk}{% -\vspace{10pt}% -\begin{center}% -\par{*}% -\end{center}% -} - -\newcommand{\separatorlinia}{% -\vspace{10pt}% -\hrule{}% -\vspace{10pt}% -} - -\newcommand{\motyw}[2][0]{% -\mbox{}% -\marginpar{% -\vspace{-8pt}% -\vspace{-#1\baselineskip}% -\raggedright{\hspace{0pt}% -\footnotesize{\color{theme}{#2}}}% -\vspace{\baselineskip}% -}% -} - diff --git a/librarian/pdf/wl2tex.xslt b/librarian/pdf/wl2tex.xslt index ec5e4d3..1a675ba 100644 --- a/librarian/pdf/wl2tex.xslt +++ b/librarian/pdf/wl2tex.xslt @@ -15,7 +15,7 @@ - \documentclass[a4paper, oneside, 11pt]{book} + \documentclass[]{wl} @@ -31,8 +31,6 @@ \def\{} - - \usepackage{wl} @@ -88,6 +86,22 @@ + + + \def\coverby{ + Okładka na podstawie: + + + \href{\datacoversource}{\datacoverby} + + + \datacoverby{} + + + + } + + diff --git a/librarian/picture.py b/librarian/picture.py new file mode 100644 index 0000000..ee3c61d --- /dev/null +++ b/librarian/picture.py @@ -0,0 +1,173 @@ + +from dcparser import (as_person, as_date, Field, WorkInfo, DCNS) +from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI) +from xml.parsers.expat import ExpatError +from os import path +from StringIO import StringIO +from lxml import etree +from lxml.etree import (XMLSyntaxError, XSLTApplyError) +import re + + +class WLPictureURI(WLURI): + _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/' + '(?P[-a-z0-9]+)/?$') + + @classmethod + def from_slug(cls, slug): + uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug + return cls(uri) + +def as_wlpictureuri_strict(text): + return WLPictureURI.strict(text) + + +class PictureInfo(WorkInfo): + """ + Dublin core metadata for a picture + """ + FIELDS = ( + Field(DCNS('language'), 'language', required=False), + Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True), + Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True), + + Field(DCNS('format.dimensions'), 'dimensions', required=False), + Field(DCNS('format.checksum.sha1'), 'sha1', required=True), + Field(DCNS('description.medium'), 'medium', required=False), + Field(DCNS('description.dimensions'), 'original_dimensions', required=False), + Field(DCNS('format'), 'mime_type', required=False), + Field(DCNS('identifier.url'), 'url', WLPictureURI, + strict=as_wlpictureuri_strict), + ) + + +class ImageStore(object): + EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp' + 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc', + 'aiff', 'wbmp', 'xbm'] + MIME = ['image/gif', 'image/jpeg', 'image/png', + 'application/x-shockwave-flash', 'image/psd', 'image/bmp', + 'image/tiff', 'image/tiff', 'application/octet-stream', + 'image/jp2', 'application/octet-stream', 'application/octet-stream', + 'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm'] + + def __init__(self, dir_): + self.dir = dir_ + return super(ImageStore, self).__init__() + + def path(self, slug, mime_type): + """ + Finds file by slug and mime type in our iamge store. + Returns a file objects (perhaps should return a filename?) + """ + try: + i = self.MIME.index(mime_type) + except ValueError: + err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type)) + err.slug = slug + err.mime_type = mime_type + raise err + ext = self.EXT[i] + # add some common extensions tiff->tif, jpeg->jpg + return path.join(self.dir, slug + '.' + ext) + + +class WLPicture(object): + def __init__(self, edoc, parse_dublincore=True, image_store=None): + self.edoc = edoc + self.image_store = image_store + + root_elem = edoc.getroot() + + dc_path = './/' + RDFNS('RDF') + + if root_elem.tag != 'picture': + raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag) + + if parse_dublincore: + self.rdf_elem = root_elem.find(dc_path) + + if self.rdf_elem is None: + raise NoDublinCore('Document has no DublinCore - which is required.') + + self.picture_info = PictureInfo.from_element(self.rdf_elem) + else: + self.picture_info = None + + @classmethod + def from_string(cls, xml, *args, **kwargs): + return cls.from_file(StringIO(xml), *args, **kwargs) + + @classmethod + def from_file(cls, xmlfile, parse_dublincore=True, image_store=None): + + # first, prepare for parsing + if isinstance(xmlfile, basestring): + file = open(xmlfile, 'rb') + try: + data = file.read() + finally: + file.close() + else: + data = xmlfile.read() + + if not isinstance(data, unicode): + data = data.decode('utf-8') + + data = data.replace(u'\ufeff', '') + + # assume images are in the same directory + if image_store is None and xmlfile.name is not None: + image_store = ImageStore(path.dirname(xmlfile.name)) + + try: + parser = etree.XMLParser(remove_blank_text=False) + tree = etree.parse(StringIO(data.encode('utf-8')), parser) + + return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store) + except (ExpatError, XMLSyntaxError, XSLTApplyError), e: + raise ParseError(e) + + @property + def mime_type(self): + if self.picture_info is None: + raise ValueError('DC is not loaded, hence we don\'t know the image type') + return self.picture_info.mime_type + + @property + def slug(self): + return self.picture_info.url.slug + + @property + def image_path(self): + if self.image_store is None: + raise ValueError("No image store associated with whis WLPicture.") + return self.image_store.path(self.slug, self.mime_type) + + def image_file(self, *args, **kwargs): + return open(self.image_path, *args, **kwargs) + + def partiter(self): + """ + Iterates the parts of this picture and returns them and their metadata + """ + for part in self.edoc.iter("div"): + pd = {} + pd['type'] = part.get('type') + if pd['type'] == 'area': + pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))), + (int(part.get('x2')), int(part.get('y2')))) + + pd['themes'] = [] + pd['object'] = None + parent = part + while True: + parent = parent.getparent() + if parent is None: + break + if parent.tag == 'sem': + if parent.get('type') == 'theme': + pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(',')) + elif parent.get('type') == 'object' and pd['object'] is None: + pd['object'] = parent.get('object') + yield pd diff --git a/librarian/res/jedenprocent.png b/librarian/res/jedenprocent.png new file mode 100644 index 0000000..1b88c54 Binary files /dev/null and b/librarian/res/jedenprocent.png differ diff --git a/librarian/text.py b/librarian/text.py index c23bcd6..d99e7cf 100644 --- a/librarian/text.py +++ b/librarian/text.py @@ -3,7 +3,8 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from librarian import dcparser, parser, functions +import copy +from librarian import functions, OutputFile from lxml import etree import os @@ -28,7 +29,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz %(description)s%(contributors)s """ -def transform(input_file, output_file, parse_dublincore=True, flags=None, **options): +def transform(wldoc, flags=None, **options): """ Transforms input_file in XML to output_file in TXT. possible flags: raw-text, @@ -37,7 +38,9 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt') style = etree.parse(style_filename) - document = parser.WLDocument.from_file(input_file, True, parse_dublincore=parse_dublincore) + document = copy.deepcopy(wldoc) + del wldoc + document.swap_endlines() if flags: for flag in flags: @@ -46,10 +49,10 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti result = document.transform(style, **options) if not flags or 'raw-text' not in flags: - if parse_dublincore: - parsed_dc = dcparser.BookInfo.from_element(document.edoc) + if document.book_info: + parsed_dc = document.book_info description = parsed_dc.description - url = parsed_dc.url + url = document.book_info.url license_description = parsed_dc.license_description license = parsed_dc.license @@ -75,7 +78,7 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti license_description = "" source = "" contributors = "" - output_file.write((TEMPLATE % { + return OutputFile.from_string((TEMPLATE % { 'description': description, 'url': url, 'license_description': license_description, @@ -84,5 +87,5 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti 'contributors': contributors, }).encode('utf-8')) else: - output_file.write(unicode(result).encode('utf-8')) + return OutputFile.from_string(unicode(result).encode('utf-8')) diff --git a/scripts/book2cover b/scripts/book2cover new file mode 100755 index 0000000..d2befc3 --- /dev/null +++ b/scripts/book2cover @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import os +import optparse + +from librarian import ParseError +from librarian.parser import WLDocument +from librarian.cover import WLCover + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Create cover images for SOURCE files.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + output_filename = os.path.splitext(input_filename)[0] + '.png' + + doc = WLDocument.from_file(input_filename) + WLCover(doc.book_info).save(output_filename) diff --git a/scripts/book2epub b/scripts/book2epub index 82aaa2b..ce8adb5 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -7,8 +7,9 @@ import os.path import optparse -from librarian import epub, DirDocProvider, ParseError +from librarian import DirDocProvider, ParseError from librarian.cover import ImageCover +from librarian.parser import WLDocument if __name__ == '__main__': @@ -20,6 +21,8 @@ if __name__ == '__main__': parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='print status messages to stdout') + parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, + help='create default cover') parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, help='create a directory for author and put the PDF in it') parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', @@ -32,7 +35,7 @@ if __name__ == '__main__': help='less advertising, for commercial purposes') parser.add_option('-W', '--not-wl', action='store_true', dest='not_wl', default=False, help='not a WolneLektury book') - parser.add_option('-c', '--cover', dest='cover', metavar='FILE', + parser.add_option('--cover', dest='cover', metavar='FILE', help='specifies the cover file') options, input_filenames = parser.parse_args() @@ -46,20 +49,20 @@ if __name__ == '__main__': for main_input in input_filenames: if options.verbose: print main_input + path, fname = os.path.realpath(main_input).rsplit('/', 1) provider = DirDocProvider(path) - - output_dir = output_file = None - if options.output_dir: - output_dir = options.output_dir - elif options.output_file: - output_file = options.output_file + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.epub' else: - output_dir = path + output_file = None + + doc = WLDocument.from_file(main_input, provider=provider) - cover = None if options.cover: cover = ImageCover(options.cover) + else: + cover = options.with_cover flags = [] if options.images: @@ -69,8 +72,11 @@ if __name__ == '__main__': if options.not_wl: flags.append('not-wl') - epub.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir, - cover=cover, flags=flags) + epub = doc.as_epub(cover=cover, flags=flags) + + doc.save_output_file(epub, + output_file, options.output_dir, options.make_dir, 'epub') + except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': main_input, diff --git a/scripts/book2html b/scripts/book2html index d61b299..8adeb38 100755 --- a/scripts/book2html +++ b/scripts/book2html @@ -7,7 +7,8 @@ import os import optparse -from librarian import html, ParseError +from librarian import ParseError +from librarian.parser import WLDocument if __name__ == '__main__': @@ -21,6 +22,8 @@ if __name__ == '__main__': help='print status messages to stdout') parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True, help='don\'t try to parse dublin core metadata') + parser.add_option('-r', '--raw', action='store_false', dest='full_page', default=True, + help='outpu raw text for use in templates') options, input_filenames = parser.parse_args() @@ -35,7 +38,11 @@ if __name__ == '__main__': output_filename = os.path.splitext(input_filename)[0] + '.html' try: - html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore, flags=('full-page',)) + doc = WLDocument.from_file(input_filename, + parse_dublincore=options.parse_dublincore) + flags = ('full-page',) if options.full_page else None + html = doc.as_html(flags=flags) + doc.save_output_file(html, output_path=output_filename) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': input_filename, diff --git a/scripts/book2ihtml b/scripts/book2ihtml index 97d8ebd..779f245 100755 --- a/scripts/book2ihtml +++ b/scripts/book2ihtml @@ -7,7 +7,8 @@ import os import optparse -from librarian import html, ParseError +from librarian import ParseError +from librarian.parser import WLDocument if __name__ == '__main__': @@ -35,8 +36,10 @@ if __name__ == '__main__': output_filename = os.path.splitext(input_filename)[0] + '.html' try: - html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore,\ - stylesheet='partial') + doc = WLDocument.from_file(input_filename, + parse_dublincore=options.parse_dublincore) + html = doc.as_html(flags=('full-page',), stylesheet='partial') + doc.save_output_file(html, output_path=output_filename) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': input_filename, diff --git a/scripts/book2mobi b/scripts/book2mobi new file mode 100755 index 0000000..665dcfa --- /dev/null +++ b/scripts/book2mobi @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import os.path +import optparse + +from librarian import DirDocProvider, ParseError +from librarian.parser import WLDocument + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to MOBI format.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, + help='create a directory for author and put the PDF in it') + parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE', + help='specifies the output file') + parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR', + help='specifies the directory for output') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + try: + for main_input in input_filenames: + path, fname = os.path.realpath(main_input).rsplit('/', 1) + provider = DirDocProvider(path) + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.mobi' + else: + output_file = None + + doc = WLDocument.from_file(main_input, provider=provider) + mobi = doc.as_mobi() + + doc.save_output_file(mobi, + output_file, options.output_dir, options.make_dir, 'mobi') + except ParseError, e: + print '%(file)s:%(name)s:%(message)s' % { + 'file': main_input, + 'name': e.__class__.__name__, + 'message': e + } diff --git a/scripts/book2pdf b/scripts/book2pdf index d10f400..258c20d 100755 --- a/scripts/book2pdf +++ b/scripts/book2pdf @@ -6,7 +6,10 @@ # import os.path from optparse import OptionParser -from librarian import pdf, DirDocProvider, ParseError + +from librarian import DirDocProvider, ParseError +from librarian.parser import WLDocument + if __name__ == '__main__': usage = """Usage: %prog [options] SOURCE [SOURCE...] @@ -15,6 +18,8 @@ if __name__ == '__main__': parser = OptionParser(usage) parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='make lots of noise and revert to default interaction in LaTeX') + parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False, + help='create default cover') parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False, help='create a directory for author and put the PDF in it') parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE', @@ -31,33 +36,26 @@ if __name__ == '__main__': parser.print_help() exit(1) - try: - if options.output_dir and options.output_file: - raise ValueError("Either --output-dir or --output file should be specified") + if options.output_dir and options.output_file: + raise ValueError("Either --output-dir or --output file should be specified") + try: for main_input in args: - if options.verbose: - print main_input path, fname = os.path.realpath(main_input).rsplit('/', 1) provider = DirDocProvider(path) - - output_file = output_dir = None - if options.output_dir: - output_dir = options.output_dir - elif options.output_file: - output_file = options.output_file + output_file, output_dir = options.output_file, options.output_dir + if not (options.output_file or options.output_dir): + output_file = os.path.splitext(main_input)[0] + '.pdf' else: - output_dir = path + output_file = None + + doc = WLDocument.from_file(main_input, provider=provider) + pdf = doc.as_pdf(save_tex=options.save_tex, + cover=options.with_cover, + morefloats=options.morefloats) - pdf.transform(provider, - file_path=main_input, - output_file=output_file, - output_dir=output_dir, - verbose=options.verbose, - make_dir=options.make_dir, - save_tex=options.save_tex, - morefloats=options.morefloats - ) + doc.save_output_file(pdf, + output_file, options.output_dir, options.make_dir, 'pdf') except ParseError, e: print '%(file)s:%(name)s:%(message)s; use -v to see more output' % { 'file': main_input, diff --git a/scripts/book2txt b/scripts/book2txt index d56d6ff..9cfdef2 100755 --- a/scripts/book2txt +++ b/scripts/book2txt @@ -7,8 +7,8 @@ import os import optparse -from librarian import text -from librarian import dcparser, ParseError +from librarian import ParseError +from librarian.parser import WLDocument if __name__ == '__main__': @@ -38,9 +38,10 @@ if __name__ == '__main__': output_filename = os.path.splitext(input_filename)[0] + '.txt' try: - output_file = open(output_filename, 'w') - text.transform(open(input_filename), output_file, parse_dublincore=options.parse_dublincore, - wrapping=str(options.wrapping)) + doc = WLDocument.from_file(input_filename, + parse_dublincore=options.parse_dublincore) + html = doc.as_text(wrapping=str(options.wrapping)) + doc.save_output_file(html, output_path=output_filename) except ParseError, e: print '%(file)s:%(name)s:%(message)s' % { 'file': input_filename, diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index d1db01b..b1ea926 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def whole_tree(prefix, path): setup( name='librarian', - version='1.3', + version='1.4.1', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', @@ -29,15 +29,17 @@ setup( maintainer_email='radek.czajka@gmail.com', url='http://github.com/fnp/librarian', packages=['librarian'], - package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'pdf/*', 'fonts/*', 'res/*'] + + package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fonts/*', 'res/*'] + whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')}, include_package_data=True, install_requires=['lxml>=2.2'], scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/book2epub', + 'scripts/book2mobi', 'scripts/book2pdf', 'scripts/book2partner', + 'scripts/book2cover', 'scripts/bookfragments', 'scripts/genslugs'], tests_require=['nose>=0.11', 'coverage>=3.0.1'], diff --git a/tests/files/dcparser/andersen_brzydkie_kaczatko.out b/tests/files/dcparser/andersen_brzydkie_kaczatko.out index fda83eb..c0fb00b 100644 --- a/tests/files/dcparser/andersen_brzydkie_kaczatko.out +++ b/tests/files/dcparser/andersen_brzydkie_kaczatko.out @@ -14,4 +14,6 @@ 'genre': u'Baśń', 'technical_editors': [u'Gałecki, Dariusz'], 'license_description': u'Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925', + 'audiences': [u'SP1'], + 'language': u'pol', } diff --git a/tests/files/dcparser/biedrzycki_akslop.out b/tests/files/dcparser/biedrzycki_akslop.out index 1111b9c..a7eeffe 100644 --- a/tests/files/dcparser/biedrzycki_akslop.out +++ b/tests/files/dcparser/biedrzycki_akslop.out @@ -1,17 +1,19 @@ { - 'editors': [u'Sekuła, Aleksandra'], - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop', - 'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993', - 'author': u'Biedrzycki, Miłosz', - 'url': u'http://wolnelektury.pl/katalog/lektura/akslop', - 'created_at': u'2009-06-04', - 'title': u'Akslop', - 'kind': u'Liryka', - 'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1', - 'epoch': u'Współczesność', - 'genre': u'Wiersz', - 'technical_editors': [u'Sutkowska, Olga'], - 'license': u'http://creativecommons.org/licenses/by-sa/3.0/', - 'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL' + 'editors': [u'Sekuła, Aleksandra'], + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop', + 'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993', + 'author': u'Biedrzycki, Miłosz', + 'url': u'http://wolnelektury.pl/katalog/lektura/akslop', + 'created_at': u'2009-06-04', + 'title': u'Akslop', + 'kind': u'Liryka', + 'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1', + 'epoch': u'Współczesność', + 'genre': u'Wiersz', + 'technical_editors': [u'Sutkowska, Olga'], + 'license': u'http://creativecommons.org/licenses/by-sa/3.0/', + 'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL', + 'audiences': [u'L'], + 'language': u'pol', } diff --git a/tests/files/dcparser/kochanowski_piesn7.out b/tests/files/dcparser/kochanowski_piesn7.out index 4c1dc8f..b3eba1e 100644 --- a/tests/files/dcparser/kochanowski_piesn7.out +++ b/tests/files/dcparser/kochanowski_piesn7.out @@ -1,18 +1,20 @@ { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)', - 'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976', - 'author': u'Kochanowski, Jan', - 'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr', - 'created_at': u'2007-08-31', - 'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)', - 'kind': u'Liryka', - 'source_url': u'http://www.polona.pl/Content/1499', - 'released_to_public_domain_at': u'1584-01-01', - 'epoch': u'Renesans', - 'genre': u'Pieśń', - 'technical_editors': [u'Gałecki, Dariusz'], - 'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ', - 'editors': [u'Sekuła, Aleksandra', u'Krzyżanowski, Julian', u'Otwinowska, Barbara'], + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)', + 'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976', + 'author': u'Kochanowski, Jan', + 'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr', + 'created_at': u'2007-08-31', + 'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)', + 'kind': u'Liryka', + 'source_url': u'http://www.polona.pl/Content/1499', + 'released_to_public_domain_at': u'1584-01-01', + 'epoch': u'Renesans', + 'genre': u'Pieśń', + 'technical_editors': [u'Gałecki, Dariusz'], + 'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ', + 'editors': [u'Sekuła, Aleksandra', u'Krzyżanowski, Julian', u'Otwinowska, Barbara'], + 'audiences': [u'L'], + 'language': u'pol', } diff --git a/tests/files/dcparser/mickiewicz_rybka.out b/tests/files/dcparser/mickiewicz_rybka.out index ff4bd98..a35f935 100644 --- a/tests/files/dcparser/mickiewicz_rybka.out +++ b/tests/files/dcparser/mickiewicz_rybka.out @@ -1,18 +1,20 @@ { - 'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'], - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka', - 'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922', - 'author': u'Mickiewicz, Adam', - 'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka', - 'created_at': u'2007-09-06', - 'title': u'Rybka', - 'kind': u'Liryka', - 'source_url': u'http://www.polona.pl/Content/2222', - 'released_to_public_domain_at': u'1855-01-01', - 'epoch': u'Romantyzm', - 'genre': u'Ballada', - 'technical_editors': [u'Sutkowska, Olga'], - 'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855', + 'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'], + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka', + 'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922', + 'author': u'Mickiewicz, Adam', + 'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka', + 'created_at': u'2007-09-06', + 'title': u'Rybka', + 'kind': u'Liryka', + 'source_url': u'http://www.polona.pl/Content/2222', + 'released_to_public_domain_at': u'1855-01-01', + 'epoch': u'Romantyzm', + 'genre': u'Ballada', + 'technical_editors': [u'Sutkowska, Olga'], + 'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855', + 'audiences': [u'SP2', u'G', u'L'], + 'language': u'pol', } diff --git a/tests/files/dcparser/sofokles_antygona.out b/tests/files/dcparser/sofokles_antygona.out index 0f2b4d0..d934602 100644 --- a/tests/files/dcparser/sofokles_antygona.out +++ b/tests/files/dcparser/sofokles_antygona.out @@ -1,19 +1,22 @@ { - 'editors': [u'Sekuła, Aleksandra'], - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona', - 'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939', - 'author': u'Sofokles', - 'url': u'http://wolnelektury.pl/katalog/lektura/antygona', - 'created_at': u'2007-08-30', - 'title': u'Antygona', - 'kind': u'Dramat', - 'source_url': u'http://www.polona.pl/Content/3768', - 'translators': [u'Morawski, Kazimierz'], - 'released_to_public_domain_at': u'1925-01-01', - 'epoch': u'Starożytność', - 'genre': u'Tragedia', - 'technical_editors': [u'Gałecki, Dariusz'], - 'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925', + 'editors': [u'Sekuła, Aleksandra'], + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona', + 'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939', + 'author': u'Sofokles', + 'url': u'http://wolnelektury.pl/katalog/lektura/antygona', + 'created_at': u'2007-08-30', + 'title': u'Antygona', + 'kind': u'Dramat', + 'source_url': u'http://www.polona.pl/Content/3768', + 'translators': [u'Morawski, Kazimierz'], + 'released_to_public_domain_at': u'1925-01-01', + 'epoch': u'Starożytność', + 'genre': u'Tragedia', + 'technical_editors': [u'Gałecki, Dariusz'], + 'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925', + 'language': u'pol', + 'audiences': [u'G'], + 'language': u'pol', } diff --git a/tests/files/picture/angelus-novus.png b/tests/files/picture/angelus-novus.png new file mode 100644 index 0000000..9925dad Binary files /dev/null and b/tests/files/picture/angelus-novus.png differ diff --git a/tests/files/picture/angelus-novus.xml b/tests/files/picture/angelus-novus.xml new file mode 100644 index 0000000..0f26730 --- /dev/null +++ b/tests/files/picture/angelus-novus.xml @@ -0,0 +1,42 @@ + + + + Klee, Paul + Angelus Novus + Fundacja Nowoczesna Polska + Sekuła, Aleksandra + Kwiatkowska, Katarzyna + Trzeciak, Weronika + Modernizm + Obraz + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + 31.8 × 24.2 cm + Akwarela na papierze + http://wolnelektury.pl/katalog/obraz/angelus-novus + http://katilifox.files.wordpress.com/2011/04/1190051611_angelus-novus.jpg + Muzeum Narodowe, inw. 00000000. + Domena publiczna - Paul Klee zm. 1940 + 1940 + Image + image/png + 1645 x 2000 px + d9ead48f3442ac4e1add602aacdffa4638ae8e21 + 1920 + lat + + + +
+ + +
+ + +
+ + +
+
+ + + diff --git a/tests/files/text/asnyk_miedzy_nami.xml b/tests/files/text/asnyk_miedzy_nami.xml deleted file mode 100644 index 36d8df6..0000000 --- a/tests/files/text/asnyk_miedzy_nami.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - -Asnyk, Adam -Między nami nic nie było - -Sekuła, Aleksandra -Sutkowska, Olga -Fundacja Nowoczesna Polska -Pozytywizm -Liryka -Wiersz -Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. -http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo -http://www.polona.pl/Content/5164 -(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 -Domena publiczna - Adam Asnyk zm. 1897 -1897 -xml -text -text -2007-09-06 -L -pol - - - - -Adam Asnyk - -Miłość platonicznaMiędzy nami nic nie było - - - -Między nami nic nie było!/ -Żadnych zwierzeń, wyznań żadnych!/ -Nic nas z sobą nie łączyło ---/ -Prócz wiosennych marzeń zdradnych; - - - -NaturaPrócz tych woni, barw i blasków,/ -Unoszących się w przestrzeni;/ -Prócz szumiących śpiewem lasków/ -I tej świeżej łąk zieleni; - - - -Prócz tych kaskad i potoków,/ -Zraszających każdy parów,/ -Prócz girlandy tęcz, obłoków,/ -Prócz natury słodkich czarów; - - - -Prócz tych wspólnych, jasnych zdrojów,/ -Z których serce zachwyt piło;/ -Prócz pierwiosnków i powojów,---/ -Między nami nic nie było! - - - diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml new file mode 100755 index 0000000..c585a8b --- /dev/null +++ b/tests/files/text/asnyk_zbior.xml @@ -0,0 +1,29 @@ + + + + + +Asnyk, Adam +Poezye +Fundacja Nowoczesna Polska +Pozytywizm +Liryka +Wiersz +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/poezye +http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo +http://www.polona.pl/Content/5164 +(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 +Domena publiczna - Adam Asnyk zm. 1897 +1897 +xml +text +text +2007-09-06 +L +pol + + + + + diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml new file mode 100644 index 0000000..124940e --- /dev/null +++ b/tests/files/text/miedzy-nami-nic-nie-bylo.xml @@ -0,0 +1,65 @@ + + + + + + +Asnyk, Adam +Między nami nic nie było + +Sekuła, Aleksandra +Sutkowska, Olga +Fundacja Nowoczesna Polska +Pozytywizm +Liryka +Wiersz +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. +http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo +http://www.polona.pl/Content/5164 +(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3, Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898 +Domena publiczna - Adam Asnyk zm. 1897 +1897 +xml +text +text +2007-09-06 +L +pol + + + + +Adam Asnyk + +Miłość platonicznaMiędzy nami nic nie było + + + +Między nami nic nie było!/ +Żadnych zwierzeń, wyznań żadnych!/ +Nic nas z sobą nie łączyło ---/ +Prócz wiosennych marzeń zdradnych; + + + +NaturaPrócz tych woni, barw i blasków,/ +Unoszących się w przestrzeni;/ +Prócz szumiących śpiewem lasków/ +I tej świeżej łąk zieleni; + + + +Prócz tych kaskad i potoków,/ +Zraszających każdy parów,/ +Prócz girlandy tęcz, obłoków,/ +Prócz natury słodkich czarów; + + + +Prócz tych wspólnych, jasnych zdrojów,/ +Z których serce zachwyt piło;/ +Prócz pierwiosnków i powojów,---/ +Między nami nic nie było! + + + diff --git a/tests/test_epub.py b/tests/test_epub.py new file mode 100644 index 0000000..9fc5637 --- /dev/null +++ b/tests/test_epub.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from librarian import DirDocProvider +from librarian.parser import WLDocument +from nose.tools import * +from utils import get_fixture + + +def test_transform(): + WLDocument.from_file( + get_fixture('text', 'asnyk_zbior.xml'), + provider=DirDocProvider(get_fixture('text', '')) + ).as_epub(flags=['without_fonts']) diff --git a/tests/test_html.py b/tests/test_html.py index 5187e06..51d6acd 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -3,44 +3,38 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from librarian import html, NoDublinCore +from librarian import NoDublinCore +from librarian.parser import WLDocument from nose.tools import * -from utils import get_fixture, remove_output_file +from utils import get_fixture -def teardown_transform(): - remove_output_file('text', 'asnyk_miedzy_nami.html') - -@with_setup(None, teardown_transform) def test_transform(): - output_file_path = get_fixture('text', 'asnyk_miedzy_nami.html') expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html') - html.transform( - get_fixture('text', 'asnyk_miedzy_nami.xml'), - output_file_path, - ) + html = WLDocument.from_file( + get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') + ).as_html().get_string() - assert_equal(file(output_file_path).read(), file(expected_output_file_path).read()) + assert_equal(html, file(expected_output_file_path).read()) -@with_setup(None, teardown_transform) @raises(NoDublinCore) def test_no_dublincore(): - html.transform( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), - get_fixture('text', 'asnyk_miedzy_nami.html'), - ) + WLDocument.from_file( + get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') + ).as_html() -@with_setup(None, teardown_transform) def test_passing_parse_dublincore_to_transform(): """Passing parse_dublincore=False to transform omits DublinCore parsing.""" - html.transform( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), - get_fixture('text', 'asnyk_miedzy_nami.html'), - parse_dublincore=False, - ) + WLDocument.from_file( + get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), + parse_dublincore=False, + ).as_html() def test_empty(): - assert html.transform('', is_file=False, parse_dublincore=False).find('empty') + assert not WLDocument.from_string( + '', + parse_dublincore=False, + ).as_html() diff --git a/tests/test_picture.py b/tests/test_picture.py new file mode 100644 index 0000000..71a77dc --- /dev/null +++ b/tests/test_picture.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from librarian import picture, dcparser +from lxml import etree +from nose.tools import * +from os.path import splitext +from tests.utils import get_all_fixtures, get_fixture +import codecs +from os import path + +def test_wlpictureuri(): + uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus') + +def check_load(xml_file): + pi = dcparser.parse(xml_file, picture.PictureInfo) + assert pi is not None + assert isinstance(pi, picture.PictureInfo) + + +def test_load(): + for fixture in get_all_fixtures('picture', '*.xml'): + yield check_load, fixture + + +def test_wlpicture(): + wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml'))) + pi = wlp.picture_info + + # from nose.tools import set_trace; set_trace() + assert pi.type[0] == u"Image" + assert pi.mime_type == u'image/png' == wlp.mime_type + assert wlp.slug == 'angelus-novus' + + assert path.exists(wlp.image_path) + + f = wlp.image_file('r') + f.close() + +def test_picture_parts(): + wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml'))) + parts = list(wlp.partiter()) + assert len(parts) == 5, "there should be %d parts of the picture" % 5 + motifs = set() + names = set() + + print parts + for p in parts: + for m in p['themes']: + motifs.add(m) + for p in parts: + if p['object']: + names.add(p['object']) + + assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs + assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names + + diff --git a/tests/test_text.py b/tests/test_text.py index 7ff94ca..70dfb60 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -3,42 +3,32 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from librarian import text, NoDublinCore +from librarian import NoDublinCore +from librarian.parser import WLDocument from nose.tools import * -from utils import get_fixture, remove_output_file +from utils import get_fixture -def teardown_transform(): - remove_output_file('text', 'asnyk_miedzy_nami.txt') - - -@with_setup(None, teardown_transform) def test_transform(): - output_file_path = get_fixture('text', 'asnyk_miedzy_nami.txt') expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt') - text.transform( - open(get_fixture('text', 'asnyk_miedzy_nami.xml')), - open(output_file_path, 'w'), - ) + text = WLDocument.from_file( + get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') + ).as_text().get_string() - assert_equal(file(output_file_path).read(), file(expected_output_file_path).read()) + assert_equal(text, file(expected_output_file_path).read()) -@with_setup(None, teardown_transform) @raises(NoDublinCore) def test_no_dublincore(): - text.transform( - open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')), - open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'), - ) + WLDocument.from_file( + get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') + ).as_text() -@with_setup(None, teardown_transform) def test_passing_parse_dublincore_to_transform(): - """Passing parse_dublincore=False to transform omits DublinCore parsing.""" - text.transform( - open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')), - open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'), - parse_dublincore=False, - ) + """Passing parse_dublincore=False to the constructor omits DublinCore parsing.""" + WLDocument.from_file( + get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), + parse_dublincore=False, + ).as_text() diff --git a/tests/utils.py b/tests/utils.py index b112066..3b1f4f5 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -21,10 +21,3 @@ def get_fixture(dir_name, file_name): def get_all_fixtures(dir_name, glob_pattern='*'): """Returns list of paths for fixtures in directory dir_name matching the glob_pattern.""" return [get_fixture(dir_name, file_name) for file_name in glob.glob(join(get_fixture_dir(dir_name), glob_pattern))] - - -def remove_output_file(dir_name, file_name): - try: - os.remove(get_fixture(dir_name, file_name)) - except: - pass