# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from __future__ import with_statement
-
import os
import re
-import shutil
import urllib
+from .utils import XMLNamespace
class UnicodeException(Exception):
class ValidationError(UnicodeException):
pass
-class NoDublinCore(ValidationError):
- """There's no DublinCore section, and it's required."""
- pass
-
-class NoProvider(UnicodeException):
- """There's no DocProvider specified, and it's needed."""
- pass
-
-class XMLNamespace(object):
- '''A handy structure to repsent names in an XML namespace.'''
-
- def __init__(self, uri):
- self.uri = uri
-
- def __call__(self, tag):
- return '{%s}%s' % (self.uri, tag)
-
- def __contains__(self, tag):
- return tag.startswith('{' + str(self) + '}')
-
- def __repr__(self):
- return 'XMLNamespace(%r)' % self.uri
-
- def __str__(self):
- return '%s' % self.uri
class EmptyNamespace(XMLNamespace):
def __init__(self):
NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/")
OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
-WLNS = EmptyNamespace()
+SSTNS = XMLNamespace('http://nowoczesnapolska.org.pl/sst#')
class WLURI(object):
return self.slug == other.slug
-class DocProvider(object):
- """Base class for a repository of XML files.
-
- Used for generating joined files, like EPUBs.
- """
-
- def by_slug(self, slug):
- """Should return a file-like object with a WL document XML."""
- raise NotImplementedError
-
- def by_uri(self, uri, wluri=WLURI):
- """Should return a file-like object with a WL document XML."""
- wluri = wluri(uri)
- return self.by_slug(wluri.slug)
-
-
-class DirDocProvider(DocProvider):
- """ Serve docs from a directory of files in form <slug>.xml """
-
- def __init__(self, dir_):
- self.dir = dir_
- self.files = {}
-
- def by_slug(self, slug):
- fname = slug + '.xml'
- return open(os.path.join(self.dir, fname))
-
-
-import lxml.etree as etree
-import dcparser
-
-DEFAULT_BOOKINFO = dcparser.BookInfo(
- { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
- { DCNS('creator'): [u'Some, Author'],
- DCNS('title'): [u'Some Title'],
- DCNS('subject.period'): [u'Unknown'],
- DCNS('subject.type'): [u'Unknown'],
- DCNS('subject.genre'): [u'Unknown'],
- DCNS('date'): ['1970-01-01'],
- DCNS('language'): [u'pol'],
- # DCNS('date'): [creation_date],
- DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
- DCNS('description'):
- [u"""Publikacja zrealizowana w ramach projektu
- Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
- wykonana przez Bibliotekę Narodową z egzemplarza
- pochodzącego ze zbiorów BN."""],
- DCNS('identifier.url'): [WLURI.example],
- DCNS('rights'):
- [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
-
-def xinclude_forURI(uri):
- e = etree.Element(XINS("include"))
- e.set("href", uri)
- return etree.tostring(e, encoding=unicode)
-
-def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
- """Wrap the text within the minimal XML structure with a DC template."""
- bookinfo.created_at = creation_date
-
- dcstring = etree.tostring(bookinfo.to_etree(), \
- method='xml', encoding=unicode, pretty_print=True)
-
- return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
- u'\n</plain-text>\n</utwor>'
-
-
-def serialize_raw(element):
- b = u'' + (element.text or '')
-
- for child in element.iterchildren():
- e = etree.tostring(child, method='xml', encoding=unicode,
- pretty_print=True)
- b += e
-
- return b
-
-SERIALIZERS = {
- 'raw': serialize_raw,
-}
-
-def serialize_children(element, format='raw'):
- return SERIALIZERS[format](element)
-
-def get_resource(path):
- return os.path.join(os.path.dirname(__file__), path)
-
-
-class OutputFile(object):
- """Represents a file returned by one of the converters."""
-
- _string = None
- _filename = None
-
- def __del__(self):
- if self._filename:
- os.unlink(self._filename)
-
- def __nonzero__(self):
- return self._string is not None or self._filename is not None
-
- @classmethod
- def from_string(cls, string):
- """Converter returns contents of a file as a string."""
-
- instance = cls()
- instance._string = string
- return instance
-
- @classmethod
- def from_filename(cls, filename):
- """Converter returns contents of a file as a named file."""
-
- instance = cls()
- instance._filename = filename
- return instance
-
- def get_string(self):
- """Get file's contents as a string."""
-
- if self._filename is not None:
- with open(self._filename) as f:
- return f.read()
- else:
- return self._string
-
- def get_file(self):
- """Get file as a file-like object."""
-
- if self._string is not None:
- from StringIO import StringIO
- return StringIO(self._string)
- elif self._filename is not None:
- return open(self._filename)
-
- def get_filename(self):
- """Get file as a fs path."""
-
- if self._filename is not None:
- return self._filename
- elif self._string is not None:
- from tempfile import NamedTemporaryFile
- temp = NamedTemporaryFile(prefix='librarian-', delete=False)
- temp.write(self._string)
- temp.close()
- self._filename = temp.name
- return self._filename
- else:
- return None
-
- def save_as(self, path):
- """Save file to a path. Create directories, if necessary."""
-
- dirname = os.path.dirname(os.path.abspath(path))
- if not os.path.isdir(dirname):
- os.makedirs(dirname)
- shutil.copy(self.get_filename(), path)
-
-
class URLOpener(urllib.FancyURLopener):
- version = 'FNP Librarian (http://github.com/fnp/librarian)'
+ version = 'FNP Librarian (http://git.nowoczesnapolska.org.pl/?p=librarian.git)'
urllib._urlopener = URLOpener()
+
import os.path
import optparse
-from librarian import DirDocProvider, ParseError
-from librarian.parser import WLDocument
-from librarian.cover import WLCover
+from librarian import ParseError
+from librarian.document import Document
class Option(object):
Subclass it for any format you want to convert to.
"""
- format_name = None # Set format name, like "PDF".
- ext = None # Set file extension, like "pdf".
- uses_cover = False # Can it add a cover?
- cover_optional = True # Only relevant if uses_cover
- uses_provider = False # Does it need a DocProvider?
- transform = None # Transform method. Uses WLDocument.as_{ext} by default.
- parser_options = [] # List of Option objects for additional parser args.
- transform_options = [] # List of Option objects for additional transform args.
- transform_flags = [] # List of Option objects for supported transform flags.
-
+ format_cls = None # A formats.Format subclass
+ document_options = [] # List of Option objects for document options.
+ format_options = [] # List of Option objects for format customization.
+ build_options = [] # List of Option objects for build options.
@classmethod
def run(cls):
# Parse commandline arguments
usage = """Usage: %%prog [options] SOURCE [SOURCE...]
- Convert SOURCE files to %s format.""" % cls.format_name
+ Convert SOURCE files to %s.""" % cls.format_cls.format_name
parser = optparse.OptionParser(usage=usage)
parser.add_option('-v', '--verbose',
action='store_true', dest='verbose', default=False,
help='print status messages to stdout')
- parser.add_option('-d', '--make-dir',
- action='store_true', dest='make_dir', default=False,
- help='create a directory for author and put the output file in it')
parser.add_option('-o', '--output-file',
dest='output_file', metavar='FILE',
help='specifies the output file')
- parser.add_option('-O', '--output-dir',
- dest='output_dir', metavar='DIR',
- help='specifies the directory for output')
- if cls.uses_cover:
- if cls.cover_optional:
- parser.add_option('-c', '--with-cover',
- action='store_true', dest='with_cover', default=False,
- help='create default cover')
- parser.add_option('-C', '--image-cache',
- dest='image_cache', metavar='URL',
- help='prefix for image download cache' +
- (' (implies --with-cover)' if cls.cover_optional else ''))
- for option in cls.parser_options + cls.transform_options + cls.transform_flags:
+ for option in cls.document_options + cls.format_options + cls.build_options:
option.add(parser)
options, input_filenames = parser.parse_args()
parser.print_help()
return(1)
- # Prepare additional args for parser.
- parser_args = {}
- for option in cls.parser_options:
- parser_args[option.name()] = option.value(options)
- # Prepare additional args for transform method.
- transform_args = {}
- for option in cls.transform_options:
- transform_args[option.name()] = option.value(options)
- # Add flags to transform_args, if any.
- transform_flags = [flag.name() for flag in cls.transform_flags
- if flag.value(options)]
- if transform_flags:
- transform_args['flags'] = transform_flags
- # Add cover support, if any.
- if cls.uses_cover:
- if options.image_cache:
- def cover_class(*args, **kwargs):
- return WLCover(image_cache=options.image_cache, *args, **kwargs)
- transform_args['cover'] = cover_class
- elif not cls.cover_optional or options.with_cover:
- transform_args['cover'] = WLCover
-
+ # Prepare additional args for document.
+ document_args = {}
+ for option in cls.document_options:
+ document_args[option.name()] = option.value(options)
+ # Prepare additional args for format.
+ format_args = {}
+ for option in cls.format_options:
+ format_args[option.name()] = option.value(options)
+ # Prepare additional args for build.
+ build_args = {}
+ for option in cls.build_options:
+ build_args[option.name()] = option.value(options)
# Do some real work
try:
if options.verbose:
print main_input
- # Where to find input?
- if cls.uses_provider:
- path, fname = os.path.realpath(main_input).rsplit('/', 1)
- provider = DirDocProvider(path)
- else:
- provider = None
+ # Do the transformation.
+ doc = Document.from_file(main_input, **document_args)
+ format_ = cls.format_cls(doc, **format_args)
# Where to write output?
- if not (options.output_file or options.output_dir):
- output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
+ if not options.output_file:
+ output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext
else:
output_file = None
-
- # Do the transformation.
- doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
- transform = cls.transform
- if transform is None:
- transform = getattr(WLDocument, 'as_%s' % cls.ext)
- output = transform(doc, **transform_args)
-
- doc.save_output_file(output,
- output_file, options.output_dir, options.make_dir, cls.ext)
+
+ output = format_.build(**build_args)
+ output.save_as(output_file)
except ParseError, e:
print '%(file)s:%(name)s:%(message)s' % {
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from librarian import SSTNS
+from .meta import Metadata
+
+
+class TextElement(etree.ElementBase):
+ @property
+ def meta(self):
+ m = self.find(SSTNS('metadata'))
+ if m is None:
+ return Metadata.about(self)
+ return m
+
+
+class Span(TextElement):
+ pass
+
+
+class Div(TextElement):
+ pass
+
+
+class Section(TextElement):
+ pass
+
+
+class Header(TextElement):
+ pass
+
+
+class Aside(TextElement):
+ pass
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import re
-import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
-from StringIO import StringIO
-from librarian import get_resource, OutputFile, URLOpener
-
-
-class Metric(object):
- """Gets metrics from an object, scaling it by a factor."""
- def __init__(self, obj, scale):
- self._obj = obj
- self._scale = float(scale)
-
- def __getattr__(self, name):
- src = getattr(self._obj, name)
- if src and self._scale:
- src = type(src)(self._scale * src)
- return src
-
-
-class TextBox(object):
- """Creates an Image with a series of centered strings."""
-
- SHADOW_X = 3
- SHADOW_Y = 3
- SHADOW_BLUR = 3
-
- def __init__(self, max_width, max_height, padding_x=None, padding_y=None):
- if padding_x is None:
- padding_x = self.SHADOW_X + self.SHADOW_BLUR
- if padding_y is None:
- padding_y = self.SHADOW_Y + self.SHADOW_BLUR
-
- self.max_width = max_width
- self.max_text_width = max_width - 2 * padding_x
- self.padding_y = padding_y
- self.height = padding_y
- self.img = Image.new('RGBA', (max_width, max_height))
- self.draw = ImageDraw.Draw(self.img)
- self.shadow_img = None
- self.shadow_draw = None
-
- def skip(self, height):
- """Skips some vertical space."""
- self.height += height
-
- def text(self, text, color='#000', font=None, line_height=20,
- shadow_color=None):
- """Writes some centered text."""
- text = re.sub(r'\s+', ' ', text)
- if shadow_color:
- if not self.shadow_img:
- self.shadow_img = Image.new('RGBA', self.img.size)
- self.shadow_draw = ImageDraw.Draw(self.shadow_img)
- while text:
- line = text
- line_width = self.draw.textsize(line, font=font)[0]
- while line_width > self.max_text_width:
- parts = line.rsplit(' ', 1)
- if len(parts) == 1:
- line_width = self.max_text_width
- break
- line = parts[0]
- line_width = self.draw.textsize(line, font=font)[0]
- line = line.strip() + ' '
-
- pos_x = (self.max_width - line_width) / 2
-
- if shadow_color:
- self.shadow_draw.text(
- (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y),
- line, font=font, fill=shadow_color
- )
-
- self.draw.text((pos_x, self.height), line, font=font, fill=color)
- self.height += line_height
- # go to next line
- text = text[len(line):]
-
- def image(self):
- """Creates the actual Image object."""
- image = Image.new('RGBA', (self.max_width,
- self.height + self.padding_y))
- if self.shadow_img:
- shadow = self.shadow_img.filter(ImageFilter.BLUR)
- image.paste(shadow, (0, 0), shadow)
- image.paste(self.img, (0, 0), self.img)
- else:
- image.paste(self.img, (0, 0))
- return image
-
-
-class Cover(object):
- """Abstract base class for cover images generator."""
- width = 600
- height = 800
- background_color = '#fff'
- background_img = None
-
- author_top = 100
- author_margin_left = 20
- author_margin_right = 20
- author_lineskip = 40
- author_color = '#000'
- author_shadow = None
- author_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
- author_font_size = 30
-
- title_top = 100
- title_margin_left = 20
- title_margin_right = 20
- title_lineskip = 54
- title_color = '#000'
- title_shadow = None
- title_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
- title_font_size = 40
-
- logo_bottom = None
- logo_width = None
- uses_dc_cover = False
-
- format = 'JPEG'
- scale = 1
-
- exts = {
- 'JPEG': 'jpg',
- 'PNG': 'png',
- }
-
- mime_types = {
- 'JPEG': 'image/jpeg',
- 'PNG': 'image/png',
- }
-
- def __init__(self, book_info, format=None, width=None, height=None):
- self.author = ", ".join(auth.readable() for auth in book_info.authors)
- self.title = book_info.title
- if format is not None:
- self.format = format
- scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
- if scale:
- self.scale = scale
-
- def pretty_author(self):
- """Allows for decorating author's name."""
- return self.author
-
- def pretty_title(self):
- """Allows for decorating title."""
- return self.title
-
- def image(self):
- metr = Metric(self, self.scale)
- img = Image.new('RGB', (metr.width, metr.height), self.background_color)
-
- if self.background_img:
- background = Image.open(self.background_img)
- img.paste(background, None, background)
- del background
-
- # WL logo
- if metr.logo_width:
- logo = Image.open(get_resource('res/wl-logo.png'))
- logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
- img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
-
- top = metr.author_top
- tbox = TextBox(
- metr.width - metr.author_margin_left - metr.author_margin_right,
- metr.height - top,
- )
-
- author_font = ImageFont.truetype(
- self.author_font_ttf, metr.author_font_size)
- tbox.text(self.pretty_author(), self.author_color, author_font,
- metr.author_lineskip, self.author_shadow)
- text_img = tbox.image()
- img.paste(text_img, (metr.author_margin_left, top), text_img)
-
- top += text_img.size[1] + metr.title_top
- tbox = TextBox(
- metr.width - metr.title_margin_left - metr.title_margin_right,
- metr.height - top,
- )
- title_font = ImageFont.truetype(
- self.title_font_ttf, metr.title_font_size)
- tbox.text(self.pretty_title(), self.title_color, title_font,
- metr.title_lineskip, self.title_shadow)
- text_img = tbox.image()
- img.paste(text_img, (metr.title_margin_left, top), text_img)
-
- return img
-
- def mime_type(self):
- return self.mime_types[self.format]
-
- def ext(self):
- return self.exts[self.format]
-
- def save(self, *args, **kwargs):
- return self.image().save(format=self.format, quality=95, *args, **kwargs)
-
- def output_file(self, *args, **kwargs):
- imgstr = StringIO()
- self.save(imgstr, *args, **kwargs)
- return OutputFile.from_string(imgstr.getvalue())
-
-
-class WLCover(Cover):
- """Default Wolne Lektury cover generator."""
- width = 600
- height = 833
- uses_dc_cover = True
- author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
- author_font_size = 20
- author_lineskip = 30
- title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf')
- title_font_size = 30
- title_lineskip = 40
- title_box_width = 350
-
- box_top_margin = 100
- box_bottom_margin = 100
- box_padding_y = 20
- box_above_line = 10
- box_below_line = 15
- box_line_left = 75
- box_line_right = 275
- box_line_width = 2
-
- logo_top = 15
- logo_width = 140
-
- bar_width = 35
- background_color = '#444'
- author_color = '#444'
- default_background = get_resource('res/cover.png')
- format = 'JPEG'
-
- epoch_colors = {
- u'Starożytność': '#9e3610',
- u'Średniowiecze': '#564c09',
- u'Renesans': '#8ca629',
- u'Barok': '#a6820a',
- u'Oświecenie': '#f2802e',
- u'Romantyzm': '#db4b16',
- u'Pozytywizm': '#961060',
- u'Modernizm': '#7784e0',
- u'Dwudziestolecie międzywojenne': '#3044cf',
- u'Współczesność': '#06393d',
- }
-
- def __init__(self, book_info, format=None, width=None, height=None, with_logo=False):
- super(WLCover, self).__init__(book_info, format=format, width=width, height=height)
- self.kind = book_info.kind
- self.epoch = book_info.epoch
- self.with_logo = with_logo
- if book_info.cover_url:
- url = book_info.cover_url
- bg_src = None
- if bg_src is None:
- bg_src = URLOpener().open(url)
- self.background_img = StringIO(bg_src.read())
- bg_src.close()
- else:
- self.background_img = self.default_background
-
- def pretty_author(self):
- return self.author.upper()
-
- def image(self):
- metr = Metric(self, self.scale)
- img = Image.new('RGB', (metr.width, metr.height), self.background_color)
- draw = ImageDraw.Draw(img)
-
- if self.epoch in self.epoch_colors:
- epoch_color = self.epoch_colors[self.epoch]
- else:
- epoch_color = '#000'
- draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color)
-
- if self.background_img:
- src = Image.open(self.background_img)
- trg_size = (metr.width - metr.bar_width, metr.height)
- if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
- resized = (
- trg_size[0],
- src.size[1] * trg_size[0] / src.size[0]
- )
- cut = (resized[1] - trg_size[1]) / 2
- src = src.resize(resized, Image.ANTIALIAS)
- src = src.crop((0, cut, src.size[0], src.size[1] - cut))
- else:
- resized = (
- src.size[0] * trg_size[1] / src.size[1],
- trg_size[1],
- )
- cut = (resized[0] - trg_size[0]) / 2
- src = src.resize(resized, Image.ANTIALIAS)
- src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
-
- img.paste(src, (metr.bar_width, 0))
- del src
-
- box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
- author_font = ImageFont.truetype(
- self.author_font_ttf, metr.author_font_size)
- box.text(self.pretty_author(),
- font=author_font,
- line_height=metr.author_lineskip,
- color=self.author_color,
- shadow_color=self.author_shadow,
- )
-
- box.skip(metr.box_above_line)
- box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
- fill=self.author_color, width=metr.box_line_width)
- box.skip(metr.box_below_line)
-
- title_font = ImageFont.truetype(
- self.title_font_ttf, metr.title_font_size)
- box.text(self.pretty_title(),
- line_height=metr.title_lineskip,
- font=title_font,
- color=epoch_color,
- shadow_color=self.title_shadow,
- )
-
- if self.with_logo:
- logo = Image.open(get_resource('res/wl-logo-mono.png'))
- logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS)
- alpha = logo.split()[3]
- alpha = ImageEnhance.Brightness(alpha).enhance(.75)
- logo.putalpha(alpha)
- box.skip(metr.logo_top + logo.size[1])
-
- box_img = box.image()
-
- if self.kind == 'Liryka':
- # top
- box_top = metr.box_top_margin
- elif self.kind == 'Epika':
- # bottom
- box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
- else:
- # center
- box_top = (metr.height - box_img.size[1]) / 2
-
- box_left = metr.bar_width + (metr.width - metr.bar_width -
- box_img.size[0]) / 2
- draw.rectangle((box_left, box_top,
- box_left + box_img.size[0], box_top + box_img.size[1]),
- fill='#fff')
- img.paste(box_img, (box_left, box_top), box_img)
-
- if self.with_logo:
- img.paste(logo,
- (box_left + (box_img.size[0] - logo.size[0]) / 2,
- box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo)
-
- return img
-
-
-class VirtualoCover(Cover):
- width = 600
- height = 730
- author_top = 73
- title_top = 73
- logo_bottom = 25
- logo_width = 250
-
-
-class PrestigioCover(Cover):
- width = 580
- height = 783
- background_img = get_resource('res/cover-prestigio.png')
-
- author_top = 446
- author_margin_left = 118
- author_margin_right = 62
- author_lineskip = 60
- author_color = '#fff'
- author_shadow = '#000'
- author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
- author_font_size = 50
-
- title_top = 0
- title_margin_left = 118
- title_margin_right = 62
- title_lineskip = 60
- title_color = '#fff'
- title_shadow = '#000'
- title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
- title_font_size = 50
-
- def pretty_title(self):
- return u"„%s”" % self.title
-
-
-class BookotekaCover(Cover):
- width = 2140
- height = 2733
- background_img = get_resource('res/cover-bookoteka.png')
-
- author_top = 480
- author_margin_left = 307
- author_margin_right = 233
- author_lineskip = 156
- author_color = '#d9d919'
- author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
- author_font_size = 130
-
- title_top = 400
- title_margin_left = 307
- title_margin_right = 233
- title_lineskip = 168
- title_color = '#d9d919'
- title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
- title_font_size = 140
-
- format = 'PNG'
-
-
-class GandalfCover(Cover):
- width = 600
- height = 730
- background_img = get_resource('res/cover-gandalf.png')
- author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
- author_font_size = 30
- title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
- title_font_size = 40
- logo_bottom = 25
- logo_width = 250
- format = 'PNG'
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from StringIO import StringIO
+from lxml import etree
+from . import SSTNS
+from .core import Section
+from .parser import SSTParser
+
+
+class Document(object):
+ # Do I use meta_context?
+ def __init__(self, edoc, meta_context=None):
+ self.edoc = edoc
+
+ root_elem = edoc.getroot()
+ if meta_context is not None:
+ root_elem.meta_context = meta_context
+
+ if not isinstance(root_elem, Section):
+ if root_elem.tag != SSTNS('section'):
+ raise ValidationError("Invalid root element. Found '%s', should be '%s'" % (
+ root_elem.tag, SSTNS('section')))
+ else:
+ raise ValidationError("Invalid class of root element. "
+ "Use librarian.parser.SSTParser.")
+
+ @classmethod
+ def from_string(cls, xml, *args, **kwargs):
+ return cls.from_file(StringIO(xml), *args, **kwargs)
+
+ @classmethod
+ def from_file(cls, xmlfile, *args, **kwargs):
+ # first, prepare for parsing
+ if isinstance(xmlfile, basestring):
+ file = open(xmlfile, 'rb')
+ try:
+ data = file.read()
+ finally:
+ file.close()
+ else:
+ data = xmlfile.read()
+
+ if not isinstance(data, unicode):
+ data = data.decode('utf-8')
+
+ data = data.replace(u'\ufeff', '')
+
+ parser = SSTParser()
+ tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+ tree.xinclude()
+ return cls(tree, *args, **kwargs)
+
+ @property
+ def meta(self):
+ """ Document's metadata is root's metadata. """
+ return self.edoc.getroot().meta
+++ /dev/null
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
- <title>Okładka</title>
- <style type="text/css"> img { max-width: 100%; } </style>
- </head>
- <body style="oeb-column-number: 1;">
- <div id="cover-image">
- <img alt="Okładka" />
- </div>
- </body>
-</html>
\ No newline at end of file
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+class Format(object):
+ """ Generic format class. """
+ def __init__(self, doc):
+ self.doc = doc
+
+ def build(self):
+ raise NotImplementedError
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import re
+from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
+from StringIO import StringIO
+from librarian import DCNS, URLOpener
+from librarian.output import OutputFile
+from librarian.utils import get_resource
+from librarian.formats import Format
+
+
+class Metric(object):
+ """Gets metrics from an object, scaling it by a factor."""
+ def __init__(self, obj, scale):
+ self._obj = obj
+ self._scale = float(scale)
+
+ def __getattr__(self, name):
+ src = getattr(self._obj, name)
+ if src and self._scale:
+ src = type(src)(self._scale * src)
+ return src
+
+
+class TextBox(object):
+ """Creates an Image with a series of centered strings."""
+
+ SHADOW_X = 3
+ SHADOW_Y = 3
+ SHADOW_BLUR = 3
+
+ def __init__(self, max_width, max_height, padding_x=None, padding_y=None):
+ if padding_x is None:
+ padding_x = self.SHADOW_X + self.SHADOW_BLUR
+ if padding_y is None:
+ padding_y = self.SHADOW_Y + self.SHADOW_BLUR
+
+ self.max_width = max_width
+ self.max_text_width = max_width - 2 * padding_x
+ self.padding_y = padding_y
+ self.height = padding_y
+ self.img = Image.new('RGBA', (max_width, max_height))
+ self.draw = ImageDraw.Draw(self.img)
+ self.shadow_img = None
+ self.shadow_draw = None
+
+ def skip(self, height):
+ """Skips some vertical space."""
+ self.height += height
+
+ def text(self, text, color='#000', font=None, line_height=20,
+ shadow_color=None):
+ """Writes some centered text."""
+ text = re.sub(r'\s+', ' ', text)
+ if shadow_color:
+ if not self.shadow_img:
+ self.shadow_img = Image.new('RGBA', self.img.size)
+ self.shadow_draw = ImageDraw.Draw(self.shadow_img)
+ while text:
+ line = text
+ line_width = self.draw.textsize(line, font=font)[0]
+ while line_width > self.max_text_width:
+ parts = line.rsplit(' ', 1)
+ if len(parts) == 1:
+ line_width = self.max_text_width
+ break
+ line = parts[0]
+ line_width = self.draw.textsize(line, font=font)[0]
+ line = line.strip() + ' '
+
+ pos_x = (self.max_width - line_width) / 2
+
+ if shadow_color:
+ self.shadow_draw.text(
+ (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y),
+ line, font=font, fill=shadow_color
+ )
+
+ self.draw.text((pos_x, self.height), line, font=font, fill=color)
+ self.height += line_height
+ # go to next line
+ text = text[len(line):]
+
+ def image(self):
+ """Creates the actual Image object."""
+ image = Image.new('RGBA', (self.max_width,
+ self.height + self.padding_y))
+ if self.shadow_img:
+ shadow = self.shadow_img.filter(ImageFilter.BLUR)
+ image.paste(shadow, (0, 0), shadow)
+ image.paste(self.img, (0, 0), self.img)
+ else:
+ image.paste(self.img, (0, 0))
+ return image
+
+
+class Cover(Format):
+ """Base class for cover images generator."""
+ format_name = u"cover image"
+
+ width = 600
+ height = 800
+ background_color = '#fff'
+ background_img = None
+
+ author_top = 100
+ author_margin_left = 20
+ author_margin_right = 20
+ author_lineskip = 40
+ author_color = '#000'
+ author_shadow = None
+ author_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
+ author_font_size = 30
+
+ title_top = 100
+ title_margin_left = 20
+ title_margin_right = 20
+ title_lineskip = 54
+ title_color = '#000'
+ title_shadow = None
+ title_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
+ title_font_size = 40
+
+ logo_bottom = None
+ logo_width = None
+ uses_dc_cover = False
+
+ format = 'JPEG'
+ scale = 1
+
+ exts = {
+ 'JPEG': 'jpg',
+ 'PNG': 'png',
+ }
+
+ mime_types = {
+ 'JPEG': 'image/jpeg',
+ 'PNG': 'image/png',
+ }
+
+ def __init__(self, doc, format=None, width=None, height=None):
+ self.author = ", ".join(auth for auth in doc.meta.get(DCNS('creator')))
+ self.title = doc.meta.title()
+ if format is not None:
+ self.format = format
+ scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
+ if scale:
+ self.scale = scale
+
+ def pretty_author(self):
+ """Allows for decorating author's name."""
+ return self.author
+
+ def pretty_title(self):
+ """Allows for decorating title."""
+ return self.title
+
+ def image(self):
+ metr = Metric(self, self.scale)
+ img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+
+ if self.background_img:
+ background = Image.open(self.background_img)
+ img.paste(background, None, background)
+ del background
+
+ # WL logo
+ if metr.logo_width:
+ logo = Image.open(get_resource('res/wl-logo.png'))
+ logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
+ img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+
+ top = metr.author_top
+ tbox = TextBox(
+ metr.width - metr.author_margin_left - metr.author_margin_right,
+ metr.height - top,
+ )
+
+ author_font = ImageFont.truetype(
+ self.author_font_ttf, metr.author_font_size)
+ tbox.text(self.pretty_author(), self.author_color, author_font,
+ metr.author_lineskip, self.author_shadow)
+ text_img = tbox.image()
+ img.paste(text_img, (metr.author_margin_left, top), text_img)
+
+ top += text_img.size[1] + metr.title_top
+ tbox = TextBox(
+ metr.width - metr.title_margin_left - metr.title_margin_right,
+ metr.height - top,
+ )
+ title_font = ImageFont.truetype(
+ self.title_font_ttf, metr.title_font_size)
+ tbox.text(self.pretty_title(), self.title_color, title_font,
+ metr.title_lineskip, self.title_shadow)
+ text_img = tbox.image()
+ img.paste(text_img, (metr.title_margin_left, top), text_img)
+
+ return img
+ imgstr = StringIO()
+ img.save(imgstr, format=self.format, quality=95)
+ OutputFile.from_string(imgstr.getvalue())
+
+ def mime_type(self):
+ return self.mime_types[self.format]
+
+ @property
+ def format_ext(self):
+ return self.exts[self.format]
+
+ def save(self, *args, **kwargs):
+ return self.image().save(format=self.format, quality=95, *args, **kwargs)
+
+ def build(self, *args, **kwargs):
+ imgstr = StringIO()
+ self.save(imgstr, *args, **kwargs)
+ return OutputFile.from_string(imgstr.getvalue())
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from librarian.utils import get_resource
+from .. import Cover
+
+
+class VirtualoCover(Cover):
+ format_name = u"Virtualo cover image"
+
+ width = 600
+ height = 730
+ author_top = 73
+ title_top = 73
+ logo_bottom = 25
+ logo_width = 250
+
+
+class PrestigioCover(Cover):
+ format_name = u"Prestigio cover image"
+
+ width = 580
+ height = 783
+ background_img = get_resource('res/cover-prestigio.png')
+
+ author_top = 446
+ author_margin_left = 118
+ author_margin_right = 62
+ author_lineskip = 60
+ author_color = '#fff'
+ author_shadow = '#000'
+ author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
+ author_font_size = 50
+
+ title_top = 0
+ title_margin_left = 118
+ title_margin_right = 62
+ title_lineskip = 60
+ title_color = '#fff'
+ title_shadow = '#000'
+ title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
+ title_font_size = 50
+
+ def pretty_title(self):
+ return u"„%s”" % self.title
+
+
+class BookotekaCover(Cover):
+ format_name = u"Bookoteka cover image"
+
+ width = 2140
+ height = 2733
+ background_img = get_resource('res/cover-bookoteka.png')
+
+ author_top = 480
+ author_margin_left = 307
+ author_margin_right = 233
+ author_lineskip = 156
+ author_color = '#d9d919'
+ author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+ author_font_size = 130
+
+ title_top = 400
+ title_margin_left = 307
+ title_margin_right = 233
+ title_lineskip = 168
+ title_color = '#d9d919'
+ title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+ title_font_size = 140
+
+ format = 'PNG'
+
+
+class GandalfCover(Cover):
+ format_name = u"Gandalf cover image"
+
+ width = 600
+ height = 730
+ background_img = get_resource('res/cover-gandalf.png')
+ author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+ author_font_size = 30
+ title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+ title_font_size = 40
+ logo_bottom = 25
+ logo_width = 250
+ format = 'PNG'
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from PIL import Image, ImageFont, ImageDraw
+from librarian.utils import get_resource
+from .. import Cover, Metric, TextBox
+
+
+class WLCover(Cover):
+ """Default Wolne Lektury cover generator."""
+ format_name = u"WL-style cover image"
+
+ width = 600
+ height = 833
+ uses_dc_cover = True
+ author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+ author_font_size = 20
+ author_lineskip = 30
+ title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf')
+ title_font_size = 30
+ title_lineskip = 40
+ title_box_width = 350
+
+ box_top_margin = 100
+ box_bottom_margin = 100
+ box_padding_y = 20
+ box_above_line = 10
+ box_below_line = 15
+ box_line_left = 75
+ box_line_right = 275
+ box_line_width = 2
+
+ logo_top = 15
+ logo_width = 140
+
+ bar_width = 35
+ background_color = '#444'
+ author_color = '#444'
+ default_background = get_resource('res/cover.png')
+ format = 'JPEG'
+
+ epoch_colors = {
+ u'Starożytność': '#9e3610',
+ u'Średniowiecze': '#564c09',
+ u'Renesans': '#8ca629',
+ u'Barok': '#a6820a',
+ u'Oświecenie': '#f2802e',
+ u'Romantyzm': '#db4b16',
+ u'Pozytywizm': '#961060',
+ u'Modernizm': '#7784e0',
+ u'Dwudziestolecie międzywojenne': '#3044cf',
+ u'Współczesność': '#06393d',
+ }
+
+ def __init__(self, doc, format=None, width=None, height=None, with_logo=False):
+ super(WLCover, self).__init__(doc, format=format, width=width, height=height)
+ self.kind = doc.meta.get_one('kind')
+ self.epoch = doc.meta.get_one('epoch')
+ self.with_logo = with_logo
+ # TODO
+ if doc.meta.get('cover_url'):
+ url = doc.meta.get('cover_url')[0]
+ bg_src = None
+ if bg_src is None:
+ bg_src = URLOpener().open(url)
+ self.background_img = StringIO(bg_src.read())
+ bg_src.close()
+ else:
+ self.background_img = self.default_background
+
+ def pretty_author(self):
+ return self.author.upper()
+
+ def image(self):
+ metr = Metric(self, self.scale)
+ img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+ draw = ImageDraw.Draw(img)
+
+ if self.epoch in self.epoch_colors:
+ epoch_color = self.epoch_colors[self.epoch]
+ else:
+ epoch_color = '#000'
+ draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color)
+
+ if self.background_img:
+ src = Image.open(self.background_img)
+ trg_size = (metr.width - metr.bar_width, metr.height)
+ if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
+ resized = (
+ trg_size[0],
+ src.size[1] * trg_size[0] / src.size[0]
+ )
+ cut = (resized[1] - trg_size[1]) / 2
+ src = src.resize(resized, Image.ANTIALIAS)
+ src = src.crop((0, cut, src.size[0], src.size[1] - cut))
+ else:
+ resized = (
+ src.size[0] * trg_size[1] / src.size[1],
+ trg_size[1],
+ )
+ cut = (resized[0] - trg_size[0]) / 2
+ src = src.resize(resized, Image.ANTIALIAS)
+ src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
+
+ img.paste(src, (metr.bar_width, 0))
+ del src
+
+ box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
+ author_font = ImageFont.truetype(
+ self.author_font_ttf, metr.author_font_size)
+ box.text(self.pretty_author(),
+ font=author_font,
+ line_height=metr.author_lineskip,
+ color=self.author_color,
+ shadow_color=self.author_shadow,
+ )
+
+ box.skip(metr.box_above_line)
+ box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
+ fill=self.author_color, width=metr.box_line_width)
+ box.skip(metr.box_below_line)
+
+ title_font = ImageFont.truetype(
+ self.title_font_ttf, metr.title_font_size)
+ box.text(self.pretty_title(),
+ line_height=metr.title_lineskip,
+ font=title_font,
+ color=epoch_color,
+ shadow_color=self.title_shadow,
+ )
+
+ if self.with_logo:
+ logo = Image.open(get_resource('res/wl-logo-mono.png'))
+ logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS)
+ alpha = logo.split()[3]
+ alpha = ImageEnhance.Brightness(alpha).enhance(.75)
+ logo.putalpha(alpha)
+ box.skip(metr.logo_top + logo.size[1])
+
+ box_img = box.image()
+
+ if self.kind == 'Liryka':
+ # top
+ box_top = metr.box_top_margin
+ elif self.kind == 'Epika':
+ # bottom
+ box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
+ else:
+ # center
+ box_top = (metr.height - box_img.size[1]) / 2
+
+ box_left = metr.bar_width + (metr.width - metr.bar_width -
+ box_img.size[0]) / 2
+ draw.rectangle((box_left, box_top,
+ box_left + box_img.size[0], box_top + box_img.size[1]),
+ fill='#fff')
+ img.paste(box_img, (box_left, box_top), box_img)
+
+ if self.with_logo:
+ img.paste(logo,
+ (box_left + (box_img.size[0] - logo.size[0]) / 2,
+ box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo)
+
+ return img
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+from copy import deepcopy
+from tempfile import NamedTemporaryFile
+import zipfile
+from lxml import etree
+from librarian import OPFNS, NCXNS, XHTMLNS
+from librarian import core
+from librarian.formats import Format
+from librarian.formats.cover.wolnelektury import WLCover
+from librarian.output import OutputFile
+from librarian.renderers import Register, TreeRenderer, UnknownElement
+from librarian.utils import Context, get_resource, extend_element
+
+
+class EpubFormat(Format):
+ format_name = 'EPUB'
+ format_ext = 'epub'
+
+ cover = WLCover
+ renderers = Register()
+
+ def __init__(self, doc, cover=None, with_fonts=True):
+ super(EpubFormat, self).__init__(doc)
+ self.with_fonts = with_fonts
+ if cover is not None:
+ self.cover = cover
+
+ def build(self):
+ opf = etree.parse(get_resource('formats/epub/res/content.opf'))
+ manifest = opf.find(OPFNS('manifest'))
+ guide = opf.find(OPFNS('guide'))
+ spine = opf.find(OPFNS('spine'))
+
+ output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
+ zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
+
+ mime = zipfile.ZipInfo()
+ mime.filename = 'mimetype'
+ mime.compress_type = zipfile.ZIP_STORED
+ mime.extra = ''
+ zip.writestr(mime, 'application/epub+zip')
+ zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
+ 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
+ '<rootfiles><rootfile full-path="OPS/content.opf" ' \
+ 'media-type="application/oebps-package+xml" />' \
+ '</rootfiles></container>')
+
+ toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
+ '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
+ '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
+ 'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
+ '</navMap></ncx>')
+ nav_map = toc_file[-1]
+
+ if self.cover is not None:
+ cover = self.cover(self.doc)
+ cover_output = cover.build()
+ cover_name = 'cover.%s' % cover.format_ext
+ zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
+ del cover_output
+
+ cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
+ cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
+ zip.writestr('OPS/cover.html', etree.tostring(
+ cover_tree, method="html", pretty_print=True))
+
+ if cover.uses_dc_cover:
+ if self.doc.meta.get_one('cover_by'):
+ document.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
+ if self.doc.meta.get_one('cover_source'):
+ document.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
+
+ manifest.append(etree.fromstring(
+ '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
+ manifest.append(etree.fromstring(
+ '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
+ spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
+ opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
+ guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
+
+
+ ctx = Context(format=self)
+ ctx.toc = TOC()
+ ctx.toc_level = 0
+ ctx.footnotes = Footnotes()
+ ctx.part_no = 0
+
+ wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
+ for e in self.render(self.doc.edoc.getroot(), ctx):
+ if not len(e) and not e.text.strip():
+ continue
+ wrap = deepcopy(wrap_tmpl)
+ extend_element(wrap.find('//*[@id="book-text"]'), e)
+
+ partstr = 'part%d' % int(e.get('part_no'))
+ manifest.append(manifest.makeelement(OPFNS('item'), attrib={
+ 'id': partstr,
+ 'href': partstr + ".html",
+ 'media-type': 'application/xhtml+xml',
+ }))
+ spine.append(spine.makeelement(OPFNS('itemref'), attrib={
+ 'idref': partstr,
+ }))
+ zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
+
+ if len(ctx.footnotes.output):
+ ctx.toc.add("Przypisy", "footnotes.html")
+ manifest.append(etree.Element(OPFNS('item'),
+ id='footnotes', href='footnotes.html',
+ **{'media-type': "application/xhtml+xml"}))
+ spine.append(etree.Element('itemref', idref='footnotes'))
+ wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
+ extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
+
+ #chars = chars.union(used_chars(html_tree.getroot()))
+ zip.writestr('OPS/footnotes.html', etree.tostring(
+ wrap, method="html", pretty_print=True))
+
+
+ zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
+ ctx.toc.render(toc_file[-1])
+ zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
+ zip.close()
+ return OutputFile.from_filename(output_file.name)
+
+ def render(self, element, ctx):
+ return self.renderers.get_for(element).render(element, ctx)
+
+
+# Helpers
+
+class EpubRenderer(TreeRenderer):
+ """ Renders insides as XML in a <_/> container. """
+ def container(self, ctx):
+ root, inner = super(EpubRenderer, self).container()
+ root.set("part_no", str(ctx.part_no))
+ return root, inner
+
+ def render(self, element, ctx):
+ subctx = self.subcontext(element, ctx)
+ wrapper, inside = self.container(ctx)
+ if element.text:
+ extend_element(inside, self.render_text(element.text, ctx))
+ for child in element:
+ try:
+ child_renderer = ctx.format.renderers.get_for(child)
+ except UnknownElement:
+ continue
+ else:
+ if getattr(child_renderer, 'epub_separate', False):
+ yield wrapper
+ ctx.part_no += 1
+ for child_part in child_renderer.render(child, subctx):
+ yield child_part
+ wrapper, inside = self.container(ctx)
+ else:
+ child_parts = list(child_renderer.render(child, subctx))
+ extend_element(inside, child_parts[0])
+ if len(child_parts) > 1:
+ yield wrapper
+ for child_part in child_parts[1:-1]:
+ yield child_part
+ wrapper, inside = self.container(ctx)
+ extend_element(inside, child_parts[-1])
+ finally:
+ if child.tail:
+ extend_element(inside, self.render_text(child.tail, ctx))
+ yield wrapper
+
+
+class Footnotes(object):
+ def __init__(self):
+ self.counter = 0
+ self.output = etree.Element("_")
+
+ def append(self, items):
+ self.counter += 1
+ e = etree.Element("a",
+ href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
+ id="footnote-%d" % self.counter,
+ style="float:left;margin-right:1em")
+ e.text = "[%d]" % self.counter
+ e.tail = " "
+ self.output.append(e)
+ for item in items:
+ extend_element(self.output, item)
+ anchor = etree.Element("a",
+ id="footnote-anchor-%d" % self.counter,
+ href="footnotes.html#footnote-%d" % self.counter)
+ anchor.text = "[%d]" % self.counter
+ return anchor
+
+
+class TOC(object):
+ def __init__(self, title=None, href="", root=None):
+ if root is None:
+ self.counter = 0
+ self.root = self
+ else:
+ self.root = root
+ self.children = []
+ self.title = title
+ self.href = href.format(counter=self.root.counter)
+ self.number = self.root.counter
+ self.root.counter += 1
+
+ def add(self, title, href):
+ subtoc = type(self)(title, href, root=self.root)
+ self.children.append(subtoc)
+ return subtoc
+
+ def render(self, nav_map):
+ for child in self.children:
+ nav_point = etree.Element(NCXNS('navPoint'))
+ nav_point.set('id', 'NavPoint-%d' % child.number)
+ nav_point.set('playOrder', str(child.number))
+
+ nav_label = etree.Element(NCXNS('navLabel'))
+ text = etree.Element(NCXNS('text'))
+ text.text = child.title
+ nav_label.append(text)
+ nav_point.append(nav_label)
+
+ content = etree.Element(NCXNS('content'))
+ content.set('src', child.href)
+ nav_point.append(content)
+ nav_map.append(nav_point)
+ child.render(nav_map)
+
+
+# Renderers
+
+class AsideR(EpubRenderer):
+ def render(self, element, ctx):
+ outputs = list(super(AsideR, self).render(element, ctx))
+ anchor = ctx.footnotes.append(outputs)
+ wrapper, inside = self.text_container() #etree.Element('_', part_no=str(ctx.part_no))
+ inside.append(anchor)
+ yield wrapper
+EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
+
+
+class DivR(EpubRenderer):
+ def container(self, ctx):
+ root, inner = super(DivR, self).container(ctx)
+ if getattr(ctx, 'inline', False):
+ inner.tag = 'span'
+ inner.set('style', 'display: block;')
+ return root, inner
+EpubFormat.renderers.register(core.Div, None, DivR('div'))
+
+
+class HeaderR(EpubRenderer):
+ def subcontext(self, element, ctx):
+ return Context(ctx, inline=True)
+EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
+
+
+class SectionR(EpubRenderer):
+ epub_separate = True
+
+ def render(self, element, ctx):
+ # Add 'poczatek'?
+ if element.getparent() is not None:
+ tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
+ ctx = Context(ctx, toc=tocitem)
+ return super(SectionR, self).render(element, ctx)
+EpubFormat.renderers.register(core.Section, None, SectionR())
+
+
+class SpanR(EpubRenderer):
+ pass
+EpubFormat.renderers.register(core.Span, None, SpanR('span'))
+
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"></meta>
+ <title>
+ WolneLektury.pl
+ </title>
+ </head>
+ <body>
+ <div id="book-text"></div>
+ </body>
+</html>
--- /dev/null
+<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId" version="2.0">
+ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf">
+ <dc:title></dc:title>
+ <dc:language xsi:type="dcterms:RFC3066"></dc:language>
+ <dc:identifier id="BookId" opf:scheme="URI"></dc:identifier>
+ <dc:subject></dc:subject>
+ <dc:creator></dc:creator>
+ <dc:publisher></dc:publisher>
+ <dc:date xsi:type="dcterms:W3CDTF"></dc:date>
+ </metadata>
+ <manifest>
+ <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
+ <item id="style" href="style.css" media-type="text/css" />
+ <!--item id="titlePage" href="title.html" media-type="application/xhtml+xml" />
+ <item id="logo_wolnelektury" href="logo_wolnelektury.png" media-type="image/png" />
+ <item id="jedenprocent" href="jedenprocent.png" media-type="image/png" /-->
+ </manifest>
+ <spine toc="toc">
+ <!--itemref idref="titlePage" /-->
+ </spine>
+ <guide>
+ <!--reference type="text" title="Początek" href="part1.html" /-->
+ </guide>
+</package>
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+ <title>Okładka</title>
+ <style type="text/css"> img { max-width: 100%; } </style>
+ </head>
+ <body style="oeb-column-number: 1;">
+ <div id="cover-image">
+ <img alt="Okładka" />
+ </div>
+ </body>
+</html>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"></meta>
+ <title>
+ Przypisy
+ </title>
+ </head>
+ <body>
+ <div id="book-text">
+ <h1>
+ Przypisy:
+ </h1>
+ <div id="footnotes"></div>
+ </div>
+ </body>
+</html>
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import re
+from lxml import etree
+from librarian.formats import Format
+from librarian.output import OutputFile
+from librarian.renderers import Register, TreeRenderer
+from librarian.utils import Context, get_resource
+from librarian import core
+
+
+class HtmlFormat(Format):
+ format_name = 'HTML'
+ format_ext = 'html'
+
+ renderers = Register()
+
+ def __init__(self, doc, standalone=False):
+ super(HtmlFormat, self).__init__(doc)
+ self.standalone = standalone
+
+ def build(self):
+ if self.standalone:
+ tmpl = get_resource("formats/html/res/html_standalone.html")
+ else:
+ tmpl = get_resource("formats/html/res/html.html")
+ t = etree.parse(tmpl)
+
+ ctx = Context(format=self)
+ ctx.toc = TOC()
+ ctx.toc_level = 0
+ ctx.footnotes = Footnotes()
+
+ if self.standalone:
+ t.find('head/title').text = u"%s (%s)" % (self.doc.meta.title(), self.doc.meta.author())
+
+ t.find('.//div[@id="content"]').extend(
+ self.render(self.doc.edoc.getroot(), ctx))
+ t.find('.//div[@id="toc"]').append(ctx.toc.render())
+ t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output)
+
+ return OutputFile.from_string(etree.tostring(
+ t, encoding='utf-8', method="html"))
+
+ def render(self, element, ctx):
+ return self.renderers.get_for(element).render(element, ctx)
+
+
+# Helpers
+
+class NaturalText(TreeRenderer):
+ def render_text(self, text, ctx):
+ root, inner = self.text_container()
+ chunks = re.split('(?<=\s\w) ', text)
+ inner.text = chunks[0]
+ for chunk in chunks[1:]:
+ x = etree.Entity("nbsp")
+ x.tail = chunk
+ inner.append(x)
+ return root
+
+
+class LiteralText(TreeRenderer):
+ pass
+
+
+class Footnotes(object):
+ def __init__(self):
+ self.counter = 0
+ self.output = etree.Element("_")
+
+ def append(self, item):
+ self.counter += 1
+ e = etree.Element("a",
+ href="#footnote-anchor-%d" % self.counter,
+ id="footnote-%d" % self.counter,
+ style="float:left;margin-right:1em")
+ e.text = "[%d]" % self.counter
+ e.tail = " "
+ self.output.append(e)
+ self.output.extend(item)
+ anchor = etree.Element("a",
+ id="footnote-anchor-%d" % self.counter,
+ href="#footnote-%d" % self.counter)
+ anchor.text = "[%d]" % self.counter
+ return anchor
+
+
+class TOC(object):
+ def __init__(self):
+ self.items = []
+ self.counter = 0
+
+ def add(self, title, level=0):
+ self.counter += 1
+ self.items.append((level, title, self.counter))
+ return self.counter
+
+ def render(self):
+ out = etree.Element("ul", id="toc")
+ curr_level = 0
+ cursor = out
+ for level, title, counter in self.items:
+ while level > curr_level:
+ ins = etree.Element("ul")
+ cursor.append(ins)
+ cursor = ins
+ curr_level += 1
+ while level < curr_level:
+ cursor = cursor.getparent()
+ curr_level -= 1
+ ins = etree.Element("li")
+ ins.append(etree.Element("a", href="#sect%d" % counter))
+ ins[0].text = title
+ cursor.append(ins)
+ return out
+
+
+# Renderers
+
+HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside'))
+
+class AsideFootnote(NaturalText):
+ def render(self, element, ctx):
+ output = super(AsideFootnote, self).render(element, ctx)
+ anchor = ctx.footnotes.append(output)
+ root, inner = self.container()
+ inner.append(anchor)
+ return root
+HtmlFormat.renderers.register(core.Aside, 'footnote', AsideFootnote())
+
+
+HtmlFormat.renderers.register(core.Header, None, NaturalText('h1'))
+
+
+HtmlFormat.renderers.register(core.Div, None, NaturalText('div'))
+HtmlFormat.renderers.register(core.Div, 'item', NaturalText('li'))
+HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
+HtmlFormat.renderers.register(core.Div, 'p', NaturalText('p'))
+
+
+class Section(NaturalText):
+ def subcontext(self, element, ctx):
+ return Context(ctx, toc_level=ctx.toc_level + 1)
+
+ def render(self, element, ctx):
+ counter = ctx.toc.add(element.meta.title(), ctx.toc_level)
+ root = super(Section, self).render(element, ctx)
+ root[0].set("id", "sect%d" % counter)
+ return root
+HtmlFormat.renderers.register(core.Section, None, Section('section'))
+
+
+HtmlFormat.renderers.register(core.Span, None, NaturalText('span'))
+HtmlFormat.renderers.register(core.Span, 'cite', NaturalText('cite'))
+HtmlFormat.renderers.register(core.Span, 'cite.code', LiteralText('code'))
+HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em'))
+
+class SpanUri(LiteralText):
+ def render(self, element, ctx):
+ root = super(SpanUri, self).render(element, ctx)
+ root[0].attrib['href'] = element.text
+ return root
+HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a'))
--- /dev/null
+<div>
+ <div id="toc">
+ </div>
+ <div id="content">
+ </div>
+ <div id="footnotes">
+ </div>
+</div>
--- /dev/null
+<!DOCTYPE html>
+<html>
+<head>
+ <title></title>
+ <meta charset="UTF-8" />
+</head>
+<body>
+ <div id="toc">
+ </div>
+ <div id="content">
+ </div>
+ <div id="footnotes">
+ </div>
+</body>
+</html>
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from lxml import etree
-import re
-
-from librarian.dcparser import Person
-
-def _register_function(f):
- """ Register extension function with lxml """
- ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
- ns[f.__name__] = f
-
-
-def reg_substitute_entities():
- ENTITY_SUBSTITUTIONS = [
- (u'---', u'—'),
- (u'--', u'–'),
- (u'...', u'…'),
- (u',,', u'„'),
- (u'"', u'”'),
- ]
-
- def substitute_entities(context, text):
- """XPath extension function converting all entites in passed text."""
- if isinstance(text, list):
- text = ''.join(text)
- for entity, substitutution in ENTITY_SUBSTITUTIONS:
- text = text.replace(entity, substitutution)
- return text
-
- _register_function(substitute_entities)
-
-
-def reg_strip():
- def strip(context, text):
- """Remove unneeded whitespace from beginning and end"""
- if isinstance(text, list):
- text = ''.join(text)
- return re.sub(r'\s+', ' ', text).strip()
- _register_function(strip)
-
-
-def reg_starts_white():
- def starts_white(context, text):
- if isinstance(text, list):
- text = ''.join(text)
- if not text:
- return False
- return text[0].isspace()
- _register_function(starts_white)
-
-
-def reg_ends_white():
- def ends_white(context, text):
- if isinstance(text, list):
- text = ''.join(text)
- if not text:
- return False
- return text[-1].isspace()
- _register_function(ends_white)
-
-
-def reg_wrap_words():
- def wrap_words(context, text, wrapping):
- """XPath extension function automatically wrapping words in passed text"""
- if isinstance(text, list):
- text = ''.join(text)
- if not wrapping:
- return text
-
- words = re.split(r'\s', text)
-
- line_length = 0
- lines = [[]]
- for word in words:
- line_length += len(word) + 1
- if line_length > wrapping:
- # Max line length was exceeded. We create new line
- lines.append([])
- line_length = len(word)
- lines[-1].append(word)
- return '\n'.join(' '.join(line) for line in lines)
- _register_function(wrap_words)
-
-
-def reg_person_name():
- def person_name(context, text):
- """ Converts "Name, Forename" to "Forename Name" """
- if isinstance(text, list):
- text = ''.join(text)
- return Person.from_text(text).readable()
- _register_function(person_name)
-
-
-def reg_texcommand():
- def texcommand(context, text):
- """Remove non-letters"""
- if isinstance(text, list):
- text = ''.join(text)
- return re.sub(r'[^a-zA-Z]', '', text).strip()
- _register_function(texcommand)
-
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import os
-import cStringIO
-import copy
-
-from lxml import etree
-from librarian import XHTMLNS, ParseError, OutputFile
-from librarian import functions
-
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-
-functions.reg_substitute_entities()
-functions.reg_person_name()
-
-STYLESHEETS = {
- 'legacy': 'xslt/book2html.xslt',
- 'full': 'xslt/wl2html_full.xslt',
- 'partial': 'xslt/wl2html_partial.xslt'
-}
-
-def get_stylesheet(name):
- return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
-
-def html_has_content(text):
- return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
-
-def transform(wldoc, stylesheet='legacy', options=None, flags=None):
- """Transforms the WL document to XHTML.
-
- If output_filename is None, returns an XML,
- otherwise returns True if file has been written,False if it hasn't.
- File won't be written if it has no content.
- """
- # Parse XSLT
- try:
- style_filename = get_stylesheet(stylesheet)
- style = etree.parse(style_filename)
-
- document = copy.deepcopy(wldoc)
- del wldoc
- document.swap_endlines()
-
- if flags:
- for flag in flags:
- document.edoc.getroot().set(flag, 'yes')
-
- document.clean_ed_note()
-
- if not options:
- options = {}
- result = document.transform(style, **options)
- del document # no longer needed large object :)
-
- if html_has_content(result):
- add_anchors(result.getroot())
- add_table_of_contents(result.getroot())
-
- return OutputFile.from_string(etree.tostring(result, method='html',
- xml_declaration=False, pretty_print=True, encoding='utf-8'))
- else:
- return None
- except KeyError:
- raise ValueError("'%s' is not a valid stylesheet.")
- except (XMLSyntaxError, XSLTApplyError), e:
- raise ParseError(e)
-
-class Fragment(object):
- def __init__(self, id, themes):
- super(Fragment, self).__init__()
- self.id = id
- self.themes = themes
- self.events = []
-
- def append(self, event, element):
- self.events.append((event, element))
-
- def closed_events(self):
- stack = []
- for event, element in self.events:
- if event == 'start':
- stack.append(('end', element))
- elif event == 'end':
- try:
- stack.pop()
- except IndexError:
- print 'CLOSED NON-OPEN TAG:', element
-
- stack.reverse()
- return self.events + stack
-
- def to_string(self):
- result = []
- for event, element in self.closed_events():
- if event == 'start':
- result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
- if element.text:
- result.append(element.text)
- elif event == 'end':
- result.append(u'</%s>' % element.tag)
- if element.tail:
- result.append(element.tail)
- else:
- result.append(element)
-
- return ''.join(result)
-
- def __unicode__(self):
- return self.to_string()
-
-
-def extract_fragments(input_filename):
- """Extracts theme fragments from input_filename."""
- open_fragments = {}
- closed_fragments = {}
-
- # iterparse would die on a HTML document
- parser = etree.HTMLParser(encoding='utf-8')
- buf = cStringIO.StringIO()
- buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
- buf.seek(0)
-
- for event, element in etree.iterparse(buf, events=('start', 'end')):
- # Process begin and end elements
- if element.get('class', '') in ('theme-begin', 'theme-end'):
- if not event == 'end': continue # Process elements only once, on end event
-
- # Open new fragment
- if element.get('class', '') == 'theme-begin':
- fragment = Fragment(id=element.get('fid'), themes=element.text)
-
- # Append parents
- if element.getparent().get('id', None) != 'book-text':
- parents = [element.getparent()]
- while parents[-1].getparent().get('id', None) != 'book-text':
- parents.append(parents[-1].getparent())
-
- parents.reverse()
- for parent in parents:
- fragment.append('start', parent)
-
- open_fragments[fragment.id] = fragment
-
- # Close existing fragment
- else:
- try:
- fragment = open_fragments[element.get('fid')]
- except KeyError:
- print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
- else:
- closed_fragments[fragment.id] = fragment
- del open_fragments[fragment.id]
-
- # Append element tail to lost_text (we don't want to lose any text)
- if element.tail:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append('text', element.tail)
-
-
- # Process all elements except begin and end
- else:
- # Omit annotation tags
- if (len(element.get('name', '')) or
- element.get('class', '') in ('annotation', 'anchor')):
- if event == 'end' and element.tail:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append('text', element.tail)
- else:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append(event, copy.copy(element))
-
- return closed_fragments, open_fragments
-
-
-def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
- if with_link:
- if link_text is None:
- link_text = prefix
- anchor = etree.Element('a', href='#%s' % prefix)
- anchor.set('class', 'anchor')
- anchor.text = unicode(link_text)
- if element.text:
- anchor.tail = element.text
- element.text = u''
- element.insert(0, anchor)
-
- if with_target:
- anchor_target = etree.Element('a', name='%s' % prefix)
- anchor_target.set('class', 'target')
- anchor_target.text = u' '
- if element.text:
- anchor_target.tail = element.text
- element.text = u''
- element.insert(0, anchor_target)
-
-
-def any_ancestor(element, test):
- for ancestor in element.iterancestors():
- if test(ancestor):
- return True
- return False
-
-
-def add_anchors(root):
- counter = 1
- for element in root.iterdescendants():
- if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
- or e.get('id') == 'nota_red'
- or e.tag == 'blockquote'):
- continue
-
- if element.tag == 'p' and 'verse' in element.get('class', ''):
- if counter == 1 or counter % 5 == 0:
- add_anchor(element, "f%d" % counter, link_text=counter)
- counter += 1
- elif 'paragraph' in element.get('class', ''):
- add_anchor(element, "f%d" % counter, link_text=counter)
- counter += 1
-
-
-def raw_printable_text(element):
- working = copy.deepcopy(element)
- for e in working.findall('a'):
- if e.get('class') == 'annotation':
- e.text = ''
- return etree.tostring(working, method='text', encoding=unicode).strip()
-
-
-def add_table_of_contents(root):
- sections = []
- counter = 1
- for element in root.iterdescendants():
- if element.tag in ('h2', 'h3'):
- if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
- continue
-
- element_text = raw_printable_text(element)
- if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
- sections[-1][3].append((counter, element.tag, element_text, []))
- else:
- sections.append((counter, element.tag, element_text, []))
- add_anchor(element, "s%d" % counter, with_link=False)
- counter += 1
-
- toc = etree.Element('div')
- toc.set('id', 'toc')
- toc_header = etree.SubElement(toc, 'h2')
- toc_header.text = u'Spis treści'
- toc_list = etree.SubElement(toc, 'ol')
-
- for n, section, text, subsections in sections:
- section_element = etree.SubElement(toc_list, 'li')
- add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
-
- if len(subsections):
- subsection_list = etree.SubElement(section_element, 'ol')
- for n, subsection, text, _ in subsections:
- subsection_element = etree.SubElement(subsection_list, 'li')
- add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
-
- root.insert(0, toc)
-
-
-def extract_annotations(html_path):
- """For each annotation, yields a tuple: anchor, text, html."""
- parser = etree.HTMLParser(encoding='utf-8')
- tree = etree.parse(html_path, parser)
- footnotes = tree.find('//*[@id="footnotes"]')
- if footnotes is not None:
- for footnote in footnotes.findall('div'):
- anchor = footnote.find('a[@name]').get('name')
- del footnote[:2]
- text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip()
- html_str = etree.tostring(footnote, method='html', encoding='utf-8')
- yield anchor, text_str, html_str
-
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from librarian import DCNS, SSTNS
+
+
+def text_value(meta):
+ """ Finds out the text value of metadata element.
+
+ >>> p = Person()
+ >>> p.text = u"Czajka, Radek"
+ >>> print text_value(p)
+ Radek Czajka
+
+ """
+ if hasattr(meta, 'text_value'):
+ return meta.text_value()
+ else:
+ return meta.text
+
+
+class Metadata(etree.ElementBase):
+ @classmethod
+ def about(cls, element):
+ meta = cls()
+ meta._about = element
+ return meta
+
+ def get_about(self):
+ if hasattr(self, '_about'):
+ return self._about
+ else:
+ return self.getparent()
+
+ def get(self, key, inherit=True):
+ """ Finds metadata by its element name. """
+ values = self.findall(key)
+ if values:
+ return [text_value(v) for v in values]
+ elif inherit and self.get_about().getparent() is not None:
+ return self.get_about().getparent().meta.get(key)
+ elif inherit and hasattr(self.get_about(), 'meta_context'):
+ return self.get_about().meta_context.get(key)
+ else:
+ return []
+
+ def get_one(self, *args, **kwargs):
+ values = self.get(*args, **kwargs)
+ if values:
+ return values[0]
+ else:
+ return None
+
+
+ # Specials.
+
+ def author(self):
+ try:
+ return unicode(self.get(DCNS('creator'))[0])
+ except IndexError:
+ return u""
+
+ def slug(self):
+ try:
+ return self.get(DCNS('identifier'))[0].slug()
+ except IndexError:
+ return None
+
+ def title(self):
+ dc_title = self.get(DCNS('title'), inherit=False)
+ if dc_title:
+ return unicode(dc_title[0])
+ else:
+ header = self.get_about().find(SSTNS('header'))
+ if header is not None:
+ # FIXME: This should be a simple text representation
+ return header.text
+ else:
+ return u""
+
+
+class MetaItem(etree.ElementBase):
+ pass
+
+
+class Person(MetaItem):
+ def text_value(self):
+ return u" ".join(p.strip() for p in reversed(self.text.rsplit(u',', 1)))
+
+
+class Identifier(MetaItem):
+ def slug(self):
+ return self.text.rstrip('/').rsplit('/', 1)[-1]
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+import shutil
+
+
+class OutputFile(object):
+ """Represents a file returned by one of the converters."""
+
+ _string = None
+ _filename = None
+
+ def __del__(self):
+ if self._filename:
+ os.unlink(self._filename)
+
+ def __nonzero__(self):
+ return self._string is not None or self._filename is not None
+
+ @classmethod
+ def from_string(cls, string):
+ """Converter returns contents of a file as a string."""
+
+ instance = cls()
+ instance._string = string
+ return instance
+
+ @classmethod
+ def from_filename(cls, filename):
+ """Converter returns contents of a file as a named file."""
+
+ instance = cls()
+ instance._filename = filename
+ return instance
+
+ def get_string(self):
+ """Get file's contents as a string."""
+
+ if self._filename is not None:
+ with open(self._filename) as f:
+ return f.read()
+ else:
+ return self._string
+
+ def get_file(self):
+ """Get file as a file-like object."""
+
+ if self._string is not None:
+ from StringIO import StringIO
+ return StringIO(self._string)
+ elif self._filename is not None:
+ return open(self._filename)
+
+ def get_filename(self):
+ """Get file as a fs path."""
+
+ if self._filename is not None:
+ return self._filename
+ elif self._string is not None:
+ from tempfile import NamedTemporaryFile
+ temp = NamedTemporaryFile(prefix='librarian-', delete=False)
+ temp.write(self._string)
+ temp.close()
+ self._filename = temp.name
+ return self._filename
+ else:
+ return None
+
+ def save_as(self, path):
+ """Save file to a path. Create directories, if necessary."""
+
+ dirname = os.path.dirname(os.path.abspath(path))
+ if not os.path.isdir(dirname):
+ os.makedirs(dirname)
+ shutil.copy(self.get_filename(), path)
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from librarian import ValidationError, NoDublinCore, ParseError, NoProvider
-from librarian import RDFNS
-from librarian.cover import WLCover
-from librarian import dcparser
-
-from xml.parsers.expat import ExpatError
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-
-import os
-import re
-from StringIO import StringIO
-
-class WLDocument(object):
- LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
- provider = None
-
- def __init__(self, edoc, parse_dublincore=True, provider=None,
- strict=False, meta_fallbacks=None):
- self.edoc = edoc
- self.provider = provider
-
- root_elem = edoc.getroot()
-
- dc_path = './/' + RDFNS('RDF')
-
- if root_elem.tag != 'utwor':
- raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
-
- if parse_dublincore:
- self.rdf_elem = root_elem.find(dc_path)
-
- if self.rdf_elem is None:
- raise NoDublinCore('Document has no DublinCore - which is required.')
-
- self.book_info = dcparser.BookInfo.from_element(
- self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
- else:
- self.book_info = None
-
- @classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
-
- @classmethod
- def from_file(cls, xmlfile, *args, **kwargs):
-
- # first, prepare for parsing
- if isinstance(xmlfile, basestring):
- file = open(xmlfile, 'rb')
- try:
- data = file.read()
- finally:
- file.close()
- else:
- data = xmlfile.read()
-
- if not isinstance(data, unicode):
- data = data.decode('utf-8')
-
- data = data.replace(u'\ufeff', '')
-
- try:
- parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-
- return cls(tree, *args, **kwargs)
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
- raise ParseError(e)
-
- def swap_endlines(self):
- """Converts line breaks in stanzas into <br/> tags."""
- # only swap inside stanzas
- for elem in self.edoc.iter('strofa'):
- for child in list(elem):
- if child.tail:
- chunks = self.LINE_SWAP_EXPR.split(child.tail)
- ins_index = elem.index(child) + 1
- while len(chunks) > 1:
- ins = etree.Element('br')
- ins.tail = chunks.pop()
- elem.insert(ins_index, ins)
- child.tail = chunks.pop(0)
- if elem.text:
- chunks = self.LINE_SWAP_EXPR.split(elem.text)
- while len(chunks) > 1:
- ins = etree.Element('br')
- ins.tail = chunks.pop()
- elem.insert(0, ins)
- elem.text = chunks.pop(0)
-
- def parts(self):
- if self.provider is None:
- raise NoProvider('No document provider supplied.')
- if self.book_info is None:
- raise NoDublinCore('No Dublin Core in document.')
- for part_uri in self.book_info.parts:
- yield self.from_file(self.provider.by_uri(part_uri),
- provider=self.provider)
-
- def chunk(self, path):
- # convert the path to XPath
- expr = self.path_to_xpath(path)
- elems = self.edoc.xpath(expr)
-
- if len(elems) == 0:
- return None
- else:
- return elems[0]
-
- def path_to_xpath(self, path):
- parts = []
-
- for part in path.split('/'):
- match = re.match(r'([^\[]+)\[(\d+)\]', part)
- if not match:
- parts.append(part)
- else:
- tag, n = match.groups()
- parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) )
-
- if parts[0] == '.':
- parts[0] = ''
-
- return '/'.join(parts)
-
- def transform(self, stylesheet, **options):
- return self.edoc.xslt(stylesheet, **options)
-
- def update_dc(self):
- if self.book_info:
- parent = self.rdf_elem.getparent()
- parent.replace( self.rdf_elem, self.book_info.to_etree(parent) )
-
- def serialize(self):
- self.update_dc()
- return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
-
- def merge_chunks(self, chunk_dict):
- unmerged = []
-
- for key, data in chunk_dict.iteritems():
- try:
- xpath = self.path_to_xpath(key)
- node = self.edoc.xpath(xpath)[0]
- repl = etree.fromstring(u"<%s>%s</%s>" %(node.tag, data, node.tag) )
- node.getparent().replace(node, repl)
- except Exception, e:
- unmerged.append( repr( (key, xpath, e) ) )
-
- return unmerged
-
- def clean_ed_note(self):
- """ deletes forbidden tags from nota_red """
-
- for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in
- ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
- tail = node.tail
- node.clear()
- node.tag = 'span'
- node.tail = tail
-
- def editors(self):
- """Returns a set of all editors for book and its children.
-
- :returns: set of dcparser.Person objects
- """
- if self.book_info is None:
- raise NoDublinCore('No Dublin Core in document.')
- persons = set(self.book_info.editors +
- self.book_info.technical_editors)
- for child in self.parts():
- persons.update(child.editors())
- if None in persons:
- persons.remove(None)
- return persons
-
- # Converters
-
- def as_html(self, *args, **kwargs):
- from librarian import html
- return html.transform(self, *args, **kwargs)
-
- def as_text(self, *args, **kwargs):
- from librarian import text
- return text.transform(self, *args, **kwargs)
-
- def as_epub(self, *args, **kwargs):
- from librarian import epub
- return epub.transform(self, *args, **kwargs)
-
- def as_pdf(self, *args, **kwargs):
- from librarian import pdf
- return pdf.transform(self, *args, **kwargs)
-
- def as_mobi(self, *args, **kwargs):
- from librarian import mobi
- return mobi.transform(self, *args, **kwargs)
-
- def as_fb2(self, *args, **kwargs):
- from librarian import fb2
- return fb2.transform(self, *args, **kwargs)
-
- def as_cover(self, cover_class=None, *args, **kwargs):
- if cover_class is None:
- cover_class = WLCover
- return cover_class(self.book_info, *args, **kwargs).output_file()
-
- def save_output_file(self, output_file, output_path=None,
- output_dir_path=None, make_author_dir=False, ext=None):
- if output_dir_path:
- save_path = output_dir_path
- if make_author_dir:
- save_path = os.path.join(save_path,
- unicode(self.book_info.author).encode('utf-8'))
- save_path = os.path.join(save_path,
- self.book_info.uri.slug)
- if ext:
- save_path += '.%s' % ext
- else:
- save_path = output_path
-
- output_file.save_as(save_path)
+from . import DCNS, SSTNS
+from . import core, meta
+
+
+class SSTParser(etree.XMLParser):
+ """ XML parser using relevant element classes. """
+ def __init__(self):
+ super(SSTParser, self).__init__(remove_blank_text=False)
+ lookup = etree.ElementNamespaceClassLookup()
+ self.set_element_class_lookup(lookup)
+
+ # Define core language tags.
+ sst_ns = lookup.get_namespace(SSTNS.uri)
+ sst_ns['aside'] = core.Aside
+ sst_ns['div'] = core.Div
+ sst_ns['header'] = core.Header
+ sst_ns['section'] = core.Section
+ sst_ns['span'] = core.Span
+ sst_ns['metadata'] = meta.Metadata
+
+ # Define any special metadata.
+ dc_ns = lookup.get_namespace(DCNS.uri)
+ dc_ns['creator'] = meta.Person
+ dc_ns['identifier'] = meta.Identifier
+++ /dev/null
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
-from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
-from xml.parsers.expat import ExpatError
-from os import path
-from StringIO import StringIO
-from lxml import etree
-from lxml.etree import (XMLSyntaxError, XSLTApplyError)
-import re
-
-
-class WLPictureURI(WLURI):
- _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
- '(?P<slug>[-a-z0-9]+)/?$')
-
- @classmethod
- def from_slug(cls, slug):
- uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
- return cls(uri)
-
-def as_wlpictureuri_strict(text):
- return WLPictureURI.strict(text)
-
-
-class PictureInfo(WorkInfo):
- """
- Dublin core metadata for a picture
- """
- FIELDS = (
- Field(DCNS('language'), 'language', required=False),
- Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
- Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
-
- Field(DCNS('format.dimensions'), 'dimensions', required=False),
- Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
- Field(DCNS('description.medium'), 'medium', required=False),
- Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
- Field(DCNS('format'), 'mime_type', required=False),
- Field(DCNS('identifier.url'), 'url', WLPictureURI,
- strict=as_wlpictureuri_strict),
- )
-
-
-class ImageStore(object):
- EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
- 'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
- 'aiff', 'wbmp', 'xbm']
- MIME = ['image/gif', 'image/jpeg', 'image/png',
- 'application/x-shockwave-flash', 'image/psd', 'image/bmp',
- 'image/tiff', 'image/tiff', 'application/octet-stream',
- 'image/jp2', 'application/octet-stream', 'application/octet-stream',
- 'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
-
- def __init__(self, dir_):
- self.dir = dir_
- return super(ImageStore, self).__init__()
-
- def path(self, slug, mime_type):
- """
- Finds file by slug and mime type in our iamge store.
- Returns a file objects (perhaps should return a filename?)
- """
- try:
- i = self.MIME.index(mime_type)
- except ValueError:
- err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type))
- err.slug = slug
- err.mime_type = mime_type
- raise err
- ext = self.EXT[i]
- # add some common extensions tiff->tif, jpeg->jpg
- return path.join(self.dir, slug + '.' + ext)
-
-
-class WLPicture(object):
- def __init__(self, edoc, parse_dublincore=True, image_store=None):
- self.edoc = edoc
- self.image_store = image_store
-
- root_elem = edoc.getroot()
-
- dc_path = './/' + RDFNS('RDF')
-
- if root_elem.tag != 'picture':
- raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag)
-
- if parse_dublincore:
- self.rdf_elem = root_elem.find(dc_path)
-
- if self.rdf_elem is None:
- raise NoDublinCore('Document has no DublinCore - which is required.')
-
- self.picture_info = PictureInfo.from_element(self.rdf_elem)
- else:
- self.picture_info = None
-
- @classmethod
- def from_string(cls, xml, *args, **kwargs):
- return cls.from_file(StringIO(xml), *args, **kwargs)
-
- @classmethod
- def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
-
- # first, prepare for parsing
- if isinstance(xmlfile, basestring):
- file = open(xmlfile, 'rb')
- try:
- data = file.read()
- finally:
- file.close()
- else:
- data = xmlfile.read()
-
- if not isinstance(data, unicode):
- data = data.decode('utf-8')
-
- data = data.replace(u'\ufeff', '')
-
- # assume images are in the same directory
- if image_store is None and xmlfile.name is not None:
- image_store = ImageStore(path.dirname(xmlfile.name))
-
- try:
- parser = etree.XMLParser(remove_blank_text=False)
- tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-
- return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
- except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
- raise ParseError(e)
-
- @property
- def mime_type(self):
- if self.picture_info is None:
- raise ValueError('DC is not loaded, hence we don\'t know the image type')
- return self.picture_info.mime_type
-
- @property
- def slug(self):
- return self.picture_info.url.slug
-
- @property
- def image_path(self):
- if self.image_store is None:
- raise ValueError("No image store associated with whis WLPicture.")
- return self.image_store.path(self.slug, self.mime_type)
-
- def image_file(self, *args, **kwargs):
- return open(self.image_path, *args, **kwargs)
-
- def partiter(self):
- """
- Iterates the parts of this picture and returns them and their metadata
- """
- for part in self.edoc.iter("div"):
- pd = {}
- pd['type'] = part.get('type')
- if pd['type'] == 'area':
- pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
- (int(part.get('x2')), int(part.get('y2'))))
-
- pd['themes'] = []
- pd['object'] = None
- parent = part
- while True:
- parent = parent.getparent()
- if parent is None:
- break
- if parent.tag == 'sem':
- if parent.get('type') == 'theme':
- pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
- elif parent.get('type') == 'object' and pd['object'] is None:
- pd['object'] = parent.get('object')
- yield pd
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from . import UnicodeException
+from .utils import extend_element
+
+
+class UnknownElement(UnicodeException):
+ pass
+
+
+class Renderer(object):
+ """ Renders an element in a context to some kind of container. """
+ def render(self, element, ctx):
+ """ Renders the element in the context. """
+ raise NotImplemented
+
+ def render_text(self, text, ctx):
+ """ Renders the text in the context. """
+ raise NotImplemented
+
+
+class TreeRenderer(Renderer):
+ """ Renders insides as XML in a <_/> container. """
+ root_name = "_"
+
+ def __init__(self, tag_name=None, attrib=None):
+ self.tag_name = tag_name
+ self.attrib = attrib or {}
+
+ def container(self):
+ root = etree.Element(self.root_name)
+ if self.tag_name:
+ inner = etree.Element(self.tag_name, **self.attrib)
+ root.append(inner)
+ return root, inner
+ else:
+ return root, root
+
+ def text_container(self):
+ root = etree.Element(self.root_name)
+ return root, root
+
+ def subcontext(self, element, ctx):
+ return ctx
+
+ def get_insides(self, element, ctx):
+ subctx = self.subcontext(element, ctx)
+ if element.text:
+ yield self.render_text(element.text, ctx)
+ for child in element:
+ try:
+ yield ctx.format.render(child, subctx)
+ except UnknownElement:
+ pass
+ if child.tail:
+ yield self.render_text(child.tail, ctx)
+
+ def render(self, element, ctx):
+ root, inner = self.container()
+ for inside in self.get_insides(element, ctx):
+ extend_element(inner, inside)
+ return root
+
+ def render_text(self, text, ctx):
+ root, inner = self.text_container()
+ inner.text = text
+ return root
+
+
+
+class Register(object):
+ """ Class-renderer register.
+
+ >>> from librarian.core import Div
+ >>> renderer = Renderer()
+ >>> reg = Register()
+ >>> reg.register(Div, 'a.b', renderer)
+ >>> reg.get(Div, 'a.b.c') is renderer
+ True
+
+ """
+ def __init__(self):
+ self.classes = {}
+
+ def register(self, tag, klass, renderer):
+ self.classes[tag, klass] = renderer
+
+ def get(self, tag, klass=None):
+ while klass:
+ try:
+ return self.classes[tag, klass]
+ except KeyError:
+ try:
+ klass = klass.rsplit('.', 1)[-2]
+ except IndexError:
+ klass = None
+ try:
+ return self.classes[tag, None]
+ except KeyError:
+ raise UnknownElement(tag)
+
+ def get_for(self, element):
+ return self.get(type(element), element.get('class'))
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+
+
+class Context(object):
+ """ Processing context.
+
+ >>> ctx = Context(a=1)
+ >>> subctx = Context(ctx, a=2)
+ >>> ctx.b = 3
+ >>> print subctx.a, subctx.b
+ 2 3
+
+ """
+ def __init__(self, _upctx=None, **initial):
+ object.__setattr__(self, '_upctx', _upctx)
+ object.__setattr__(self, '_data', initial or {})
+
+ def __getattr__(self, name):
+ if name in self._data:
+ return self._data[name]
+ elif self._upctx is not None:
+ return getattr(self._upctx, name)
+ else:
+ raise AttributeError
+
+ def __setattr__(self, name, value):
+ try:
+ self.try_setattr(name, value)
+ except ValueError:
+ self._data[name] = value
+
+ def try_setattr(self, name, value):
+ if name in self._data:
+ self._data[name] = value
+ elif self._upctx is not None:
+ self._upctx.try_setattr(name, value)
+ else:
+ raise ValueError
+
+
+class XMLNamespace(object):
+ '''A handy structure to repsent names in an XML namespace.'''
+ def __init__(self, uri):
+ self.uri = uri
+
+ def __call__(self, tag):
+ return '{%s}%s' % (self.uri, tag)
+
+ def __contains__(self, tag):
+ return tag.startswith('{' + str(self) + '}')
+
+ def __repr__(self):
+ return 'XMLNamespace(%r)' % self.uri
+
+ def __str__(self):
+ return '%s' % self.uri
+
+
+def extend_element(container, element=None, text=None):
+ """ Extends XML element with another one's contents.
+
+ Differs from etree.Element.extend by taking the text into account.
+
+ >>> from lxml import etree
+ >>> container = etree.fromstring("<A><B/></A>")
+ >>> element = etree.fromstring("<_>a<b/>c</_>")
+ >>> extend_element(container, element)
+ >>> print etree.tostring(container)
+ <A><B/>a<b/>c</A>
+
+ """
+ add_text = (text or "") + (element.text or "" if element is not None else "")
+ if add_text:
+ if len(container):
+ container[-1].tail = (container[-1].tail or "") + add_text
+ else:
+ container.text = (container.text or "") + add_text
+ if element is not None:
+ container.extend(element)
+
+
+def get_resource(path):
+ return os.path.join(os.path.dirname(__file__), path)
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from StringIO import StringIO
-from librarian import OutputFile
from librarian.book2anything import Book2Anything, Option
+from librarian.formats.cover.wolnelektury import WLCover
class Book2Cover(Book2Anything):
- format_name = "JPEG"
- ext = "jpg"
- uses_cover = True
- cover_optional = False
+ format_cls = WLCover
- transform_options = [
+ format_options = [
Option('-W', '--width', action='store', type='int', dest='width', default=None,
help='Set width.'),
Option('-H', '--height', action='store', type='int', dest='height', default=None,
help='Add WL logo in white box.'),
]
- @staticmethod
- def transform(wldoc, cover, *args, **kwargs):
- return wldoc.as_cover(cover_class=cover, *args, **kwargs)
-
if __name__ == '__main__':
Book2Cover.run()
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from librarian.book2anything import Book2Anything, Option
+from librarian.formats.epub import EpubFormat
class Book2Epub(Book2Anything):
- format_name = "EPUB"
- ext = "epub"
- uses_cover = True
- uses_provider = True
- transform_flags = [
- Option('-w', '--working-copy', dest='working-copy',
- action='store_true', default=False,
- help='mark the output as a working copy')
- ]
-
+ format_cls = EpubFormat
if __name__ == '__main__':
Book2Epub.run()
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from librarian.book2anything import Book2Anything, Option
+from librarian.formats.html import HtmlFormat
class Book2Html(Book2Anything):
- format_name = "HTML"
- ext = "html"
- uses_cover = False
- uses_provider = False
- transform_flags = [
- Option('-r', '--raw', dest='full-page',
+ format_cls = HtmlFormat
+
+ format_options = [
+ Option('-r', '--raw', dest='standalone',
action='store_false', default=True,
help='output raw text for use in templates')
]
- parser_args = [
- Option('-i', '--ignore-dublin-core', dest='parse_dublincore',
- action='store_false', default=True,
- help='don\'t try to parse dublin core metadata')
- ]
if __name__ == '__main__':
#
import os
import os.path
-from distutils.core import setup
+from setuptools import setup, find_packages
def whole_tree(prefix, path):
files = []
setup(
name='librarian',
- version='1.5.1',
+ version='2.0a',
description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
author="Marek Stępniowski",
author_email='marek@stepniowski.com',
maintainer='Radek Czajka',
maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
url='http://github.com/fnp/librarian',
- packages=['librarian'],
- package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] +
- whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
+ packages=find_packages(),
+ package_data={
+ 'librarian': ['xslt/*.xslt', 'epub/*', 'html/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] +
+ whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer'),
+ 'librarian.formats.html': ['res/*'],
+ 'librarian.formats.epub': ['res/*'],
+ },
include_package_data=True,
- install_requires=['lxml>=2.2'],
+ install_requires=[
+ 'lxml>=2.2',
+ 'pillow',
+ 'Texml',
+ ],
scripts=['scripts/book2html',
'scripts/book2txt',
'scripts/book2epub',