Some experiments with the language: html, epub, covers.

author Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>

Thu, 2 May 2013 10:17:09 +0000 (12:17 +0200)

committer Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>

Thu, 2 May 2013 10:17:27 +0000 (12:17 +0200)
author Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 2 May 2013 10:17:09 +0000 (12:17 +0200)
committer Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 2 May 2013 10:17:27 +0000 (12:17 +0200)
diff --git a/librarian/__init__.py b/librarian/__init__.py

index c46d5d1..0616f23 100644 (file)
--- a/librarian/__init__.py
+++ b/librarian/__init__.py
@@ -3,12 +3,10 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from __future__ import with_statement
-
  import os
  import re
-import shutil
  import urllib
+from .utils import XMLNamespace
  
  
  class UnicodeException(Exception):
@@ -31,31 +29,6 @@ class ParseError(UnicodeException):
  class ValidationError(UnicodeException):
      pass
  
-class NoDublinCore(ValidationError):
-    """There's no DublinCore section, and it's required."""
-    pass
-
-class NoProvider(UnicodeException):
-    """There's no DocProvider specified, and it's needed."""
-    pass
-
-class XMLNamespace(object):
-    '''A handy structure to repsent names in an XML namespace.'''
-
-    def __init__(self, uri):
-        self.uri = uri
-
-    def __call__(self, tag):
-        return '{%s}%s' % (self.uri, tag)
-
-    def __contains__(self, tag):
-        return tag.startswith('{' + str(self) + '}')
-
-    def __repr__(self):
-        return 'XMLNamespace(%r)' % self.uri
-
-    def __str__(self):
-        return '%s' % self.uri
  
  class EmptyNamespace(XMLNamespace):
      def __init__(self):
@@ -72,7 +45,7 @@ XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
  NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/")
  OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
  
-WLNS = EmptyNamespace()
+SSTNS = XMLNamespace('http://nowoczesnapolska.org.pl/sst#')
  
  
  class WLURI(object):
@@ -117,165 +90,7 @@ class WLURI(object):
          return self.slug == other.slug
  
  
-class DocProvider(object):
-    """Base class for a repository of XML files.
-
-    Used for generating joined files, like EPUBs.
-    """
-
-    def by_slug(self, slug):
-        """Should return a file-like object with a WL document XML."""
-        raise NotImplementedError
-
-    def by_uri(self, uri, wluri=WLURI):
-        """Should return a file-like object with a WL document XML."""
-        wluri = wluri(uri)
-        return self.by_slug(wluri.slug)
-
-
-class DirDocProvider(DocProvider):
-    """ Serve docs from a directory of files in form <slug>.xml """
-
-    def __init__(self, dir_):
-        self.dir = dir_
-        self.files = {}
-
-    def by_slug(self, slug):
-        fname = slug + '.xml'
-        return open(os.path.join(self.dir, fname))
-
-
-import lxml.etree as etree
-import dcparser
-
-DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
-        { DCNS('creator'): [u'Some, Author'],
-          DCNS('title'): [u'Some Title'],
-          DCNS('subject.period'): [u'Unknown'],
-          DCNS('subject.type'): [u'Unknown'],
-          DCNS('subject.genre'): [u'Unknown'],
-          DCNS('date'): ['1970-01-01'],
-          DCNS('language'): [u'pol'],
-          # DCNS('date'): [creation_date],
-          DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
-          DCNS('description'):
-          [u"""Publikacja zrealizowana w ramach projektu
-             Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
-             wykonana przez Bibliotekę Narodową z egzemplarza
-             pochodzącego ze zbiorów BN."""],
-          DCNS('identifier.url'): [WLURI.example],
-          DCNS('rights'):
-            [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
-
-def xinclude_forURI(uri):
-    e = etree.Element(XINS("include"))
-    e.set("href", uri)
-    return etree.tostring(e, encoding=unicode)
-
-def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
-    """Wrap the text within the minimal XML structure with a DC template."""
-    bookinfo.created_at = creation_date
-
-    dcstring = etree.tostring(bookinfo.to_etree(), \
-        method='xml', encoding=unicode, pretty_print=True)
-
-    return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
-        u'\n</plain-text>\n</utwor>'
-
-
-def serialize_raw(element):
-    b = u'' + (element.text or '')
-
-    for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode,
-                pretty_print=True)
-        b += e
-
-    return b
-
-SERIALIZERS = {
-    'raw': serialize_raw,
-}
-
-def serialize_children(element, format='raw'):
-    return SERIALIZERS[format](element)
-
-def get_resource(path):
-    return os.path.join(os.path.dirname(__file__), path)
-
-
-class OutputFile(object):
-    """Represents a file returned by one of the converters."""
-
-    _string = None
-    _filename = None
-
-    def __del__(self):
-        if self._filename:
-            os.unlink(self._filename)
-
-    def __nonzero__(self):
-        return self._string is not None or self._filename is not None
-
-    @classmethod
-    def from_string(cls, string):
-        """Converter returns contents of a file as a string."""
-
-        instance = cls()
-        instance._string = string
-        return instance
-
-    @classmethod
-    def from_filename(cls, filename):
-        """Converter returns contents of a file as a named file."""
-
-        instance = cls()
-        instance._filename = filename
-        return instance
-
-    def get_string(self):
-        """Get file's contents as a string."""
-
-        if self._filename is not None:
-            with open(self._filename) as f:
-                return f.read()
-        else:
-            return self._string
-
-    def get_file(self):
-        """Get file as a file-like object."""
-
-        if self._string is not None:
-            from StringIO import StringIO
-            return StringIO(self._string)
-        elif self._filename is not None:
-            return open(self._filename)
-
-    def get_filename(self):
-        """Get file as a fs path."""
-
-        if self._filename is not None:
-            return self._filename
-        elif self._string is not None:
-            from tempfile import NamedTemporaryFile
-            temp = NamedTemporaryFile(prefix='librarian-', delete=False)
-            temp.write(self._string)
-            temp.close()
-            self._filename = temp.name
-            return self._filename
-        else:
-            return None
-
-    def save_as(self, path):
-        """Save file to a path. Create directories, if necessary."""
-
-        dirname = os.path.dirname(os.path.abspath(path))
-        if not os.path.isdir(dirname):
-            os.makedirs(dirname)
-        shutil.copy(self.get_filename(), path)
-
-
  class URLOpener(urllib.FancyURLopener):
-    version = 'FNP Librarian (http://github.com/fnp/librarian)'
+    version = 'FNP Librarian (http://git.nowoczesnapolska.org.pl/?p=librarian.git)'
  urllib._urlopener = URLOpener()
+
diff --git a/librarian/book2anything.py b/librarian/book2anything.py

index b8b8d27..b50cb1c 100755 (executable)
--- a/librarian/book2anything.py
+++ b/librarian/book2anything.py
@@ -8,9 +8,8 @@ from collections import namedtuple
  import os.path
  import optparse
  
-from librarian import DirDocProvider, ParseError
-from librarian.parser import WLDocument
-from librarian.cover import WLCover
+from librarian import ParseError
+from librarian.document import Document
  
  
  class Option(object):
@@ -34,47 +33,26 @@ class Book2Anything(object):
      
      Subclass it for any format you want to convert to.
      """
-    format_name = None # Set format name, like "PDF".
-    ext = None # Set file extension, like "pdf".
-    uses_cover = False # Can it add a cover?
-    cover_optional = True # Only relevant if uses_cover
-    uses_provider = False # Does it need a DocProvider?
-    transform = None # Transform method. Uses WLDocument.as_{ext} by default.
-    parser_options = [] # List of Option objects for additional parser args.
-    transform_options = [] # List of Option objects for additional transform args.
-    transform_flags = [] # List of Option objects for supported transform flags.
-
+    format_cls = None # A formats.Format subclass
+    document_options = [] # List of Option objects for document options.
+    format_options = [] # List of Option objects for format customization.
+    build_options = [] # List of Option objects for build options.
  
      @classmethod
      def run(cls):
          # Parse commandline arguments
          usage = """Usage: %%prog [options] SOURCE [SOURCE...]
-        Convert SOURCE files to %s format.""" % cls.format_name
+        Convert SOURCE files to %s.""" % cls.format_cls.format_name
  
          parser = optparse.OptionParser(usage=usage)
  
          parser.add_option('-v', '--verbose', 
                  action='store_true', dest='verbose', default=False,
                  help='print status messages to stdout')
-        parser.add_option('-d', '--make-dir',
-                action='store_true', dest='make_dir', default=False,
-                help='create a directory for author and put the output file in it')
          parser.add_option('-o', '--output-file',
                  dest='output_file', metavar='FILE',
                  help='specifies the output file')
-        parser.add_option('-O', '--output-dir',
-                dest='output_dir', metavar='DIR',
-                help='specifies the directory for output')
-        if cls.uses_cover:
-            if cls.cover_optional:
-                parser.add_option('-c', '--with-cover', 
-                        action='store_true', dest='with_cover', default=False,
-                        help='create default cover')
-            parser.add_option('-C', '--image-cache',
-                    dest='image_cache', metavar='URL',
-                    help='prefix for image download cache' +
-                        (' (implies --with-cover)' if cls.cover_optional else ''))
-        for option in cls.parser_options + cls.transform_options + cls.transform_flags:
+        for option in cls.document_options + cls.format_options + cls.build_options:
              option.add(parser)
  
          options, input_filenames = parser.parse_args()
@@ -83,28 +61,18 @@ class Book2Anything(object):
              parser.print_help()
              return(1)
  
-        # Prepare additional args for parser.
-        parser_args = {}
-        for option in cls.parser_options:
-            parser_args[option.name()] = option.value(options)
-        # Prepare additional args for transform method.
-        transform_args = {}
-        for option in cls.transform_options:
-            transform_args[option.name()] = option.value(options)
-        # Add flags to transform_args, if any.
-        transform_flags = [flag.name() for flag in cls.transform_flags
-                    if flag.value(options)]
-        if transform_flags:
-            transform_args['flags'] = transform_flags
-        # Add cover support, if any.
-        if cls.uses_cover:
-            if options.image_cache:
-                def cover_class(*args, **kwargs):
-                    return WLCover(image_cache=options.image_cache, *args, **kwargs)
-                transform_args['cover'] = cover_class
-            elif not cls.cover_optional or options.with_cover:
-                transform_args['cover'] = WLCover
-
+        # Prepare additional args for document.
+        document_args = {}
+        for option in cls.document_options:
+            document_args[option.name()] = option.value(options)
+        # Prepare additional args for format.
+        format_args = {}
+        for option in cls.format_options:
+            format_args[option.name()] = option.value(options)
+        # Prepare additional args for build.
+        build_args = {}
+        for option in cls.build_options:
+            build_args[option.name()] = option.value(options)
  
          # Do some real work
          try:
@@ -112,28 +80,18 @@ class Book2Anything(object):
                  if options.verbose:
                      print main_input
  
-            # Where to find input?
-            if cls.uses_provider:
-                path, fname = os.path.realpath(main_input).rsplit('/', 1)
-                provider = DirDocProvider(path)
-            else:
-                provider = None
+            # Do the transformation.
+            doc = Document.from_file(main_input, **document_args)
+            format_ = cls.format_cls(doc, **format_args)
  
              # Where to write output?
-            if not (options.output_file or options.output_dir):
-                output_file = os.path.splitext(main_input)[0] + '.' + cls.ext
+            if not options.output_file:
+                output_file = os.path.splitext(main_input)[0] + '.' + format_.format_ext
              else:
                  output_file = None
-
-            # Do the transformation.
-            doc = WLDocument.from_file(main_input, provider=provider, **parser_args)
-            transform = cls.transform
-            if transform is None:
-                transform = getattr(WLDocument, 'as_%s' % cls.ext)
-            output = transform(doc, **transform_args)
-
-            doc.save_output_file(output,
-                output_file, options.output_dir, options.make_dir, cls.ext)
+            
+            output = format_.build(**build_args)
+            output.save_as(output_file)
  
          except ParseError, e:
              print '%(file)s:%(name)s:%(message)s' % {
diff --git a/librarian/core.py b/librarian/core.py

new file mode 100755 (executable)

index 0000000..0b90a2e
--- /dev/null
+++ b/librarian/core.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from librarian import SSTNS
+from .meta import Metadata
+
+
+class TextElement(etree.ElementBase):
+    @property
+    def meta(self):
+        m = self.find(SSTNS('metadata'))
+        if m is None:
+            return Metadata.about(self)
+        return m
+
+
+class Span(TextElement):
+    pass
+
+
+class Div(TextElement):
+    pass
+
+
+class Section(TextElement):
+    pass
+
+
+class Header(TextElement):
+    pass
+
+
+class Aside(TextElement):
+    pass
diff --git a/librarian/cover.py b/librarian/cover.py

deleted file mode 100644 (file)

index a37b911..0000000
--- a/librarian/cover.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import re
-import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
-from StringIO import StringIO
-from librarian import get_resource, OutputFile, URLOpener
-
-
-class Metric(object):
-    """Gets metrics from an object, scaling it by a factor."""
-    def __init__(self, obj, scale):
-        self._obj = obj
-        self._scale = float(scale)
-
-    def __getattr__(self, name):
-        src = getattr(self._obj, name)
-        if src and self._scale:
-            src = type(src)(self._scale * src)
-        return src
-
-
-class TextBox(object):
-    """Creates an Image with a series of centered strings."""
-
-    SHADOW_X = 3
-    SHADOW_Y = 3
-    SHADOW_BLUR = 3
-
-    def __init__(self, max_width, max_height, padding_x=None, padding_y=None):
-        if padding_x is None:
-            padding_x = self.SHADOW_X + self.SHADOW_BLUR
-        if padding_y is None:
-            padding_y = self.SHADOW_Y + self.SHADOW_BLUR
-
-        self.max_width = max_width
-        self.max_text_width = max_width - 2 * padding_x
-        self.padding_y = padding_y
-        self.height = padding_y
-        self.img = Image.new('RGBA', (max_width, max_height))
-        self.draw = ImageDraw.Draw(self.img)
-        self.shadow_img = None
-        self.shadow_draw = None
-
-    def skip(self, height):
-        """Skips some vertical space."""
-        self.height += height
-
-    def text(self, text, color='#000', font=None, line_height=20,
-             shadow_color=None):
-        """Writes some centered text."""
-        text = re.sub(r'\s+', ' ', text)
-        if shadow_color:
-            if not self.shadow_img:
-                self.shadow_img = Image.new('RGBA', self.img.size)
-                self.shadow_draw = ImageDraw.Draw(self.shadow_img)
-        while text:
-            line = text
-            line_width = self.draw.textsize(line, font=font)[0]
-            while line_width > self.max_text_width:
-                parts = line.rsplit(' ', 1)
-                if len(parts) == 1:
-                    line_width = self.max_text_width
-                    break
-                line = parts[0]
-                line_width = self.draw.textsize(line, font=font)[0]
-            line = line.strip() + ' '
-
-            pos_x = (self.max_width - line_width) / 2
-
-            if shadow_color:
-                self.shadow_draw.text(
-                        (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y),
-                        line, font=font, fill=shadow_color
-                )
-
-            self.draw.text((pos_x, self.height), line, font=font, fill=color)
-            self.height += line_height
-            # go to next line
-            text = text[len(line):]
-
-    def image(self):
-        """Creates the actual Image object."""
-        image = Image.new('RGBA', (self.max_width,
-                                   self.height + self.padding_y))
-        if self.shadow_img:
-            shadow = self.shadow_img.filter(ImageFilter.BLUR)
-            image.paste(shadow, (0, 0), shadow)
-            image.paste(self.img, (0, 0), self.img)
-        else:
-            image.paste(self.img, (0, 0))
-        return image
-
-
-class Cover(object):
-    """Abstract base class for cover images generator."""
-    width = 600
-    height = 800
-    background_color = '#fff'
-    background_img = None
-
-    author_top = 100
-    author_margin_left = 20
-    author_margin_right = 20
-    author_lineskip = 40
-    author_color = '#000'
-    author_shadow = None
-    author_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
-    author_font_size = 30
-
-    title_top = 100
-    title_margin_left = 20
-    title_margin_right = 20
-    title_lineskip = 54
-    title_color = '#000'
-    title_shadow = None
-    title_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
-    title_font_size = 40
-
-    logo_bottom = None
-    logo_width = None
-    uses_dc_cover = False
-
-    format = 'JPEG'
-    scale = 1
-
-    exts = {
-        'JPEG': 'jpg',
-        'PNG': 'png',
-        }
-
-    mime_types = {
-        'JPEG': 'image/jpeg',
-        'PNG': 'image/png',
-        }
-
-    def __init__(self, book_info, format=None, width=None, height=None):
-        self.author = ", ".join(auth.readable() for auth in book_info.authors)
-        self.title = book_info.title
-        if format is not None:
-            self.format = format
-        scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
-        if scale:
-            self.scale = scale
-
-    def pretty_author(self):
-        """Allows for decorating author's name."""
-        return self.author
-
-    def pretty_title(self):
-        """Allows for decorating title."""
-        return self.title
-
-    def image(self):
-        metr = Metric(self, self.scale)
-        img = Image.new('RGB', (metr.width, metr.height), self.background_color)
-
-        if self.background_img:
-            background = Image.open(self.background_img)
-            img.paste(background, None, background)
-            del background
-
-        # WL logo
-        if metr.logo_width:
-            logo = Image.open(get_resource('res/wl-logo.png'))
-            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
-            img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
-
-        top = metr.author_top
-        tbox = TextBox(
-            metr.width - metr.author_margin_left - metr.author_margin_right,
-            metr.height - top,
-            )
-            
-        author_font = ImageFont.truetype(
-            self.author_font_ttf, metr.author_font_size)
-        tbox.text(self.pretty_author(), self.author_color, author_font,
-            metr.author_lineskip, self.author_shadow)
-        text_img = tbox.image()
-        img.paste(text_img, (metr.author_margin_left, top), text_img)
-
-        top += text_img.size[1] + metr.title_top
-        tbox = TextBox(
-            metr.width - metr.title_margin_left - metr.title_margin_right,
-            metr.height - top,
-            )
-        title_font = ImageFont.truetype(
-            self.title_font_ttf, metr.title_font_size)
-        tbox.text(self.pretty_title(), self.title_color, title_font,
-            metr.title_lineskip, self.title_shadow)
-        text_img = tbox.image()
-        img.paste(text_img, (metr.title_margin_left, top), text_img)
-
-        return img
-
-    def mime_type(self):
-        return self.mime_types[self.format]
-
-    def ext(self):
-        return self.exts[self.format]
-
-    def save(self, *args, **kwargs):
-        return self.image().save(format=self.format, quality=95, *args, **kwargs)
-
-    def output_file(self, *args, **kwargs):
-        imgstr = StringIO()
-        self.save(imgstr, *args, **kwargs)
-        return OutputFile.from_string(imgstr.getvalue())
-
-
-class WLCover(Cover):
-    """Default Wolne Lektury cover generator."""
-    width = 600
-    height = 833
-    uses_dc_cover = True
-    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
-    author_font_size = 20
-    author_lineskip = 30
-    title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf')
-    title_font_size = 30
-    title_lineskip = 40
-    title_box_width = 350
-    
-    box_top_margin = 100
-    box_bottom_margin = 100
-    box_padding_y = 20
-    box_above_line = 10
-    box_below_line = 15
-    box_line_left = 75
-    box_line_right = 275
-    box_line_width = 2
-
-    logo_top = 15
-    logo_width = 140
-
-    bar_width = 35
-    background_color = '#444'
-    author_color = '#444'
-    default_background = get_resource('res/cover.png')
-    format = 'JPEG'
-
-    epoch_colors = {
-        u'Starożytność': '#9e3610',
-        u'Średniowiecze': '#564c09',
-        u'Renesans': '#8ca629',
-        u'Barok': '#a6820a',
-        u'Oświecenie': '#f2802e',
-        u'Romantyzm': '#db4b16',
-        u'Pozytywizm': '#961060',
-        u'Modernizm': '#7784e0',
-        u'Dwudziestolecie międzywojenne': '#3044cf',
-        u'Współczesność': '#06393d',
-    }
-
-    def __init__(self, book_info, format=None, width=None, height=None, with_logo=False):
-        super(WLCover, self).__init__(book_info, format=format, width=width, height=height)
-        self.kind = book_info.kind
-        self.epoch = book_info.epoch
-        self.with_logo = with_logo
-        if book_info.cover_url:
-            url = book_info.cover_url
-            bg_src = None
-            if bg_src is None:
-                bg_src = URLOpener().open(url)
-            self.background_img = StringIO(bg_src.read())
-            bg_src.close()
-        else:
-            self.background_img = self.default_background
-
-    def pretty_author(self):
-        return self.author.upper()
-
-    def image(self):
-        metr = Metric(self, self.scale)
-        img = Image.new('RGB', (metr.width, metr.height), self.background_color)
-        draw = ImageDraw.Draw(img)
-
-        if self.epoch in self.epoch_colors:
-            epoch_color = self.epoch_colors[self.epoch]
-        else:
-            epoch_color = '#000'
-        draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color)
-
-        if self.background_img:
-            src = Image.open(self.background_img)
-            trg_size = (metr.width - metr.bar_width, metr.height)
-            if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
-                resized = (
-                    trg_size[0],
-                    src.size[1] * trg_size[0] / src.size[0]
-                )
-                cut = (resized[1] - trg_size[1]) / 2
-                src = src.resize(resized, Image.ANTIALIAS)
-                src = src.crop((0, cut, src.size[0], src.size[1] - cut))
-            else:
-                resized = (
-                    src.size[0] * trg_size[1] / src.size[1],
-                    trg_size[1],
-                )
-                cut = (resized[0] - trg_size[0]) / 2
-                src = src.resize(resized, Image.ANTIALIAS)
-                src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
-
-            img.paste(src, (metr.bar_width, 0))
-            del src
-
-        box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
-        author_font = ImageFont.truetype(
-            self.author_font_ttf, metr.author_font_size)
-        box.text(self.pretty_author(),
-                 font=author_font,
-                 line_height=metr.author_lineskip,
-                 color=self.author_color,
-                 shadow_color=self.author_shadow,
-                )
-
-        box.skip(metr.box_above_line)
-        box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
-                fill=self.author_color, width=metr.box_line_width)
-        box.skip(metr.box_below_line)
-
-        title_font = ImageFont.truetype(
-            self.title_font_ttf, metr.title_font_size)
-        box.text(self.pretty_title(),
-                 line_height=metr.title_lineskip,
-                 font=title_font,
-                 color=epoch_color,
-                 shadow_color=self.title_shadow,
-                )
-
-        if self.with_logo:
-            logo = Image.open(get_resource('res/wl-logo-mono.png'))
-            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS)
-            alpha = logo.split()[3]
-            alpha = ImageEnhance.Brightness(alpha).enhance(.75)
-            logo.putalpha(alpha)
-            box.skip(metr.logo_top + logo.size[1])
-
-        box_img = box.image()
-
-        if self.kind == 'Liryka':
-            # top
-            box_top = metr.box_top_margin
-        elif self.kind == 'Epika':
-            # bottom
-            box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
-        else:
-            # center
-            box_top = (metr.height - box_img.size[1]) / 2
-
-        box_left = metr.bar_width + (metr.width - metr.bar_width -
-                        box_img.size[0]) / 2
-        draw.rectangle((box_left, box_top,
-            box_left + box_img.size[0], box_top + box_img.size[1]),
-            fill='#fff')
-        img.paste(box_img, (box_left, box_top), box_img)
-
-        if self.with_logo:
-            img.paste(logo, 
-                (box_left + (box_img.size[0] - logo.size[0]) / 2,
-                    box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo)
-
-        return img
-
-
-class VirtualoCover(Cover):
-    width = 600
-    height = 730
-    author_top = 73
-    title_top = 73
-    logo_bottom = 25
-    logo_width = 250
-
-
-class PrestigioCover(Cover):
-    width = 580
-    height = 783
-    background_img = get_resource('res/cover-prestigio.png')
-
-    author_top = 446
-    author_margin_left = 118
-    author_margin_right = 62
-    author_lineskip = 60
-    author_color = '#fff'
-    author_shadow = '#000'
-    author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
-    author_font_size = 50
-
-    title_top = 0
-    title_margin_left = 118
-    title_margin_right = 62
-    title_lineskip = 60
-    title_color = '#fff'
-    title_shadow = '#000'
-    title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
-    title_font_size = 50
-
-    def pretty_title(self):
-        return u"„%s”" % self.title
-
-
-class BookotekaCover(Cover):
-    width = 2140
-    height = 2733
-    background_img = get_resource('res/cover-bookoteka.png')
-
-    author_top = 480
-    author_margin_left = 307
-    author_margin_right = 233
-    author_lineskip = 156
-    author_color = '#d9d919'
-    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
-    author_font_size = 130
-
-    title_top = 400
-    title_margin_left = 307
-    title_margin_right = 233
-    title_lineskip = 168
-    title_color = '#d9d919'
-    title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
-    title_font_size = 140
-
-    format = 'PNG'
-
-
-class GandalfCover(Cover):
-    width = 600
-    height = 730
-    background_img = get_resource('res/cover-gandalf.png')
-    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
-    author_font_size = 30
-    title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
-    title_font_size = 40
-    logo_bottom = 25
-    logo_width = 250
-    format = 'PNG'
diff --git a/librarian/document.py b/librarian/document.py

new file mode 100755 (executable)

index 0000000..32148e3
--- /dev/null
+++ b/librarian/document.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from StringIO import StringIO
+from lxml import etree
+from . import SSTNS
+from .core import Section
+from .parser import SSTParser
+
+
+class Document(object):
+    # Do I use meta_context?
+    def __init__(self, edoc, meta_context=None):
+        self.edoc = edoc
+
+        root_elem = edoc.getroot()
+        if meta_context is not None:
+            root_elem.meta_context = meta_context
+
+        if not isinstance(root_elem, Section):
+            if root_elem.tag != SSTNS('section'):
+                raise ValidationError("Invalid root element. Found '%s', should be '%s'" % (
+                    root_elem.tag, SSTNS('section')))
+            else:
+                raise ValidationError("Invalid class of root element. "
+                    "Use librarian.parser.SSTParser.")
+
+    @classmethod
+    def from_string(cls, xml, *args, **kwargs):
+        return cls.from_file(StringIO(xml), *args, **kwargs)
+
+    @classmethod
+    def from_file(cls, xmlfile, *args, **kwargs):
+        # first, prepare for parsing
+        if isinstance(xmlfile, basestring):
+            file = open(xmlfile, 'rb')
+            try:
+                data = file.read()
+            finally:
+                file.close()
+        else:
+            data = xmlfile.read()
+
+        if not isinstance(data, unicode):
+            data = data.decode('utf-8')
+
+        data = data.replace(u'\ufeff', '')
+
+        parser = SSTParser()
+        tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+        tree.xinclude()
+        return cls(tree, *args, **kwargs)
+
+    @property
+    def meta(self):
+        """ Document's metadata is root's metadata. """
+        return self.edoc.getroot().meta
diff --git a/librarian/epub/cover.html b/librarian/epub/cover.html

deleted file mode 100644 (file)

index 784067c..0000000
--- a/librarian/epub/cover.html
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-  <head>
-    <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
-    <title>Okładka</title>
-    <style type="text/css"> img { max-width: 100%; } </style>
-  </head>
-  <body style="oeb-column-number: 1;">
-    <div id="cover-image">
-      <img alt="Okładka" />
-    </div>
-  </body>
-</html>
-\ No newline at end of file
diff --git a/librarian/formats/__init__.py b/librarian/formats/__init__.py

new file mode 100644 (file)

index 0000000..cfe4fc2
--- /dev/null
+++ b/librarian/formats/__init__.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+class Format(object):
+    """ Generic format class. """
+    def __init__(self, doc):
+        self.doc = doc
+
+    def build(self):
+        raise NotImplementedError
diff --git a/librarian/formats/cover/__init__.py b/librarian/formats/cover/__init__.py

new file mode 100644 (file)

index 0000000..7a787e8
--- /dev/null
+++ b/librarian/formats/cover/__init__.py
@@ -0,0 +1,219 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import re
+from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageEnhance
+from StringIO import StringIO
+from librarian import DCNS, URLOpener
+from librarian.output import OutputFile
+from librarian.utils import get_resource
+from librarian.formats import Format
+
+
+class Metric(object):
+    """Gets metrics from an object, scaling it by a factor."""
+    def __init__(self, obj, scale):
+        self._obj = obj
+        self._scale = float(scale)
+
+    def __getattr__(self, name):
+        src = getattr(self._obj, name)
+        if src and self._scale:
+            src = type(src)(self._scale * src)
+        return src
+
+
+class TextBox(object):
+    """Creates an Image with a series of centered strings."""
+
+    SHADOW_X = 3
+    SHADOW_Y = 3
+    SHADOW_BLUR = 3
+
+    def __init__(self, max_width, max_height, padding_x=None, padding_y=None):
+        if padding_x is None:
+            padding_x = self.SHADOW_X + self.SHADOW_BLUR
+        if padding_y is None:
+            padding_y = self.SHADOW_Y + self.SHADOW_BLUR
+
+        self.max_width = max_width
+        self.max_text_width = max_width - 2 * padding_x
+        self.padding_y = padding_y
+        self.height = padding_y
+        self.img = Image.new('RGBA', (max_width, max_height))
+        self.draw = ImageDraw.Draw(self.img)
+        self.shadow_img = None
+        self.shadow_draw = None
+
+    def skip(self, height):
+        """Skips some vertical space."""
+        self.height += height
+
+    def text(self, text, color='#000', font=None, line_height=20,
+             shadow_color=None):
+        """Writes some centered text."""
+        text = re.sub(r'\s+', ' ', text)
+        if shadow_color:
+            if not self.shadow_img:
+                self.shadow_img = Image.new('RGBA', self.img.size)
+                self.shadow_draw = ImageDraw.Draw(self.shadow_img)
+        while text:
+            line = text
+            line_width = self.draw.textsize(line, font=font)[0]
+            while line_width > self.max_text_width:
+                parts = line.rsplit(' ', 1)
+                if len(parts) == 1:
+                    line_width = self.max_text_width
+                    break
+                line = parts[0]
+                line_width = self.draw.textsize(line, font=font)[0]
+            line = line.strip() + ' '
+
+            pos_x = (self.max_width - line_width) / 2
+
+            if shadow_color:
+                self.shadow_draw.text(
+                        (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y),
+                        line, font=font, fill=shadow_color
+                )
+
+            self.draw.text((pos_x, self.height), line, font=font, fill=color)
+            self.height += line_height
+            # go to next line
+            text = text[len(line):]
+
+    def image(self):
+        """Creates the actual Image object."""
+        image = Image.new('RGBA', (self.max_width,
+                                   self.height + self.padding_y))
+        if self.shadow_img:
+            shadow = self.shadow_img.filter(ImageFilter.BLUR)
+            image.paste(shadow, (0, 0), shadow)
+            image.paste(self.img, (0, 0), self.img)
+        else:
+            image.paste(self.img, (0, 0))
+        return image
+
+
+class Cover(Format):
+    """Base class for cover images generator."""
+    format_name = u"cover image"
+
+    width = 600
+    height = 800
+    background_color = '#fff'
+    background_img = None
+
+    author_top = 100
+    author_margin_left = 20
+    author_margin_right = 20
+    author_lineskip = 40
+    author_color = '#000'
+    author_shadow = None
+    author_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
+    author_font_size = 30
+
+    title_top = 100
+    title_margin_left = 20
+    title_margin_right = 20
+    title_lineskip = 54
+    title_color = '#000'
+    title_shadow = None
+    title_font_ttf = get_resource('fonts/DejaVuSerif.ttf')
+    title_font_size = 40
+
+    logo_bottom = None
+    logo_width = None
+    uses_dc_cover = False
+
+    format = 'JPEG'
+    scale = 1
+
+    exts = {
+        'JPEG': 'jpg',
+        'PNG': 'png',
+        }
+
+    mime_types = {
+        'JPEG': 'image/jpeg',
+        'PNG': 'image/png',
+        }
+
+    def __init__(self, doc, format=None, width=None, height=None):
+        self.author = ", ".join(auth for auth in doc.meta.get(DCNS('creator')))
+        self.title = doc.meta.title()
+        if format is not None:
+            self.format = format
+        scale = max(float(width or 0) / self.width, float(height or 0) / self.height)
+        if scale:
+            self.scale = scale
+
+    def pretty_author(self):
+        """Allows for decorating author's name."""
+        return self.author
+
+    def pretty_title(self):
+        """Allows for decorating title."""
+        return self.title
+
+    def image(self):
+        metr = Metric(self, self.scale)
+        img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+
+        if self.background_img:
+            background = Image.open(self.background_img)
+            img.paste(background, None, background)
+            del background
+
+        # WL logo
+        if metr.logo_width:
+            logo = Image.open(get_resource('res/wl-logo.png'))
+            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]))
+            img.paste(logo, ((metr.width - metr.logo_width) / 2, img.size[1] - logo.size[1] - metr.logo_bottom))
+
+        top = metr.author_top
+        tbox = TextBox(
+            metr.width - metr.author_margin_left - metr.author_margin_right,
+            metr.height - top,
+            )
+            
+        author_font = ImageFont.truetype(
+            self.author_font_ttf, metr.author_font_size)
+        tbox.text(self.pretty_author(), self.author_color, author_font,
+            metr.author_lineskip, self.author_shadow)
+        text_img = tbox.image()
+        img.paste(text_img, (metr.author_margin_left, top), text_img)
+
+        top += text_img.size[1] + metr.title_top
+        tbox = TextBox(
+            metr.width - metr.title_margin_left - metr.title_margin_right,
+            metr.height - top,
+            )
+        title_font = ImageFont.truetype(
+            self.title_font_ttf, metr.title_font_size)
+        tbox.text(self.pretty_title(), self.title_color, title_font,
+            metr.title_lineskip, self.title_shadow)
+        text_img = tbox.image()
+        img.paste(text_img, (metr.title_margin_left, top), text_img)
+
+        return img
+        imgstr = StringIO()
+        img.save(imgstr, format=self.format, quality=95)
+        OutputFile.from_string(imgstr.getvalue())
+
+    def mime_type(self):
+        return self.mime_types[self.format]
+
+    @property
+    def format_ext(self):
+        return self.exts[self.format]
+
+    def save(self, *args, **kwargs):
+        return self.image().save(format=self.format, quality=95, *args, **kwargs)
+
+    def build(self, *args, **kwargs):
+        imgstr = StringIO()
+        self.save(imgstr, *args, **kwargs)
+        return OutputFile.from_string(imgstr.getvalue())
diff --git a/librarian/formats/cover/partners/__init__.py b/librarian/formats/cover/partners/__init__.py

new file mode 100644 (file)

index 0000000..2d8a663
--- /dev/null
+++ b/librarian/formats/cover/partners/__init__.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from librarian.utils import get_resource
+from .. import Cover
+
+
+class VirtualoCover(Cover):
+    format_name = u"Virtualo cover image"
+
+    width = 600
+    height = 730
+    author_top = 73
+    title_top = 73
+    logo_bottom = 25
+    logo_width = 250
+
+
+class PrestigioCover(Cover):
+    format_name = u"Prestigio cover image"
+
+    width = 580
+    height = 783
+    background_img = get_resource('res/cover-prestigio.png')
+
+    author_top = 446
+    author_margin_left = 118
+    author_margin_right = 62
+    author_lineskip = 60
+    author_color = '#fff'
+    author_shadow = '#000'
+    author_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
+    author_font_size = 50
+
+    title_top = 0
+    title_margin_left = 118
+    title_margin_right = 62
+    title_lineskip = 60
+    title_color = '#fff'
+    title_shadow = '#000'
+    title_font_ttf = get_resource('fonts/JunicodeWL-Italic.ttf')
+    title_font_size = 50
+
+    def pretty_title(self):
+        return u"„%s”" % self.title
+
+
+class BookotekaCover(Cover):
+    format_name = u"Bookoteka cover image"
+
+    width = 2140
+    height = 2733
+    background_img = get_resource('res/cover-bookoteka.png')
+
+    author_top = 480
+    author_margin_left = 307
+    author_margin_right = 233
+    author_lineskip = 156
+    author_color = '#d9d919'
+    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+    author_font_size = 130
+
+    title_top = 400
+    title_margin_left = 307
+    title_margin_right = 233
+    title_lineskip = 168
+    title_color = '#d9d919'
+    title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+    title_font_size = 140
+
+    format = 'PNG'
+
+
+class GandalfCover(Cover):
+    format_name = u"Gandalf cover image"
+
+    width = 600
+    height = 730
+    background_img = get_resource('res/cover-gandalf.png')
+    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+    author_font_size = 30
+    title_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+    title_font_size = 40
+    logo_bottom = 25
+    logo_width = 250
+    format = 'PNG'
diff --git a/librarian/formats/cover/wolnelektury/__init__.py b/librarian/formats/cover/wolnelektury/__init__.py

new file mode 100644 (file)

index 0000000..4218770
--- /dev/null
+++ b/librarian/formats/cover/wolnelektury/__init__.py
@@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from PIL import Image, ImageFont, ImageDraw
+from librarian.utils import get_resource
+from .. import Cover, Metric, TextBox
+
+
+class WLCover(Cover):
+    """Default Wolne Lektury cover generator."""
+    format_name = u"WL-style cover image"
+
+    width = 600
+    height = 833
+    uses_dc_cover = True
+    author_font_ttf = get_resource('fonts/JunicodeWL-Regular.ttf')
+    author_font_size = 20
+    author_lineskip = 30
+    title_font_ttf = get_resource('fonts/DejaVuSerif-Bold.ttf')
+    title_font_size = 30
+    title_lineskip = 40
+    title_box_width = 350
+    
+    box_top_margin = 100
+    box_bottom_margin = 100
+    box_padding_y = 20
+    box_above_line = 10
+    box_below_line = 15
+    box_line_left = 75
+    box_line_right = 275
+    box_line_width = 2
+
+    logo_top = 15
+    logo_width = 140
+
+    bar_width = 35
+    background_color = '#444'
+    author_color = '#444'
+    default_background = get_resource('res/cover.png')
+    format = 'JPEG'
+
+    epoch_colors = {
+        u'Starożytność': '#9e3610',
+        u'Średniowiecze': '#564c09',
+        u'Renesans': '#8ca629',
+        u'Barok': '#a6820a',
+        u'Oświecenie': '#f2802e',
+        u'Romantyzm': '#db4b16',
+        u'Pozytywizm': '#961060',
+        u'Modernizm': '#7784e0',
+        u'Dwudziestolecie międzywojenne': '#3044cf',
+        u'Współczesność': '#06393d',
+    }
+
+    def __init__(self, doc, format=None, width=None, height=None, with_logo=False):
+        super(WLCover, self).__init__(doc, format=format, width=width, height=height)
+        self.kind = doc.meta.get_one('kind')
+        self.epoch = doc.meta.get_one('epoch')
+        self.with_logo = with_logo
+        # TODO
+        if doc.meta.get('cover_url'):
+            url = doc.meta.get('cover_url')[0]
+            bg_src = None
+            if bg_src is None:
+                bg_src = URLOpener().open(url)
+            self.background_img = StringIO(bg_src.read())
+            bg_src.close()
+        else:
+            self.background_img = self.default_background
+
+    def pretty_author(self):
+        return self.author.upper()
+
+    def image(self):
+        metr = Metric(self, self.scale)
+        img = Image.new('RGB', (metr.width, metr.height), self.background_color)
+        draw = ImageDraw.Draw(img)
+
+        if self.epoch in self.epoch_colors:
+            epoch_color = self.epoch_colors[self.epoch]
+        else:
+            epoch_color = '#000'
+        draw.rectangle((0, 0, metr.bar_width, metr.height), fill=epoch_color)
+
+        if self.background_img:
+            src = Image.open(self.background_img)
+            trg_size = (metr.width - metr.bar_width, metr.height)
+            if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
+                resized = (
+                    trg_size[0],
+                    src.size[1] * trg_size[0] / src.size[0]
+                )
+                cut = (resized[1] - trg_size[1]) / 2
+                src = src.resize(resized, Image.ANTIALIAS)
+                src = src.crop((0, cut, src.size[0], src.size[1] - cut))
+            else:
+                resized = (
+                    src.size[0] * trg_size[1] / src.size[1],
+                    trg_size[1],
+                )
+                cut = (resized[0] - trg_size[0]) / 2
+                src = src.resize(resized, Image.ANTIALIAS)
+                src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
+
+            img.paste(src, (metr.bar_width, 0))
+            del src
+
+        box = TextBox(metr.title_box_width, metr.height, padding_y=metr.box_padding_y)
+        author_font = ImageFont.truetype(
+            self.author_font_ttf, metr.author_font_size)
+        box.text(self.pretty_author(),
+                 font=author_font,
+                 line_height=metr.author_lineskip,
+                 color=self.author_color,
+                 shadow_color=self.author_shadow,
+                )
+
+        box.skip(metr.box_above_line)
+        box.draw.line((metr.box_line_left, box.height, metr.box_line_right, box.height),
+                fill=self.author_color, width=metr.box_line_width)
+        box.skip(metr.box_below_line)
+
+        title_font = ImageFont.truetype(
+            self.title_font_ttf, metr.title_font_size)
+        box.text(self.pretty_title(),
+                 line_height=metr.title_lineskip,
+                 font=title_font,
+                 color=epoch_color,
+                 shadow_color=self.title_shadow,
+                )
+
+        if self.with_logo:
+            logo = Image.open(get_resource('res/wl-logo-mono.png'))
+            logo = logo.resize((metr.logo_width, logo.size[1] * metr.logo_width / logo.size[0]), Image.ANTIALIAS)
+            alpha = logo.split()[3]
+            alpha = ImageEnhance.Brightness(alpha).enhance(.75)
+            logo.putalpha(alpha)
+            box.skip(metr.logo_top + logo.size[1])
+
+        box_img = box.image()
+
+        if self.kind == 'Liryka':
+            # top
+            box_top = metr.box_top_margin
+        elif self.kind == 'Epika':
+            # bottom
+            box_top = metr.height - metr.box_bottom_margin - box_img.size[1]
+        else:
+            # center
+            box_top = (metr.height - box_img.size[1]) / 2
+
+        box_left = metr.bar_width + (metr.width - metr.bar_width -
+                        box_img.size[0]) / 2
+        draw.rectangle((box_left, box_top,
+            box_left + box_img.size[0], box_top + box_img.size[1]),
+            fill='#fff')
+        img.paste(box_img, (box_left, box_top), box_img)
+
+        if self.with_logo:
+            img.paste(logo, 
+                (box_left + (box_img.size[0] - logo.size[0]) / 2,
+                    box_top + box_img.size[1] - metr.box_padding_y - logo.size[1]), mask=logo)
+
+        return img
diff --git a/librarian/formats/epub/__init__.py b/librarian/formats/epub/__init__.py

new file mode 100644 (file)

index 0000000..f9f7565
--- /dev/null
+++ b/librarian/formats/epub/__init__.py
@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+from copy import deepcopy
+from tempfile import NamedTemporaryFile
+import zipfile
+from lxml import etree
+from librarian import OPFNS, NCXNS, XHTMLNS
+from librarian import core
+from librarian.formats import Format
+from librarian.formats.cover.wolnelektury import WLCover
+from librarian.output import OutputFile
+from librarian.renderers import Register, TreeRenderer, UnknownElement
+from librarian.utils import Context, get_resource, extend_element
+
+
+class EpubFormat(Format):
+    format_name = 'EPUB'
+    format_ext = 'epub'
+
+    cover = WLCover
+    renderers = Register()
+
+    def __init__(self, doc, cover=None, with_fonts=True):
+        super(EpubFormat, self).__init__(doc)
+        self.with_fonts = with_fonts
+        if cover is not None:
+            self.cover = cover
+
+    def build(self):
+        opf = etree.parse(get_resource('formats/epub/res/content.opf'))
+        manifest = opf.find(OPFNS('manifest'))
+        guide = opf.find(OPFNS('guide'))
+        spine = opf.find(OPFNS('spine'))
+
+        output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
+        zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
+
+        mime = zipfile.ZipInfo()
+        mime.filename = 'mimetype'
+        mime.compress_type = zipfile.ZIP_STORED
+        mime.extra = ''
+        zip.writestr(mime, 'application/epub+zip')
+        zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
+                       'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
+                       '<rootfiles><rootfile full-path="OPS/content.opf" ' \
+                       'media-type="application/oebps-package+xml" />' \
+                       '</rootfiles></container>')
+
+        toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
+                               '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
+                               '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
+                               'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
+                               '</navMap></ncx>')
+        nav_map = toc_file[-1]
+
+        if self.cover is not None:
+            cover = self.cover(self.doc)
+            cover_output = cover.build()
+            cover_name = 'cover.%s' % cover.format_ext
+            zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
+            del cover_output
+
+            cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
+            cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
+            zip.writestr('OPS/cover.html', etree.tostring(
+                            cover_tree, method="html", pretty_print=True))
+
+            if cover.uses_dc_cover:
+                if self.doc.meta.get_one('cover_by'):
+                    document.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
+                if self.doc.meta.get_one('cover_source'):
+                    document.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
+
+            manifest.append(etree.fromstring(
+                '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
+            manifest.append(etree.fromstring(
+                '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
+            spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
+            opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
+            guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
+
+
+        ctx = Context(format=self)
+        ctx.toc = TOC()
+        ctx.toc_level = 0
+        ctx.footnotes = Footnotes()
+        ctx.part_no = 0
+
+        wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
+        for e in self.render(self.doc.edoc.getroot(), ctx):
+            if not len(e) and not e.text.strip():
+                continue
+            wrap = deepcopy(wrap_tmpl)
+            extend_element(wrap.find('//*[@id="book-text"]'), e)
+
+            partstr = 'part%d' % int(e.get('part_no'))
+            manifest.append(manifest.makeelement(OPFNS('item'), attrib={
+                                 'id': partstr,
+                                 'href': partstr + ".html",
+                                 'media-type': 'application/xhtml+xml',
+                             }))
+            spine.append(spine.makeelement(OPFNS('itemref'), attrib={
+                        'idref': partstr,
+                    }))
+            zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
+
+        if len(ctx.footnotes.output):
+            ctx.toc.add("Przypisy", "footnotes.html")
+            manifest.append(etree.Element(OPFNS('item'),
+                    id='footnotes', href='footnotes.html',
+                    **{'media-type': "application/xhtml+xml"}))
+            spine.append(etree.Element('itemref', idref='footnotes'))
+            wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
+            extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
+            
+            #chars = chars.union(used_chars(html_tree.getroot()))
+            zip.writestr('OPS/footnotes.html', etree.tostring(
+                                wrap, method="html", pretty_print=True))
+
+
+        zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
+        ctx.toc.render(toc_file[-1])
+        zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
+        zip.close()
+        return OutputFile.from_filename(output_file.name)
+
+    def render(self, element, ctx):
+        return self.renderers.get_for(element).render(element, ctx)
+
+
+# Helpers
+
+class EpubRenderer(TreeRenderer):
+    """ Renders insides as XML in a <_/> container. """
+    def container(self, ctx):
+        root, inner = super(EpubRenderer, self).container()
+        root.set("part_no", str(ctx.part_no))
+        return root, inner
+
+    def render(self, element, ctx):
+        subctx = self.subcontext(element, ctx)
+        wrapper, inside = self.container(ctx)
+        if element.text:
+            extend_element(inside, self.render_text(element.text, ctx))
+        for child in element:
+            try:
+                child_renderer = ctx.format.renderers.get_for(child)
+            except UnknownElement:
+                continue
+            else:
+                if getattr(child_renderer, 'epub_separate', False):
+                    yield wrapper
+                    ctx.part_no += 1
+                    for child_part in child_renderer.render(child, subctx):
+                        yield child_part
+                    wrapper, inside = self.container(ctx)
+                else:
+                    child_parts = list(child_renderer.render(child, subctx))
+                    extend_element(inside, child_parts[0])
+                    if len(child_parts) > 1:
+                        yield wrapper
+                        for child_part in child_parts[1:-1]:
+                            yield child_part
+                        wrapper, inside = self.container(ctx)
+                        extend_element(inside, child_parts[-1])
+            finally:
+                if child.tail:
+                    extend_element(inside, self.render_text(child.tail, ctx))
+        yield wrapper
+
+
+class Footnotes(object):
+    def __init__(self):
+        self.counter = 0
+        self.output = etree.Element("_")
+
+    def append(self, items):
+        self.counter += 1
+        e = etree.Element("a",
+            href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
+            id="footnote-%d" % self.counter,
+            style="float:left;margin-right:1em")
+        e.text = "[%d]" % self.counter
+        e.tail = " "
+        self.output.append(e)
+        for item in items:
+            extend_element(self.output, item)
+        anchor = etree.Element("a",
+            id="footnote-anchor-%d" % self.counter,
+            href="footnotes.html#footnote-%d" % self.counter)
+        anchor.text = "[%d]" % self.counter
+        return anchor
+
+
+class TOC(object):
+    def __init__(self, title=None, href="", root=None):
+        if root is None:
+            self.counter = 0
+            self.root = self
+        else:
+            self.root = root
+        self.children = []
+        self.title = title
+        self.href = href.format(counter=self.root.counter)
+        self.number = self.root.counter
+        self.root.counter += 1
+
+    def add(self, title, href):
+        subtoc = type(self)(title, href, root=self.root)
+        self.children.append(subtoc)
+        return subtoc
+
+    def render(self, nav_map):
+        for child in self.children:
+            nav_point = etree.Element(NCXNS('navPoint'))
+            nav_point.set('id', 'NavPoint-%d' % child.number)
+            nav_point.set('playOrder', str(child.number))
+
+            nav_label = etree.Element(NCXNS('navLabel'))
+            text = etree.Element(NCXNS('text'))
+            text.text = child.title
+            nav_label.append(text)
+            nav_point.append(nav_label)
+
+            content = etree.Element(NCXNS('content'))
+            content.set('src', child.href)
+            nav_point.append(content)
+            nav_map.append(nav_point)
+            child.render(nav_map)
+
+
+# Renderers
+
+class AsideR(EpubRenderer):
+    def render(self, element, ctx):
+        outputs = list(super(AsideR, self).render(element, ctx))
+        anchor = ctx.footnotes.append(outputs)
+        wrapper, inside = self.text_container()  #etree.Element('_', part_no=str(ctx.part_no))
+        inside.append(anchor)
+        yield wrapper
+EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
+
+
+class DivR(EpubRenderer):
+    def container(self, ctx):
+        root, inner = super(DivR, self).container(ctx)
+        if getattr(ctx, 'inline', False):
+            inner.tag = 'span'
+            inner.set('style', 'display: block;')
+        return root, inner
+EpubFormat.renderers.register(core.Div, None, DivR('div'))
+
+
+class HeaderR(EpubRenderer):
+    def subcontext(self, element, ctx):
+        return Context(ctx, inline=True)
+EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
+
+
+class SectionR(EpubRenderer):
+    epub_separate = True
+
+    def render(self, element, ctx):
+        # Add 'poczatek'?
+        if element.getparent() is not None:
+            tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
+            ctx = Context(ctx, toc=tocitem)
+        return super(SectionR, self).render(element, ctx)
+EpubFormat.renderers.register(core.Section, None, SectionR())
+
+
+class SpanR(EpubRenderer):
+    pass
+EpubFormat.renderers.register(core.Span, None, SpanR('span'))
+
diff --git a/librarian/formats/epub/res/chapter.html b/librarian/formats/epub/res/chapter.html

new file mode 100644 (file)

index 0000000..342d5df
--- /dev/null
+++ b/librarian/formats/epub/res/chapter.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"></meta>
+        <title>
+          WolneLektury.pl
+        </title>
+    </head>
+    <body>
+        <div id="book-text"></div>
+    </body>
+</html>
diff --git a/librarian/formats/epub/res/content.opf b/librarian/formats/epub/res/content.opf

new file mode 100644 (file)

index 0000000..df95a3a
--- /dev/null
+++ b/librarian/formats/epub/res/content.opf
@@ -0,0 +1,24 @@
+<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId" version="2.0">
+    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf">
+        <dc:title></dc:title>
+        <dc:language xsi:type="dcterms:RFC3066"></dc:language>
+        <dc:identifier id="BookId" opf:scheme="URI"></dc:identifier>
+        <dc:subject></dc:subject>
+        <dc:creator></dc:creator>
+        <dc:publisher></dc:publisher>
+        <dc:date xsi:type="dcterms:W3CDTF"></dc:date>
+    </metadata>
+    <manifest>
+        <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
+        <item id="style" href="style.css" media-type="text/css" />
+        <!--item id="titlePage" href="title.html" media-type="application/xhtml+xml" />
+        <item id="logo_wolnelektury" href="logo_wolnelektury.png" media-type="image/png" />
+        <item id="jedenprocent" href="jedenprocent.png" media-type="image/png" /-->
+    </manifest>
+    <spine toc="toc">
+        <!--itemref idref="titlePage" /-->
+    </spine>
+    <guide>
+        <!--reference type="text" title="Początek" href="part1.html" /-->
+    </guide>
+</package>
diff --git a/librarian/formats/epub/res/cover.html b/librarian/formats/epub/res/cover.html

new file mode 100644 (file)

index 0000000..784067c
--- /dev/null
+++ b/librarian/formats/epub/res/cover.html
@@ -0,0 +1,13 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+    <title>Okładka</title>
+    <style type="text/css"> img { max-width: 100%; } </style>
+  </head>
+  <body style="oeb-column-number: 1;">
+    <div id="cover-image">
+      <img alt="Okładka" />
+    </div>
+  </body>
+</html>
+\ No newline at end of file
diff --git a/librarian/formats/epub/res/footnotes.html b/librarian/formats/epub/res/footnotes.html

new file mode 100644 (file)

index 0000000..b3b868c
--- /dev/null
+++ b/librarian/formats/epub/res/footnotes.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"></meta>
+        <title>
+          Przypisy
+        </title>
+    </head>
+    <body>
+        <div id="book-text">
+            <h1>
+              Przypisy:
+            </h1>
+          <div id="footnotes"></div>
+        </div>
+    </body>
+</html>
diff --git a/librarian/formats/html/__init__.py b/librarian/formats/html/__init__.py

new file mode 100644 (file)

index 0000000..ddf2c78
--- /dev/null
+++ b/librarian/formats/html/__init__.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import re
+from lxml import etree
+from librarian.formats import Format
+from librarian.output import OutputFile
+from librarian.renderers import Register, TreeRenderer
+from librarian.utils import Context, get_resource
+from librarian import core
+
+
+class HtmlFormat(Format):
+    format_name = 'HTML'
+    format_ext = 'html'
+
+    renderers = Register()
+
+    def __init__(self, doc, standalone=False):
+        super(HtmlFormat, self).__init__(doc)
+        self.standalone = standalone
+
+    def build(self):
+        if self.standalone:
+            tmpl = get_resource("formats/html/res/html_standalone.html")
+        else:
+            tmpl = get_resource("formats/html/res/html.html")
+        t = etree.parse(tmpl)
+
+        ctx = Context(format=self)
+        ctx.toc = TOC()
+        ctx.toc_level = 0
+        ctx.footnotes = Footnotes()
+
+        if self.standalone:
+            t.find('head/title').text = u"%s (%s)" % (self.doc.meta.title(), self.doc.meta.author())
+
+        t.find('.//div[@id="content"]').extend(
+            self.render(self.doc.edoc.getroot(), ctx))
+        t.find('.//div[@id="toc"]').append(ctx.toc.render())
+        t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output)
+
+        return OutputFile.from_string(etree.tostring(
+            t, encoding='utf-8', method="html"))
+
+    def render(self, element, ctx):
+        return self.renderers.get_for(element).render(element, ctx)
+
+
+# Helpers
+
+class NaturalText(TreeRenderer):
+    def render_text(self, text, ctx):
+        root, inner = self.text_container()
+        chunks = re.split('(?<=\s\w) ', text)
+        inner.text = chunks[0]
+        for chunk in chunks[1:]:
+            x = etree.Entity("nbsp")
+            x.tail = chunk
+            inner.append(x)
+        return root
+
+
+class LiteralText(TreeRenderer):
+    pass
+
+
+class Footnotes(object):
+    def __init__(self):
+        self.counter = 0
+        self.output = etree.Element("_")
+
+    def append(self, item):
+        self.counter += 1
+        e = etree.Element("a",
+            href="#footnote-anchor-%d" % self.counter,
+            id="footnote-%d" % self.counter,
+            style="float:left;margin-right:1em")
+        e.text = "[%d]" % self.counter
+        e.tail = " "
+        self.output.append(e)
+        self.output.extend(item)
+        anchor = etree.Element("a",
+            id="footnote-anchor-%d" % self.counter,
+            href="#footnote-%d" % self.counter)
+        anchor.text = "[%d]" % self.counter
+        return anchor
+
+
+class TOC(object):
+    def __init__(self):
+        self.items = []
+        self.counter = 0
+
+    def add(self, title, level=0):
+        self.counter += 1
+        self.items.append((level, title, self.counter))
+        return self.counter
+
+    def render(self):
+        out = etree.Element("ul", id="toc")
+        curr_level = 0
+        cursor = out
+        for level, title, counter in self.items:
+            while level > curr_level:
+                ins = etree.Element("ul")
+                cursor.append(ins)
+                cursor = ins
+                curr_level += 1
+            while level < curr_level:
+                cursor = cursor.getparent()
+                curr_level -= 1
+            ins = etree.Element("li")
+            ins.append(etree.Element("a", href="#sect%d" % counter))
+            ins[0].text = title
+            cursor.append(ins)
+        return out
+
+
+# Renderers
+
+HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside'))
+
+class AsideFootnote(NaturalText):
+    def render(self, element, ctx):
+        output = super(AsideFootnote, self).render(element, ctx)
+        anchor = ctx.footnotes.append(output)
+        root, inner = self.container()
+        inner.append(anchor)
+        return root
+HtmlFormat.renderers.register(core.Aside, 'footnote', AsideFootnote())
+
+       
+HtmlFormat.renderers.register(core.Header, None, NaturalText('h1'))
+
+
+HtmlFormat.renderers.register(core.Div, None, NaturalText('div'))
+HtmlFormat.renderers.register(core.Div, 'item', NaturalText('li'))
+HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
+HtmlFormat.renderers.register(core.Div, 'p', NaturalText('p'))
+
+
+class Section(NaturalText):
+    def subcontext(self, element, ctx):
+        return Context(ctx, toc_level=ctx.toc_level + 1)
+
+    def render(self, element, ctx):
+        counter = ctx.toc.add(element.meta.title(), ctx.toc_level)
+        root = super(Section, self).render(element, ctx)
+        root[0].set("id", "sect%d" % counter)
+        return root
+HtmlFormat.renderers.register(core.Section, None, Section('section'))
+
+
+HtmlFormat.renderers.register(core.Span, None, NaturalText('span'))
+HtmlFormat.renderers.register(core.Span, 'cite', NaturalText('cite'))
+HtmlFormat.renderers.register(core.Span, 'cite.code', LiteralText('code'))
+HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em'))
+
+class SpanUri(LiteralText):
+    def render(self, element, ctx):
+        root = super(SpanUri, self).render(element, ctx)
+        root[0].attrib['href'] = element.text
+        return root
+HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a'))
diff --git a/librarian/formats/html/res/html.html b/librarian/formats/html/res/html.html

new file mode 100644 (file)

index 0000000..a6e6314
--- /dev/null
+++ b/librarian/formats/html/res/html.html
@@ -0,0 +1,8 @@
+<div>
+    <div id="toc">
+    </div>
+    <div id="content">
+    </div>
+    <div id="footnotes">
+    </div>
+</div>
diff --git a/librarian/formats/html/res/html_standalone.html b/librarian/formats/html/res/html_standalone.html

new file mode 100644 (file)

index 0000000..a6b6213
--- /dev/null
+++ b/librarian/formats/html/res/html_standalone.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title></title>
+    <meta charset="UTF-8" />
+</head>
+<body>
+    <div id="toc">
+    </div>
+    <div id="content">
+    </div>
+    <div id="footnotes">
+    </div>
+</body>
+</html>
diff --git a/librarian/functions.py b/librarian/functions.py

deleted file mode 100644 (file)

index 523b3d5..0000000
--- a/librarian/functions.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-from lxml import etree
-import re
-
-from librarian.dcparser import Person
-
-def _register_function(f):
-    """ Register extension function with lxml """
-    ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
-    ns[f.__name__] = f
-
-
-def reg_substitute_entities():
-    ENTITY_SUBSTITUTIONS = [
-        (u'---', u'—'),
-        (u'--', u'–'),
-        (u'...', u'…'),
-        (u',,', u'„'),
-        (u'"', u'”'),
-    ]
-
-    def substitute_entities(context, text):
-        """XPath extension function converting all entites in passed text."""
-        if isinstance(text, list):
-            text = ''.join(text)
-        for entity, substitutution in ENTITY_SUBSTITUTIONS:
-            text = text.replace(entity, substitutution)
-        return text
-
-    _register_function(substitute_entities)
-
-
-def reg_strip():
-    def strip(context, text):
-        """Remove unneeded whitespace from beginning and end"""
-        if isinstance(text, list):
-            text = ''.join(text)
-        return re.sub(r'\s+', ' ', text).strip()
-    _register_function(strip)
-
-
-def reg_starts_white():
-    def starts_white(context, text):
-        if isinstance(text, list):
-            text = ''.join(text)
-        if not text:
-            return False
-        return text[0].isspace()
-    _register_function(starts_white)
-
-
-def reg_ends_white():
-    def ends_white(context, text):
-        if isinstance(text, list):
-            text = ''.join(text)
-        if not text:
-            return False
-        return text[-1].isspace()
-    _register_function(ends_white)
-
-
-def reg_wrap_words():
-    def wrap_words(context, text, wrapping):
-        """XPath extension function automatically wrapping words in passed text"""
-        if isinstance(text, list):
-            text = ''.join(text)
-        if not wrapping:
-            return text
-
-        words = re.split(r'\s', text)
-
-        line_length = 0
-        lines = [[]]
-        for word in words:
-            line_length += len(word) + 1
-            if line_length > wrapping:
-                # Max line length was exceeded. We create new line
-                lines.append([])
-                line_length = len(word)
-            lines[-1].append(word)
-        return '\n'.join(' '.join(line) for line in lines)
-    _register_function(wrap_words)
-
-
-def reg_person_name():
-    def person_name(context, text):
-        """ Converts "Name, Forename" to "Forename Name" """
-        if isinstance(text, list):
-            text = ''.join(text)
-        return Person.from_text(text).readable()
-    _register_function(person_name)
-
-
-def reg_texcommand():
-    def texcommand(context, text):
-        """Remove non-letters"""
-        if isinstance(text, list):
-            text = ''.join(text)
-        return re.sub(r'[^a-zA-Z]', '', text).strip()
-    _register_function(texcommand)
-
-
diff --git a/librarian/html.py b/librarian/html.py

deleted file mode 100644 (file)

index c1a5e5b..0000000
--- a/librarian/html.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import os
-import cStringIO
-import copy
-
-from lxml import etree
-from librarian import XHTMLNS, ParseError, OutputFile
-from librarian import functions
-
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-
-functions.reg_substitute_entities()
-functions.reg_person_name()
-
-STYLESHEETS = {
-    'legacy': 'xslt/book2html.xslt',
-    'full': 'xslt/wl2html_full.xslt',
-    'partial': 'xslt/wl2html_partial.xslt'
-}
-
-def get_stylesheet(name):
-    return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
-
-def html_has_content(text):
-    return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
-
-def transform(wldoc, stylesheet='legacy', options=None, flags=None):
-    """Transforms the WL document to XHTML.
-
-    If output_filename is None, returns an XML,
-    otherwise returns True if file has been written,False if it hasn't.
-    File won't be written if it has no content.
-    """
-    # Parse XSLT
-    try:
-        style_filename = get_stylesheet(stylesheet)
-        style = etree.parse(style_filename)
-
-        document = copy.deepcopy(wldoc)
-        del wldoc
-        document.swap_endlines()
-
-        if flags:
-            for flag in flags:
-                document.edoc.getroot().set(flag, 'yes')
-
-        document.clean_ed_note()
-
-        if not options:
-            options = {}
-        result = document.transform(style, **options)
-        del document # no longer needed large object :)
-
-        if html_has_content(result):
-            add_anchors(result.getroot())
-            add_table_of_contents(result.getroot())
-
-            return OutputFile.from_string(etree.tostring(result, method='html',
-                xml_declaration=False, pretty_print=True, encoding='utf-8'))
-        else:
-            return None
-    except KeyError:
-        raise ValueError("'%s' is not a valid stylesheet.")
-    except (XMLSyntaxError, XSLTApplyError), e:
-        raise ParseError(e)
-
-class Fragment(object):
-    def __init__(self, id, themes):
-        super(Fragment, self).__init__()
-        self.id = id
-        self.themes = themes
-        self.events = []
-
-    def append(self, event, element):
-        self.events.append((event, element))
-
-    def closed_events(self):
-        stack = []
-        for event, element in self.events:
-            if event == 'start':
-                stack.append(('end', element))
-            elif event == 'end':
-                try:
-                    stack.pop()
-                except IndexError:
-                    print 'CLOSED NON-OPEN TAG:', element
-
-        stack.reverse()
-        return self.events + stack
-
-    def to_string(self):
-        result = []
-        for event, element in self.closed_events():
-            if event == 'start':
-                result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
-                if element.text:
-                    result.append(element.text)
-            elif event == 'end':
-                result.append(u'</%s>' % element.tag)
-                if element.tail:
-                    result.append(element.tail)
-            else:
-                result.append(element)
-
-        return ''.join(result)
-
-    def __unicode__(self):
-        return self.to_string()
-
-
-def extract_fragments(input_filename):
-    """Extracts theme fragments from input_filename."""
-    open_fragments = {}
-    closed_fragments = {}
-
-    # iterparse would die on a HTML document
-    parser = etree.HTMLParser(encoding='utf-8')
-    buf = cStringIO.StringIO()
-    buf.write(etree.tostring(etree.parse(input_filename, parser).getroot()[0][0], encoding='utf-8'))
-    buf.seek(0)
-
-    for event, element in etree.iterparse(buf, events=('start', 'end')):
-        # Process begin and end elements
-        if element.get('class', '') in ('theme-begin', 'theme-end'):
-            if not event == 'end': continue # Process elements only once, on end event
-
-            # Open new fragment
-            if element.get('class', '') == 'theme-begin':
-                fragment = Fragment(id=element.get('fid'), themes=element.text)
-
-                # Append parents
-                if element.getparent().get('id', None) != 'book-text':
-                    parents = [element.getparent()]
-                    while parents[-1].getparent().get('id', None) != 'book-text':
-                        parents.append(parents[-1].getparent())
-
-                    parents.reverse()
-                    for parent in parents:
-                        fragment.append('start', parent)
-
-                open_fragments[fragment.id] = fragment
-
-            # Close existing fragment
-            else:
-                try:
-                    fragment = open_fragments[element.get('fid')]
-                except KeyError:
-                    print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
-                else:
-                    closed_fragments[fragment.id] = fragment
-                    del open_fragments[fragment.id]
-
-            # Append element tail to lost_text (we don't want to lose any text)
-            if element.tail:
-                for fragment_id in open_fragments:
-                    open_fragments[fragment_id].append('text', element.tail)
-
-
-        # Process all elements except begin and end
-        else:
-            # Omit annotation tags
-            if (len(element.get('name', '')) or 
-                    element.get('class', '') in ('annotation', 'anchor')):
-                if event == 'end' and element.tail:
-                    for fragment_id in open_fragments:
-                        open_fragments[fragment_id].append('text', element.tail)
-            else:
-                for fragment_id in open_fragments:
-                    open_fragments[fragment_id].append(event, copy.copy(element))
-
-    return closed_fragments, open_fragments
-
-
-def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
-    if with_link:
-        if link_text is None:
-            link_text = prefix
-        anchor = etree.Element('a', href='#%s' % prefix)
-        anchor.set('class', 'anchor')
-        anchor.text = unicode(link_text)
-        if element.text:
-            anchor.tail = element.text
-            element.text = u''
-        element.insert(0, anchor)
-
-    if with_target:
-        anchor_target = etree.Element('a', name='%s' % prefix)
-        anchor_target.set('class', 'target')
-        anchor_target.text = u' '
-        if element.text:
-            anchor_target.tail = element.text
-            element.text = u''
-        element.insert(0, anchor_target)
-
-
-def any_ancestor(element, test):
-    for ancestor in element.iterancestors():
-        if test(ancestor):
-            return True
-    return False
-
-
-def add_anchors(root):
-    counter = 1
-    for element in root.iterdescendants():
-        if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
-        or e.get('id') == 'nota_red'
-        or e.tag == 'blockquote'):
-            continue
-
-        if element.tag == 'p' and 'verse' in element.get('class', ''):
-            if counter == 1 or counter % 5 == 0:
-                add_anchor(element, "f%d" % counter, link_text=counter)
-            counter += 1
-        elif 'paragraph' in element.get('class', ''):
-            add_anchor(element, "f%d" % counter, link_text=counter)
-            counter += 1
-
-
-def raw_printable_text(element):
-    working = copy.deepcopy(element)
-    for e in working.findall('a'):
-        if e.get('class') == 'annotation':
-            e.text = ''
-    return etree.tostring(working, method='text', encoding=unicode).strip()
-
-
-def add_table_of_contents(root):
-    sections = []
-    counter = 1
-    for element in root.iterdescendants():
-        if element.tag in ('h2', 'h3'):
-            if any_ancestor(element, lambda e: e.get('id') in ('footnotes', 'nota_red') or e.get('class') in ('person-list',)):
-                continue
-
-            element_text = raw_printable_text(element)
-            if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
-                sections[-1][3].append((counter, element.tag, element_text, []))
-            else:
-                sections.append((counter, element.tag, element_text, []))
-            add_anchor(element, "s%d" % counter, with_link=False)
-            counter += 1
-
-    toc = etree.Element('div')
-    toc.set('id', 'toc')
-    toc_header = etree.SubElement(toc, 'h2')
-    toc_header.text = u'Spis treści'
-    toc_list = etree.SubElement(toc, 'ol')
-
-    for n, section, text, subsections in sections:
-        section_element = etree.SubElement(toc_list, 'li')
-        add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
-
-        if len(subsections):
-            subsection_list = etree.SubElement(section_element, 'ol')
-            for n, subsection, text, _ in subsections:
-                subsection_element = etree.SubElement(subsection_list, 'li')
-                add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
-
-    root.insert(0, toc)
-
-
-def extract_annotations(html_path):
-    """For each annotation, yields a tuple: anchor, text, html."""
-    parser = etree.HTMLParser(encoding='utf-8')
-    tree = etree.parse(html_path, parser)
-    footnotes = tree.find('//*[@id="footnotes"]')
-    if footnotes is not None:
-        for footnote in footnotes.findall('div'):
-            anchor = footnote.find('a[@name]').get('name')
-            del footnote[:2]
-            text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip()
-            html_str = etree.tostring(footnote, method='html', encoding='utf-8')
-            yield anchor, text_str, html_str
-
diff --git a/librarian/meta.py b/librarian/meta.py

new file mode 100755 (executable)

index 0000000..5b50d92
--- /dev/null
+++ b/librarian/meta.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from librarian import DCNS, SSTNS
+
+
+def text_value(meta):
+    """ Finds out the text value of metadata element.
+    
+    >>> p = Person()
+    >>> p.text = u"Czajka, Radek"
+    >>> print text_value(p)
+    Radek Czajka
+
+    """
+    if hasattr(meta, 'text_value'):
+        return meta.text_value()
+    else:
+        return meta.text
+
+
+class Metadata(etree.ElementBase):
+    @classmethod
+    def about(cls, element):
+        meta = cls()
+        meta._about = element
+        return meta
+
+    def get_about(self):
+        if hasattr(self, '_about'):
+            return self._about
+        else:
+            return self.getparent()
+
+    def get(self, key, inherit=True):
+        """ Finds metadata by its element name. """
+        values = self.findall(key)
+        if values:
+            return [text_value(v) for v in values]
+        elif inherit and self.get_about().getparent() is not None:
+            return self.get_about().getparent().meta.get(key)
+        elif inherit and hasattr(self.get_about(), 'meta_context'):
+            return self.get_about().meta_context.get(key)
+        else:
+            return []
+
+    def get_one(self, *args, **kwargs):
+        values = self.get(*args, **kwargs)
+        if values:
+            return values[0]
+        else:
+            return None
+        
+
+    # Specials.
+
+    def author(self):
+        try:
+            return unicode(self.get(DCNS('creator'))[0])
+        except IndexError:
+            return u""
+
+    def slug(self):
+        try:
+            return self.get(DCNS('identifier'))[0].slug()
+        except IndexError:
+            return None
+
+    def title(self):
+        dc_title = self.get(DCNS('title'), inherit=False)
+        if dc_title:
+            return unicode(dc_title[0])
+        else:
+            header = self.get_about().find(SSTNS('header'))
+            if header is not None:
+                # FIXME: This should be a simple text representation
+                return header.text
+            else:
+                return u""
+
+
+class MetaItem(etree.ElementBase):
+    pass
+
+
+class Person(MetaItem):
+    def text_value(self):
+        return u" ".join(p.strip() for p in reversed(self.text.rsplit(u',', 1)))
+
+
+class Identifier(MetaItem):
+    def slug(self):
+        return self.text.rstrip('/').rsplit('/', 1)[-1]
diff --git a/librarian/output.py b/librarian/output.py

new file mode 100755 (executable)

index 0000000..a11f697
--- /dev/null
+++ b/librarian/output.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+import shutil
+
+
+class OutputFile(object):
+    """Represents a file returned by one of the converters."""
+
+    _string = None
+    _filename = None
+
+    def __del__(self):
+        if self._filename:
+            os.unlink(self._filename)
+
+    def __nonzero__(self):
+        return self._string is not None or self._filename is not None
+
+    @classmethod
+    def from_string(cls, string):
+        """Converter returns contents of a file as a string."""
+
+        instance = cls()
+        instance._string = string
+        return instance
+
+    @classmethod
+    def from_filename(cls, filename):
+        """Converter returns contents of a file as a named file."""
+
+        instance = cls()
+        instance._filename = filename
+        return instance
+
+    def get_string(self):
+        """Get file's contents as a string."""
+
+        if self._filename is not None:
+            with open(self._filename) as f:
+                return f.read()
+        else:
+            return self._string
+
+    def get_file(self):
+        """Get file as a file-like object."""
+
+        if self._string is not None:
+            from StringIO import StringIO
+            return StringIO(self._string)
+        elif self._filename is not None:
+            return open(self._filename)
+
+    def get_filename(self):
+        """Get file as a fs path."""
+
+        if self._filename is not None:
+            return self._filename
+        elif self._string is not None:
+            from tempfile import NamedTemporaryFile
+            temp = NamedTemporaryFile(prefix='librarian-', delete=False)
+            temp.write(self._string)
+            temp.close()
+            self._filename = temp.name
+            return self._filename
+        else:
+            return None
+
+    def save_as(self, path):
+        """Save file to a path. Create directories, if necessary."""
+
+        dirname = os.path.dirname(os.path.abspath(path))
+        if not os.path.isdir(dirname):
+            os.makedirs(dirname)
+        shutil.copy(self.get_filename(), path)
diff --git a/librarian/parser.py b/librarian/parser.py

index a9e8c65..a0b8a7f 100644 (file)
--- a/librarian/parser.py
+++ b/librarian/parser.py
@@ -3,226 +3,28 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
-from librarian import RDFNS
-from librarian.cover import WLCover
-from librarian import dcparser
-
-from xml.parsers.expat import ExpatError
  from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-
-import os
-import re
-from StringIO import StringIO
-
-class WLDocument(object):
-    LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
-    provider = None
-
-    def __init__(self, edoc, parse_dublincore=True, provider=None, 
-                    strict=False, meta_fallbacks=None):
-        self.edoc = edoc
-        self.provider = provider
-
-        root_elem = edoc.getroot()
-
-        dc_path = './/' + RDFNS('RDF')
-
-        if root_elem.tag != 'utwor':
-            raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag)
-
-        if parse_dublincore:
-            self.rdf_elem = root_elem.find(dc_path)
-
-            if self.rdf_elem is None:
-                raise NoDublinCore('Document has no DublinCore - which is required.')
-
-            self.book_info = dcparser.BookInfo.from_element(
-                    self.rdf_elem, fallbacks=meta_fallbacks, strict=strict)
-        else:
-            self.book_info = None
-
-    @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
-
-    @classmethod
-    def from_file(cls, xmlfile, *args, **kwargs):
-
-        # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
-            file = open(xmlfile, 'rb')
-            try:
-                data = file.read()
-            finally:
-                file.close()
-        else:
-            data = xmlfile.read()
-
-        if not isinstance(data, unicode):
-            data = data.decode('utf-8')
-
-        data = data.replace(u'\ufeff', '')
-
-        try:
-            parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-
-            return cls(tree, *args, **kwargs)
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
-            raise ParseError(e)
-
-    def swap_endlines(self):
-        """Converts line breaks in stanzas into <br/> tags."""
-        # only swap inside stanzas
-        for elem in self.edoc.iter('strofa'):
-            for child in list(elem):
-                if child.tail:
-                    chunks = self.LINE_SWAP_EXPR.split(child.tail)
-                    ins_index = elem.index(child) + 1
-                    while len(chunks) > 1:
-                        ins = etree.Element('br')
-                        ins.tail = chunks.pop()
-                        elem.insert(ins_index, ins)
-                    child.tail = chunks.pop(0)
-            if elem.text:
-                chunks = self.LINE_SWAP_EXPR.split(elem.text)
-                while len(chunks) > 1:
-                    ins = etree.Element('br')
-                    ins.tail = chunks.pop()
-                    elem.insert(0, ins)
-                elem.text = chunks.pop(0)
-
-    def parts(self):
-        if self.provider is None:
-            raise NoProvider('No document provider supplied.')
-        if self.book_info is None:
-            raise NoDublinCore('No Dublin Core in document.')
-        for part_uri in self.book_info.parts:
-            yield self.from_file(self.provider.by_uri(part_uri),
-                    provider=self.provider)
-
-    def chunk(self, path):
-        # convert the path to XPath
-        expr = self.path_to_xpath(path)
-        elems = self.edoc.xpath(expr)
-
-        if len(elems) == 0:
-            return None
-        else:
-            return elems[0]
-
-    def path_to_xpath(self, path):
-        parts = []
-
-        for part in path.split('/'):
-            match = re.match(r'([^\[]+)\[(\d+)\]', part)
-            if not match:
-                parts.append(part)
-            else:
-                tag, n = match.groups()
-                parts.append("*[%d][name() = '%s']" % (int(n)+1, tag) )
-
-        if parts[0] == '.':
-            parts[0] = ''
-
-        return '/'.join(parts)
-
-    def transform(self, stylesheet, **options):
-        return self.edoc.xslt(stylesheet, **options)
-
-    def update_dc(self):
-        if self.book_info:
-            parent = self.rdf_elem.getparent()
-            parent.replace( self.rdf_elem, self.book_info.to_etree(parent) )
-
-    def serialize(self):
-        self.update_dc()
-        return etree.tostring(self.edoc, encoding=unicode, pretty_print=True)
-
-    def merge_chunks(self, chunk_dict):
-        unmerged = []
-
-        for key, data in chunk_dict.iteritems():
-            try:
-                xpath = self.path_to_xpath(key)
-                node = self.edoc.xpath(xpath)[0]
-                repl = etree.fromstring(u"<%s>%s</%s>" %(node.tag, data, node.tag) )
-                node.getparent().replace(node, repl)
-            except Exception, e:
-                unmerged.append( repr( (key, xpath, e) ) )
-
-        return unmerged
-
-    def clean_ed_note(self):
-        """ deletes forbidden tags from nota_red """
-
-        for node in self.edoc.xpath('|'.join('//nota_red//%s' % tag for tag in
-                    ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))):
-            tail = node.tail
-            node.clear()
-            node.tag = 'span'
-            node.tail = tail
-
-    def editors(self):
-        """Returns a set of all editors for book and its children.
-
-        :returns: set of dcparser.Person objects
-        """
-        if self.book_info is None:
-            raise NoDublinCore('No Dublin Core in document.')
-        persons = set(self.book_info.editors +
-                        self.book_info.technical_editors)
-        for child in self.parts():
-            persons.update(child.editors())
-        if None in persons:
-            persons.remove(None)
-        return persons
-
-    # Converters
-
-    def as_html(self, *args, **kwargs):
-        from librarian import html
-        return html.transform(self, *args, **kwargs)
-
-    def as_text(self, *args, **kwargs):
-        from librarian import text
-        return text.transform(self, *args, **kwargs)
-
-    def as_epub(self, *args, **kwargs):
-        from librarian import epub
-        return epub.transform(self, *args, **kwargs)
-
-    def as_pdf(self, *args, **kwargs):
-        from librarian import pdf
-        return pdf.transform(self, *args, **kwargs)
-
-    def as_mobi(self, *args, **kwargs):
-        from librarian import mobi
-        return mobi.transform(self, *args, **kwargs)
-
-    def as_fb2(self, *args, **kwargs):
-        from librarian import fb2
-        return fb2.transform(self, *args, **kwargs)
-
-    def as_cover(self, cover_class=None, *args, **kwargs):
-        if cover_class is None:
-            cover_class = WLCover
-        return cover_class(self.book_info, *args, **kwargs).output_file()
-
-    def save_output_file(self, output_file, output_path=None,
-            output_dir_path=None, make_author_dir=False, ext=None):
-        if output_dir_path:
-            save_path = output_dir_path
-            if make_author_dir:
-                save_path = os.path.join(save_path,
-                        unicode(self.book_info.author).encode('utf-8'))
-            save_path = os.path.join(save_path,
-                                self.book_info.uri.slug)
-            if ext:
-                save_path += '.%s' % ext
-        else:
-            save_path = output_path
-
-        output_file.save_as(save_path)
+from . import DCNS, SSTNS
+from . import core, meta
+
+
+class SSTParser(etree.XMLParser):
+    """ XML parser using relevant element classes. """
+    def __init__(self):
+        super(SSTParser, self).__init__(remove_blank_text=False)
+        lookup = etree.ElementNamespaceClassLookup()
+        self.set_element_class_lookup(lookup)
+
+        # Define core language tags.
+        sst_ns = lookup.get_namespace(SSTNS.uri)
+        sst_ns['aside'] = core.Aside
+        sst_ns['div'] = core.Div
+        sst_ns['header'] = core.Header
+        sst_ns['section'] = core.Section
+        sst_ns['span'] = core.Span
+        sst_ns['metadata'] = meta.Metadata
+
+        # Define any special metadata.
+        dc_ns = lookup.get_namespace(DCNS.uri)
+        dc_ns['creator'] = meta.Person
+        dc_ns['identifier'] = meta.Identifier
diff --git a/librarian/picture.py b/librarian/picture.py

deleted file mode 100644 (file)

index ee3c61d..0000000
--- a/librarian/picture.py
+++ /dev/null
@@ -1,173 +0,0 @@
-
-from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
-from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
-from xml.parsers.expat import ExpatError
-from os import path
-from StringIO import StringIO
-from lxml import etree
-from lxml.etree import (XMLSyntaxError, XSLTApplyError)
-import re
-
-
-class WLPictureURI(WLURI):
-    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
-            '(?P<slug>[-a-z0-9]+)/?$')
-
-    @classmethod
-    def from_slug(cls, slug):
-        uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
-        return cls(uri)
-
-def as_wlpictureuri_strict(text):
-    return WLPictureURI.strict(text)
-
-
-class PictureInfo(WorkInfo):
-    """
-    Dublin core metadata for a picture
-    """
-    FIELDS = (
-        Field(DCNS('language'), 'language', required=False),
-        Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
-        Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
-
-        Field(DCNS('format.dimensions'), 'dimensions', required=False),
-        Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
-        Field(DCNS('description.medium'), 'medium', required=False),
-        Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
-        Field(DCNS('format'), 'mime_type', required=False),
-        Field(DCNS('identifier.url'), 'url', WLPictureURI,
-            strict=as_wlpictureuri_strict),
-        )
-
-
-class ImageStore(object):
-    EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
-            'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
-            'aiff', 'wbmp', 'xbm']
-    MIME = ['image/gif', 'image/jpeg', 'image/png',
-            'application/x-shockwave-flash', 'image/psd', 'image/bmp',
-            'image/tiff', 'image/tiff', 'application/octet-stream',
-            'image/jp2', 'application/octet-stream', 'application/octet-stream',
-            'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
-
-    def __init__(self, dir_):
-        self.dir = dir_
-        return super(ImageStore, self).__init__()
-
-    def path(self, slug, mime_type):
-        """
-        Finds file by slug and mime type in our iamge store.
-        Returns a file objects (perhaps should return a filename?)
-        """
-        try:
-            i = self.MIME.index(mime_type)
-        except ValueError:
-            err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type))
-            err.slug = slug
-            err.mime_type = mime_type
-            raise err
-        ext = self.EXT[i]
-        # add some common extensions tiff->tif, jpeg->jpg
-        return path.join(self.dir, slug + '.' + ext)
-
-
-class WLPicture(object):
-    def __init__(self, edoc, parse_dublincore=True, image_store=None):
-        self.edoc = edoc
-        self.image_store = image_store
-
-        root_elem = edoc.getroot()
-
-        dc_path = './/' + RDFNS('RDF')
-
-        if root_elem.tag != 'picture':
-            raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag)
-
-        if parse_dublincore:
-            self.rdf_elem = root_elem.find(dc_path)
-
-            if self.rdf_elem is None:
-                raise NoDublinCore('Document has no DublinCore - which is required.')
-
-            self.picture_info = PictureInfo.from_element(self.rdf_elem)
-        else:
-            self.picture_info = None
-
-    @classmethod
-    def from_string(cls, xml, *args, **kwargs):
-        return cls.from_file(StringIO(xml), *args, **kwargs)
-
-    @classmethod
-    def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
-
-        # first, prepare for parsing
-        if isinstance(xmlfile, basestring):
-            file = open(xmlfile, 'rb')
-            try:
-                data = file.read()
-            finally:
-                file.close()
-        else:
-            data = xmlfile.read()
-
-        if not isinstance(data, unicode):
-            data = data.decode('utf-8')
-
-        data = data.replace(u'\ufeff', '')
-
-        # assume images are in the same directory
-        if image_store is None and xmlfile.name is not None:
-            image_store = ImageStore(path.dirname(xmlfile.name))
-
-        try:
-            parser = etree.XMLParser(remove_blank_text=False)
-            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
-
-            return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
-        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
-            raise ParseError(e)
-
-    @property
-    def mime_type(self):
-        if self.picture_info is None:
-            raise ValueError('DC is not loaded, hence we don\'t know the image type')
-        return self.picture_info.mime_type
-
-    @property
-    def slug(self):
-        return self.picture_info.url.slug
-
-    @property
-    def image_path(self):
-        if self.image_store is None:
-            raise ValueError("No image store associated with whis WLPicture.")
-        return self.image_store.path(self.slug, self.mime_type)
-
-    def image_file(self, *args, **kwargs):
-        return open(self.image_path, *args, **kwargs)
-
-    def partiter(self):
-        """
-        Iterates the parts of this picture and returns them and their metadata
-        """
-        for part in self.edoc.iter("div"):
-            pd = {}
-            pd['type'] = part.get('type')
-            if pd['type'] == 'area':
-                pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
-                                (int(part.get('x2')), int(part.get('y2'))))
-
-            pd['themes'] = []
-            pd['object'] = None
-            parent = part
-            while True:
-                parent = parent.getparent()
-                if parent is None:
-                    break
-                if parent.tag == 'sem':
-                    if parent.get('type') == 'theme':
-                        pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
-                    elif parent.get('type') == 'object' and pd['object'] is None:
-                        pd['object'] = parent.get('object')
-            yield pd
diff --git a/librarian/renderers.py b/librarian/renderers.py

new file mode 100755 (executable)

index 0000000..59ed8a4
--- /dev/null
+++ b/librarian/renderers.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from lxml import etree
+from . import UnicodeException
+from .utils import extend_element
+
+
+class UnknownElement(UnicodeException):
+    pass
+
+
+class Renderer(object):
+    """ Renders an element in a context to some kind of container. """
+    def render(self, element, ctx):
+        """ Renders the element in the context. """
+        raise NotImplemented
+
+    def render_text(self, text, ctx):
+        """ Renders the text in the context. """
+        raise NotImplemented
+
+
+class TreeRenderer(Renderer):
+    """ Renders insides as XML in a <_/> container. """
+    root_name = "_"
+
+    def __init__(self, tag_name=None, attrib=None):
+        self.tag_name = tag_name
+        self.attrib = attrib or {}
+
+    def container(self):
+        root = etree.Element(self.root_name)
+        if self.tag_name:
+            inner = etree.Element(self.tag_name, **self.attrib)
+            root.append(inner)
+            return root, inner
+        else:
+            return root, root
+
+    def text_container(self):
+        root = etree.Element(self.root_name)
+        return root, root
+
+    def subcontext(self, element, ctx):
+        return ctx
+
+    def get_insides(self, element, ctx):
+        subctx = self.subcontext(element, ctx)
+        if element.text:
+            yield self.render_text(element.text, ctx)
+        for child in element:
+            try:
+                yield ctx.format.render(child, subctx)
+            except UnknownElement:
+                pass
+            if child.tail:
+                yield self.render_text(child.tail, ctx)
+
+    def render(self, element, ctx):
+        root, inner = self.container()
+        for inside in self.get_insides(element, ctx):
+            extend_element(inner, inside)
+        return root
+
+    def render_text(self, text, ctx):
+        root, inner = self.text_container()
+        inner.text = text
+        return root
+
+
+
+class Register(object):
+    """ Class-renderer register.
+
+    >>> from librarian.core import Div
+    >>> renderer = Renderer()
+    >>> reg = Register()
+    >>> reg.register(Div, 'a.b', renderer)
+    >>> reg.get(Div, 'a.b.c') is renderer
+    True
+
+    """
+    def __init__(self):
+        self.classes = {}
+
+    def register(self, tag, klass, renderer):
+        self.classes[tag, klass] = renderer
+
+    def get(self, tag, klass=None):
+        while klass:
+            try:
+                return self.classes[tag, klass]
+            except KeyError:
+                try:
+                    klass = klass.rsplit('.', 1)[-2]
+                except IndexError:
+                    klass = None
+        try:
+            return self.classes[tag, None]
+        except KeyError:
+            raise UnknownElement(tag)
+
+    def get_for(self, element):
+        return self.get(type(element), element.get('class'))
diff --git a/librarian/utils.py b/librarian/utils.py

new file mode 100755 (executable)

index 0000000..25936bf
--- /dev/null
+++ b/librarian/utils.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+
+
+class Context(object):
+    """ Processing context.
+    
+    >>> ctx = Context(a=1)
+    >>> subctx = Context(ctx, a=2)
+    >>> ctx.b = 3
+    >>> print subctx.a, subctx.b
+    2 3
+
+    """
+    def __init__(self, _upctx=None, **initial):
+        object.__setattr__(self, '_upctx', _upctx)
+        object.__setattr__(self, '_data', initial or {})
+
+    def __getattr__(self, name):
+        if name in self._data:
+            return self._data[name]
+        elif self._upctx is not None:
+            return getattr(self._upctx, name)
+        else:
+            raise AttributeError
+
+    def __setattr__(self, name, value):
+        try:
+            self.try_setattr(name, value)
+        except ValueError:
+            self._data[name] = value
+
+    def try_setattr(self, name, value):
+        if name in self._data:
+            self._data[name] = value
+        elif self._upctx is not None:
+            self._upctx.try_setattr(name, value)
+        else:
+            raise ValueError
+
+
+class XMLNamespace(object):
+    '''A handy structure to repsent names in an XML namespace.'''
+    def __init__(self, uri):
+        self.uri = uri
+
+    def __call__(self, tag):
+        return '{%s}%s' % (self.uri, tag)
+
+    def __contains__(self, tag):
+        return tag.startswith('{' + str(self) + '}')
+
+    def __repr__(self):
+        return 'XMLNamespace(%r)' % self.uri
+
+    def __str__(self):
+        return '%s' % self.uri
+
+
+def extend_element(container, element=None, text=None):
+    """ Extends XML element with another one's contents.
+
+    Differs from etree.Element.extend by taking the text into account.
+
+    >>> from lxml import etree
+    >>> container = etree.fromstring("<A><B/></A>")
+    >>> element = etree.fromstring("<_>a<b/>c</_>")
+    >>> extend_element(container, element)
+    >>> print etree.tostring(container)
+    <A><B/>a<b/>c</A>
+
+    """
+    add_text = (text or "") + (element.text or "" if element is not None else "")
+    if add_text:
+        if len(container):
+            container[-1].tail = (container[-1].tail or "") + add_text
+        else:
+            container.text = (container.text or "") + add_text
+    if element is not None:
+        container.extend(element)
+
+
+def get_resource(path):
+    return os.path.join(os.path.dirname(__file__), path)
diff --git a/scripts/book2cover b/scripts/book2cover

index 758ab0e..977096e 100755 (executable)
--- a/scripts/book2cover
+++ b/scripts/book2cover
@@ -4,18 +4,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
-from StringIO import StringIO
-from librarian import OutputFile
  from librarian.book2anything import Book2Anything, Option
+from librarian.formats.cover.wolnelektury import WLCover
  
  
  class Book2Cover(Book2Anything):
-    format_name = "JPEG"
-    ext = "jpg"
-    uses_cover = True
-    cover_optional = False
+    format_cls = WLCover
  
-    transform_options = [
+    format_options = [
          Option('-W', '--width', action='store', type='int', dest='width', default=None,
                  help='Set width.'),
          Option('-H', '--height', action='store', type='int', dest='height', default=None,
@@ -25,10 +21,6 @@ class Book2Cover(Book2Anything):
                  help='Add WL logo in white box.'),
      ]
  
-    @staticmethod
-    def transform(wldoc, cover, *args, **kwargs):
-        return wldoc.as_cover(cover_class=cover, *args, **kwargs)
-
  
  if __name__ == '__main__':
      Book2Cover.run()
diff --git a/scripts/book2epub b/scripts/book2epub

index 01ca79a..4d061f0 100755 (executable)
--- a/scripts/book2epub
+++ b/scripts/book2epub
@@ -5,19 +5,11 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from librarian.book2anything import Book2Anything, Option
+from librarian.formats.epub import EpubFormat
  
  
  class Book2Epub(Book2Anything):
-    format_name = "EPUB"
-    ext = "epub"
-    uses_cover = True
-    uses_provider = True
-    transform_flags = [
-        Option('-w', '--working-copy', dest='working-copy',
-                action='store_true', default=False,
-                help='mark the output as a working copy')
-        ]
-
+    format_cls = EpubFormat
  
  if __name__ == '__main__':
      Book2Epub.run()
diff --git a/scripts/book2html b/scripts/book2html

index 5d48eec..6c1e1c6 100755 (executable)
--- a/scripts/book2html
+++ b/scripts/book2html
@@ -5,23 +5,17 @@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from librarian.book2anything import Book2Anything, Option
+from librarian.formats.html import HtmlFormat
  
  
  class Book2Html(Book2Anything):
-    format_name = "HTML"
-    ext = "html"
-    uses_cover = False
-    uses_provider = False
-    transform_flags = [
-        Option('-r', '--raw', dest='full-page',
+    format_cls = HtmlFormat
+
+    format_options = [
+        Option('-r', '--raw', dest='standalone',
                  action='store_false', default=True,
                  help='output raw text for use in templates')
      ]
-    parser_args = [
-        Option('-i', '--ignore-dublin-core', dest='parse_dublincore', 
-                action='store_false', default=True,
-                help='don\'t try to parse dublin core metadata')
-    ]
  
  
  if __name__ == '__main__':
diff --git a/setup.py b/setup.py

index 51003ef..a0e4e53 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
  #
  import os
  import os.path
-from distutils.core import setup
+from setuptools import setup, find_packages
  
  def whole_tree(prefix, path):
      files = []
@@ -21,18 +21,26 @@ def whole_tree(prefix, path):
  
  setup(
      name='librarian',
-    version='1.5.1',
+    version='2.0a',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
      maintainer='Radek Czajka',
      maintainer_email='radoslaw.czajka@nowoczesnapolska.org.pl',
      url='http://github.com/fnp/librarian',
-    packages=['librarian'],
-    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] +
-                                whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
+    packages=find_packages(),
+    package_data={
+            'librarian': ['xslt/*.xslt', 'epub/*', 'html/*', 'mobi/*', 'pdf/*', 'fb2/*', 'fonts/*', 'res/*'] +
+                        whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer'),
+            'librarian.formats.html': ['res/*'],
+            'librarian.formats.epub': ['res/*'],
+        },
      include_package_data=True,
-    install_requires=['lxml>=2.2'],
+    install_requires=[
+            'lxml>=2.2',
+            'pillow',
+            'Texml',
+        ],
      scripts=['scripts/book2html',
               'scripts/book2txt',
               'scripts/book2epub',
author	Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
	Thu, 2 May 2013 10:17:09 +0000 (12:17 +0200)
committer	Radek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
	Thu, 2 May 2013 10:17:27 +0000 (12:17 +0200)
librarian/__init__.py		patch \| blob \| history
librarian/book2anything.py		patch \| blob \| history
librarian/core.py	[new file with mode: 0755]	patch \| blob
librarian/cover.py	[deleted file]	patch \| blob \| history
librarian/document.py	[new file with mode: 0755]	patch \| blob
librarian/epub/cover.html	[deleted file]	patch \| blob \| history
librarian/formats/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/cover/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/cover/partners/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/cover/wolnelektury/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/epub/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/epub/res/chapter.html	[new file with mode: 0644]	patch \| blob
librarian/formats/epub/res/content.opf	[new file with mode: 0644]	patch \| blob
librarian/formats/epub/res/cover.html	[new file with mode: 0644]	patch \| blob
librarian/formats/epub/res/footnotes.html	[new file with mode: 0644]	patch \| blob
librarian/formats/html/__init__.py	[new file with mode: 0644]	patch \| blob
librarian/formats/html/res/html.html	[new file with mode: 0644]	patch \| blob
librarian/formats/html/res/html_standalone.html	[new file with mode: 0644]	patch \| blob
librarian/functions.py	[deleted file]	patch \| blob \| history
librarian/html.py	[deleted file]	patch \| blob \| history
librarian/meta.py	[new file with mode: 0755]	patch \| blob
librarian/output.py	[new file with mode: 0755]	patch \| blob
librarian/parser.py		patch \| blob \| history
librarian/picture.py	[deleted file]	patch \| blob \| history
librarian/renderers.py	[new file with mode: 0755]	patch \| blob
librarian/utils.py	[new file with mode: 0755]	patch \| blob
scripts/book2cover		patch \| blob \| history
scripts/book2epub		patch \| blob \| history
scripts/book2html		patch \| blob \| history
setup.py		patch \| blob \| history