Merge branch 'pretty' into commerce
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:35:59 +0000 (16:35 +0100)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Thu, 19 Jan 2012 15:35:59 +0000 (16:35 +0100)
Conflicts:
librarian/cover.py
librarian/epub.py
librarian/epub/xsltContent.xsl
librarian/epub/xsltScheme.xsl
scripts/book2epub

45 files changed:
librarian/__init__.py
librarian/cover.py
librarian/dcparser.py
librarian/epub.py
librarian/epub/style.css
librarian/epub/toc.html [new file with mode: 0755]
librarian/epub/xsltAnnotations.xsl
librarian/epub/xsltContent.xsl
librarian/epub/xsltLast.xsl
librarian/epub/xsltScheme.xsl
librarian/html.py
librarian/mobi.py [new file with mode: 0755]
librarian/mobi/style.css [new file with mode: 0755]
librarian/packagers.py
librarian/parser.py
librarian/pdf.py
librarian/pdf/wl.cls [new file with mode: 0644]
librarian/pdf/wl.sty [deleted file]
librarian/pdf/wl2tex.xslt
librarian/picture.py [new file with mode: 0644]
librarian/res/jedenprocent.png [new file with mode: 0644]
librarian/text.py
scripts/book2cover [new file with mode: 0755]
scripts/book2epub
scripts/book2html
scripts/book2ihtml
scripts/book2mobi [new file with mode: 0755]
scripts/book2pdf
scripts/book2txt
setup.py [changed mode: 0644->0755]
tests/files/dcparser/andersen_brzydkie_kaczatko.out
tests/files/dcparser/biedrzycki_akslop.out
tests/files/dcparser/kochanowski_piesn7.out
tests/files/dcparser/mickiewicz_rybka.out
tests/files/dcparser/sofokles_antygona.out
tests/files/picture/angelus-novus.png [new file with mode: 0644]
tests/files/picture/angelus-novus.xml [new file with mode: 0644]
tests/files/text/asnyk_miedzy_nami.xml [deleted file]
tests/files/text/asnyk_zbior.xml [new file with mode: 0755]
tests/files/text/miedzy-nami-nic-nie-bylo.xml [new file with mode: 0644]
tests/test_epub.py [new file with mode: 0644]
tests/test_html.py
tests/test_picture.py [new file with mode: 0644]
tests/test_text.py
tests/utils.py

index 5b6981d..dd09ce4 100644 (file)
@@ -3,21 +3,30 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
+from __future__ import with_statement
+
 import os
+import re
+import shutil
 
 class ParseError(Exception):
     def __str__(self):
         """ Dirty workaround for Python Unicode handling problems. """
-        return self.message.message
+        return self.message
 
     def __unicode__(self):
         """ Dirty workaround for Python Unicode handling problems. """
-        return self.message.message
+        return self.message
 
 class ValidationError(Exception):
     pass
 
 class NoDublinCore(ValidationError):
+    """There's no DublinCore section, and it's required."""
+    pass
+
+class NoProvider(Exception):
+    """There's no DocProvider specified, and it's needed."""
     pass
 
 class XMLNamespace(object):
@@ -56,43 +65,88 @@ OPFNS = XMLNamespace("http://www.idpf.org/2007/opf")
 WLNS = EmptyNamespace()
 
 
+class WLURI(object):
+    """Represents a WL URI. Extracts slug from it."""
+    slug = None
+
+    example = 'http://wolnelektury.pl/katalog/lektura/template/'
+    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/lektura/'
+            '(?P<slug>[-a-z0-9]+)/?$')
+
+    def __init__(self, uri):
+        uri = unicode(uri)
+        self.uri = uri
+        self.slug = uri.rstrip('/').rsplit('/', 1)[-1]
+
+    @classmethod
+    def strict(cls, uri):
+        match = cls._re_wl_uri.match(uri)
+        if not match:
+            raise ValueError('Supplied URI (%s) does not match '
+                'the template: %s.' % (uri, cls._re_wl_uri))
+        return cls(uri)
+
+    @classmethod
+    def from_slug(cls, slug):
+        """Contructs an URI from slug.
+
+        >>> WLURI.from_slug('a-slug').uri
+        u'http://wolnelektury.pl/katalog/lektura/a-slug/'
+
+        """
+        uri = 'http://wolnelektury.pl/katalog/lektura/%s/' % slug
+        return cls(uri)
+
+    def __unicode__(self):
+        return self.uri
+
+    def __str__(self):
+        return self.uri
+
+    def __eq__(self, other):
+        return self.slug == other.slug
+
+
 class DocProvider(object):
-    """ Base class for a repository of XML files.
-        Used for generating joined files, like EPUBs
+    """Base class for a repository of XML files.
+
+    Used for generating joined files, like EPUBs.
     """
 
     def by_slug(self, slug):
-        raise NotImplemented
-
-    def __getitem__(self, slug):
-        return self.by_slug(slug)
+        """Should return a file-like object with a WL document XML."""
+        raise NotImplementedError
 
-    def by_uri(self, uri):
-        return self.by_slug(uri.rsplit('/', 1)[1])
+    def by_uri(self, uri, wluri=WLURI):
+        """Should return a file-like object with a WL document XML."""
+        wluri = wluri(uri)
+        return self.by_slug(wluri.slug)
 
 
 class DirDocProvider(DocProvider):
     """ Serve docs from a directory of files in form <slug>.xml """
 
-    def __init__(self, dir):
-        self.dir = dir
+    def __init__(self, dir_):
+        self.dir = dir_
         self.files = {}
 
     def by_slug(self, slug):
-        return open(os.path.join(self.dir, '%s.xml' % slug))
+        fname = slug + '.xml'
+        return open(os.path.join(self.dir, fname))
 
 
 import lxml.etree as etree
 import dcparser
 
 DEFAULT_BOOKINFO = dcparser.BookInfo(
-        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'}, \
+        { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'},
         { DCNS('creator'): [u'Some, Author'],
           DCNS('title'): [u'Some Title'],
           DCNS('subject.period'): [u'Unknown'],
           DCNS('subject.type'): [u'Unknown'],
           DCNS('subject.genre'): [u'Unknown'],
           DCNS('date'): ['1970-01-01'],
+          DCNS('language'): [u'pol'],
           # DCNS('date'): [creation_date],
           DCNS('publisher'): [u"Fundacja Nowoczesna Polska"],
           DCNS('description'):
@@ -100,8 +154,7 @@ DEFAULT_BOOKINFO = dcparser.BookInfo(
              Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa
              wykonana przez Bibliotekę Narodową z egzemplarza
              pochodzącego ze zbiorów BN."""],
-          DCNS('identifier.url'):
-            [u"http://wolnelektury.pl/katalog/lektura/template"],
+          DCNS('identifier.url'): [WLURI.example],
           DCNS('rights'):
             [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] })
 
@@ -118,14 +171,15 @@ def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO):
         method='xml', encoding=unicode, pretty_print=True)
 
     return u'<utwor>\n' + dcstring + u'\n<plain-text>\n' + ocrtext + \
-        u'\n</plain-text>\n</utwor>';
+        u'\n</plain-text>\n</utwor>'
 
 
 def serialize_raw(element):
     b = u'' + (element.text or '')
 
     for child in element.iterchildren():
-        e = etree.tostring(child, method='xml', encoding=unicode, pretty_print=True)
+        e = etree.tostring(child, method='xml', encoding=unicode,
+                pretty_print=True)
         b += e
 
     return b
@@ -140,3 +194,73 @@ def serialize_children(element, format='raw'):
 def get_resource(path):
     return os.path.join(os.path.dirname(__file__), path)
 
+
+class OutputFile(object):
+    """Represents a file returned by one of the converters."""
+
+    _string = None
+    _filename = None
+
+    def __del__(self):
+        if self._filename:
+            os.unlink(self._filename)
+
+    def __nonzero__(self):
+        return self._string is not None or self._filename is not None
+
+    @classmethod
+    def from_string(cls, string):
+        """Converter returns contents of a file as a string."""
+
+        instance = cls()
+        instance._string = string
+        return instance
+
+    @classmethod
+    def from_filename(cls, filename):
+        """Converter returns contents of a file as a named file."""
+
+        instance = cls()
+        instance._filename = filename
+        return instance
+
+    def get_string(self):
+        """Get file's contents as a string."""
+
+        if self._filename is not None:
+            with open(self._filename) as f:
+                return f.read()
+        else:
+            return self._string
+
+    def get_file(self):
+        """Get file as a file-like object."""
+
+        if self._string is not None:
+            from StringIO import StringIO
+            return StringIO(self._string)
+        elif self._filename is not None:
+            return open(self._filename)
+
+    def get_filename(self):
+        """Get file as a fs path."""
+
+        if self._filename is not None:
+            return self._filename
+        elif self._string is not None:
+            from tempfile import NamedTemporaryFile
+            temp = NamedTemporaryFile(prefix='librarian-', delete=False)
+            temp.write(self._string)
+            temp.close()
+            self._filename = temp.name
+            return self._filename
+        else:
+            return None
+
+    def save_as(self, path):
+        """Save file to a path. Create directories, if necessary."""
+
+        dirname = os.path.dirname(os.path.abspath(path))
+        if not os.path.isdir(dirname):
+            os.makedirs(dirname)
+        shutil.copy(self.get_filename(), path)
index 8e61fe3..e7a8e1b 100644 (file)
@@ -7,7 +7,110 @@ import Image, ImageFont, ImageDraw, ImageFilter
 from librarian import get_resource
 
 
+class TextBox(object):
+    """Creates an Image with a series of centered strings."""
+
+    SHADOW_X = 3
+    SHADOW_Y = 3
+    SHADOW_BLUR = 3
+
+    def __init__(self, max_width, max_height, padding_x=None, padding_y=None):
+        if padding_x is None:
+            padding_x = self.SHADOW_X + self.SHADOW_BLUR
+        if padding_y is None:
+            padding_y = self.SHADOW_Y + self.SHADOW_BLUR
+
+        self.max_width = max_width
+        self.max_text_width = max_width - 2 * padding_x
+        self.padding_y = padding_y
+        self.height = padding_y
+        self.img = Image.new('RGBA', (max_width, max_height))
+        self.draw = ImageDraw.Draw(self.img)
+        self.shadow_img = None
+        self.shadow_draw = None
+
+    def skip(self, height):
+        """Skips some vertical space."""
+        self.height += height
+
+    def text(self, text, color='#000', font=None, line_height=20, 
+             shadow_color=None, shortener=None):
+        """Writes some centered text."""
+        if shadow_color:
+            if not self.shadow_img:
+                self.shadow_img = Image.new('RGBA', self.img.size)
+                self.shadow_draw = ImageDraw.Draw(self.shadow_img)
+        while text:
+            if shortener:
+                for line in shortener(text):
+                    if text_draw.textsize(line, font=font)[0] <= self.max_text_width:
+                        break
+                text = ''
+            else:
+                line = text
+                line_width = self.draw.textsize(line, font=font)[0]
+                while line_width > self.max_text_width:
+                    parts = line.rsplit(' ', 1)
+                    if len(parts) == 1:
+                        line_width = self.max_text_width
+                        break
+                    line = parts[0]
+                    line_width = self.draw.textsize(line, font=font)[0]
+
+            line = line.strip() + ' '
+
+            pos_x = (self.max_width - line_width) / 2
+
+            if shadow_color:
+                self.shadow_draw.text(
+                        (pos_x + self.SHADOW_X, self.height + self.SHADOW_Y),
+                        line, font=font, fill=shadow_color
+                )
+
+            self.draw.text((pos_x, self.height), line, font=font, fill=color)
+            self.height += line_height
+            # go to next line
+            text = text[len(line):]
+
+    @staticmethod
+    def person_shortener(text):
+        yield text
+        chunks = text.split()
+        n_chunks = len(chunks)
+        # make initials from given names, starting from last
+        for i in range(n_chunks - 2, -1, -1):
+            chunks[i] = chunks[i][0] + '.'
+            yield " ".join(chunks)
+        # remove given names initials, starting from last
+        while len(chunks) > 2:
+            del chunks[1]
+            yield " ".join(chunks)
+
+    @staticmethod
+    def title_shortener(text):
+        yield text
+        chunks = text.split()
+        n_chunks = len(chunks)
+        # remove words, starting from last one
+        while len(chunks) > 1:
+            del chunks[-1]
+            yield " ".join(chunks) + u'…'
+
+    def image(self):
+        """Creates the actual Image object."""
+        image = Image.new('RGBA', (self.max_width,
+                                   self.height + self.padding_y))
+        if self.shadow_img:
+            shadow = self.shadow_img.filter(ImageFilter.BLUR)
+            image.paste(shadow, (0, 0), shadow)
+            image.paste(self.img, (0, 0), self.img)
+        else:
+            image.paste(self.img, (0, 0))
+        return image
+
+
 class Cover(object):
+    """Abstract base class for cover images generator."""
     width = 600
     height = 800
     background_color = '#fff'
@@ -35,10 +138,10 @@ class Cover(object):
 
     logo_bottom = None
     logo_width = None
+    uses_dc_cover = False
 
     format = 'JPEG'
 
-
     exts = {
         'JPEG': 'jpg',
         'PNG': 'png',
@@ -49,75 +152,16 @@ class Cover(object):
         'PNG': 'image/png',
         }
 
-    @staticmethod
-    def person_shortener(text):
-        yield text
-        chunks = text.split()
-        n_chunks = len(chunks)
-        # make initials from given names, starting from last
-        for i in range(n_chunks - 2, -1, -1):
-            chunks[i] = chunks[i][0] + '.'
-            yield " ".join(chunks)
-        # remove given names initials, starting from last
-        while len(chunks) > 2:
-            del chunks[1]
-            yield " ".join(chunks)
-
-    @staticmethod
-    def title_shortener(text):
-        yield text
-        chunks = text.split()
-        n_chunks = len(chunks)
-        # remove words, starting from last one
-        while len(chunks) > 1:
-            del chunks[-1]
-            yield " ".join(chunks) + u'…'
-
-    @staticmethod
-    def draw_text(text, img, font, align, shortener, margin_left, width, pos_y, lineskip, color, shadow_color):
-        if shadow_color:
-            shadow_img = Image.new('RGBA', img.size)
-            shadow_draw = ImageDraw.Draw(shadow_img)
-        text_img = Image.new('RGBA', img.size)
-        text_draw = ImageDraw.Draw(text_img)
-        while text:
-            if shortener:
-                for line in shortener(text):
-                    if text_draw.textsize(line, font=font)[0] <= width:
-                        break
-                text = ''
-            else:
-                line = text
-                while text_draw.textsize(line, font=font)[0] > width:
-                    try:
-                        line, ext = line.rsplit(' ', 1)
-                    except:
-                        break
-                text = text[len(line)+1:]
-            pos_x = margin_left
-            if align == 'c':
-                pos_x += (width - text_draw.textsize(line, font=font)[0]) / 2
-            elif align == 'r':
-                pos_x += (width - text_draw.textsize(line, font=font)[0])
-            if shadow_color:
-                shadow_draw.text((pos_x + 3, pos_y + 3), line, font=font, fill=shadow_color)
-            text_draw.text((pos_x, pos_y), line, font=font, fill=color)
-            pos_y += lineskip
-        if shadow_color:
-            shadow_img = shadow_img.filter(ImageFilter.BLUR)
-            img.paste(shadow_img, None, shadow_img)
-        img.paste(text_img, None, text_img)
-        return pos_y
-
-
-    def __init__(self, author='', title=''):
-        self.author = author
-        self.title = title
+    def __init__(self, book_info):
+        self.author = ", ".join(auth.readable() for auth in book_info.authors)
+        self.title = book_info.title
 
     def pretty_author(self):
+        """Allows for decorating author's name."""
         return self.author
 
     def pretty_title(self):
+        """Allows for decorating title."""
         return self.title
 
     def image(self):
@@ -137,16 +181,31 @@ class Cover(object):
             logo = logo.resize((self.logo_width, logo.size[1] * self.logo_width / logo.size[0]))
             img.paste(logo, ((self.width - self.logo_width) / 2, img.size[1] - logo.size[1] - self.logo_bottom))
 
-        author_font = self.author_font or ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), 30)
-        author_shortener = None if self.author_wrap else self.person_shortener 
-        title_y = self.draw_text(self.pretty_author(), img, author_font, self.author_align, author_shortener,
-                    self.author_margin_left, self.width - self.author_margin_left - self.author_margin_right, self.author_top,
-                    self.author_lineskip, self.author_color, self.author_shadow) + self.title_top
-        title_shortener = None if self.title_wrap else self.title_shortener 
-        title_font = self.title_font or ImageFont.truetype(get_resource('fonts/DejaVuSerif.ttf'), 40)
-        self.draw_text(self.pretty_title(), img, title_font, self.title_align, title_shortener,
-                    self.title_margin_left, self.width - self.title_margin_left - self.title_margin_right, title_y,
-                    self.title_lineskip, self.title_color, self.title_shadow)
+        top = self.author_top
+        tbox = TextBox(
+            self.width - self.author_margin_left - self.author_margin_right,
+            self.height - top,
+            )
+        author_font = self.author_font or ImageFont.truetype(
+            get_resource('fonts/DejaVuSerif.ttf'), 30)
+        author_shortener = None if self.author_wrap else TextBox.person_shortener 
+        tbox.text(self.pretty_author(), self.author_color, author_font,
+            self.author_lineskip, self.author_shadow, author_shortener)
+        text_img = tbox.image()
+        img.paste(text_img, (self.author_margin_left, top), text_img)
+        
+        top += text_img.size[1] + self.title_top
+        tbox = TextBox(
+            self.width - self.title_margin_left - self.title_margin_right,
+            self.height - top,
+            )
+        title_font = self.author_font or ImageFont.truetype(
+            get_resource('fonts/DejaVuSerif.ttf'), 40)
+        title_shortener = None if self.title_wrap else TextBox.title_shortener 
+        tbox.text(self.pretty_title(), self.title_color, title_font,
+            self.title_lineskip, self.title_shadow, title_shortener)
+        text_img = tbox.image()
+        img.paste(text_img, (self.title_margin_left, top), text_img)
 
         return img
 
@@ -160,6 +219,125 @@ class Cover(object):
         return self.image().save(format=self.format, *args, **kwargs)
 
 
+class WLCover(Cover):
+    """Default Wolne Lektury cover generator."""
+    uses_dc_cover = True
+    author_font = ImageFont.truetype(
+        get_resource('fonts/JunicodeWL-Regular.ttf'), 20)
+    author_lineskip = 30
+    title_font = ImageFont.truetype(
+        get_resource('fonts/DejaVuSerif-Bold.ttf'), 30)
+    title_lineskip = 40
+    title_box_width = 350
+    bar_width = 35
+    background_color = '#444'
+    author_color = '#444'
+
+    epochs = {
+        u'Starożytność': 0,
+        u'Średniowiecze': 30,
+        u'Renesans': 60,
+        u'Barok': 90,
+        u'Oświecenie': 120,
+        u'Romantyzm': 150,
+        u'Pozytywizm': 180,
+        u'Modernizm': 210,
+        u'Dwudziestolecie międzywojenne': 240,
+        u'Współczesność': 270,
+    }
+
+    def __init__(self, book_info):
+        super(WLCover, self).__init__(book_info)
+        self.kind = book_info.kind
+        self.epoch = book_info.epoch
+        if book_info.cover_url:
+            from urllib2 import urlopen
+            from StringIO import StringIO
+
+            bg_src = urlopen(book_info.cover_url)
+            self.background_img = StringIO(bg_src.read())
+            bg_src.close()
+
+    def pretty_author(self):
+        return self.author.upper()
+
+    def image(self):
+        from colorsys import hsv_to_rgb
+
+        img = Image.new('RGB', (self.width, self.height), self.background_color)
+        draw = ImageDraw.Draw(img)
+
+        if self.epoch in self.epochs:
+            epoch_color = tuple(int(255 * c) for c in hsv_to_rgb(
+                    float(self.epochs[self.epoch]) / 360, .7, .7))
+        else:
+            epoch_color = '#000'
+        draw.rectangle((0, 0, self.bar_width, self.height), fill=epoch_color)
+
+        if self.background_img:
+            src = Image.open(self.background_img)
+            trg_size = (self.width - self.bar_width, self.height)
+            if src.size[0] * trg_size[1] < src.size[1] * trg_size[0]:
+                resized = (
+                    trg_size[0],
+                    src.size[1] * trg_size[0] / src.size[0]
+                )
+                cut = (resized[1] - trg_size[1]) / 2
+                src = src.resize(resized)
+                src = src.crop((0, cut, src.size[0], src.size[1] - cut))
+            else:
+                resized = (
+                    src.size[0] * trg_size[1] / src.size[1],
+                    trg_size[1],
+                )
+                cut = (resized[0] - trg_size[0]) / 2
+                src = src.resize(resized)
+                src = src.crop((cut, 0, src.size[0] - cut, src.size[1]))
+            
+            img.paste(src, (self.bar_width, 0))
+            del src
+
+        box = TextBox(self.title_box_width, self.height, padding_y=20)
+        box.text(self.pretty_author(), 
+                 font=self.author_font,
+                 line_height=self.author_lineskip,
+                 color=self.author_color,
+                 shadow_color=self.author_shadow,
+                )
+
+        box.skip(10)
+        box.draw.line((75, box.height, 275, box.height), 
+                fill=self.author_color, width=2)
+        box.skip(15)
+
+        box.text(self.pretty_title(),
+                 line_height=self.title_lineskip,
+                 font=self.title_font,
+                 color=epoch_color,
+                 shadow_color=self.title_shadow,
+                )
+        box_img = box.image()
+
+        if self.kind == 'Liryka':
+            # top
+            box_top = 100
+        elif self.kind == 'Epika':
+            # bottom
+            box_top = self.height - 100 - box_img.size[1]
+        else:
+            # center
+            box_top = (self.height - box_img.size[1]) / 2
+
+        box_left = self.bar_width + (self.width - self.bar_width - 
+                        box_img.size[0]) / 2
+        draw.rectangle((box_left, box_top, 
+            box_left + box_img.size[0], box_top + box_img.size[1]),
+            fill='#fff')
+        img.paste(box_img, (box_left, box_top), box_img)
+
+        return img
+
+
 
 class VirtualoCover(Cover):
     width = 600
index 9faffe8..f64317a 100644 (file)
@@ -7,7 +7,8 @@ from xml.parsers.expat import ExpatError
 from datetime import date
 import time
 
-from librarian import ValidationError, NoDublinCore, ParseError, DCNS, RDFNS
+from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS,
+                       WLURI)
 
 import lxml.etree as etree # ElementTree API using libxml2
 from lxml.etree import XMLSyntaxError
@@ -78,35 +79,43 @@ def as_unicode(text):
     else:
         return text.decode('utf-8')
 
+def as_wluri_strict(text):
+    return WLURI.strict(text)
+
 class Field(object):
-    def __init__(self, uri, attr_name, type=as_unicode, multiple=False, salias=None, **kwargs):
+    def __init__(self, uri, attr_name, validator=as_unicode, strict=None, multiple=False, salias=None, **kwargs):
         self.uri = uri
         self.name = attr_name
-        self.validator = type
+        self.validator = validator
+        self.strict = strict
         self.multiple = multiple
         self.salias = salias
 
         self.required = kwargs.get('required', True) and not kwargs.has_key('default')
         self.default = kwargs.get('default', [] if multiple else [None])
 
-    def validate_value(self, val):
+    def validate_value(self, val, strict=False):
+        if strict and self.strict is not None:
+            validator = self.strict
+        else:
+            validator = self.validator
         try:
             if self.multiple:
-                if self.validator is None:
+                if validator is None:
                     return val
-                return [ self.validator(v) if v is not None else v for v in val ]
+                return [ validator(v) if v is not None else v for v in val ]
             elif len(val) > 1:
                 raise ValidationError("Multiple values not allowed for field '%s'" % self.uri)
             elif len(val) == 0:
                 raise ValidationError("Field %s has no value to assign. Check your defaults." % self.uri)
             else:
-                if self.validator is None or val[0] is None:
+                if validator is None or val[0] is None:
                     return val[0]
-                return self.validator(val[0])
+                return validator(val[0])
         except ValueError, e:
             raise ValidationError("Field '%s' - invald value: %s" % (self.uri, e.message))
 
-    def validate(self, fdict):
+    def validate(self, fdict, strict=False):
         if not fdict.has_key(self.uri):
             if not self.required:
                 f = self.default
@@ -115,47 +124,64 @@ class Field(object):
         else:
             f = fdict[self.uri]
 
-        return self.validate_value(f)
+        return self.validate_value(f, strict=strict)
+
+    def __eq__(self, other):
+        if isinstance(other, Field) and other.name == self.name:
+            return True
+        return False
+
 
+class DCInfo(type):
+    def __new__(meta, classname, bases, class_dict):
+        fields = list(class_dict['FIELDS'])
 
+        for base in bases[::-1]:
+            if hasattr(base, 'FIELDS'):
+                for field in base.FIELDS[::-1]:
+                    try:
+                        fields.index(field)
+                    except ValueError:
+                        fields.insert(0, field)
 
+        class_dict['FIELDS'] = tuple(fields)
+        return super(DCInfo, meta).__new__(meta, classname, bases, class_dict)
+
+
+class WorkInfo(object):
+    __metaclass__ = DCInfo
 
-class BookInfo(object):
     FIELDS = (
-        Field( DCNS('creator'), 'author', as_person),
+        Field( DCNS('creator'), 'authors', as_person, salias='author', multiple=True),
         Field( DCNS('title'), 'title'),
-        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
-        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
-        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True),
-        Field( DCNS('date'), 'created_at', as_date),
-        Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
+        Field( DCNS('type'), 'type', required=False, multiple=True),
+
         Field( DCNS('contributor.editor'), 'editors', \
             as_person, salias='editor', multiple=True, default=[]),
-        Field( DCNS('contributor.translator'), 'translators', \
-            as_person,  salias='translator', multiple=True, default=[]),
         Field( DCNS('contributor.technical_editor'), 'technical_editors',
             as_person, salias='technical_editor', multiple=True, default=[]),
+
+        Field( DCNS('date'), 'created_at', as_date),
+        Field( DCNS('date.pd'), 'released_to_public_domain_at', as_date, required=False),
         Field( DCNS('publisher'), 'publisher'),
+
+        Field( DCNS('language'), 'language'),
+        Field( DCNS('description'), 'description', required=False),
+
         Field( DCNS('source'), 'source_name', required=False),
         Field( DCNS('source.URL'), 'source_url', required=False),
-        Field( DCNS('identifier.url'), 'url'),
-        Field( DCNS('relation.hasPart'), 'parts', multiple=True, required=False),
+        Field( DCNS('identifier.url'), 'url', WLURI, strict=as_wluri_strict),
         Field( DCNS('rights.license'), 'license', required=False),
         Field( DCNS('rights'), 'license_description'),
-        Field( DCNS('description'), 'description', required=False),
     )
 
-    @property
-    def slug(self):
-        return self.url.rsplit('/', 1)[1]
-
     @classmethod
-    def from_string(cls, xml):
+    def from_string(cls, xml, *args, **kwargs):
         from StringIO import StringIO
-        return cls.from_file(StringIO(xml))
+        return cls.from_file(StringIO(xml), *args, **kwargs)
 
     @classmethod
-    def from_file(cls, xmlfile):
+    def from_file(cls, xmlfile, *args, **kwargs):
         desc_tag = None
         try:
             iter = etree.iterparse(xmlfile, ['start', 'end'])
@@ -176,17 +202,17 @@ class BookInfo(object):
             # if there is no end, Expat should yell at us with an ExpatError
 
             # extract data from the element and make the info
-            return cls.from_element(desc_tag)
+            return cls.from_element(desc_tag, *args, **kwargs)
         except XMLSyntaxError, e:
             raise ParseError(e)
         except ExpatError, e:
             raise ParseError(e)
 
     @classmethod
-    def from_element(cls, rdf_tag):
+    def from_element(cls, rdf_tag, *args, **kwargs):
         # the tree is already parsed, so we don't need to worry about Expat errors
         field_dict = {}
-        desc = rdf_tag.find(".//" + RDFNS('Description') )
+        desc = rdf_tag.find(".//" + RDFNS('Description'))
 
         if desc is None:
             raise NoDublinCore("No DublinCore section found.")
@@ -196,9 +222,9 @@ class BookInfo(object):
             fv.append(e.text)
             field_dict[e.tag] = fv
 
-        return cls( desc.attrib, field_dict )
+        return cls(desc.attrib, field_dict, *args, **kwargs)
 
-    def __init__(self, rdf_attrs, dc_fields):
+    def __init__(self, rdf_attrs, dc_fields, strict=False):
         """rdf_attrs should be a dictionary-like object with any attributes of the RDF:Description.
         dc_fields - dictionary mapping DC fields (with namespace) to list of text values for the
         given field. """
@@ -207,7 +233,7 @@ class BookInfo(object):
         self.fmap = {}
 
         for field in self.FIELDS:
-            value = field.validate( dc_fields )
+            value = field.validate(dc_fields, strict=strict)
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias: self.fmap[field.salias] = field
@@ -278,7 +304,6 @@ class BookInfo(object):
 
         return root
 
-
     def serialize(self):
         rdf = {}
         rdf['about'] = { 'uri': RDFNS('about'), 'value': self.about }
@@ -316,5 +341,31 @@ class BookInfo(object):
 
         return result
 
-def parse(file_name):
-    return BookInfo.from_file(file_name)
+
+class BookInfo(WorkInfo):
+    FIELDS = (
+        Field( DCNS('audience'), 'audiences', salias='audience', multiple=True,
+                required=False),
+
+        Field( DCNS('subject.period'), 'epochs', salias='epoch', multiple=True,
+                required=False),
+        Field( DCNS('subject.type'), 'kinds', salias='kind', multiple=True,
+                required=False),
+        Field( DCNS('subject.genre'), 'genres', salias='genre', multiple=True,
+                required=False),
+                
+        Field( DCNS('contributor.translator'), 'translators', \
+            as_person,  salias='translator', multiple=True, default=[]),
+        Field( DCNS('relation.hasPart'), 'parts', 
+            WLURI, strict=as_wluri_strict, multiple=True, required=False),
+        Field( DCNS('relation.isVariantOf'), 'variant_of', 
+            WLURI, strict=as_wluri_strict, required=False),
+
+        Field( DCNS('relation.coverImage.url'), 'cover_url', required=False),
+        Field( DCNS('relation.coverImage.attribution'), 'cover_by', required=False),
+        Field( DCNS('relation.coverImage.source'), 'cover_source', required=False),
+    )
+
+
+def parse(file_name, cls=BookInfo):
+    return cls.from_file(file_name)
index 348df0c..48bb2f2 100644 (file)
@@ -13,14 +13,11 @@ from StringIO import StringIO
 from copy import deepcopy
 from lxml import etree
 import zipfile
-from tempfile import mkdtemp
+from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
-import sys
-
-from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, XHTMLNS, NoDublinCore
-from librarian.dcparser import BookInfo
-from librarian.cover import ImageCover
+from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile
+from librarian.cover import WLCover
 
 from librarian import functions, get_resource
 
@@ -160,19 +157,23 @@ def add_to_spine(spine, partno):
 
 
 class TOC(object):
-    def __init__(self, name=None, part_number=None):
+    def __init__(self, name=None, part_href=None):
         self.children = []
         self.name = name
-        self.part_number = part_number
+        self.part_href = part_href
         self.sub_number = None
 
-    def add(self, name, part_number, level=0, is_part=True):
+    def add(self, name, part_href, level=0, is_part=True, index=None):
+        assert level == 0 or index is None
         if level > 0 and self.children:
-            return self.children[-1].add(name, part_number, level-1, is_part)
+            return self.children[-1].add(name, part_href, level-1, is_part)
         else:
             t = TOC(name)
-            t.part_number = part_number
-            self.children.append(t)
+            t.part_href = part_href
+            if index is not None:
+                self.children.insert(index, t)
+            else:
+                self.children.append(t)
             if not is_part:
                 t.sub_number = len(self.children) + 1
                 return t.sub_number
@@ -189,7 +190,13 @@ class TOC(object):
         else:
             return 0
 
-    def write_to_xml(self, nav_map, counter):
+    def href(self):
+        src = self.part_href
+        if self.sub_number is not None:
+            src += '#sub%d' % self.sub_number
+        return src
+
+    def write_to_xml(self, nav_map, counter=1):
         for child in self.children:
             nav_point = nav_map.makeelement(NCXNS('navPoint'))
             nav_point.set('id', 'NavPoint-%d' % counter)
@@ -202,15 +209,26 @@ class TOC(object):
             nav_point.append(nav_label)
 
             content = nav_map.makeelement(NCXNS('content'))
-            src = 'part%d.html' % child.part_number
-            if child.sub_number is not None:
-                src += '#sub%d' % child.sub_number
-            content.set('src', src)
+            content.set('src', child.href())
             nav_point.append(content)
             nav_map.append(nav_point)
             counter = child.write_to_xml(nav_point, counter + 1)
         return counter
 
+    def html_part(self, depth=0):
+        texts = []
+        for child in self.children:
+            texts.append(
+                "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
+                (depth, child.href(), child.name))
+            texts.append(child.html_part(depth+1))
+        return "\n".join(texts)
+
+    def html(self):
+        with open(get_resource('epub/toc.html')) as f:
+            t = unicode(f.read(), 'utf-8')
+        return t % self.html_part()
+
 
 def used_chars(element):
     """ Lists characters used in an ETree Element """
@@ -250,9 +268,9 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
     toc = TOC()
     for element in chunk_xml[0]:
         if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"):
-            toc.add(node_name(element), chunk_no)
+            toc.add(node_name(element), "part%d.html" % chunk_no)
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
-            subnumber = toc.add(node_name(element), chunk_no, level=1, is_part=False)
+            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
             element.set('sub', str(subnumber))
     if empty:
         if not _empty_html_static:
@@ -268,44 +286,40 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
     return output_html, toc, chars
 
 
-def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False,
+def transform(wldoc, verbose=False,
+              style=None, html_toc=False,
               sample=None, cover=None, flags=None):
     """ produces a EPUB file
 
-    provider: a DocProvider
-    slug: slug of file to process, available by provider
-    output_file: file-like object or path to output file
-    output_dir: path to directory to save output file to; either this or output_file must be present
-    make_dir: writes output to <output_dir>/<author>/<slug>.epub instead of <output_dir>/<slug>.epub
     sample=n: generate sample e-book (with at least n paragraphs)
-    cover: a cover.Cover object
-    flags: less-advertising, images, not-wl
+    cover: a cover.Cover object or True for default
+    flags: less-advertising, without-fonts, images, not-wl
     """
 
-    def transform_file(input_xml, chunk_counter=1, first=True, sample=None):
+    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
         """ processes one input file and proceeds to its children """
 
-        replace_characters(input_xml.getroot())
-
-        children = [child.text for child in input_xml.findall('.//'+DCNS('relation.hasPart'))]
+        replace_characters(wldoc.edoc.getroot())
 
         # every input file will have a TOC entry,
         # pointing to starting chunk
-        toc = TOC(node_name(input_xml.find('.//'+DCNS('title'))), chunk_counter)
+        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
         chars = set()
         if first:
             # write book title page
-            html_tree = xslt(input_xml, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
             chars = used_chars(html_tree.getroot())
             zip.writestr('OPS/title.html',
                  etree.tostring(html_tree, method="html", pretty_print=True))
-        elif children:
+            # add a title page TOC entry
+            toc.add(u"Strona tytułowa", "title.html")
+        elif wldoc.book_info.parts:
             # write title page for every parent
             if sample is not None and sample <= 0:
                 chars = set()
                 html_string = open(get_resource('epub/emptyChunk.html')).read()
             else:
-                html_tree = xslt(input_xml, get_resource('epub/xsltChunkTitle.xsl'))
+                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                 chars = used_chars(html_tree.getroot())
                 html_string = etree.tostring(html_tree, method="html", pretty_print=True)
             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
@@ -313,12 +327,12 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
 
-        if len(input_xml.getroot()) > 1:
+        if len(wldoc.edoc.getroot()) > 1:
             # rdf before style master
-            main_text = input_xml.getroot()[1]
+            main_text = wldoc.edoc.getroot()[1]
         else:
             # rdf in style master
-            main_text = input_xml.getroot()[0]
+            main_text = wldoc.edoc.getroot()[0]
             if main_text.tag == RDFNS('RDF'):
                 main_text = None
 
@@ -339,55 +353,29 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
 
-        if children:
-            for child in children:
-                child_xml = etree.parse(provider.by_uri(child))
-                child_toc, chunk_counter, chunk_chars, sample = transform_file(child_xml, chunk_counter, first=False, sample=sample)
-                toc.append(child_toc)
-                chars = chars.union(chunk_chars)
+        for child in wldoc.parts():
+            child_toc, chunk_counter, chunk_chars, sample = transform_file(
+                child, chunk_counter, first=False, sample=sample)
+            toc.append(child_toc)
+            chars = chars.union(chunk_chars)
 
         return toc, chunk_counter, chars, sample
 
-    # read metadata from the first file
-    if file_path:
-        if slug:
-            raise ValueError('slug or file_path should be specified, not both')
-        f = open(file_path, 'r')
-        input_xml = etree.parse(f)
-        f.close()
-    else:
-        if not slug:
-            raise ValueError('either slug or file_path should be specified')
-        input_xml = etree.parse(provider[slug])
+
+    document = deepcopy(wldoc)
+    del wldoc
 
     if flags:
         for flag in flags:
-            input_xml.getroot().set(flag, 'yes')
-
-    metadata = input_xml.find('.//'+RDFNS('Description'))
-    if metadata is None:
-        raise NoDublinCore('Document has no DublinCore - which is required.')
-    book_info = BookInfo.from_element(input_xml)
-    metadata = etree.ElementTree(metadata)
-
-    # if output to dir, create the file
-    if output_dir is not None:
-        if make_dir:
-            author = unicode(book_info.author)
-            output_dir = os.path.join(output_dir, author)
-            try:
-                os.makedirs(output_dir)
-            except OSError:
-                pass
-        if slug:
-            output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w')
-        else:
-            output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w')
+            document.edoc.getroot().set(flag, 'yes')
 
-    opf = xslt(metadata, get_resource('epub/xsltContent.xsl'))
+    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
     manifest = opf.find('.//' + OPFNS('manifest'))
+    guide = opf.find('.//' + OPFNS('guide'))
     spine = opf.find('.//' + OPFNS('spine'))
 
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
+
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
     # write static elements
@@ -401,15 +389,29 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
                        '<rootfiles><rootfile full-path="OPS/content.opf" ' \
                        'media-type="application/oebps-package+xml" />' \
                        '</rootfiles></container>')
-    zip.write(get_resource('epub/style.css'), os.path.join('OPS', 'style.css'))
     if not flags or 'not-wl' not in flags:
         manifest.append(etree.fromstring(
             '<item id="logo_wolnelektury" href="logo_wolnelektury.png" media-type="image/png" />'))
+        manifest.append(etree.fromstring(
+            '<item id="jedenprocent" href="jedenprocent.png" media-type="image/png" />'))
         zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
+        zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
+
+    if not style:
+        style = get_resource('epub/style.css')
+    zip.write(style, os.path.join('OPS', 'style.css'))
 
     if cover:
+        if cover is True:
+            cover = WLCover
+        if cover.uses_dc_cover:
+            if document.book_info.cover_by:
+                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+            if document.book_info.cover_source:
+                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
+
         cover_file = StringIO()
-        c = cover(book_info.author.readable(), book_info.title)
+        c = cover(document.book_info)
         c.save(cover_file)
         c_name = 'cover.%s' % c.ext()
         zip.writestr(os.path.join('OPS', c_name), cover_file.getvalue())
@@ -424,12 +426,12 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
         manifest.append(etree.fromstring(
             '<item id="cover-image" href="%s" media-type="%s" />' % (c_name, c.mime_type())))
-        spine.insert(0, etree.fromstring('<itemref idref="cover" />'))
+        spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
-        opf.getroot().append(etree.fromstring('<guide><reference href="cover.html" type="cover" title="Okładka"/></guide>'))
+        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
     if flags and 'images' in flags:
-        for ilustr in input_xml.findall('//ilustr'):
+        for ilustr in document.edoc.findall('//ilustr'):
             src = ilustr.get('src')
             mime = ImageCover(src)().mime_type()
             zip.write(src, os.path.join('OPS', src))
@@ -446,7 +448,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
                 moved.tail = None
                 after.addnext(moved)
     else:
-        for ilustr in input_xml.findall('//ilustr'):
+        for ilustr in document.edoc.findall('//ilustr'):
             ilustr.tag = 'extra'
 
     annotations = etree.Element('annotations')
@@ -455,23 +457,24 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
                                '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
                                '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
                                'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
-                               '<navPoint id="NavPoint-1" playOrder="1"><navLabel>' \
-                               '<text>Strona tytułowa</text></navLabel><content src="title.html" />' \
-                               '</navPoint></navMap></ncx>')
+                               '</navMap></ncx>')
     nav_map = toc_file[-1]
 
-    toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample)
+    if html_toc:
+        manifest.append(etree.fromstring(
+            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
+        spine.append(etree.fromstring(
+            '<itemref idref="html_toc" />'))
+        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
+
+    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 
-    if not toc.children:
-        toc.add(u"Początek utworu", 1)
-    toc_counter = toc.write_to_xml(nav_map, 2)
+    if len(toc.children) < 2:
+        toc.add(u"Początek utworu", "part1.html")
 
     # Last modifications in container files and EPUB creation
     if len(annotations) > 0:
-        nav_map.append(etree.fromstring(
-            '<navPoint id="NavPoint-%(i)d" playOrder="%(i)d" ><navLabel><text>Przypisy</text>'\
-            '</navLabel><content src="annotations.html" /></navPoint>' % {'i': toc_counter}))
-        toc_counter += 1
+        toc.add("Przypisy", "annotations.html")
         manifest.append(etree.fromstring(
             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
         spine.append(etree.fromstring(
@@ -482,44 +485,44 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
         zip.writestr('OPS/annotations.html', etree.tostring(
                             html_tree, method="html", pretty_print=True))
 
-    nav_map.append(etree.fromstring(
-        '<navPoint id="NavPoint-%(i)d" playOrder="%(i)d" ><navLabel><text>Strona redakcyjna</text>'\
-        '</navLabel><content src="last.html" /></navPoint>' % {'i': toc_counter}))
+    toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
-    stopka = input_xml.find('//stopka')
+    stopka = document.edoc.find('//stopka')
     if stopka is not None:
         stopka.tag = 'stopka_'
         replace_by_verse(stopka)
         html_tree = xslt(stopka, get_resource('epub/xsltScheme.xsl'))
     else:
-        html_tree = xslt(input_xml, get_resource('epub/xsltLast.xsl'))
+        html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
     chars.update(used_chars(html_tree.getroot()))
     zip.writestr('OPS/last.html', etree.tostring(
                         html_tree, method="html", pretty_print=True))
 
-    # strip fonts
-    tmpdir = mkdtemp('-librarian-epub')
-    cwd = os.getcwd()
-
-    os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
-    for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
-        optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
-                          get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]
-        if verbose:
-            print "Running font-optimizer"
-            subprocess.check_call(optimizer_call)
-        else:
-            subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
-    rmtree(tmpdir)
-    os.chdir(cwd)
+    if not flags or not 'without-fonts' in flags:
+        # strip fonts
+        tmpdir = mkdtemp('-librarian-epub')
+        cwd = os.getcwd()
+
+        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
+        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
+            optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
+                              get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]
+            if verbose:
+                print "Running font-optimizer"
+                subprocess.check_call(optimizer_call)
+            else:
+                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="font/ttf" />' % (fname, fname)))
+        rmtree(tmpdir)
+        os.chdir(cwd)
 
     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
-    contents = []
-    title = node_name(etree.ETXPath('.//'+DCNS('title'))(input_xml)[0])
+    title = document.book_info.title
     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
     for st in attributes:
         meta = toc_file.makeelement(NCXNS('meta'))
@@ -529,5 +532,13 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir=
     toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl')))
     toc_file[0][1].set('content', str(toc.depth()))
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
+
+    # write TOC
+    if html_toc:
+        toc.add(u"Spis treści", "toc.html", index=1)
+        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
+    toc.write_to_xml(nav_map)
     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
     zip.close()
+
+    return OutputFile.from_filename(output_file.name)
index 7fb53a3..a4c61c8 100644 (file)
@@ -108,28 +108,24 @@ p
        text-align: left;
 }
 
-.annotation
+.annotation-anchor
 {
        font-style: normal;
        font-weight: normal;
        font-size: 0.875em;
-}
-
-#footnotes .annotation
-{
        display: block;
        float: left;
        width: 2.5em;
        clear: both;
 }
 
-#footnotes div
+.annotation
 {
     margin: 0;
     margin-top: 1.5em;
 }
 
-#footnotes p
+.annotation-body
 {
        margin-left: 2.5em;
        font-size: 0.875em;
@@ -352,13 +348,17 @@ em.author-emphasis
        text-transform: uppercase;
 }
 
-p.info
+.info
 {
        text-align: center;
        margin-bottom: 1em;
 }
+.info div
+{
+    text-align: center;
+}
 
-p.info img
+.info img
 {
        margin: 0;
        margin-left: 2em;
diff --git a/librarian/epub/toc.html b/librarian/epub/toc.html
new file mode 100755 (executable)
index 0000000..69d8724
--- /dev/null
@@ -0,0 +1,11 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <title>WolneLektury.pl</title>
+    </head>
+    <body>
+        <h1>Spis treści</h1>
+        %s
+    </body>
+</html>
index c66730a..f3e6443 100644 (file)
   </xsl:template>
 
   <xsl:template match="pa|pe|pr|pt" mode="przypis">
-    <div xmlns="http://www.w3.org/1999/xhtml">
+    <div xmlns="http://www.w3.org/1999/xhtml" class="annotation">
       <p id="annotation-{@number}" xmlns="http://www.w3.org/1999/xhtml"></p>
-      <a class="annotation" href="part{@part}.html#anchor-{@number}" xmlns="http://www.w3.org/1999/xhtml">
+      <a class="annotation-anchor" href="part{@part}.html#anchor-{@number}" xmlns="http://www.w3.org/1999/xhtml">
         [<xsl:value-of select="@number" />]
       </a>
-      <p xmlns="http://www.w3.org/1999/xhtml">
+      <p class="annotation-body" xmlns="http://www.w3.org/1999/xhtml">
         <xsl:apply-templates />
         <xsl:if test="name()='pa'"> [przypis autorski]</xsl:if>
       </p>
index 65bf808..ef7ae74 100644 (file)
         <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
         <item id="style" href="style.css" media-type="text/css" />
         <item id="titlePage" href="title.html" media-type="application/xhtml+xml" />
-        <item id="DejaVuSerif.ttf" href="DejaVuSerif.ttf" media-type="font/ttf" />
-        <item id="DejaVuSerif-Bold.ttf" href="DejaVuSerif-Bold.ttf" media-type="font/ttf" />
-        <item id="DejaVuSerif-BoldItalic.ttf" href="DejaVuSerif-BoldItalic.ttf" media-type="font/ttf" />
-        <item id="DejaVuSerif-Italic.ttf" href="DejaVuSerif-Italic.ttf" media-type="font/ttf" />
       </manifest>
       <spine toc="toc">
         <itemref idref="titlePage" />
       </spine>
+      <guide>
+        <reference type="text" title="Początek" href="part1.html" />
+      </guide>
     </package>
   </xsl:template>
 
index dd44a30..751f97a 100644 (file)
@@ -9,7 +9,7 @@
   <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
   <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
 
-  <xsl:template match="/">
+  <xsl:template match="utwor">
     <html>
       <head>
         <link rel="stylesheet" href="style.css" type="text/css" />
 
           <xsl:call-template name="editors" />
 
+          <xsl:if test="@data-cover-by">
+            <p class="info">Okładka na podstawie: 
+            <xsl:choose>
+            <xsl:when test="@data-cover-source">
+                <a>
+                <xsl:attribute name="href">
+                  <xsl:value-of select="@data-cover-source" />
+                </xsl:attribute>
+                <xsl:value-of select="@data-cover-by" />
+                </a>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:value-of select="@data-cover-by" />
+            </xsl:otherwise>
+            </xsl:choose>
+            </p>
+          </xsl:if>
+
+          <div class="info">
+          <img src="jedenprocent.png" alt="Logo 1%" />
+          <div>Przekaż 1% podatku na rozwój Wolnych Lektur.</div>
+          <div>Nazwa organizacji: Fundacja Nowoczesna Polska</div>
+          <div>KRS 0000070056</div>
+          </div>
+
           <p class="info">&#160;</p>
           <p class="minor info">
               Plik wygenerowany dnia <span id="file_date"><xsl:value-of select="substring(date:date(), 1, 10)" /></span>.
index 3065cac..395e950 100644 (file)
   <xsl:template match="strofa">
     <div class="stanza" xmlns="http://www.w3.org/1999/xhtml">
       <xsl:apply-templates />
-    </div><div xmlns="http://www.w3.org/1999/xhtml">&#160;</div>
+    </div><div class='stanza-spacer' xmlns="http://www.w3.org/1999/xhtml">&#160;</div>
   </xsl:template>
 
   <xsl:template match="wers_normalny">
   <!--===========================================================-->
 
   <xsl:template match="text()"  >
-    <xsl:value-of select="."/>
+    <xsl:value-of select="." />
   </xsl:template>
 
   <xsl:template match="text()" >
-    <xsl:value-of select="."/>
+    <xsl:value-of select="." />
   </xsl:template>
 
 </xsl:stylesheet>
index 5f832e3..39e5a01 100644 (file)
@@ -5,12 +5,10 @@
 #
 import os
 import cStringIO
-import re
 import copy
 
 from lxml import etree
-from librarian.parser import WLDocument
-from librarian import XHTMLNS, ParseError
+from librarian import XHTMLNS, ParseError, OutputFile
 from librarian import functions
 
 from lxml.etree import XMLSyntaxError, XSLTApplyError
@@ -30,9 +28,8 @@ def get_stylesheet(name):
 def html_has_content(text):
     return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)
 
-def transform(input, output_filename=None, is_file=True, \
-    parse_dublincore=True, stylesheet='legacy', options={}, flags=None):
-    """Transforms file input_filename in XML to output_filename in XHTML.
+def transform(wldoc, stylesheet='legacy', options=None, flags=None):
+    """Transforms the WL document to XHTML.
 
     If output_filename is None, returns an XML,
     otherwise returns True if file has been written,False if it hasn't.
@@ -43,12 +40,9 @@ def transform(input, output_filename=None, is_file=True, \
         style_filename = get_stylesheet(stylesheet)
         style = etree.parse(style_filename)
 
-        if is_file:
-            document = WLDocument.from_file(input, True, \
-                parse_dublincore=parse_dublincore)
-        else:
-            document = WLDocument.from_string(input, True, \
-                parse_dublincore=parse_dublincore)
+        document = copy.deepcopy(wldoc)
+        del wldoc
+        document.swap_endlines()
 
         if flags:
             for flag in flags:
@@ -56,6 +50,8 @@ def transform(input, output_filename=None, is_file=True, \
 
         document.clean_ed_note()
 
+        if not options:
+            options = {}
         result = document.transform(style, **options)
         del document # no longer needed large object :)
 
@@ -63,16 +59,10 @@ def transform(input, output_filename=None, is_file=True, \
             add_anchors(result.getroot())
             add_table_of_contents(result.getroot())
 
-            if output_filename is not None:
-                result.write(output_filename, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')
-            else:
-                return result
-            return True
+            return OutputFile.from_string(etree.tostring(result, method='html',
+                xml_declaration=False, pretty_print=True, encoding='utf-8'))
         else:
-            if output_filename is not None:
-                return False
-            else:
-                return "<empty />"
+            return None
     except KeyError:
         raise ValueError("'%s' is not a valid stylesheet.")
     except (XMLSyntaxError, XSLTApplyError), e:
@@ -238,10 +228,12 @@ def add_table_of_contents(root):
             if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
                 continue
 
+            element_text = etree.tostring(element, method='text',
+                    encoding=unicode).strip()
             if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
-                sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
+                sections[-1][3].append((counter, element.tag, element_text, []))
             else:
-                sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
+                sections.append((counter, element.tag, element_text, []))
             add_anchor(element, "s%d" % counter, with_link=False)
             counter += 1
 
@@ -263,3 +255,17 @@ def add_table_of_contents(root):
 
     root.insert(0, toc)
 
+
+def extract_annotations(html_path):
+    """For each annotation, yields a tuple: anchor, text, html."""
+    parser = etree.HTMLParser(encoding='utf-8')
+    tree = etree.parse(html_path, parser)
+    footnotes = tree.find('//*[@id="footnotes"]')
+    if footnotes is not None:
+        for footnote in footnotes.findall('div'):
+            anchor = footnote.find('a[@name]').get('name')
+            del footnote[:2]
+            text_str = etree.tostring(footnote, method='text', encoding='utf-8').strip()
+            html_str = etree.tostring(footnote, method='html', encoding='utf-8')
+            yield anchor, text_str, html_str
+
diff --git a/librarian/mobi.py b/librarian/mobi.py
new file mode 100755 (executable)
index 0000000..1e7569b
--- /dev/null
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from copy import deepcopy
+import os
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from librarian import OutputFile
+from librarian.cover import WLCover
+from librarian import get_resource
+
+
+def transform(wldoc, verbose=False,
+              sample=None, cover=None, flags=None):
+    """ produces a MOBI file
+
+    wldoc: a WLDocument
+    sample=n: generate sample e-book (with at least n paragraphs)
+    cover: a cover.Cover object
+    flags: less-advertising,
+    """
+
+    document = deepcopy(wldoc)
+    del wldoc
+    book_info = document.book_info
+
+    # provide a cover by default
+    if not cover:
+        cover = WLCover
+    cover_file = NamedTemporaryFile(suffix='.png', delete=False)
+    c = cover(book_info)
+    c.save(cover_file)
+
+    if cover.uses_dc_cover:
+        if document.book_info.cover_by:
+            document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+        if document.book_info.cover_source:
+            document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
+
+    if not flags:
+        flags = []
+    flags = list(flags) + ['without-fonts']
+    epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True,
+            flags=flags, style=get_resource('mobi/style.css'))
+
+    if verbose:
+        kwargs = {}
+    else:
+        devnull = open("/dev/null", 'w')
+        kwargs = {"stdout": devnull, "stderr": devnull}
+
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False)
+    output_file.close()
+    subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name,
+            '--no-inline-toc', '--cover=%s' % cover_file.name], **kwargs)
+    os.unlink(cover_file.name)
+    return OutputFile.from_filename(output_file.name)
\ No newline at end of file
diff --git a/librarian/mobi/style.css b/librarian/mobi/style.css
new file mode 100755 (executable)
index 0000000..99792e5
--- /dev/null
@@ -0,0 +1,306 @@
+/* =================================================== */
+/* = Common elements: headings, paragraphs and lines = */
+/* =================================================== */
+
+
+.h2
+{
+       font-size: 2em;
+       margin: 0;
+       margin-top: 1.5em;
+       font-weight: bold;
+       line-height: 1.5em;
+}
+
+.h3
+{
+    text-align:left;
+    font-size: 1.5em;
+    margin-top: 1.5em;
+    font-weight: normal;
+    line-height: 1.5em;
+}
+
+.h4
+{
+       font-size: 1em;
+    margin: 0;
+    margin-top: 1em;
+       line-height: 1.5em;
+}
+
+.paragraph
+{
+    margin-top: 0;
+}
+
+/* ======================== */
+/* = Footnotes and themes = */
+/* ======================== */
+
+.annotation-anchor
+{
+       font-style: normal;
+       font-weight: normal;
+       font-size: 0.875em;
+       display: block;
+       float: left;
+       width: 2.5em;
+       clear: both;
+}
+
+.annotation
+{
+    margin: 0;
+    margin-top: 1.5em;
+}
+
+.annotation-body
+{
+       margin-left: 2.5em;
+       font-size: 0.875em;
+}
+
+.block
+{
+       font-size: 0.875em;
+       padding: 1em;
+}
+
+/* ============= */
+/* = Numbering = */
+/* ============= */
+
+.anchor
+{
+       margin: -0.25em -0.5em;
+       color: #777;
+       font-size: 0.875em;
+       width: 2em;
+       text-align: center;
+       padding: 0.25em 0.5em;
+       line-height: 1.5em;
+}
+
+/* =================== */
+/* = Custom elements = */
+/* =================== */
+
+.title-page
+{
+    margin-top: 1.5em;
+}
+
+.title
+{
+    font-size: 3em;
+    text-align: center;
+    line-height: 1.5em;
+    font-weight: bold;
+}
+
+.author
+{
+    margin: 0;
+    text-align: center;
+    font-weight: bold;
+
+    font-size: 1.5em;
+    line-height: 1.5em;
+    margin-bottom: 0.25em;
+}
+
+.intitle
+{
+    margin: 0;
+    text-align: center;
+    font-weight: bold;
+
+    font-size: 1.5em;
+    line-height: 1.5em;
+    margin-bottom: 0.25em;
+}
+
+.insubtitle
+{
+    margin: 0;
+    text-align: center;
+    font-weight: bold;
+
+    font-size: 1em;
+    line-height: 1.5em;
+    margin-bottom: 0.25em;
+}
+
+.collection
+{
+    margin: 0;
+    text-align: center;
+    font-weight: bold;
+
+       font-size: 1.125em;
+       line-height: 1.5em;
+       margin-bottom: -0.25em;
+}
+
+.subtitle
+{
+    margin: 0;
+    text-align: center;
+    font-weight: bold;
+
+       font-size: 1.5em;
+       line-height: 1.5em;
+       margin-top: -0.25em;
+}
+
+div.didaskalia
+{
+       font-style: italic;
+       margin-top: 0.5em;
+       margin-left: 1.5em;
+}
+
+div.kwestia
+{
+       margin-top: 0.5em;
+}
+
+.stanza
+{
+    margin-bottom: 1em;
+}
+
+.stanza-spacer
+{
+    display: none;
+}
+
+.motto
+{
+       text-align: justify;
+       font-style: italic;
+       margin-top: 1.5em;
+}
+
+.motto_podpis
+{
+       font-size: 0.875em;
+       text-align: right;
+}
+
+div.fragment
+{
+       border-bottom: 0.1em solid #999;
+       padding-bottom: 1.5em;
+}
+
+div.note
+{
+       text-align: right;
+       font-style: italic;
+}
+div.note div.paragraph
+{
+    text-align: right;
+    font-style: italic;
+}
+div.dedication
+{
+    text-align: right;
+    font-style: italic;
+}
+div.dedication div.paragaph
+{
+    text-align: right;
+    font-style: italic;
+}
+
+
+hr.spacer
+{
+       height: 3em;
+       visibility: hidden;
+}
+
+hr.spacer-line
+{
+       margin: 0;
+       margin-top: 1.5em;
+       margin-bottom: 1.5em;
+       border: none;
+       border-bottom: 0.1em solid #000;
+}
+
+.spacer-asterisk
+{
+       padding: 0;
+    margin: 0;
+    margin-top: 1.5em;
+    margin-bottom: 1.5em;
+       text-align: center;
+}
+
+div.person-list ol
+{
+       list-style: none;
+       padding: 0;
+       padding-left: 1.5em;
+}
+
+.place-and-time
+{
+       font-style: italic;
+}
+
+em.math
+{
+       font-style: italic;
+}
+em.foreign-word
+{
+    font-style: italic;
+}
+em.book-title
+{
+    font-style: italic;
+}
+em.didaskalia
+{
+    font-style: italic;
+}
+
+em.author-emphasis
+{
+       letter-spacing: 0.1em;
+}
+
+.person-list em.person
+{
+       font-style: normal;
+       text-transform: uppercase;
+}
+
+.info
+{
+       text-align: center;
+       margin-bottom: 1em;
+}
+.info div
+{
+    text-align: center;
+}
+
+.info img
+{
+       margin: 0;
+       margin-left: 2em;
+       margin-right: 2em;
+}
+
+p.minor {
+    font-size: 0.75em;
+}
+p.footer {
+    margin-top: 2em;
+}
index 2c543da..9a93e56 100644 (file)
@@ -6,8 +6,8 @@
 import os
 from copy import deepcopy
 from lxml import etree
-from librarian import epub, pdf, DirDocProvider, ParseError, cover
-from librarian.dcparser import BookInfo
+from librarian import pdf, epub, DirDocProvider, ParseError, cover
+from librarian.parser import WLDocument
 
 
 class Packager(object):
@@ -26,8 +26,11 @@ class Packager(object):
             except:
                 pass
         outfile = os.path.join(output_dir, slug + '.' + cls.ext)
-        cls.converter.transform(provider, file_path=main_input, output_file=outfile,
+
+        doc = WLDocument.from_file(main_input, provider=provider)
+        output_file = cls.converter.transform(doc,
                 cover=cls.cover, flags=cls.flags)
+        doc.save_output_file(output_file, output_path=outfile)
 
 
     @classmethod
@@ -84,7 +87,6 @@ class VirtualoEpubPackager(Packager):
         """ truncates text to at most `limit' bytes in utf-8 """
         if text is None:
             return text
-        orig_text = text
         if len(text.encode('utf-8')) > limit:
             newlimit = limit - 3
             while len(text.encode('utf-8')) > newlimit:
@@ -122,7 +124,8 @@ class VirtualoEpubPackager(Packager):
                 outfile_dir = os.path.join(output_dir, slug)
                 os.makedirs(os.path.join(output_dir, slug))
 
-                info = BookInfo.from_file(main_input)
+                doc = WLDocument.from_file(main_input, provider=provider)
+                info = doc.book_info
 
                 product_elem = deepcopy(product)
                 product_elem[0].text = cls.utf_trunc(slug, 100)
@@ -133,14 +136,13 @@ class VirtualoEpubPackager(Packager):
                 product_elem[4][0][1].text = cls.utf_trunc(info.author.last_name, 100)
                 xml.append(product_elem)
 
-                cover.VirtualoCover(
-                    u' '.join(info.author.first_names + (info.author.last_name,)),
-                    info.title
-                    ).save(os.path.join(outfile_dir, slug+'.jpg'))
+                cover.VirtualoCover(info).save(os.path.join(outfile_dir, slug+'.jpg'))
                 outfile = os.path.join(outfile_dir, '1.epub')
                 outfile_sample = os.path.join(outfile_dir, '1.sample.epub')
-                epub.transform(provider, file_path=main_input, output_file=outfile)
-                epub.transform(provider, file_path=main_input, output_file=outfile_sample, sample=25)
+                doc.save_output_file(epub.transform(doc),
+                        output_path=outfile)
+                doc.save_output_file(epub.transform(doc, sample=25), 
+                        output_path=outfile_sample)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': main_input,
index afc4f1a..2ece72f 100644 (file)
@@ -3,7 +3,7 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from librarian import ValidationError, NoDublinCore,  ParseError
+from librarian import ValidationError, NoDublinCore,  ParseError, NoProvider
 from librarian import RDFNS
 from librarian import dcparser
 
@@ -11,14 +11,17 @@ from xml.parsers.expat import ExpatError
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
 
+import os
 import re
 from StringIO import StringIO
 
 class WLDocument(object):
-    LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
+    LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
+    provider = None
 
-    def __init__(self, edoc, parse_dublincore=True):
+    def __init__(self, edoc, parse_dublincore=True, provider=None, strict=False):
         self.edoc = edoc
+        self.provider = provider
 
         root_elem = edoc.getroot()
 
@@ -33,7 +36,8 @@ class WLDocument(object):
             if self.rdf_elem is None:
                 raise NoDublinCore('Document has no DublinCore - which is required.')
 
-            self.book_info = dcparser.BookInfo.from_element(self.rdf_elem)
+            self.book_info = dcparser.BookInfo.from_element(
+                    self.rdf_elem, strict=strict)
         else:
             self.book_info = None
 
@@ -42,7 +46,7 @@ class WLDocument(object):
         return cls.from_file(StringIO(xml), *args, **kwargs)
 
     @classmethod
-    def from_file(cls, xmlfile, swap_endlines=False, parse_dublincore=True):
+    def from_file(cls, xmlfile, parse_dublincore=True, provider=None):
 
         # first, prepare for parsing
         if isinstance(xmlfile, basestring):
@@ -63,20 +67,17 @@ class WLDocument(object):
             parser = etree.XMLParser(remove_blank_text=False)
             tree = etree.parse(StringIO(data.encode('utf-8')), parser)
 
-            if swap_endlines:
-                cls.swap_endlines(tree)
-
-            return cls(tree, parse_dublincore=parse_dublincore)
+            return cls(tree, parse_dublincore=parse_dublincore, provider=provider)
         except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
             raise ParseError(e)
 
-    @classmethod
-    def swap_endlines(cls, tree):
+    def swap_endlines(self):
+        """Converts line breaks in stanzas into <br/> tags."""
         # only swap inside stanzas
-        for elem in tree.iter('strofa'):
+        for elem in self.edoc.iter('strofa'):
             for child in list(elem):
                 if child.tail:
-                    chunks = cls.LINE_SWAP_EXPR.split(child.tail)
+                    chunks = self.LINE_SWAP_EXPR.split(child.tail)
                     ins_index = elem.index(child) + 1
                     while len(chunks) > 1:
                         ins = etree.Element('br')
@@ -84,13 +85,22 @@ class WLDocument(object):
                         elem.insert(ins_index, ins)
                     child.tail = chunks.pop(0)
             if elem.text:
-                chunks = cls.LINE_SWAP_EXPR.split(elem.text)
+                chunks = self.LINE_SWAP_EXPR.split(elem.text)
                 while len(chunks) > 1:
                     ins = etree.Element('br')
                     ins.tail = chunks.pop()
                     elem.insert(0, ins)
                 elem.text = chunks.pop(0)
 
+    def parts(self):
+        if self.provider is None:
+            raise NoProvider('No document provider supplied.')
+        if self.book_info is None:
+            raise NoDublinCore('No Dublin Core in document.')
+        for part_uri in self.book_info.parts:
+            yield self.from_file(self.provider.by_uri(part_uri),
+                    provider=self.provider)
+
     def chunk(self, path):
         # convert the path to XPath
         expr = self.path_to_xpath(path)
@@ -152,3 +162,41 @@ class WLDocument(object):
             node.clear()
             node.tag = 'span'
             node.tail = tail
+
+    # Converters
+
+    def as_html(self, *args, **kwargs):
+        from librarian import html
+        return html.transform(self, *args, **kwargs)
+
+    def as_text(self, *args, **kwargs):
+        from librarian import text
+        return text.transform(self, *args, **kwargs)
+
+    def as_epub(self, *args, **kwargs):
+        from librarian import epub
+        return epub.transform(self, *args, **kwargs)
+
+    def as_pdf(self, *args, **kwargs):
+        from librarian import pdf
+        return pdf.transform(self, *args, **kwargs)
+
+    def as_mobi(self, *args, **kwargs):
+        from librarian import mobi
+        return mobi.transform(self, *args, **kwargs)
+
+    def save_output_file(self, output_file, output_path=None,
+            output_dir_path=None, make_author_dir=False, ext=None):
+        if output_dir_path:
+            save_path = output_dir_path
+            if make_author_dir:
+                save_path = os.path.join(save_path,
+                        unicode(self.book_info.author).encode('utf-8'))
+            save_path = os.path.join(save_path,
+                                self.book_info.uri.slug)
+            if ext:
+                save_path += '.%s' % ext
+        else:
+            save_path = output_path
+
+        output_file.save_as(save_path)
index 476fbee..bcf8d9a 100644 (file)
@@ -8,21 +8,20 @@ import os
 import os.path
 import shutil
 from StringIO import StringIO
-from tempfile import mkdtemp
+from tempfile import mkdtemp, NamedTemporaryFile
 import re
 from copy import deepcopy
 from subprocess import call, PIPE
 
-import sys
-
 from Texml.processor import process
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
 
 from librarian.dcparser import Person
 from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource
+from librarian import ParseError, DCNS, get_resource, OutputFile
 from librarian import functions
+from librarian.cover import WLCover
 
 
 functions.reg_substitute_entities()
@@ -35,6 +34,13 @@ STYLESHEETS = {
     'wl2tex': 'pdf/wl2tex.xslt',
 }
 
+CUSTOMIZATIONS = [
+    'nofootnotes',
+    'nothemes',
+    'onehalfleading',
+    'doubleleading',
+    'nowlfont',
+    ]
 
 def insert_tags(doc, split_re, tagname, exclude=None):
     """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
@@ -152,7 +158,7 @@ def package_available(package, args='', verbose=False):
     fpath = os.path.join(tempdir, 'test.tex')
     f = open(fpath, 'w')
     f.write(r"""
-        \documentclass{book}
+        \documentclass{wl}
         \usepackage[%s]{%s}
         \begin{document}
         \end{document}
@@ -166,38 +172,33 @@ def package_available(package, args='', verbose=False):
     return p == 0
 
 
-def transform(provider, slug=None, file_path=None,
-              output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None, morefloats=None,
-              cover=None, flags=None):
+def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
+              cover=None, flags=None, customizations=None):
     """ produces a PDF file with XeLaTeX
 
-    provider: a DocProvider
-    slug: slug of file to process, available by provider
-    file_path can be provided instead of a slug
-    output_file: file-like object or path to output file
-    output_dir: path to directory to save output file to; either this or output_file must be present
-    make_dir: writes output to <output_dir>/<author>/<slug>.pdf istead of <output_dir>/<slug>.pdf
+    wldoc: a WLDocument
     verbose: prints all output from LaTeX
     save_tex: path to save the intermediary LaTeX file to
     morefloats (old/new/none): force specific morefloats
     cover: a cover.Cover object
     flags: less-advertising,
+    customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
     """
 
     # Parse XSLT
     try:
-        if file_path:
-            if slug:
-                raise ValueError('slug or file_path should be specified, not both')
-            document = load_including_children(provider, file_path=file_path)
-        else:
-            if not slug:
-                raise ValueError('either slug or file_path should be specified')
-            document = load_including_children(provider, slug=slug)
+        document = load_including_children(wldoc)
 
         if cover:
+            if cover is True:
+                cover = WLCover
             document.edoc.getroot().set('data-cover-width', str(cover.width))
             document.edoc.getroot().set('data-cover-height', str(cover.height))
+            if cover.uses_dc_cover:
+                if document.book_info.cover_by:
+                    document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+                if document.book_info.cover_source:
+                    document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
         if flags:
             for flag in flags:
                 document.edoc.getroot().set('flag-' + flag, 'yes')
@@ -208,6 +209,10 @@ def transform(provider, slug=None, file_path=None,
         elif package_available('morefloats', 'maxfloats=19'):
             document.edoc.getroot().set('morefloats', 'new')
 
+        # add customizations
+        if customizations is not None:
+            document.edoc.getroot().set('customizations', u','.join(customizations))
+
         # hack the tree
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
@@ -215,21 +220,17 @@ def transform(provider, slug=None, file_path=None,
         substitute_hyphens(document.edoc)
         fix_hanging(document.edoc)
 
-        # find output dir
-        if make_dir and output_dir is not None:
-            author = unicode(document.book_info.author)
-            output_dir = os.path.join(output_dir, author)
-
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
         style = etree.parse(style_filename)
+
         texml = document.transform(style)
 
         # TeXML -> LaTeX
         temp = mkdtemp('-wl2pdf')
 
         if cover:
-            c = cover(document.book_info.author.readable(), document.book_info.title)
+            c = cover(document.book_info)
             with open(os.path.join(temp, 'cover.png'), 'w') as f:
                 c.save(f)
 
@@ -245,7 +246,7 @@ def transform(provider, slug=None, file_path=None,
             shutil.copy(tex_path, save_tex)
 
         # LaTeX -> PDF
-        shutil.copy(get_resource('pdf/wl.sty'), temp)
+        shutil.copy(get_resource('pdf/wl.cls'), temp)
         shutil.copy(get_resource('res/wl-logo.png'), temp)
 
         cwd = os.getcwd()
@@ -260,56 +261,38 @@ def transform(provider, slug=None, file_path=None,
 
         os.chdir(cwd)
 
-        # save the PDF
+        output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
         pdf_path = os.path.join(temp, 'doc.pdf')
-        if output_dir is not None:
-            try:
-                os.makedirs(output_dir)
-            except OSError:
-                pass
-            if slug:
-                output_path = os.path.join(output_dir, '%s.pdf' % slug)
-            else:
-                output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf')
-            shutil.move(pdf_path, output_path)
-        else:
-            if hasattr(output_file, 'write'):
-                # file-like object
-                with open(pdf_path) as f:
-                    output_file.write(f.read())
-                output_file.close()
-            else:
-                # path to output file
-                shutil.copy(pdf_path, output_file)
+        shutil.move(pdf_path, output_file.name)
         shutil.rmtree(temp)
+        return OutputFile.from_filename(output_file.name)
 
     except (XMLSyntaxError, XSLTApplyError), e:
         raise ParseError(e)
 
 
-def load_including_children(provider, slug=None, uri=None, file_path=None):
-    """ makes one big xml file with children inserted at end
-    either slug or uri must be provided
+def load_including_children(wldoc=None, provider=None, uri=None):
+    """ Makes one big xml file with children inserted at end.
+    
+    Either wldoc or provider and URI must be provided.
     """
 
-    if uri:
+    if uri and provider:
         f = provider.by_uri(uri)
-    elif slug:
-        f = provider[slug]
-    elif file_path:
-        f = open(file_path, 'r')
+        text = f.read().decode('utf-8')
+        f.close()
+    elif wldoc is not None:
+        text = etree.tostring(wldoc.edoc, encoding=unicode)
+        provider = wldoc.provider
     else:
-        raise ValueError('Neither slug, URI nor file path provided for a book.')
+        raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
 
-    text = f.read().decode('utf-8')
     text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
 
-    document = WLDocument.from_string(text, True,
-        parse_dublincore=True)
+    document = WLDocument.from_string(text, parse_dublincore=True)
+    document.swap_endlines()
 
-    f.close()
     for child_uri in document.book_info.parts:
-        print child_uri
-        child = load_including_children(provider, uri=child_uri)
+        child = load_including_children(provider=provider, uri=child_uri)
         document.edoc.getroot().append(child.edoc.getroot())
     return document
diff --git a/librarian/pdf/wl.cls b/librarian/pdf/wl.cls
new file mode 100644 (file)
index 0000000..c9305ca
--- /dev/null
@@ -0,0 +1,490 @@
+% -*- coding: utf-8 -*-
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesClass{wl}[2011/11/28 wolnelektury.pl book style]
+
+% PDF customizations
+%
+% nofootnotes - disable generation of footnotes
+% nothemes - disable generation of themes
+% onehalfleading - leading of 1.5 (interlinia)
+% doubleleading - double leading (interlinia)
+% a4paper,... - paper size as required by LaTeX
+% nowlfont - don't use customized WL font
+
+\RequirePackage{setspace}
+\RequirePackage{type1cm}
+\DeclareOption{13pt}{%
+\AtEndOfClass{%
+% font size definitions, similar to ones in /usr/share/texmf-texlive/tex/latex/base/
+\renewcommand\normalsize{%
+   \@setfontsize\normalsize{13pt}{14.5pt}%
+   \abovedisplayskip 12\p@ \@plus3\p@ \@minus7\p@
+   \abovedisplayshortskip \z@ \@plus3\p@
+   \belowdisplayshortskip 6.5\p@ \@plus3.5\p@ \@minus3\p@
+   \belowdisplayskip \abovedisplayskip
+   \let\@listi\@listI}\normalsize%
+\renewcommand\footnotesize{%
+   \@setfontsize\footnotesize\@xpt\@xiipt
+   \abovedisplayskip 10\p@ \@plus2\p@ \@minus5\p@
+   \abovedisplayshortskip \z@ \@plus3\p@
+   \belowdisplayshortskip 6\p@ \@plus3\p@ \@minus3\p@
+   \def\@listi{\leftmargin\leftmargini
+               \topsep 6\p@ \@plus2\p@ \@minus2\p@
+               \parsep 3\p@ \@plus2\p@ \@minus\p@
+               \itemsep \parsep}%
+   \belowdisplayskip \abovedisplayskip
+}%
+}%
+}
+
+%% \DeclareOption{14pt}{\renewcommand{\normalsize}{\AtEndOfClass{\fontsize{14}{17}\selectfont}}}
+
+\DeclareOption{doubleleading}{\AtBeginDocument{\doublespacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}}
+\DeclareOption{onehalfleading}{\AtBeginDocument{\onehalfspacing}}%\setlength{\leading}{1em plus 0.5ex minus 0.2ex}}
+
+%% This does not really work, since dvipdfm(x) will use it's configuration in /etc/texmf/dvipdfm(x) and force a global paper size setting.
+\DeclareOption{a5paper}{%
+      \setlength{\paperheight}{210mm}%
+      \setlength{\paperwidth}{148mm}}
+
+
+\newif\ifshowfootnotes \showfootnotestrue
+\DeclareOption{nofootnotes}{\showfootnotesfalse}
+
+\newif\ifshowthemes \showthemestrue
+\DeclareOption{nothemes}{\showthemesfalse}
+
+\newif\ifenablewlfont \enablewlfonttrue
+\DeclareOption{nowlfont}{\enablewlfontfalse}
+
+\DeclareOption*{\PassOptionsToClass{\CurrentOption}{book}}
+\ProcessOptions\relax
+\LoadClass[a4paper,oneside]{book}
+
+
+\usepackage{trace}
+
+\usepackage[MeX]{polski}
+
+\usepackage[xetex]{graphicx}
+\usepackage{fontspec}
+\usepackage{xunicode}
+\usepackage{xltxtra}
+
+\usepackage[overload]{textcase}
+\usepackage{scalefnt}
+\usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref}
+
+\ifenablewlfont
+\setmainfont [
+%ExternalLocation,
+UprightFont = JunicodeWL-Regular,
+ItalicFont = JunicodeWL-Italic,
+BoldFont = JunicodeWL-Regular,
+BoldItalicFont = JunicodeWL-Italic,
+SmallCapsFont = JunicodeWL-Regular,
+SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}},
+Numbers=OldStyle,
+Scale=1.04,
+LetterSpace=-1.0
+] {JunicodeWL}
+
+\newfontfamily\alien[
+SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}},
+Numbers=OldStyle,
+Scale=0.85,
+LetterSpace=-1.0
+] {DejaVu Serif}
+
+
+\defaultfontfeatures{
+SizeFeatures={
+  {Size={-10.5}, FakeStretch=1.02, LetterSpace=2.0 },
+  {Size={10.5-12}, FakeStretch=2.00, LetterSpace=0.0 },
+  {Size={12-}, FakeStretch=0.98, LetterSpace=-2.0 }
+}
+}
+
+\renewcommand{\textsc}[1]{%
+{\addfontfeature{
+SizeFeatures={
+  {Size={-10.5}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 },
+  {Size={10.5-12}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 },
+  {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 }
+},
+Letters={SmallCaps,UppercaseSmallCaps}
+}
+#1}
+}
+\fi% enablewlfont
+
+%{\Itshape JunicodeWL-Italic.ttf }
+%{\bfseries Junicode-Bold.ttf }
+%{\bfseries\itshape Junicode-BoldItalic.ttf }
+
+\pagestyle{plain}
+\usepackage{fancyhdr}
+
+\makeatletter
+
+% bottom figure below footnotes
+\usepackage{fnpos}
+\makeFNabove
+
+\usepackage{color}
+\definecolor{theme}{gray}{.3}
+
+\setlength{\marginparsep}{2em}
+\setlength{\marginparwidth}{8.5em}
+\setlength{\oddsidemargin}{0pt}
+\setlength{\voffset}{0pt}
+\setlength{\topmargin}{0pt}
+\setlength{\headheight}{0pt}
+\setlength{\headsep}{0pt}
+\setlength{\textheight}{24cm}
+
+\pagestyle{fancy}
+\fancyhf{}
+\renewcommand{\headrulewidth}{0pt}
+\renewcommand{\footrulewidth}{0pt}
+\lfoot{{\footnotesize \textsc{\@author} \emph{\@title}}}
+\cfoot{}
+\rfoot{{\footnotesize \thepage}}
+
+\clubpenalty=100000
+\widowpenalty=100000
+
+
+% see http://osdir.com/ml/tex.xetex/2005-10/msg00003.html
+\newsavebox{\ximagebox}\newlength{\ximageheight}
+\newsavebox{\xglyphbox}\newlength{\xglyphheight}
+\newcommand{\xbox}[1]
+{\savebox{\ximagebox}{#1}\settoheight{\ximageheight}{\usebox {\ximagebox}}%
+\savebox{\xglyphbox}{\char32}\settoheight{\xglyphheight}{\usebox {\xglyphbox}}%
+\raisebox{\ximageheight}[0pt][0pt]{%\raisebox{-\xglyphheight}[0pt] [0pt]{%
+\makebox[0pt][l]{\usebox{\xglyphbox}}}%}%
+\usebox{\ximagebox}%
+\raisebox{0pt}[0pt][0pt]{\makebox[0pt][r]{\usebox{\xglyphbox}}}}
+
+\newcommand{\makecover}[2]{
+        \pdfpagewidth=#1
+        \pdfpageheight=#2
+
+        \thispagestyle{empty}
+        \newlength{\PictHOffset}
+        \newlength{\PictVOffset}
+        \setlength{\PictHOffset}{1in}
+        \addtolength{\PictHOffset}{\hoffset}
+        \addtolength{\PictHOffset}{\oddsidemargin}
+
+        \setlength{\PictVOffset}{1in}
+        \addtolength{\PictVOffset}{\voffset}
+        \addtolength{\PictVOffset}{\topmargin}
+        \addtolength{\PictVOffset}{\headheight}
+        \addtolength{\PictVOffset}{\headsep}
+        \addtolength{\PictVOffset}{\topskip}
+        \addtolength{\PictVOffset}{-\pdfpageheight}
+
+        \noindent\hspace*{-\PictHOffset}%
+        \raisebox{\PictVOffset}[0pt][0pt]{\makebox[0pt][l]{%
+            \includegraphics[height=\pdfpageheight,width=\pdfpagewidth]{cover.png}}}
+        \clearpage
+
+        \setlength{\pdfpagewidth}{210mm}
+        \setlength{\pdfpageheight}{297mm}
+}
+
+
+\renewcommand{\maketitle}{
+    {
+    \thispagestyle{empty}
+    \footnotesize
+    \color{theme}
+
+    \noindent \begin{minipage}[t]{.35\textwidth}\vspace{0pt}
+        \href{http://www.wolnelektury.pl}{\xbox{\includegraphics[width=\textwidth]{wl-logo.png}}}
+    \end{minipage}
+    \begin{minipage}[t]{.65\textwidth}\vspace{0pt}
+
+    \ifflaglessadvertising
+    \else
+        \href{\bookurl}{Ta lektura}, podobnie jak tysiące innych, jest dostępna on-line na stronie
+        \href{http://www.wolnelektury.pl/}{wolnelektury.pl}.
+        \vspace{.5em}
+    \fi
+
+    Utwór opracowany został w ramach projektu \href{http://www.wolnelektury.pl/}{Wolne Lektury}
+    przez \href{http://nowoczesnapolska.org.pl}{fundację Nowoczesna Polska}.
+
+    \end{minipage}
+    \noindent \rule{\linewidth}{0.4pt}
+
+    \vspace{.6em}
+    \color{black}
+    }
+}
+
+\newcommand{\editorialsection}{
+  \begin{figure}[b!]
+  {
+    \footnotesize
+    \color{theme}
+    \noindent \rule{\linewidth}{0.4pt}
+
+    \rightsinfo
+    \vspace{.6em}
+
+    Źródło: \href{\bookurl}{\bookurl}
+
+    \vspace{.6em}
+    \sourceinfo
+
+    \description
+    \vspace{.6em}
+
+    \editors
+
+    \vspace{.6em}
+    \coverby
+
+    \color{black}
+  }
+  \end{figure}
+}
+
+
+\newcommand{\typosubsubsection}[1]{%
+{\textsc{#1}}
+}
+
+\newcommand{\typosubsection}[1]{%
+{\addfontfeature{
+SizeFeatures={
+  {Size={-10}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 },
+  {Size={10.5-12}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 },
+  {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 }
+},
+Letters={Uppercase}
+}
+\MakeUppercase{#1}}
+}
+
+\newcommand{\typosection}[1]{%
+{\addfontfeature{FakeStretch=0.96, LetterSpace=-4.0}\emph{\scalefont{2}#1}}
+%{\addfontfeature{Scale=2.0, FakeStretch=0.98, LetterSpace=-2.0}\emph{#1}}
+}
+
+
+\newcommand{\tytul}[1]{%
+#1%
+\vspace{1em}%
+}
+
+\newcommand{\nazwapodutworu}[1]{%
+\section*{\typosection{#1}}%
+}
+
+\newcommand{\autorutworu}[1]{%
+\subsection*{\typosubsection{#1}}%
+}
+
+\newcommand{\dzielonadrzedne}[1]{%
+\subsection*{\typosubsubsection{#1}}%
+}
+
+\newcommand{\nazwautworu}[1]{%
+\section*{\typosection{#1}}%
+}
+
+\newcommand{\podtytul}[1]{%
+\subsection*{\typosubsubsection{#1}}%
+}
+
+\newcommand{\translator}[1]{%
+\subsection*{\typosubsubsection{tłum. #1}}%
+}
+
+
+\newcommand{\powiesc}[1]{#1}
+\newcommand{\opowiadanie}[1]{#1}
+\newcommand{\lirykal}[1]{#1}
+\newcommand{\lirykalp}[1]{#1}
+\newcommand{\dramatwierszowanyl}[1]{#1}
+\newcommand{\dramatwierszowanylp}[1]{#1}
+\newcommand{\dramatwspolczesny}[1]{#1}
+
+\newcommand{\nota}[1]{%
+\par{#1}%
+}
+
+\newcommand{\dedykacja}[1]{%
+\begin{em}%
+\begin{flushright}%
+#1%
+\end{flushright}%
+\end{em}%
+}
+
+\newcommand{\dlugicytat}[1]{%
+\begin{quotation}%
+#1%
+\end{quotation}%
+}
+
+\newcommand{\poezjacyt}[1]{%
+\begin{verse}%
+#1%
+\end{verse}%
+}
+\newcommand{\motto}[1]{%
+\begin{em}%
+#1%
+\end{em}%
+}
+\newcommand{\listaosob}[2]{%
+\par{#1}%
+\begin{itemize}%
+#2%
+\end{itemize}%
+}
+
+\newcommand{\nagloweklisty}[1]{%
+\typosubsubsection{#1}%
+}
+
+\newcommand{\listaosoba}[1]{%
+\item{#1}%
+}
+
+\newcommand{\kwestia}[1]{%
+\par{#1}%
+}
+
+\newcommand{\naglowekakt}[1]{%
+\pagebreak
+\subsection*{\typosubsection{#1}}%
+}
+\newcommand{\naglowekczesc}[1]{%
+\pagebreak
+\subsection*{\typosubsection{#1}}%
+}
+\newcommand{\srodtytul}[1]{%
+\subsection*{\typosubsection{#1}}%
+}
+
+\newcommand{\naglowekscena}[1]{%
+\subsubsection*{\typosubsubsection{#1}}%
+}
+\newcommand{\naglowekrozdzial}[1]{%
+\subsubsection*{\typosubsubsection{#1}}%
+}
+
+\newcommand{\naglowekosoba}[1]{%
+\par{\textsc{#1}}\nopagebreak%
+}
+\newcommand{\naglowekpodrozdzial}[1]{%
+\par{#1}\nopagebreak%
+}
+
+\newcommand{\miejsceczas}[1]{%
+\par{\emph{#1}}%
+}
+\newcommand{\didaskalia}[1]{%
+\par{\emph{#1}}%
+}
+
+\newcommand{\akap}[1]{%
+\par{#1}%
+}
+\newcommand{\akapdialog}[1]{%
+\par{#1}%
+}
+\newcommand{\akapcd}[1]{%
+\par{#1}%
+}
+
+\newcommand{\mottopodpis}[1]{%
+\begin{em}%
+\begin{flushright}%
+#1%
+\end{flushright}%
+\end{em}%
+}
+
+\newcommand{\strofa}[1]{%
+\par{\noindent{\ignorespaces#1\vspace{1em}}}%
+}
+
+\newcommand{\wers}[1]{#1}
+
+\newcommand{\wersakap}[1]{%
+\hspace*{1em}#1%
+}
+\newcommand{\werscd}[1]{%
+\hspace*{8em}#1%
+}
+\newcommand{\werswciety}[2][1em]{%
+\hspace*{#1}#2%
+}
+
+\ifshowfootnotes
+  \newcommand{\pa}[1]{\NoCaseChange{\footnote{#1 [przypis autorski]}}}
+  \newcommand{\pe}[1]{\NoCaseChange{\footnote{#1}}}
+  \newcommand{\pr}[1]{\NoCaseChange{\footnote{#1}}}
+  \newcommand{\pt}[1]{\NoCaseChange{\footnote{#1}}}
+\else
+  \newcommand{\pa}[1]{}
+  \newcommand{\pe}[1]{}
+  \newcommand{\pr}[1]{}
+  \newcommand{\pt}[1]{}
+\fi
+
+\newcommand{\mat}[1]{$#1$}
+
+\newcommand{\didasktekst}[1]{%
+\emph{#1}%
+}
+\newcommand{\slowoobce}[1]{%
+\emph{#1}%
+}
+\newcommand{\tytuldziela}[1]{%
+\emph{#1}%
+}
+\newcommand{\wyroznienie}[1]{%
+\emph{#1}%
+}
+
+\newcommand{\osoba}[1]{%
+#1%
+}
+
+\newcommand{\sekcjaswiatlo}{%
+\vspace{30pt}%
+}
+
+\newcommand{\sekcjaasterysk}{%
+\vspace{10pt}%
+\begin{center}%
+\par{*}%
+\end{center}%
+}
+
+\newcommand{\separatorlinia}{%
+\vspace{10pt}%
+\hrule{}%
+\vspace{10pt}%
+}
+
+\newcommand{\motyw}[2][0]{%
+\ifshowthemes
+\mbox{}%
+\marginpar{%
+\vspace{-8pt}%
+\vspace{-#1\baselineskip}%
+\raggedright{\hspace{0pt}%
+\footnotesize{\color{theme}{#2}}}%
+\vspace{\baselineskip}%
+}%
+\fi
+}
+
diff --git a/librarian/pdf/wl.sty b/librarian/pdf/wl.sty
deleted file mode 100644 (file)
index 28a23a5..0000000
+++ /dev/null
@@ -1,413 +0,0 @@
-% -*- coding: utf-8 -*-
-
-\usepackage[MeX]{polski}
-
-\usepackage[xetex]{graphicx}
-\usepackage{fontspec}
-\usepackage{xunicode}
-\usepackage{xltxtra}
-
-\usepackage[overload]{textcase}
-\usepackage{scalefnt}
-\usepackage[colorlinks=true,linkcolor=black,setpagesize=false,urlcolor=black,xetex]{hyperref}
-
-\setmainfont [
-%ExternalLocation,
-UprightFont = JunicodeWL-Regular,
-ItalicFont = JunicodeWL-Italic,
-BoldFont = JunicodeWL-Regular,
-BoldItalicFont = JunicodeWL-Italic,
-SmallCapsFont = JunicodeWL-Regular,
-SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}},
-Numbers=OldStyle,
-Scale=1.04,
-LetterSpace=-1.0
-] {JunicodeWL}
-
-\newfontfamily\alien[
-SmallCapsFeatures = {Letters={SmallCaps,UppercaseSmallCaps}},
-Numbers=OldStyle,
-Scale=0.85,
-LetterSpace=-1.0
-] {DejaVu Serif}
-
-
-\defaultfontfeatures{
-SizeFeatures={
-  {Size={-10}, FakeStretch=1.02, LetterSpace=2.0 },
-  {Size={10.5-12}, FakeStretch=2.00, LetterSpace=0.0 },
-  {Size={12-}, FakeStretch=0.98, LetterSpace=-2.0 }
-}
-}
-
-\renewcommand{\textsc}[1]{%
-{\addfontfeature{
-SizeFeatures={
-  {Size={-10}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 },
-  {Size={10.5-12}, Scale=1.2, FakeStretch=1.02, LetterSpace=8.0 },
-  {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 }
-},
-Letters={SmallCaps,UppercaseSmallCaps}
-}
-#1}
-}
-
-%{\itshape JunicodeWL-Italic.ttf }
-%{\bfseries Junicode-Bold.ttf }
-%{\bfseries\itshape Junicode-BoldItalic.ttf }
-
-\pagestyle{plain}
-\usepackage{fancyhdr}
-
-\makeatletter
-
-% bottom figure below footnotes
-\usepackage{fnpos}
-\makeFNabove
-
-\usepackage{color}
-\definecolor{theme}{gray}{.3}
-
-\setlength{\marginparsep}{2em}
-\setlength{\marginparwidth}{8.5em}
-\setlength{\oddsidemargin}{0pt}
-\setlength{\voffset}{0pt}
-\setlength{\topmargin}{0pt}
-\setlength{\headheight}{0pt}
-\setlength{\headsep}{0pt}
-\setlength{\textheight}{24cm}
-
-\pagestyle{fancy}
-\fancyhf{}
-\renewcommand{\headrulewidth}{0pt}
-\renewcommand{\footrulewidth}{0pt}
-\lfoot{{\footnotesize \textsc{\@author} \emph{\@title}}}
-\cfoot{}
-\rfoot{{\footnotesize \thepage}}
-
-\clubpenalty=100000
-\widowpenalty=100000
-
-
-% see http://osdir.com/ml/tex.xetex/2005-10/msg00003.html
-\newsavebox{\ximagebox}\newlength{\ximageheight}
-\newsavebox{\xglyphbox}\newlength{\xglyphheight}
-\newcommand{\xbox}[1]
-{\savebox{\ximagebox}{#1}\settoheight{\ximageheight}{\usebox {\ximagebox}}%
-\savebox{\xglyphbox}{\char32}\settoheight{\xglyphheight}{\usebox {\xglyphbox}}%
-\raisebox{\ximageheight}[0pt][0pt]{%\raisebox{-\xglyphheight}[0pt] [0pt]{%
-\makebox[0pt][l]{\usebox{\xglyphbox}}}%}%
-\usebox{\ximagebox}%
-\raisebox{0pt}[0pt][0pt]{\makebox[0pt][r]{\usebox{\xglyphbox}}}}
-
-\newcommand{\makecover}[2]{
-        \pdfpagewidth=#1
-        \pdfpageheight=#2
-
-        \thispagestyle{empty}
-        \newlength{\PictHOffset}
-        \newlength{\PictVOffset}
-        \setlength{\PictHOffset}{1in}
-        \addtolength{\PictHOffset}{\hoffset}
-        \addtolength{\PictHOffset}{\oddsidemargin}
-
-        \setlength{\PictVOffset}{1in}
-        \addtolength{\PictVOffset}{\voffset}
-        \addtolength{\PictVOffset}{\topmargin}
-        \addtolength{\PictVOffset}{\headheight}
-        \addtolength{\PictVOffset}{\headsep}
-        \addtolength{\PictVOffset}{\topskip}
-        \addtolength{\PictVOffset}{-\pdfpageheight}
-
-        \noindent\hspace*{-\PictHOffset}%
-        \raisebox{\PictVOffset}[0pt][0pt]{\makebox[0pt][l]{%
-            \includegraphics[height=\pdfpageheight,width=\pdfpagewidth]{cover.png}}}
-        \clearpage
-
-        \setlength{\pdfpagewidth}{210mm}
-        \setlength{\pdfpageheight}{297mm}
-}
-
-
-\renewcommand{\maketitle}{
-    {
-    \thispagestyle{empty}
-    \footnotesize
-    \color{theme}
-
-    \noindent \begin{minipage}[t]{.35\textwidth}\vspace{0pt}
-        \href{http://www.wolnelektury.pl}{\xbox{\includegraphics[width=\textwidth]{wl-logo.png}}}
-    \end{minipage}
-    \begin{minipage}[t]{.65\textwidth}\vspace{0pt}
-
-    \ifflaglessadvertising
-    \else
-        \href{\bookurl}{Ta lektura}, podobnie jak tysiące innych, jest dostępna on-line na stronie
-        \href{http://www.wolnelektury.pl/}{wolnelektury.pl}.
-        \vspace{.5em}
-    \fi
-
-    Utwór opracowany został w ramach projektu \href{http://www.wolnelektury.pl/}{Wolne Lektury}
-    przez \href{http://nowoczesnapolska.org.pl}{fundację Nowoczesna Polska}.
-
-    \end{minipage}
-    \noindent \rule{\linewidth}{0.4pt}
-
-    \vspace{.6em}
-    \color{black}
-    }
-}
-
-\newcommand{\editorialsection}{
-  \begin{figure}[b!]
-  {
-    \footnotesize
-    \color{theme}
-    \noindent \rule{\linewidth}{0.4pt}
-
-    \rightsinfo
-    \vspace{.6em}
-
-    Źródło: \href{\bookurl}{\bookurl}
-
-    \vspace{.6em}
-    \sourceinfo
-
-    \description
-    \vspace{.6em}
-
-    \editors
-
-    \color{black}
-  }
-  \end{figure}
-}
-
-
-\newcommand{\typosubsubsection}[1]{%
-{\textsc{#1}}
-}
-
-\newcommand{\typosubsection}[1]{%
-{\addfontfeature{
-SizeFeatures={
-  {Size={-10}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 },
-  {Size={10.5-12}, Scale=1.2, FakeStretch=1.00, LetterSpace=8.0 },
-  {Size={12-}, FakeStretch=1.0, LetterSpace=8.0 }
-},
-Letters={Uppercase}
-}
-\MakeUppercase{#1}}
-}
-
-\newcommand{\typosection}[1]{%
-{\addfontfeature{FakeStretch=0.96, LetterSpace=-4.0}\emph{\scalefont{2}#1}}
-%{\addfontfeature{Scale=2.0, FakeStretch=0.98, LetterSpace=-2.0}\emph{#1}}
-}
-
-
-\newcommand{\tytul}[1]{%
-#1%
-\vspace{1em}%
-}
-
-\newcommand{\nazwapodutworu}[1]{%
-\section*{\typosection{#1}}%
-}
-
-\newcommand{\autorutworu}[1]{%
-\subsection*{\typosubsection{#1}}%
-}
-
-\newcommand{\dzielonadrzedne}[1]{%
-\subsection*{\typosubsubsection{#1}}%
-}
-
-\newcommand{\nazwautworu}[1]{%
-\section*{\typosection{#1}}%
-}
-
-\newcommand{\podtytul}[1]{%
-\subsection*{\typosubsubsection{#1}}%
-}
-
-\newcommand{\translator}[1]{%
-\subsection*{\typosubsubsection{tłum. #1}}%
-}
-
-
-\newcommand{\powiesc}[1]{#1}
-\newcommand{\opowiadanie}[1]{#1}
-\newcommand{\lirykal}[1]{#1}
-\newcommand{\lirykalp}[1]{#1}
-\newcommand{\dramatwierszowanyl}[1]{#1}
-\newcommand{\dramatwierszowanylp}[1]{#1}
-\newcommand{\dramatwspolczesny}[1]{#1}
-
-\newcommand{\nota}[1]{%
-\par{#1}%
-}
-
-\newcommand{\dedykacja}[1]{%
-\begin{em}%
-\begin{flushright}%
-#1%
-\end{flushright}%
-\end{em}%
-}
-
-\newcommand{\dlugicytat}[1]{%
-\begin{quotation}%
-#1%
-\end{quotation}%
-}
-
-\newcommand{\poezjacyt}[1]{%
-\begin{verse}%
-#1%
-\end{verse}%
-}
-\newcommand{\motto}[1]{%
-\begin{em}%
-#1%
-\end{em}%
-}
-\newcommand{\listaosob}[2]{%
-\par{#1}%
-\begin{itemize}%
-#2%
-\end{itemize}%
-}
-
-\newcommand{\nagloweklisty}[1]{%
-\typosubsubsection{#1}%
-}
-
-\newcommand{\listaosoba}[1]{%
-\item{#1}%
-}
-
-\newcommand{\kwestia}[1]{%
-\par{#1}%
-}
-
-\newcommand{\naglowekakt}[1]{%
-\pagebreak
-\subsection*{\typosubsection{#1}}%
-}
-\newcommand{\naglowekczesc}[1]{%
-\pagebreak
-\subsection*{\typosubsection{#1}}%
-}
-\newcommand{\srodtytul}[1]{%
-\subsection*{\typosubsection{#1}}%
-}
-
-\newcommand{\naglowekscena}[1]{%
-\subsubsection*{\typosubsubsection{#1}}%
-}
-\newcommand{\naglowekrozdzial}[1]{%
-\subsubsection*{\typosubsubsection{#1}}%
-}
-
-\newcommand{\naglowekosoba}[1]{%
-\par{\textsc{#1}}\nopagebreak%
-}
-\newcommand{\naglowekpodrozdzial}[1]{%
-\par{#1}\nopagebreak%
-}
-
-\newcommand{\miejsceczas}[1]{%
-\par{\emph{#1}}%
-}
-\newcommand{\didaskalia}[1]{%
-\par{\emph{#1}}%
-}
-
-\newcommand{\akap}[1]{%
-\par{#1}%
-}
-\newcommand{\akapdialog}[1]{%
-\par{#1}%
-}
-\newcommand{\akapcd}[1]{%
-\par{#1}%
-}
-
-\newcommand{\mottopodpis}[1]{%
-\begin{em}%
-\begin{flushright}%
-#1%
-\end{flushright}%
-\end{em}%
-}
-
-\newcommand{\strofa}[1]{%
-\par{\noindent{\ignorespaces#1\vspace{1em}}}%
-}
-
-\newcommand{\wers}[1]{#1}
-
-\newcommand{\wersakap}[1]{%
-\hspace*{1em}#1%
-}
-\newcommand{\werscd}[1]{%
-\hspace*{8em}#1%
-}
-\newcommand{\werswciety}[2][1em]{%
-\hspace*{#1}#2%
-}
-
-
-\newcommand{\pa}[1]{\NoCaseChange{\footnote{#1 [przypis autorski]}}}
-\newcommand{\pe}[1]{\NoCaseChange{\footnote{#1}}}
-\newcommand{\pr}[1]{\NoCaseChange{\footnote{#1}}}
-\newcommand{\pt}[1]{\NoCaseChange{\footnote{#1}}}
-
-\newcommand{\mat}[1]{$#1$}
-
-\newcommand{\didasktekst}[1]{%
-\emph{#1}%
-}
-\newcommand{\slowoobce}[1]{%
-\emph{#1}%
-}
-\newcommand{\tytuldziela}[1]{%
-\emph{#1}%
-}
-\newcommand{\wyroznienie}[1]{%
-\emph{#1}%
-}
-
-\newcommand{\osoba}[1]{%
-#1%
-}
-
-\newcommand{\sekcjaswiatlo}{%
-\vspace{30pt}%
-}
-
-\newcommand{\sekcjaasterysk}{%
-\vspace{10pt}%
-\begin{center}%
-\par{*}%
-\end{center}%
-}
-
-\newcommand{\separatorlinia}{%
-\vspace{10pt}%
-\hrule{}%
-\vspace{10pt}%
-}
-
-\newcommand{\motyw}[2][0]{%
-\mbox{}%
-\marginpar{%
-\vspace{-8pt}%
-\vspace{-#1\baselineskip}%
-\raggedright{\hspace{0pt}%
-\footnotesize{\color{theme}{#2}}}%
-\vspace{\baselineskip}%
-}%
-}
-
index ec5e4d3..1a675ba 100644 (file)
@@ -15,7 +15,7 @@
 <xsl:template match="utwor">
     <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
         <TeXML escape="0">
-        \documentclass[a4paper, oneside, 11pt]{book}
+        \documentclass[<xsl:value-of select="@customizations"/>]{wl}
 
         <!-- flags and values set on root -->
 
@@ -31,8 +31,6 @@
                 \def\<xsl:value-of select="wl:texcommand(name())" />{<TeXML escape="1"><xsl:value-of select="."/></TeXML>}
             </TeXML>
         </xsl:for-each>
-
-        \usepackage{wl}
         </TeXML>
 
         <xsl:choose>
             </parm></cmd>
             <xsl:apply-templates select="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny" />
             <xsl:apply-templates select="utwor" mode="part" />
+
+            <TeXML escape="0">
+                \def\coverby{
+                <xsl:if test="@data-cover-by">Okładka na podstawie: 
+                    <xsl:choose>
+                    <xsl:when test="@data-cover-source">
+                        \href{\datacoversource}{\datacoverby}
+                    </xsl:when>
+                    <xsl:otherwise>
+                        \datacoverby{}
+                    </xsl:otherwise>
+                    </xsl:choose>
+                </xsl:if>
+                }
+            </TeXML>
+
             <cmd name="editorialsection" />
         </env>
     </TeXML>
diff --git a/librarian/picture.py b/librarian/picture.py
new file mode 100644 (file)
index 0000000..ee3c61d
--- /dev/null
@@ -0,0 +1,173 @@
+
+from dcparser import (as_person, as_date, Field, WorkInfo, DCNS)
+from librarian import (RDFNS, ValidationError, NoDublinCore, ParseError, WLURI)
+from xml.parsers.expat import ExpatError
+from os import path
+from StringIO import StringIO
+from lxml import etree
+from lxml.etree import (XMLSyntaxError, XSLTApplyError)
+import re
+
+
+class WLPictureURI(WLURI):
+    _re_wl_uri = re.compile('http://wolnelektury.pl/katalog/obraz/'
+            '(?P<slug>[-a-z0-9]+)/?$')
+
+    @classmethod
+    def from_slug(cls, slug):
+        uri = 'http://wolnelektury.pl/katalog/obraz/%s/' % slug
+        return cls(uri)
+
+def as_wlpictureuri_strict(text):
+    return WLPictureURI.strict(text)
+
+
+class PictureInfo(WorkInfo):
+    """
+    Dublin core metadata for a picture
+    """
+    FIELDS = (
+        Field(DCNS('language'), 'language', required=False),
+        Field(DCNS('subject.period'), 'epochs', salias='epoch', multiple=True),
+        Field(DCNS('subject.type'), 'kinds', salias='kind', multiple=True),
+
+        Field(DCNS('format.dimensions'), 'dimensions', required=False),
+        Field(DCNS('format.checksum.sha1'), 'sha1', required=True),
+        Field(DCNS('description.medium'), 'medium', required=False),
+        Field(DCNS('description.dimensions'), 'original_dimensions', required=False),
+        Field(DCNS('format'), 'mime_type', required=False),
+        Field(DCNS('identifier.url'), 'url', WLPictureURI,
+            strict=as_wlpictureuri_strict),
+        )
+
+
+class ImageStore(object):
+    EXT = ['gif', 'jpeg', 'png', 'swf', 'psd', 'bmp'
+            'tiff', 'tiff', 'jpc', 'jp2', 'jpf', 'jb2', 'swc',
+            'aiff', 'wbmp', 'xbm']
+    MIME = ['image/gif', 'image/jpeg', 'image/png',
+            'application/x-shockwave-flash', 'image/psd', 'image/bmp',
+            'image/tiff', 'image/tiff', 'application/octet-stream',
+            'image/jp2', 'application/octet-stream', 'application/octet-stream',
+            'application/x-shockwave-flash', 'image/iff', 'image/vnd.wap.wbmp', 'image/xbm']
+
+    def __init__(self, dir_):
+        self.dir = dir_
+        return super(ImageStore, self).__init__()
+
+    def path(self, slug, mime_type):
+        """
+        Finds file by slug and mime type in our iamge store.
+        Returns a file objects (perhaps should return a filename?)
+        """
+        try:
+            i = self.MIME.index(mime_type)
+        except ValueError:
+            err = ValueError("Picture %s has unknown mime type: %s" % (slug, mime_type))
+            err.slug = slug
+            err.mime_type = mime_type
+            raise err
+        ext = self.EXT[i]
+        # add some common extensions tiff->tif, jpeg->jpg
+        return path.join(self.dir, slug + '.' + ext)
+
+
+class WLPicture(object):
+    def __init__(self, edoc, parse_dublincore=True, image_store=None):
+        self.edoc = edoc
+        self.image_store = image_store
+
+        root_elem = edoc.getroot()
+
+        dc_path = './/' + RDFNS('RDF')
+
+        if root_elem.tag != 'picture':
+            raise ValidationError("Invalid root element. Found '%s', should be 'picture'" % root_elem.tag)
+
+        if parse_dublincore:
+            self.rdf_elem = root_elem.find(dc_path)
+
+            if self.rdf_elem is None:
+                raise NoDublinCore('Document has no DublinCore - which is required.')
+
+            self.picture_info = PictureInfo.from_element(self.rdf_elem)
+        else:
+            self.picture_info = None
+
+    @classmethod
+    def from_string(cls, xml, *args, **kwargs):
+        return cls.from_file(StringIO(xml), *args, **kwargs)
+
+    @classmethod
+    def from_file(cls, xmlfile, parse_dublincore=True, image_store=None):
+
+        # first, prepare for parsing
+        if isinstance(xmlfile, basestring):
+            file = open(xmlfile, 'rb')
+            try:
+                data = file.read()
+            finally:
+                file.close()
+        else:
+            data = xmlfile.read()
+
+        if not isinstance(data, unicode):
+            data = data.decode('utf-8')
+
+        data = data.replace(u'\ufeff', '')
+
+        # assume images are in the same directory
+        if image_store is None and xmlfile.name is not None:
+            image_store = ImageStore(path.dirname(xmlfile.name))
+
+        try:
+            parser = etree.XMLParser(remove_blank_text=False)
+            tree = etree.parse(StringIO(data.encode('utf-8')), parser)
+
+            return cls(tree, parse_dublincore=parse_dublincore, image_store=image_store)
+        except (ExpatError, XMLSyntaxError, XSLTApplyError), e:
+            raise ParseError(e)
+
+    @property
+    def mime_type(self):
+        if self.picture_info is None:
+            raise ValueError('DC is not loaded, hence we don\'t know the image type')
+        return self.picture_info.mime_type
+
+    @property
+    def slug(self):
+        return self.picture_info.url.slug
+
+    @property
+    def image_path(self):
+        if self.image_store is None:
+            raise ValueError("No image store associated with whis WLPicture.")
+        return self.image_store.path(self.slug, self.mime_type)
+
+    def image_file(self, *args, **kwargs):
+        return open(self.image_path, *args, **kwargs)
+
+    def partiter(self):
+        """
+        Iterates the parts of this picture and returns them and their metadata
+        """
+        for part in self.edoc.iter("div"):
+            pd = {}
+            pd['type'] = part.get('type')
+            if pd['type'] == 'area':
+                pd['coords'] = ((int(part.get('x1')), int(part.get('y1'))),
+                                (int(part.get('x2')), int(part.get('y2'))))
+
+            pd['themes'] = []
+            pd['object'] = None
+            parent = part
+            while True:
+                parent = parent.getparent()
+                if parent is None:
+                    break
+                if parent.tag == 'sem':
+                    if parent.get('type') == 'theme':
+                        pd['themes'] += map(unicode.strip, unicode(parent.get('theme')).split(','))
+                    elif parent.get('type') == 'object' and pd['object'] is None:
+                        pd['object'] = parent.get('object')
+            yield pd
diff --git a/librarian/res/jedenprocent.png b/librarian/res/jedenprocent.png
new file mode 100644 (file)
index 0000000..1b88c54
Binary files /dev/null and b/librarian/res/jedenprocent.png differ
index c23bcd6..d99e7cf 100644 (file)
@@ -3,7 +3,8 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from librarian import dcparser, parser, functions
+import copy
+from librarian import functions, OutputFile
 from lxml import etree
 import os
 
@@ -28,7 +29,7 @@ Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowocz
 %(description)s%(contributors)s
 """
 
-def transform(input_file, output_file, parse_dublincore=True, flags=None, **options):
+def transform(wldoc, flags=None, **options):
     """
     Transforms input_file in XML to output_file in TXT.
     possible flags: raw-text,
@@ -37,7 +38,9 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
     style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt')
     style = etree.parse(style_filename)
 
-    document = parser.WLDocument.from_file(input_file, True, parse_dublincore=parse_dublincore)
+    document = copy.deepcopy(wldoc)
+    del wldoc
+    document.swap_endlines()
 
     if flags:
         for flag in flags:
@@ -46,10 +49,10 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
     result = document.transform(style, **options)
 
     if not flags or 'raw-text' not in flags:
-        if parse_dublincore:
-            parsed_dc = dcparser.BookInfo.from_element(document.edoc)
+        if document.book_info:
+            parsed_dc = document.book_info
             description = parsed_dc.description
-            url = parsed_dc.url
+            url = document.book_info.url
     
             license_description = parsed_dc.license_description
             license = parsed_dc.license
@@ -75,7 +78,7 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
             license_description = ""
             source = ""
             contributors = ""
-        output_file.write((TEMPLATE % {
+        return OutputFile.from_string((TEMPLATE % {
             'description': description,
             'url': url,
             'license_description': license_description,
@@ -84,5 +87,5 @@ def transform(input_file, output_file, parse_dublincore=True, flags=None, **opti
             'contributors': contributors,
         }).encode('utf-8'))
     else:
-        output_file.write(unicode(result).encode('utf-8'))
+        return OutputFile.from_string(unicode(result).encode('utf-8'))
 
diff --git a/scripts/book2cover b/scripts/book2cover
new file mode 100755 (executable)
index 0000000..d2befc3
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os
+import optparse
+
+from librarian import ParseError
+from librarian.parser import WLDocument
+from librarian.cover import WLCover
+
+
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Create cover images for SOURCE files."""
+
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+
+    options, input_filenames = parser.parse_args()
+
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+
+    # Do some real work
+    for input_filename in input_filenames:
+        if options.verbose:
+            print input_filename
+
+        output_filename = os.path.splitext(input_filename)[0] + '.png'
+
+        doc = WLDocument.from_file(input_filename)
+        WLCover(doc.book_info).save(output_filename)
index 82aaa2b..ce8adb5 100755 (executable)
@@ -7,8 +7,9 @@
 import os.path
 import optparse
 
-from librarian import epub, DirDocProvider, ParseError
+from librarian import DirDocProvider, ParseError
 from librarian.cover import ImageCover
+from librarian.parser import WLDocument
 
 
 if __name__ == '__main__':
@@ -20,6 +21,8 @@ if __name__ == '__main__':
 
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
         help='print status messages to stdout')
+    parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
+                      help='create default cover')
     parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
                       help='create a directory for author and put the PDF in it')
     parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
@@ -32,7 +35,7 @@ if __name__ == '__main__':
                       help='less advertising, for commercial purposes')
     parser.add_option('-W', '--not-wl', action='store_true', dest='not_wl', default=False,
                       help='not a WolneLektury book')
-    parser.add_option('-c', '--cover', dest='cover', metavar='FILE',
+    parser.add_option('--cover', dest='cover', metavar='FILE',
                       help='specifies the cover file')
 
     options, input_filenames = parser.parse_args()
@@ -46,20 +49,20 @@ if __name__ == '__main__':
         for main_input in input_filenames:
             if options.verbose:
                 print main_input
+
             path, fname = os.path.realpath(main_input).rsplit('/', 1)
             provider = DirDocProvider(path)
-
-            output_dir = output_file = None
-            if options.output_dir:
-                output_dir = options.output_dir
-            elif options.output_file:
-                output_file = options.output_file
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.epub'
             else:
-                output_dir = path
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
 
-            cover = None
             if options.cover:
                 cover = ImageCover(options.cover)
+            else:
+                cover = options.with_cover
 
             flags = []
             if options.images:
@@ -69,8 +72,11 @@ if __name__ == '__main__':
             if options.not_wl:
                 flags.append('not-wl')
 
-            epub.transform(provider, file_path=main_input, output_dir=output_dir, output_file=output_file, make_dir=options.make_dir,
-                cover=cover, flags=flags)
+            epub = doc.as_epub(cover=cover, flags=flags)
+
+            doc.save_output_file(epub,
+                output_file, options.output_dir, options.make_dir, 'epub')
+
     except ParseError, e:
         print '%(file)s:%(name)s:%(message)s' % {
             'file': main_input,
index d61b299..8adeb38 100755 (executable)
@@ -7,7 +7,8 @@
 import os
 import optparse
 
-from librarian import html, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
 
 
 if __name__ == '__main__':
@@ -21,6 +22,8 @@ if __name__ == '__main__':
         help='print status messages to stdout')
     parser.add_option('-i', '--ignore-dublin-core', action='store_false', dest='parse_dublincore', default=True,
         help='don\'t try to parse dublin core metadata')
+    parser.add_option('-r', '--raw', action='store_false', dest='full_page', default=True,
+        help='outpu raw text for use in templates')
 
     options, input_filenames = parser.parse_args()
 
@@ -35,7 +38,11 @@ if __name__ == '__main__':
 
         output_filename = os.path.splitext(input_filename)[0] + '.html'
         try:
-            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore, flags=('full-page',))
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            flags = ('full-page',) if options.full_page else None
+            html = doc.as_html(flags=flags)
+            doc.save_output_file(html, output_path=output_filename)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
index 97d8ebd..779f245 100755 (executable)
@@ -7,7 +7,8 @@
 import os
 import optparse
 
-from librarian import html, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
 
 
 if __name__ == '__main__':
@@ -35,8 +36,10 @@ if __name__ == '__main__':
 
         output_filename = os.path.splitext(input_filename)[0] + '.html'
         try:
-            html.transform(input_filename, output_filename, parse_dublincore=options.parse_dublincore,\
-                stylesheet='partial')
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            html = doc.as_html(flags=('full-page',), stylesheet='partial')
+            doc.save_output_file(html, output_path=output_filename)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
diff --git a/scripts/book2mobi b/scripts/book2mobi
new file mode 100755 (executable)
index 0000000..665dcfa
--- /dev/null
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+import optparse
+
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
+
+
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Convert SOURCE files to MOBI format."""
+
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+    parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
+                      help='create a directory for author and put the PDF in it')
+    parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+                      help='specifies the output file')
+    parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+                      help='specifies the directory for output')
+
+    options, input_filenames = parser.parse_args()
+
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+
+    # Do some real work
+    try:
+        for main_input in input_filenames:
+            path, fname = os.path.realpath(main_input).rsplit('/', 1)
+            provider = DirDocProvider(path)
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.mobi'
+            else:
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            mobi = doc.as_mobi()
+
+            doc.save_output_file(mobi,
+                output_file, options.output_dir, options.make_dir, 'mobi')
+    except ParseError, e:
+        print '%(file)s:%(name)s:%(message)s' % {
+            'file': main_input,
+            'name': e.__class__.__name__,
+            'message': e
+        }
index d10f400..258c20d 100755 (executable)
@@ -6,7 +6,10 @@
 #
 import os.path
 from optparse import OptionParser
-from librarian import pdf, DirDocProvider, ParseError
+
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
+
 
 if __name__ == '__main__':
     usage = """Usage: %prog [options] SOURCE [SOURCE...]
@@ -15,6 +18,8 @@ if __name__ == '__main__':
     parser = OptionParser(usage)
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
                       help='make lots of noise and revert to default interaction in LaTeX')
+    parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
+                      help='create default cover')
     parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
                       help='create a directory for author and put the PDF in it')
     parser.add_option('-t', '--save-tex', dest='save_tex', metavar='FILE',
@@ -31,33 +36,26 @@ if __name__ == '__main__':
         parser.print_help()
         exit(1)
 
-    try:
-        if options.output_dir and options.output_file:
-            raise ValueError("Either --output-dir or --output file should be specified")
+    if options.output_dir and options.output_file:
+        raise ValueError("Either --output-dir or --output file should be specified")
 
+    try:
         for main_input in args:
-            if options.verbose:
-                print main_input
             path, fname = os.path.realpath(main_input).rsplit('/', 1)
             provider = DirDocProvider(path)
-
-            output_file = output_dir = None
-            if options.output_dir:
-                output_dir = options.output_dir
-            elif options.output_file:
-                output_file = options.output_file
+            output_file, output_dir = options.output_file, options.output_dir
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.pdf'
             else:
-                output_dir = path
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            pdf = doc.as_pdf(save_tex=options.save_tex,
+                        cover=options.with_cover,
+                        morefloats=options.morefloats)
 
-            pdf.transform(provider,
-                file_path=main_input,
-                output_file=output_file,
-                output_dir=output_dir,
-                verbose=options.verbose,
-                make_dir=options.make_dir,
-                save_tex=options.save_tex,
-                morefloats=options.morefloats
-                )
+            doc.save_output_file(pdf,
+                output_file, options.output_dir, options.make_dir, 'pdf')
     except ParseError, e:
         print '%(file)s:%(name)s:%(message)s; use -v to see more output' % {
             'file': main_input,
index d56d6ff..9cfdef2 100755 (executable)
@@ -7,8 +7,8 @@
 import os
 import optparse
 
-from librarian import text
-from librarian import dcparser, ParseError
+from librarian import ParseError
+from librarian.parser import WLDocument
 
 
 if __name__ == '__main__':
@@ -38,9 +38,10 @@ if __name__ == '__main__':
 
         output_filename = os.path.splitext(input_filename)[0] + '.txt'
         try:
-            output_file = open(output_filename, 'w')
-            text.transform(open(input_filename), output_file, parse_dublincore=options.parse_dublincore,
-                wrapping=str(options.wrapping))
+            doc = WLDocument.from_file(input_filename,
+                parse_dublincore=options.parse_dublincore)
+            html = doc.as_text(wrapping=str(options.wrapping))
+            doc.save_output_file(html, output_path=output_filename)
         except ParseError, e:
             print '%(file)s:%(name)s:%(message)s' % {
                 'file': input_filename,
old mode 100644 (file)
new mode 100755 (executable)
index d1db01b..b1ea926
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ def whole_tree(prefix, path):
 
 setup(
     name='librarian',
-    version='1.3',
+    version='1.4.1',
     description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
     author="Marek Stępniowski",
     author_email='marek@stepniowski.com',
@@ -29,15 +29,17 @@ setup(
     maintainer_email='radek.czajka@gmail.com',
     url='http://github.com/fnp/librarian',
     packages=['librarian'],
-    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'pdf/*', 'fonts/*', 'res/*'] +
+    package_data={'librarian': ['xslt/*.xslt', 'epub/*', 'mobi/*', 'pdf/*', 'fonts/*', 'res/*'] +
                                 whole_tree(os.path.join(os.path.dirname(__file__), 'librarian'), 'font-optimizer')},
     include_package_data=True,
     install_requires=['lxml>=2.2'],
     scripts=['scripts/book2html',
              'scripts/book2txt',
              'scripts/book2epub',
+             'scripts/book2mobi',
              'scripts/book2pdf',
              'scripts/book2partner',
+             'scripts/book2cover',
              'scripts/bookfragments',
              'scripts/genslugs'],
     tests_require=['nose>=0.11', 'coverage>=3.0.1'],
index fda83eb..c0fb00b 100644 (file)
@@ -14,4 +14,6 @@
     'genre': u'Baśń',
     'technical_editors': [u'Gałecki, Dariusz'],
     'license_description': u'Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925',
+    'audiences': [u'SP1'],
+    'language': u'pol',
 }
index 1111b9c..a7eeffe 100644 (file)
@@ -1,17 +1,19 @@
 {
-            'editors': [u'Sekuła, Aleksandra'],
-            'publisher': u'Fundacja Nowoczesna Polska',
-            'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
-            'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
-            'author': u'Biedrzycki, Miłosz',
-            'url': u'http://wolnelektury.pl/katalog/lektura/akslop',
-            'created_at': u'2009-06-04',
-            'title': u'Akslop',
-            'kind': u'Liryka',
-            'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1',
-            'epoch': u'Współczesność',
-            'genre': u'Wiersz',
-            'technical_editors': [u'Sutkowska, Olga'],
-            'license': u'http://creativecommons.org/licenses/by-sa/3.0/',
-            'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL'
+    'editors': [u'Sekuła, Aleksandra'],
+    'publisher': u'Fundacja Nowoczesna Polska',
+    'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
+    'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
+    'author': u'Biedrzycki, Miłosz',
+    'url': u'http://wolnelektury.pl/katalog/lektura/akslop',
+    'created_at': u'2009-06-04',
+    'title': u'Akslop',
+    'kind': u'Liryka',
+    'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1',
+    'epoch': u'Współczesność',
+    'genre': u'Wiersz',
+    'technical_editors': [u'Sutkowska, Olga'],
+    'license': u'http://creativecommons.org/licenses/by-sa/3.0/',
+    'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL',
+    'audiences': [u'L'],
+    'language': u'pol',
 }
index 4c1dc8f..b3eba1e 100644 (file)
@@ -1,18 +1,20 @@
 {
-            'publisher': u'Fundacja Nowoczesna Polska',
-            'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
-            'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
-            'author': u'Kochanowski, Jan',
-            'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr',
-            'created_at': u'2007-08-31',
-            'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)',
-            'kind': u'Liryka',
-            'source_url': u'http://www.polona.pl/Content/1499',
-            'released_to_public_domain_at': u'1584-01-01',
-            'epoch': u'Renesans',
-            'genre': u'Pieśń',
-            'technical_editors': [u'Gałecki, Dariusz'],
-            'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ',
-            'editors': [u'Sekuła, Aleksandra', u'Krzyżanowski, Julian', u'Otwinowska, Barbara'],
+    'publisher': u'Fundacja Nowoczesna Polska',
+    'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
+    'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
+    'author': u'Kochanowski, Jan',
+    'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr',
+    'created_at': u'2007-08-31',
+    'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)',
+    'kind': u'Liryka',
+    'source_url': u'http://www.polona.pl/Content/1499',
+    'released_to_public_domain_at': u'1584-01-01',
+    'epoch': u'Renesans',
+    'genre': u'Pieśń',
+    'technical_editors': [u'Gałecki, Dariusz'],
+    'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ',
+    'editors': [u'Sekuła, Aleksandra', u'Krzyżanowski, Julian', u'Otwinowska, Barbara'],
+    'audiences': [u'L'],
+    'language': u'pol',
 }
 
index ff4bd98..a35f935 100644 (file)
@@ -1,18 +1,20 @@
 {
-            'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'],
-            'publisher': u'Fundacja Nowoczesna Polska',
-            'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
-            'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
-            'author': u'Mickiewicz, Adam',
-            'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka',
-            'created_at': u'2007-09-06',
-            'title': u'Rybka',
-            'kind': u'Liryka',
-            'source_url': u'http://www.polona.pl/Content/2222',
-            'released_to_public_domain_at': u'1855-01-01',
-            'epoch': u'Romantyzm',
-            'genre': u'Ballada',
-            'technical_editors': [u'Sutkowska, Olga'],
-            'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855',
+    'editors': [u'Sekuła, Aleksandra', u'Kallenbach, Józef'],
+    'publisher': u'Fundacja Nowoczesna Polska',
+    'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
+    'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
+    'author': u'Mickiewicz, Adam',
+    'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka',
+    'created_at': u'2007-09-06',
+    'title': u'Rybka',
+    'kind': u'Liryka',
+    'source_url': u'http://www.polona.pl/Content/2222',
+    'released_to_public_domain_at': u'1855-01-01',
+    'epoch': u'Romantyzm',
+    'genre': u'Ballada',
+    'technical_editors': [u'Sutkowska, Olga'],
+    'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855',
+    'audiences': [u'SP2', u'G', u'L'],
+    'language': u'pol',
 }
 
index 0f2b4d0..d934602 100644 (file)
@@ -1,19 +1,22 @@
 {
-            'editors': [u'Sekuła, Aleksandra'],
-            'publisher': u'Fundacja Nowoczesna Polska',
-            'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
-            'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
-            'author': u'Sofokles',
-            'url': u'http://wolnelektury.pl/katalog/lektura/antygona',
-            'created_at': u'2007-08-30',
-            'title': u'Antygona',
-            'kind': u'Dramat',
-            'source_url': u'http://www.polona.pl/Content/3768',
-            'translators': [u'Morawski, Kazimierz'],
-            'released_to_public_domain_at': u'1925-01-01',
-            'epoch': u'Starożytność',
-            'genre': u'Tragedia',
-            'technical_editors': [u'Gałecki, Dariusz'],
-            'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925',
+    'editors': [u'Sekuła, Aleksandra'],
+    'publisher': u'Fundacja Nowoczesna Polska',
+    'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
+    'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
+    'author': u'Sofokles',
+    'url': u'http://wolnelektury.pl/katalog/lektura/antygona',
+    'created_at': u'2007-08-30',
+    'title': u'Antygona',
+    'kind': u'Dramat',
+    'source_url': u'http://www.polona.pl/Content/3768',
+    'translators': [u'Morawski, Kazimierz'],
+    'released_to_public_domain_at': u'1925-01-01',
+    'epoch': u'Starożytność',
+    'genre': u'Tragedia',
+    'technical_editors': [u'Gałecki, Dariusz'],
+    'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925',
+    'language': u'pol',
+    'audiences': [u'G'],
+    'language': u'pol',
 }
 
diff --git a/tests/files/picture/angelus-novus.png b/tests/files/picture/angelus-novus.png
new file mode 100644 (file)
index 0000000..9925dad
Binary files /dev/null and b/tests/files/picture/angelus-novus.png differ
diff --git a/tests/files/picture/angelus-novus.xml b/tests/files/picture/angelus-novus.xml
new file mode 100644 (file)
index 0000000..0f26730
--- /dev/null
@@ -0,0 +1,42 @@
+<picture>
+  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko">
+      <dc:creator xml:lang="pl">Klee, Paul</dc:creator>
+      <dc:title xml:lang="la">Angelus Novus</dc:title>
+      <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+      <dc:contributor.editor xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">Sekuła, Aleksandra</dc:contributor.editor>
+      <dc:contributor.editor xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">Kwiatkowska, Katarzyna</dc:contributor.editor>
+      <dc:contributor.technical_editor xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">Trzeciak, Weronika</dc:contributor.technical_editor>
+      <dc:subject.period xml:lang="pl">Modernizm</dc:subject.period>
+      <dc:subject.type xml:lang="pl">Obraz</dc:subject.type>
+      <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+      <dc:description.dimensions xml:lang="pl">31.8 × 24.2 cm</dc:description.dimensions>
+      <dc:description.medium xml:lang="pl">Akwarela na papierze</dc:description.medium>
+      <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/obraz/angelus-novus</dc:identifier.url>
+      <dc:source.URL xml:lang="pl">http://katilifox.files.wordpress.com/2011/04/1190051611_angelus-novus.jpg</dc:source.URL>
+      <dc:source xml:lang="pl">Muzeum Narodowe, inw. 00000000.</dc:source>
+      <dc:rights xml:lang="pl">Domena publiczna - Paul Klee zm. 1940</dc:rights>
+      <dc:date.pd xml:lang="pl">1940</dc:date.pd>
+      <dc:type>Image</dc:type>
+      <dc:format xml:lang="pl">image/png</dc:format>
+      <dc:format.dimensions xml:lang="pl">1645 x 2000 px</dc:format.dimensions>
+      <dc:format.checksum.sha1 xml:lang="pl">d9ead48f3442ac4e1add602aacdffa4638ae8e21</dc:format.checksum.sha1>
+      <dc:date xml:lang="pl">1920</dc:date>
+      <dc:language xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">lat</dc:language>
+    </rdf:Description>
+  </rdf:RDF>
+  <sem type="object" object="obraz cały">
+    <div type="whole"/>
+  </sem>
+  <sem type="theme" theme="anioł historii">
+    <div type="area" x1="462" y1="212" x2="1283" y2="1730"/>
+  </sem>
+  <sem type="theme" theme="spojrzenie">
+    <div type="area" x1="688" y1="500" x2="1054" y2="618"/>
+  </sem>
+  <sem type="object" object="skrzydło">
+    <div type="area" x1="468" y1="741" x2="694" y2="1027"/>
+    <div type="area" x1="1044" y1="762" x2="1260" y2="1041"/>
+  </sem>
+</picture>
diff --git a/tests/files/text/asnyk_miedzy_nami.xml b/tests/files/text/asnyk_miedzy_nami.xml
deleted file mode 100644 (file)
index 36d8df6..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-<?xml version='1.0' encoding='utf-8'?>
-<utwor>
-  <liryka_lp>
-
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
-<rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Asnyk/Między_nami_nic_nie_było">
-<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
-<dc:title xml:lang="pl">Między nami nic nie było</dc:title>
-<dc:contributor.editor xml:lang="pl" />
-<dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
-<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
-<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
-<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
-<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
-<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
-<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
-<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:identifier.url>
-<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
-<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
-<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
-<dc:date.pd xml:lang="pl">1897</dc:date.pd>
-<dc:format xml:lang="pl">xml</dc:format>
-<dc:type xml:lang="pl">text</dc:type>
-<dc:type xml:lang="en">text</dc:type>
-<dc:date xml:lang="pl">2007-09-06</dc:date>
-<dc:audience xml:lang="pl">L</dc:audience>
-<dc:language xml:lang="pl">pol</dc:language>
-</rdf:Description>
-</rdf:RDF>
-
-
-<autor_utworu>Adam Asnyk</autor_utworu>
-
-<nazwa_utworu><begin id="b1189062500041"/><motyw id="m1189062500041">Miłość platoniczna</motyw>Między nami nic nie było</nazwa_utworu>
-
-
-
-<strofa>Między nami nic nie było!/
-Żadnych zwierzeń, wyznań żadnych!/
-Nic nas z sobą nie łączyło ---/
-Prócz wiosennych marzeń zdradnych;</strofa>
-
-
-
-<strofa><begin id="b1189062528872"/><motyw id="m1189062528872">Natura</motyw>Prócz tych woni, barw i blasków,/
-Unoszących się w przestrzeni;/
-Prócz szumiących śpiewem lasków/
-I tej świeżej łąk zieleni;</strofa>
-
-
-
-<strofa>Prócz tych kaskad i potoków,/
-Zraszających każdy parów,/
-Prócz girlandy tęcz, obłoków,/
-Prócz natury słodkich czarów;</strofa>
-
-
-
-<strofa>Prócz tych wspólnych, jasnych zdrojów,/
-Z których serce zachwyt piło;/
-Prócz pierwiosnków i powojów,---/
-Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
-
-</liryka_lp>
-</utwor>
diff --git a/tests/files/text/asnyk_zbior.xml b/tests/files/text/asnyk_zbior.xml
new file mode 100755 (executable)
index 0000000..c585a8b
--- /dev/null
@@ -0,0 +1,29 @@
+<?xml version='1.0' encoding='utf-8'?>
+<utwor>
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/asnyk-poezye/">
+<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
+<dc:title xml:lang="pl">Poezye</dc:title>
+<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
+<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/poezye</dc:identifier.url>
+<dc:relation.hasPart xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:relation.hasPart>
+<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
+<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
+<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
+<dc:date.pd xml:lang="pl">1897</dc:date.pd>
+<dc:format xml:lang="pl">xml</dc:format>
+<dc:type xml:lang="pl">text</dc:type>
+<dc:type xml:lang="en">text</dc:type>
+<dc:date xml:lang="pl">2007-09-06</dc:date>
+<dc:audience xml:lang="pl">L</dc:audience>
+<dc:language xml:lang="pl">pol</dc:language>
+</rdf:Description>
+</rdf:RDF>
+
+
+</utwor>
diff --git a/tests/files/text/miedzy-nami-nic-nie-bylo.xml b/tests/files/text/miedzy-nami-nic-nie-bylo.xml
new file mode 100644 (file)
index 0000000..124940e
--- /dev/null
@@ -0,0 +1,65 @@
+<?xml version='1.0' encoding='utf-8'?>
+<utwor>
+  <liryka_lp>
+
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/miedzy-nami-nic-nie-bylo/">
+<dc:creator xml:lang="pl">Asnyk, Adam</dc:creator>
+<dc:title xml:lang="pl">Między nami nic nie było</dc:title>
+<dc:contributor.editor xml:lang="pl" />
+<dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
+<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+<dc:subject.period xml:lang="pl">Pozytywizm</dc:subject.period>
+<dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+<dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/miedzy-nami-nic-nie-bylo</dc:identifier.url>
+<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/5164</dc:source.URL>
+<dc:source xml:lang="pl">(Asnyk, Adam) El...y (1838-1897), Poezye, t. 3,  Gebethner i Wolff, wyd. nowe poprzedzone słowem wstępnym St. Krzemińskiego, Warszawa, 1898</dc:source>
+<dc:rights xml:lang="pl">Domena publiczna - Adam Asnyk zm. 1897</dc:rights>
+<dc:date.pd xml:lang="pl">1897</dc:date.pd>
+<dc:format xml:lang="pl">xml</dc:format>
+<dc:type xml:lang="pl">text</dc:type>
+<dc:type xml:lang="en">text</dc:type>
+<dc:date xml:lang="pl">2007-09-06</dc:date>
+<dc:audience xml:lang="pl">L</dc:audience>
+<dc:language xml:lang="pl">pol</dc:language>
+</rdf:Description>
+</rdf:RDF>
+
+
+<autor_utworu>Adam Asnyk</autor_utworu>
+
+<nazwa_utworu><begin id="b1189062500041"/><motyw id="m1189062500041">Miłość platoniczna</motyw>Między nami nic nie było</nazwa_utworu>
+
+
+
+<strofa>Między nami nic nie było!/
+Żadnych zwierzeń, wyznań żadnych!/
+Nic nas z sobą nie łączyło ---/
+Prócz wiosennych marzeń zdradnych;</strofa>
+
+
+
+<strofa><begin id="b1189062528872"/><motyw id="m1189062528872">Natura</motyw>Prócz tych woni, barw i blasków,/
+Unoszących się w przestrzeni;/
+Prócz szumiących śpiewem lasków/
+I tej świeżej łąk zieleni;</strofa>
+
+
+
+<strofa>Prócz tych kaskad i potoków,/
+Zraszających każdy parów,/
+Prócz girlandy tęcz, obłoków,/
+Prócz natury słodkich czarów;</strofa>
+
+
+
+<strofa>Prócz tych wspólnych, jasnych zdrojów,/
+Z których serce zachwyt piło;/
+Prócz pierwiosnków i powojów,---/
+Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
+
+</liryka_lp>
+</utwor>
diff --git a/tests/test_epub.py b/tests/test_epub.py
new file mode 100644 (file)
index 0000000..9fc5637
--- /dev/null
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from librarian import DirDocProvider
+from librarian.parser import WLDocument
+from nose.tools import *
+from utils import get_fixture
+
+
+def test_transform():
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_zbior.xml'),
+            provider=DirDocProvider(get_fixture('text', ''))
+        ).as_epub(flags=['without_fonts'])
index 5187e06..51d6acd 100644 (file)
@@ -3,44 +3,38 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from librarian import html, NoDublinCore
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
 from nose.tools import *
-from utils import get_fixture, remove_output_file
+from utils import get_fixture
 
-def teardown_transform():
-    remove_output_file('text', 'asnyk_miedzy_nami.html')
 
-
-@with_setup(None, teardown_transform)
 def test_transform():
-    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.html')
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html')
 
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami.xml'),
-        output_file_path,
-    )
+    html = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_html().get_string()
 
-    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
+    assert_equal(html, file(expected_output_file_path).read())
 
 
-@with_setup(None, teardown_transform)
 @raises(NoDublinCore)
 def test_no_dublincore():
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
-        get_fixture('text', 'asnyk_miedzy_nami.html'),
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
+        ).as_html()
 
 
-@with_setup(None, teardown_transform)
 def test_passing_parse_dublincore_to_transform():
     """Passing parse_dublincore=False to transform omits DublinCore parsing."""
-    html.transform(
-        get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
-        get_fixture('text', 'asnyk_miedzy_nami.html'),
-        parse_dublincore=False,
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+            parse_dublincore=False,
+        ).as_html()
 
 def test_empty():
-    assert html.transform('<utwor />', is_file=False, parse_dublincore=False).find('empty')
+    assert not WLDocument.from_string(
+            '<utwor />',
+            parse_dublincore=False,
+        ).as_html()
diff --git a/tests/test_picture.py b/tests/test_picture.py
new file mode 100644 (file)
index 0000000..71a77dc
--- /dev/null
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from librarian import picture, dcparser
+from lxml import etree
+from nose.tools import *
+from os.path import splitext
+from tests.utils import get_all_fixtures, get_fixture
+import codecs
+from os import path
+
+def test_wlpictureuri():
+    uri = picture.WLPictureURI('http://wolnelektury.pl/katalog/obraz/angelus-novus')
+
+def check_load(xml_file):
+    pi = dcparser.parse(xml_file, picture.PictureInfo)
+    assert pi is not None
+    assert isinstance(pi, picture.PictureInfo)
+    
+
+def test_load():
+    for fixture in get_all_fixtures('picture', '*.xml'):
+        yield check_load, fixture
+
+
+def test_wlpicture():
+    wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
+    pi = wlp.picture_info
+
+    #    from nose.tools import set_trace; set_trace()
+    assert pi.type[0] == u"Image"
+    assert pi.mime_type == u'image/png' == wlp.mime_type
+    assert wlp.slug == 'angelus-novus'
+
+    assert path.exists(wlp.image_path)
+    
+    f = wlp.image_file('r')
+    f.close()
+
+def test_picture_parts():
+    wlp = picture.WLPicture.from_file(open(get_fixture('picture', 'angelus-novus.xml')))
+    parts = list(wlp.partiter())
+    assert len(parts) == 5, "there should be %d parts of the picture" % 5
+    motifs = set()
+    names = set()
+
+    print parts
+    for p in parts:
+        for m in p['themes']:
+            motifs.add(m)
+    for p in parts:
+        if p['object']:
+            names.add(p['object'])
+
+    assert motifs == set([u'anioł historii', u'spojrzenie']), "missing motifs, got: %s" % motifs
+    assert names == set([u'obraz cały', u'skrzydło']), 'missing objects, got: %s' % names
+    
+        
index 7ff94ca..70dfb60 100644 (file)
@@ -3,42 +3,32 @@
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from librarian import text, NoDublinCore
+from librarian import NoDublinCore
+from librarian.parser import WLDocument
 from nose.tools import *
-from utils import get_fixture, remove_output_file
+from utils import get_fixture
 
 
-def teardown_transform():
-    remove_output_file('text', 'asnyk_miedzy_nami.txt')
-
-
-@with_setup(None, teardown_transform)
 def test_transform():
-    output_file_path = get_fixture('text', 'asnyk_miedzy_nami.txt')
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
 
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami.xml')),
-        open(output_file_path, 'w'),
-    )
+    text = WLDocument.from_file(
+            get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+        ).as_text().get_string()
 
-    assert_equal(file(output_file_path).read(), file(expected_output_file_path).read())
+    assert_equal(text, file(expected_output_file_path).read())
 
 
-@with_setup(None, teardown_transform)
 @raises(NoDublinCore)
 def test_no_dublincore():
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')),
-        open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'),
-    )
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
+        ).as_text()
 
 
-@with_setup(None, teardown_transform)
 def test_passing_parse_dublincore_to_transform():
-    """Passing parse_dublincore=False to transform omits DublinCore parsing."""
-    text.transform(
-        open(get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')),
-        open(get_fixture('text', 'asnyk_miedzy_nami.txt'), 'w'),
-        parse_dublincore=False,
-    )
+    """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
+    WLDocument.from_file(
+            get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
+            parse_dublincore=False,
+        ).as_text()
index b112066..3b1f4f5 100644 (file)
@@ -21,10 +21,3 @@ def get_fixture(dir_name, file_name):
 def get_all_fixtures(dir_name, glob_pattern='*'):
     """Returns list of paths for fixtures in directory dir_name matching the glob_pattern."""
     return [get_fixture(dir_name, file_name) for file_name in glob.glob(join(get_fixture_dir(dir_name), glob_pattern))]
-
-
-def remove_output_file(dir_name, file_name):
-    try:
-        os.remove(get_fixture(dir_name, file_name))
-    except:
-        pass