Initial FictionBook 2 output support.
authorMichał Górny <mgorny@gentoo.org>
Tue, 24 Apr 2012 18:29:12 +0000 (20:29 +0200)
committerMichał Górny <mgorny@gentoo.org>
Tue, 24 Apr 2012 18:29:30 +0000 (20:29 +0200)
This is a work-in-progress and still needs a lot of polishing.

librarian/fb2.py [new file with mode: 0644]
librarian/fb2/fb2.xslt [new file with mode: 0644]
librarian/fb2/footnotes.xslt [new file with mode: 0644]
librarian/fb2/inline.xslt [new file with mode: 0644]
librarian/fb2/paragraphs.xslt [new file with mode: 0644]
librarian/fb2/poems.xslt [new file with mode: 0644]
librarian/fb2/sections.xslt [new file with mode: 0644]
librarian/parser.py
scripts/book2fb2 [new file with mode: 0755]

diff --git a/librarian/fb2.py b/librarian/fb2.py
new file mode 100644 (file)
index 0000000..b0ad410
--- /dev/null
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+from copy import deepcopy
+from lxml import etree
+
+from librarian import functions, OutputFile
+
+
+functions.reg_substitute_entities()
+
+def transform(wldoc, verbose=False,
+              cover=None, flags=None):
+    """ produces a FB2 file
+
+    cover: a cover.Cover object or True for default
+    flags: less-advertising, working-copy
+    """
+
+    document = deepcopy(wldoc)
+    del wldoc
+
+    if flags:
+        for flag in flags:
+            document.edoc.getroot().set(flag, 'yes')
+
+    style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
+    style = etree.parse(style_filename)
+
+    result = document.transform(style)
+
+    return OutputFile.from_string(unicode(result).encode('utf-8'))
diff --git a/librarian/fb2/fb2.xslt b/librarian/fb2/fb2.xslt
new file mode 100644 (file)
index 0000000..e9ab6ce
--- /dev/null
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <xsl:include href="footnotes.xslt"/>
+       <xsl:include href="inline.xslt"/>
+       <xsl:include href="paragraphs.xslt"/>
+       <xsl:include href="poems.xslt"/>
+       <xsl:include href="sections.xslt"/>
+
+       <xsl:strip-space elements="*"/>
+       <xsl:output encoding="utf-8" method="xml" indent="yes"/>
+
+       <xsl:template match="utwor">
+               <FictionBook>
+                       <xsl:apply-templates mode="outer"/>
+
+                       <body name="footnotes">
+                               <xsl:apply-templates mode="footnotes"/>
+                       </body>
+               </FictionBook>
+       </xsl:template>
+
+       <xsl:template match="dc:*" mode="outer">
+       </xsl:template>
+
+       <xsl:template match="powiesc|opowiadanie" mode="outer">
+               <body> <!-- main body for main book flow -->
+                       <xsl:if test="autor_utworu or nazwa_utworu">
+                               <title>
+                                       <xsl:apply-templates mode="para"
+                                               select="autor_utworu|nazwa_utworu"/>
+                               </title>
+                       </xsl:if>
+
+                       <xsl:apply-templates mode="sections"/>
+               </body>
+       </xsl:template>
+</xsl:stylesheet>
diff --git a/librarian/fb2/footnotes.xslt b/librarian/fb2/footnotes.xslt
new file mode 100644 (file)
index 0000000..663b2a3
--- /dev/null
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <!-- footnote body mode -->
+       <xsl:template match="pe" mode="footnotes">
+               <!-- we number them absolutely -->
+               <xsl:variable name="n" select="count(preceding::pe) + 1"/>
+
+               <xsl:element name="section">
+                       <xsl:attribute name="id">fn<xsl:value-of select="$n"/></xsl:attribute>
+
+                       <p><xsl:apply-templates mode="inline"/></p>
+               </xsl:element>
+       </xsl:template>
+       <xsl:template match="text()" mode="footnotes"/>
+
+       <!-- footnote links -->
+       <xsl:template match="pe" mode="inline">
+               <xsl:variable name="n" select="count(preceding::pe) + 1"/>
+               <xsl:element name="a">
+                       <xsl:attribute name="type">note</xsl:attribute>
+                       <xsl:attribute name="l:href">#fn<xsl:value-of select="$n"/></xsl:attribute>
+
+                       [<xsl:value-of select="$n"/>]
+               </xsl:element>
+       </xsl:template>
+</xsl:stylesheet>
diff --git a/librarian/fb2/inline.xslt b/librarian/fb2/inline.xslt
new file mode 100644 (file)
index 0000000..4151845
--- /dev/null
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <!-- inline elements -->
+
+       <!-- ignored -->
+       <xsl:template match="motyw" mode="inline"/>
+
+       <!-- formatting -->
+       <xsl:template match="slowo_obce|tytul_dziela">
+               <emphasis>
+                       <xsl:apply-templates mode="inline"/>
+               </emphasis>
+       </xsl:template>
+       <xsl:template match="wyroznienie">
+               <strong>
+                       <xsl:apply-templates mode="inline"/>
+               </strong>
+       </xsl:template>
+
+       <!-- text -->
+       <xsl:template match="text()" mode="inline">
+               <xsl:value-of select="wl:substitute_entities(.)"/>
+       </xsl:template>
+</xsl:stylesheet>
diff --git a/librarian/fb2/paragraphs.xslt b/librarian/fb2/paragraphs.xslt
new file mode 100644 (file)
index 0000000..9c7def8
--- /dev/null
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <!-- in paragraph mode -->
+
+       <xsl:template mode="para" match="autor_utworu|nazwa_utworu|akap|akap_dialog">
+               <!-- paragraphs & similar -->
+
+               <p><xsl:apply-templates mode="inline"/></p>
+       </xsl:template>
+
+       <!-- in global scope -->
+
+       <xsl:template mode="sections" match="akap|akap_dialog">
+               <!-- paragraphs & similar -->
+
+               <p><xsl:apply-templates mode="inline"/></p>
+       </xsl:template>
+       <xsl:template mode="sections" match="autor_utworu|nazwa_utworu"/>
+</xsl:stylesheet>
diff --git a/librarian/fb2/poems.xslt b/librarian/fb2/poems.xslt
new file mode 100644 (file)
index 0000000..a9ef901
--- /dev/null
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <!-- poems -->
+
+       <!-- match poem citations -->
+       <xsl:template mode="paras" match="poezja_cyt">
+               <cite>
+                       <poem>
+                               <xsl:apply-templates mode="poem"/>
+                       </poem>
+               </cite>
+       </xsl:template>
+       <!-- / XXX: fb2 doesn't allow <poem/> inside <p/> /
+       <xsl:template mode="inline" match="poezja_cyt">
+
+               <poem>
+                       <xsl:apply-templates mode="poem"/>
+               </poem>
+       </xsl:template>
+       -->
+
+       <!-- regular poem elements -->
+       <xsl:template mode="poem" match="strofa">
+               <stanza>
+                       <xsl:call-template name="split-poem">
+                               <xsl:with-param name="list" select="."/>
+                       </xsl:call-template>
+               </stanza>
+       </xsl:template>
+
+       <!-- split into verses -->
+       <xsl:template name="split-poem">
+               <xsl:param name="list"></xsl:param>
+
+               <xsl:if test="$list != ''">
+                       <xsl:variable name="before"
+                               select="substring-before(concat($list, '/'), '/')"/>
+                       <xsl:variable name="after"
+                               select="substring-after($list, '/')"/>
+
+                       <v>
+                               <xsl:value-of select="$before"/>
+                       </v>
+
+                       <xsl:call-template name="split-poem">
+                               <xsl:with-param name="list" select="$after"/>
+                       </xsl:call-template>
+               </xsl:if>
+       </xsl:template>
+</xsl:stylesheet>
diff --git a/librarian/fb2/sections.xslt b/librarian/fb2/sections.xslt
new file mode 100644 (file)
index 0000000..7f34bc3
--- /dev/null
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+       This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+       Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+       xmlns:wl="http://wolnelektury.pl/functions"
+       xmlns:dc="http://purl.org/dc/elements/1.1/"
+       xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+       xmlns:l="http://www.w3.org/1999/xlink">
+
+       <!-- a nice epigraph section -->
+       <xsl:template match="nota_red" mode="sections">
+               <epigraph>
+                       <xsl:apply-templates mode="para"/>
+               </epigraph>
+       </xsl:template>
+
+       <!-- main text is split by headings -->
+       <xsl:template match="naglowek_rozdzial" mode="sections">
+               <!--
+
+               This one's tricky - we need to sections text into sections.
+               In order to do that, all elements belonging to a single section
+               must have something in common. We assume that this common factor
+               is having the same number of following section headings.
+
+               -->
+
+               <section>
+                       <xsl:apply-templates mode="para"
+                               select="../*[count(following-sibling::naglowek_rozdzial)
+                                       = count(current()/following-sibling::naglowek_rozdzial)]"/>
+               </section>
+       </xsl:template>
+
+       <!-- actual headings -->
+       <xsl:template match="naglowek_rozdzial" mode="para">
+               <title><p><xsl:apply-templates mode="inline"/></p></title>
+       </xsl:template>
+</xsl:stylesheet>
index 2ece72f..6343d21 100644 (file)
@@ -185,6 +185,10 @@ class WLDocument(object):
         from librarian import mobi
         return mobi.transform(self, *args, **kwargs)
 
+    def as_fb2(self, *args, **kwargs):
+        from librarian import fb2
+        return fb2.transform(self, *args, **kwargs)
+
     def save_output_file(self, output_file, output_path=None,
             output_dir_path=None, make_author_dir=False, ext=None):
         if output_dir_path:
diff --git a/scripts/book2fb2 b/scripts/book2fb2
new file mode 100755 (executable)
index 0000000..81a002b
--- /dev/null
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+import optparse
+
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
+
+
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Convert SOURCE files to FB2 format."""
+
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+    parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
+                      help='create default cover')
+    parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False,
+                      help='mark the output as a working copy')
+    parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
+                      help='create a directory for author and put the PDF in it')
+    parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+                      help='specifies the output file')
+    parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+                      help='specifies the directory for output')
+
+    options, input_filenames = parser.parse_args()
+
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+
+    flags = []
+    if options.working_copy:
+        flags.append('working-copy')
+
+    # Do some real work
+    try:
+        for main_input in input_filenames:
+            if options.verbose:
+                print main_input
+
+            path, fname = os.path.realpath(main_input).rsplit('/', 1)
+            provider = DirDocProvider(path)
+            if not (options.output_file or options.output_dir):
+                output_file = os.path.splitext(main_input)[0] + '.fb2'
+            else:
+                output_file = None
+
+            doc = WLDocument.from_file(main_input, provider=provider)
+            fb2 = doc.as_fb2(cover=options.with_cover, flags=flags)
+
+            doc.save_output_file(fb2,
+                output_file, options.output_dir, options.make_dir, 'fb2')
+
+    except ParseError, e:
+        print '%(file)s:%(name)s:%(message)s' % {
+            'file': main_input,
+            'name': e.__class__.__name__,
+            'message': e
+        }