+++ /dev/null
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import cStringIO
-import re
-import optparse
-import os
-import sys
-
-from lxml import etree
-
-
-ENTITY_SUBSTITUTIONS = [
- (u'---', u'—'),
- (u'--', u'–'),
- (u'...', u'…'),
- (u',,', u'„'),
- (u'"', u'”'),
-]
-
-
-def substitute_entities(context, text):
- """XPath extension function converting all entites in passed text."""
- if isinstance(text, list):
- text = ''.join(text)
- for entity, substitutution in ENTITY_SUBSTITUTIONS:
- text = text.replace(entity, substitutution)
- return text
-
-
-# Register substitute_entities function with lxml
-ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
-ns['substitute_entities'] = substitute_entities
-
-
-def transform(input_filename, output_filename):
- """Transforms file input_filename in XML to output_filename in XHTML."""
- # Parse XSLT
- style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
- style = etree.parse(style_filename)
-
- doc_file = cStringIO.StringIO()
- expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
-
- f = open(input_filename, 'r')
- for line in f:
- line = line.decode('utf-8')
- line = expr.sub(u'<br/>\n', line)
- doc_file.write(line.encode('utf-8'))
- f.close()
-
- doc_file.seek(0);
-
- parser = etree.XMLParser(remove_blank_text=True)
- doc = etree.parse(doc_file, parser)
-
- result = doc.xslt(style)
- result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
-
-
-if __name__ == '__main__':
- # Parse commandline arguments
- usage = """Usage: %prog [options] SOURCE [SOURCE...]
- Convert SOURCE files to HTML format."""
-
- parser = optparse.OptionParser(usage=usage)
-
- parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
-
- options, input_filenames = parser.parse_args()
-
- if len(input_filenames) < 1:
- parser.print_help()
- exit(1)
-
- # Do some real work
- for input_filename in input_filenames:
- if options.verbose:
- print input_filename
-
- output_filename = os.path.splitext(input_filename)[0] + '.html'
- transform(input_filename, output_filename)
-
+++ /dev/null
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:wl="http://wolnelektury.pl/functions" >
-
-<xsl:output method="xml" encoding="utf-8" doctype-public="-//W3C//DTD XHTML 1.1//EN" doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" indent="yes" />
-
-<xsl:template match="text()" />
-<xsl:template match="text()" mode="inline">
- <xsl:value-of select="wl:substitute_entities(.)" />
-</xsl:template>
-
-<xsl:template match="extra|uwaga" />
-<xsl:template match="extra|uwaga" mode="inline" />
-
-<xsl:template match="utwor">
- <html>
- <head>
- <title>book2html output</title>
- <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
- <link rel="stylesheet" href="master.css" type="text/css" media="all" charset="utf-8" />
- </head>
- <body>
- <xsl:apply-templates select="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny" />
- <xsl:if test="count(descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)])">
- <div id="footnotes">
- <h3>Przypisy</h3>
- <xsl:for-each select="descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)]">
- <div>
- <a name="{concat('footnote-', generate-id(.))}" />
- <a href="{concat('#anchor-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
- <xsl:choose>
- <xsl:when test="count(akap|akap_cd|strofa) = 0">
- <p><xsl:apply-templates select="text()|*" mode="inline" /></p>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates select="text()|*" mode="inline" />
- </xsl:otherwise>
- </xsl:choose>
- </div>
- </xsl:for-each>
- </div>
- </xsl:if>
- </body>
- </html>
-</xsl:template>
-
-
-<xsl:template match="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny">
- <xsl:if test="nazwa_utworu">
- <h1>
- <xsl:apply-templates select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul" mode="header" />
- </h1>
- </xsl:if>
- <xsl:apply-templates />
-</xsl:template>
-
-<!-- ======================= -->
-<!-- = Header (title page) = -->
-<!-- ======================= -->
-<xsl:template match="autor_utworu" mode="header">
- <span class="author"><xsl:apply-templates mode="inline" /></span>
-</xsl:template>
-
-<xsl:template match="nazwa_utworu" mode="header">
- <span class="title"><xsl:apply-templates mode="inline" /></span>
-</xsl:template>
-
-<xsl:template match="dzielo_nadrzedne" mode="header">
- <span class="collection"><xsl:apply-templates mode="inline" /></span>
-</xsl:template>
-
-<xsl:template match="podtytul" mode="header">
- <span class="subtitle"><xsl:apply-templates mode="inline" /></span>
-</xsl:template>
-
-<xsl:template match="nota">
- <div class="note"><xsl:apply-templates /></div>
-</xsl:template>
-
-<xsl:template match="naglowek_akt|naglowek_czesc|srodtytul">
- <h2><xsl:apply-templates mode="inline" /></h2>
-</xsl:template>
-
-<xsl:template match="naglowek_scena|naglowek_rozdzial">
- <h3><xsl:apply-templates mode="inline" /></h3>
-</xsl:template>
-
-<xsl:template match="naglowek_osoba">
- <h4><xsl:apply-templates mode="inline" /></h4>
-</xsl:template>
-
-<xsl:template match="kwestia">
- <div class="kwestia">
- <xsl:apply-templates select="strofa|akap" />
- </div>
-</xsl:template>
-
-<xsl:template match="didaskalia">
- <div class="didaskalia"><xsl:apply-templates mode="inline" /></div>
-</xsl:template>
-
-<xsl:template match="lista_osob">
- <div class="person-list">
- <h3><xsl:value-of select="naglowek_listy" /></h3>
- <ol>
- <xsl:apply-templates select="lista_osoba" />
- </ol>
- </div>
-</xsl:template>
-
-<xsl:template match="lista_osoba">
- <li><xsl:apply-templates mode="inline" /></li>
-</xsl:template>
-
-<xsl:template match="begin" mode="inline">
- <xsl:variable name="mnum" select="concat('m', substring(@id, 2))" />
- <span class="theme-begin" fid="{substring(@id, 2)}">
- <xsl:value-of select="string(following::motyw[@id=$mnum]/text())" />
- </span>
-</xsl:template>
-
-<xsl:template match="end" mode="inline">
- <span class="theme-end" fid="{substring(@id, 2)}"> </span>
-</xsl:template>
-
-<xsl:template match="begin|end">
- <xsl:apply-templates select='.' mode="inline" />
-</xsl:template>
-
-<xsl:template name="verse">
- <xsl:param name="line-content" />
- <xsl:param name="line-number" />
- <p>
- <xsl:choose>
- <xsl:when test="name($line-content) = 'wers_akap'">
- <xsl:attribute name="style">indent: 1em</xsl:attribute>
- </xsl:when>
- <xsl:when test="name($line-content) = 'wers_wciety'">
- <xsl:attribute name="style">indent: 2em</xsl:attribute>
- </xsl:when>
- </xsl:choose>
- <xsl:apply-templates select="$line-content" mode="inline" />
- </p>
-</xsl:template>
-
-<xsl:template match="pa|pe|pr|pt" mode="inline">
- <a name="{concat('anchor-', generate-id(.))}" />
- <a href="{concat('#footnote-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
-</xsl:template>
-
-<xsl:template match="strofa">
- <div class="stanza">
- <xsl:choose>
- <xsl:when test="count(br) > 0">
- <xsl:call-template name="verse">
- <xsl:with-param name="line-content" select="br[1]/preceding-sibling::text() | br[1]/preceding-sibling::node()" />
- <xsl:with-param name="line-number" select="1" />
- </xsl:call-template>
- <xsl:for-each select="br">
- <!-- Każdy BR "zjada" to co jest za nim -->
- <xsl:variable name="lnum" select="count(preceding-sibling::br)" />
- <xsl:call-template name="verse">
- <xsl:with-param name="line-number" select="$lnum+2" />
- <xsl:with-param name="line-content"
- select="following-sibling::text()[count(preceding-sibling::br) = $lnum+1] | following-sibling::node()[count(preceding-sibling::br) = $lnum+1]" />
- </xsl:call-template>
- </xsl:for-each>
- </xsl:when>
- <xsl:otherwise>
- <xsl:call-template name="verse">
- <xsl:with-param name="line-content" select="text() | node()" />
- <xsl:with-param name="line-number" select="1" />
- </xsl:call-template>
- </xsl:otherwise>
- </xsl:choose>
- </div>
-</xsl:template>
-
-<xsl:template match="akap|akap_dialog|akap_cd">
- <p class="paragraph"><xsl:apply-templates mode="inline" /></p>
-</xsl:template>
-
-<xsl:template match="motyw" mode="inline" />
-
-<xsl:template match="dlugi_cytat">
- <blockquote><xsl:apply-templates /></blockquote>
-</xsl:template>
-
-<xsl:template match="motto">
- <p class="motto"><xsl:apply-templates mode="inline" /></p>
-</xsl:template>
-
-<xsl:template match="motto_podpis">
- <p class="motto_podpis"><xsl:apply-templates mode="inline" /></p>
-</xsl:template>
-
-<xsl:template match="sekcja_swiatlo">
- <hr class="spacer" />
-</xsl:template>
-
-</xsl:stylesheet>
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import optparse
-import os
-import copy
-
-from lxml import etree
-
-
-class Fragment(object):
- def __init__(self, id, themes):
- super(Fragment, self).__init__()
- self.id = id
- self.themes = themes
- self.events = []
-
- def append(self, event, element):
- self.events.append((event, element))
-
- def closed_events(self):
- stack = []
- for event, element in self.events:
- if event == 'start':
- stack.append(('end', element))
- elif event == 'end':
- try:
- stack.pop()
- except IndexError:
- print 'CLOSED NON-OPEN TAG:', element
-
- stack.reverse()
- return self.events + stack
-
- def to_string(self):
- result = []
- for event, element in self.closed_events():
- if event == 'start':
- result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
- if element.text:
- result.append(element.text)
- elif event == 'end':
- result.append(u'</%s>' % element.tag)
- if element.tail:
- result.append(element.tail)
- else:
- result.append(element)
-
- return ''.join(result)
-
- def __unicode__(self):
- return self.to_string()
-
-
-def extract_fragments(input_filename):
- """Extracts theme fragments from input_filename."""
- open_fragments = {}
- closed_fragments = {}
-
- for event, element in etree.iterparse(input_filename, events=('start', 'end')):
- # Process begin and end elements
- if element.tag == 'span' and element.get('class', '') in ('theme-begin', 'theme-end'):
- if not event == 'end': continue # Process elements only once, on end event
-
- # Open new fragment
- if element.get('class', '') == 'theme-begin':
- fragment = Fragment(id=element.get('fid'), themes=element.text)
-
- # Append parents
- if element.getparent().tag != 'body':
- parents = [element.getparent()]
- while parents[-1].getparent().tag != 'body':
- parents.append(parents[-1].getparent())
-
- parents.reverse()
- for parent in parents:
- fragment.append('start', parent)
-
- open_fragments[fragment.id] = fragment
-
- # Close existing fragment
- else:
- try:
- fragment = open_fragments[element.get('fid')]
- except KeyError:
- print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
- else:
- closed_fragments[fragment.id] = fragment
- del open_fragments[fragment.id]
-
- # Append element tail to lost_text (we don't want to lose any text)
- if element.tail:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append('text', element.tail)
-
-
- # Process all elements except begin and end
- else:
- # Omit annotation tags
- if len(element.get('name', '')) or element.get('class', '') == 'annotation':
- if event == 'end' and element.tail:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append('text', element.tail)
- else:
- for fragment_id in open_fragments:
- open_fragments[fragment_id].append(event, copy.copy(element))
-
- return closed_fragments, open_fragments
-
-
-if __name__ == '__main__':
- # Parse commandline arguments
- usage = """Usage: %prog [options] SOURCE [SOURCE...]
- Extract theme fragments from SOURCE."""
-
- parser = optparse.OptionParser(usage=usage)
-
- parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
-
- options, input_filenames = parser.parse_args()
-
- if len(input_filenames) < 1:
- parser.print_help()
- exit(1)
-
- # Do some real work
- for input_filename in input_filenames:
- if options.verbose:
- print input_filename
-
- output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
-
- closed_fragments, open_fragments = extract_fragments(input_filename)
-
- for fragment_id in open_fragments:
- print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
-
- output_file = open(output_filename, 'w')
- output_file.write("""
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
- <html><head>
- <title>bookfragments output</title>
- <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
- <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
- </head>
- <body>""")
- for fragment in closed_fragments.values():
- html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
- output_file.write(html.encode('utf-8'))
- output_file.write('</body></html>')
- output_file.close()
-
+++ /dev/null
-body {
- font-size: 16px;
- font: Georgia, "Times New Roman", serif;
- line-height: 1.5em;
- margin: 3em;
- max-width: 36em;
-}
-
-a {
- color: blue;
- text-decoration: none;
-}
-
-/* =================================================== */
-/* = Common elements: headings, paragraphs and lines = */
-/* =================================================== */
-h1 {
- font-size: 3em;
- margin: 1.5em 0;
- text-align: center;
- line-height: 1.5em;
- font-weight: bold;
-}
-
-h2 {
- font-size: 2em;
- margin: 1.5em 0 0;
- font-weight: bold;
- line-height: 1.5em;
-}
-
-h3 {
- font-size: 1.5em;
- margin: 1.5em 0 0;
- font-weight: normal;
- line-height: 1.5em;
-}
-
-h4 {
- font-size: 1em;
- margin: 1.5em 0 0;
- line-height: 1.5em;
-}
-
-p {
- margin: 0;
-}
-
-/* ======================== */
-/* = Footnotes and themes = */
-/* ======================== */
-.theme-begin {
- border-left: 0.1em solid #DDDDDD;
- color: #666;
- float: right;
- margin: 0 -9.5em 0 0;
- padding: 0 0.5em;
- width: 7.5em;
- font-style: normal;
- font-weight: normal;
- font-size: 16px;
-}
-
-.annotation {
- font-style: normal;
- font-weight: normal;
- font-size: 16px;
-}
-
-#footnotes .annotation {
- display: block;
- float: left;
- width: 2.5em;
- clear: both;
-}
-
-#footnotes div {
- margin: 1.5em 0 0 0;
-}
-
-#footnotes p {
- margin-left: 2.5em;
-}
-
-/* =================== */
-/* = Custom elements = */
-/* =================== */
-span.author {
- font-size: 0.75em;
- display: block;
- line-height: 1.5em;
- margin-bottom: 0.25em;
-}
-
-span.collection {
- font-size: 0.75em;
- display: block;
- line-height: 1.5em;
- margin-bottom: -0.25em;
-}
-
-span.subtitle {
- font-size: 0.75em;
- display: block;
- line-height: 1.5em;
- margin-top: -0.25em;
-}
-
-div.didaskalia {
- font-style: italic;
- margin: 0.5em 0 0;
-}
-
-div.kwestia {
- margin: 0.5em 0 0;
-}
-
-div.stanza {
- margin: 1.5em 0 0;
-}
-
-div.kwestia div.stanza {
- margin: 0;
-}
-
-p.paragraph {
- text-align: justify;
- margin: 1.5em 0 0;
-}
-
-p.motto {
- text-align: justify;
- font-style: italic;
- margin: 1.5em 0 0;
-}
-
-p.motto_podpis {
- font-size: 0.875em;
-}
-
-div.fragment {
- border-bottom: 0.1em solid #999;
- padding-bottom: 1.5em;
-}
-
-div.note p, div.note p.paragraph {
- text-align: right;
- font-style: italic;
-}
-
-hr.spacer {
- height: 3em;
- visibility: hidden;
-}
--- /dev/null
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import html
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to HTML format."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.html'
+ html.transform(input_filename, output_filename)
+
--- /dev/null
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import html
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Extract theme fragments from SOURCE."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
+
+ closed_fragments, open_fragments = html.extract_fragments(input_filename)
+
+ for fragment_id in open_fragments:
+ print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+
+ output_file = open(output_filename, 'w')
+ output_file.write("""
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ <html><head>
+ <title>bookfragments output</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+ <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
+ </head>
+ <body>""")
+ for fragment in closed_fragments.values():
+ fragment_html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
+ output_file.write(fragment_html.encode('utf-8'))
+ output_file.write('</body></html>')
+ output_file.close()
+
--- /dev/null
+body {
+ font-size: 16px;
+ font: Georgia, "Times New Roman", serif;
+ line-height: 1.5em;
+ margin: 3em;
+ max-width: 36em;
+}
+
+a {
+ color: blue;
+ text-decoration: none;
+}
+
+/* =================================================== */
+/* = Common elements: headings, paragraphs and lines = */
+/* =================================================== */
+h1 {
+ font-size: 3em;
+ margin: 1.5em 0;
+ text-align: center;
+ line-height: 1.5em;
+ font-weight: bold;
+}
+
+h2 {
+ font-size: 2em;
+ margin: 1.5em 0 0;
+ font-weight: bold;
+ line-height: 1.5em;
+}
+
+h3 {
+ font-size: 1.5em;
+ margin: 1.5em 0 0;
+ font-weight: normal;
+ line-height: 1.5em;
+}
+
+h4 {
+ font-size: 1em;
+ margin: 1.5em 0 0;
+ line-height: 1.5em;
+}
+
+p {
+ margin: 0;
+}
+
+/* ======================== */
+/* = Footnotes and themes = */
+/* ======================== */
+.theme-begin {
+ border-left: 0.1em solid #DDDDDD;
+ color: #666;
+ float: right;
+ margin: 0 -9.5em 0 0;
+ padding: 0 0.5em;
+ width: 7.5em;
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+}
+
+.annotation {
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+}
+
+#footnotes .annotation {
+ display: block;
+ float: left;
+ width: 2.5em;
+ clear: both;
+}
+
+#footnotes div {
+ margin: 1.5em 0 0 0;
+}
+
+#footnotes p {
+ margin-left: 2.5em;
+}
+
+/* =================== */
+/* = Custom elements = */
+/* =================== */
+span.author {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: 0.25em;
+}
+
+span.collection {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: -0.25em;
+}
+
+span.subtitle {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-top: -0.25em;
+}
+
+div.didaskalia {
+ font-style: italic;
+ margin: 0.5em 0 0;
+}
+
+div.kwestia {
+ margin: 0.5em 0 0;
+}
+
+div.stanza {
+ margin: 1.5em 0 0;
+}
+
+div.kwestia div.stanza {
+ margin: 0;
+}
+
+p.paragraph {
+ text-align: justify;
+ margin: 1.5em 0 0;
+}
+
+p.motto {
+ text-align: justify;
+ font-style: italic;
+ margin: 1.5em 0 0;
+}
+
+p.motto_podpis {
+ font-size: 0.875em;
+}
+
+div.fragment {
+ border-bottom: 0.1em solid #999;
+ padding-bottom: 1.5em;
+}
+
+div.note p, div.note p.paragraph {
+ text-align: right;
+ font-style: italic;
+}
+
+hr.spacer {
+ height: 3em;
+ visibility: hidden;
+}
--- /dev/null
+body {
+ font-size: 16px;
+ font: Georgia, "Times New Roman", serif;
+ line-height: 1.5em;
+ margin: 3em;
+ max-width: 36em;
+}
+
+a {
+ color: blue;
+ text-decoration: none;
+}
+
+/* =================================================== */
+/* = Common elements: headings, paragraphs and lines = */
+/* =================================================== */
+h1 {
+ font-size: 3em;
+ margin: 1.5em 0;
+ text-align: center;
+ line-height: 1.5em;
+ font-weight: bold;
+}
+
+h2 {
+ font-size: 2em;
+ margin: 1.5em 0 0;
+ font-weight: bold;
+ line-height: 1.5em;
+}
+
+h3 {
+ font-size: 1.5em;
+ margin: 1.5em 0 0;
+ font-weight: normal;
+ line-height: 1.5em;
+}
+
+h4 {
+ font-size: 1em;
+ margin: 1.5em 0 0;
+ line-height: 1.5em;
+}
+
+p {
+ margin: 0;
+}
+
+/* ======================== */
+/* = Footnotes and themes = */
+/* ======================== */
+.theme-begin {
+ border-left: 0.1em solid #DDDDDD;
+ color: #666;
+ float: right;
+ margin: 0 -9.5em 0 0;
+ padding: 0 0.5em;
+ width: 7.5em;
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ display: none;
+}
+
+.annotation {
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ display: none;
+}
+
+#footnotes {
+ display: none;
+}
+
+#footnotes .annotation {
+ display: block;
+ float: left;
+ width: 2.5em;
+ clear: both;
+}
+
+#footnotes div {
+ margin: 1.5em 0 0 0;
+}
+
+#footnotes p {
+ margin-left: 2.5em;
+}
+
+/* =================== */
+/* = Custom elements = */
+/* =================== */
+span.author {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: 0.25em;
+}
+
+span.collection {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: -0.25em;
+}
+
+span.subtitle {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-top: -0.25em;
+}
+
+div.didaskalia {
+ font-style: italic;
+ margin: 0.5em 0 0;
+}
+
+div.kwestia {
+ margin: 0.5em 0 0;
+}
+
+div.stanza {
+ margin: 1.5em 0 0;
+}
+
+div.kwestia div.stanza {
+ margin: 0;
+}
+
+p.paragraph {
+ text-align: justify;
+ margin: 1.5em 0 0;
+}
+
+p.motto {
+ text-align: justify;
+ font-style: italic;
+ margin: 1.5em 0 0;
+}
+
+p.motto_podpis {
+ font-size: 0.875em;
+}
+
+div.fragment {
+ border-bottom: 0.1em solid #999;
+ padding-bottom: 1.5em;
+}
+
+div.note p, div.note p.paragraph {
+ text-align: right;
+ font-style: italic;
+}
+
+hr.spacer {
+ height: 3em;
+ visibility: hidden;
+}
--- /dev/null
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions" >
+
+<xsl:output method="xml" encoding="utf-8" doctype-public="-//W3C//DTD XHTML 1.1//EN" doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" indent="yes" />
+
+<xsl:template match="text()" />
+<xsl:template match="text()" mode="inline">
+ <xsl:value-of select="wl:substitute_entities(.)" />
+</xsl:template>
+
+<xsl:template match="extra|uwaga" />
+<xsl:template match="extra|uwaga" mode="inline" />
+
+<xsl:template match="utwor">
+ <html>
+ <head>
+ <title>book2html output</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+ <link rel="stylesheet" href="master.css" type="text/css" media="all" charset="utf-8" />
+ </head>
+ <body>
+ <xsl:apply-templates select="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny" />
+ <xsl:if test="count(descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)])">
+ <div id="footnotes">
+ <h3>Przypisy</h3>
+ <xsl:for-each select="descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)]">
+ <div>
+ <a name="{concat('footnote-', generate-id(.))}" />
+ <a href="{concat('#anchor-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
+ <xsl:choose>
+ <xsl:when test="count(akap|akap_cd|strofa) = 0">
+ <p><xsl:apply-templates select="text()|*" mode="inline" /></p>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:apply-templates select="text()|*" mode="inline" />
+ </xsl:otherwise>
+ </xsl:choose>
+ </div>
+ </xsl:for-each>
+ </div>
+ </xsl:if>
+ </body>
+ </html>
+</xsl:template>
+
+
+<xsl:template match="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny">
+ <xsl:if test="nazwa_utworu">
+ <h1>
+ <xsl:apply-templates select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul" mode="header" />
+ </h1>
+ </xsl:if>
+ <xsl:apply-templates />
+</xsl:template>
+
+<!-- ======================= -->
+<!-- = Header (title page) = -->
+<!-- ======================= -->
+<xsl:template match="autor_utworu" mode="header">
+ <span class="author"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="nazwa_utworu" mode="header">
+ <span class="title"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="dzielo_nadrzedne" mode="header">
+ <span class="collection"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="podtytul" mode="header">
+ <span class="subtitle"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="nota">
+ <div class="note"><xsl:apply-templates /></div>
+</xsl:template>
+
+<xsl:template match="naglowek_akt|naglowek_czesc|srodtytul">
+ <h2><xsl:apply-templates mode="inline" /></h2>
+</xsl:template>
+
+<xsl:template match="naglowek_scena|naglowek_rozdzial">
+ <h3><xsl:apply-templates mode="inline" /></h3>
+</xsl:template>
+
+<xsl:template match="naglowek_osoba">
+ <h4><xsl:apply-templates mode="inline" /></h4>
+</xsl:template>
+
+<xsl:template match="kwestia">
+ <div class="kwestia">
+ <xsl:apply-templates select="strofa|akap" />
+ </div>
+</xsl:template>
+
+<xsl:template match="didaskalia">
+ <div class="didaskalia"><xsl:apply-templates mode="inline" /></div>
+</xsl:template>
+
+<xsl:template match="lista_osob">
+ <div class="person-list">
+ <h3><xsl:value-of select="naglowek_listy" /></h3>
+ <ol>
+ <xsl:apply-templates select="lista_osoba" />
+ </ol>
+ </div>
+</xsl:template>
+
+<xsl:template match="lista_osoba">
+ <li><xsl:apply-templates mode="inline" /></li>
+</xsl:template>
+
+<xsl:template match="begin" mode="inline">
+ <xsl:variable name="mnum" select="concat('m', substring(@id, 2))" />
+ <span class="theme-begin" fid="{substring(@id, 2)}">
+ <xsl:value-of select="string(following::motyw[@id=$mnum]/text())" />
+ </span>
+</xsl:template>
+
+<xsl:template match="end" mode="inline">
+ <span class="theme-end" fid="{substring(@id, 2)}"> </span>
+</xsl:template>
+
+<xsl:template match="begin|end">
+ <xsl:apply-templates select='.' mode="inline" />
+</xsl:template>
+
+<xsl:template name="verse">
+ <xsl:param name="line-content" />
+ <xsl:param name="line-number" />
+ <p>
+ <xsl:choose>
+ <xsl:when test="name($line-content) = 'wers_akap'">
+ <xsl:attribute name="style">indent: 1em</xsl:attribute>
+ </xsl:when>
+ <xsl:when test="name($line-content) = 'wers_wciety'">
+ <xsl:attribute name="style">indent: 2em</xsl:attribute>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:apply-templates select="$line-content" mode="inline" />
+ </p>
+</xsl:template>
+
+<xsl:template match="pa|pe|pr|pt" mode="inline">
+ <a name="{concat('anchor-', generate-id(.))}" />
+ <a href="{concat('#footnote-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
+</xsl:template>
+
+<xsl:template match="strofa">
+ <div class="stanza">
+ <xsl:choose>
+ <xsl:when test="count(br) > 0">
+ <xsl:call-template name="verse">
+ <xsl:with-param name="line-content" select="br[1]/preceding-sibling::text() | br[1]/preceding-sibling::node()" />
+ <xsl:with-param name="line-number" select="1" />
+ </xsl:call-template>
+ <xsl:for-each select="br">
+ <!-- Każdy BR "zjada" to co jest za nim -->
+ <xsl:variable name="lnum" select="count(preceding-sibling::br)" />
+ <xsl:call-template name="verse">
+ <xsl:with-param name="line-number" select="$lnum+2" />
+ <xsl:with-param name="line-content"
+ select="following-sibling::text()[count(preceding-sibling::br) = $lnum+1] | following-sibling::node()[count(preceding-sibling::br) = $lnum+1]" />
+ </xsl:call-template>
+ </xsl:for-each>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:call-template name="verse">
+ <xsl:with-param name="line-content" select="text() | node()" />
+ <xsl:with-param name="line-number" select="1" />
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+ </div>
+</xsl:template>
+
+<xsl:template match="akap|akap_dialog|akap_cd">
+ <p class="paragraph"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+<xsl:template match="motyw" mode="inline" />
+
+<xsl:template match="dlugi_cytat">
+ <blockquote><xsl:apply-templates /></blockquote>
+</xsl:template>
+
+<xsl:template match="motto">
+ <p class="motto"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+<xsl:template match="motto_podpis">
+ <p class="motto_podpis"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+<xsl:template match="sekcja_swiatlo">
+ <hr class="spacer" />
+</xsl:template>
+
+</xsl:stylesheet>
--- /dev/null
+# -*- coding: utf-8 -*-
+import os
+import cStringIO
+import re
+import copy
+import pkgutil
+
+from lxml import etree
+
+
+ENTITY_SUBSTITUTIONS = [
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
+]
+
+
+def substitute_entities(context, text):
+ """XPath extension function converting all entites in passed text."""
+ if isinstance(text, list):
+ text = ''.join(text)
+ for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ text = text.replace(entity, substitutution)
+ return text
+
+
+# Register substitute_entities function with lxml
+ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+ns['substitute_entities'] = substitute_entities
+
+
+def transform(input_filename, output_filename):
+ """Transforms file input_filename in XML to output_filename in XHTML."""
+ # Parse XSLT
+ style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
+ style = etree.parse(style_filename)
+
+ doc_file = cStringIO.StringIO()
+ expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
+
+ f = open(input_filename, 'r')
+ for line in f:
+ line = line.decode('utf-8')
+ line = expr.sub(u'<br/>\n', line)
+ doc_file.write(line.encode('utf-8'))
+ f.close()
+
+ doc_file.seek(0);
+
+ parser = etree.XMLParser(remove_blank_text=True)
+ doc = etree.parse(doc_file, parser)
+
+ result = doc.xslt(style)
+ result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
+
+
+class Fragment(object):
+ def __init__(self, id, themes):
+ super(Fragment, self).__init__()
+ self.id = id
+ self.themes = themes
+ self.events = []
+
+ def append(self, event, element):
+ self.events.append((event, element))
+
+ def closed_events(self):
+ stack = []
+ for event, element in self.events:
+ if event == 'start':
+ stack.append(('end', element))
+ elif event == 'end':
+ try:
+ stack.pop()
+ except IndexError:
+ print 'CLOSED NON-OPEN TAG:', element
+
+ stack.reverse()
+ return self.events + stack
+
+ def to_string(self):
+ result = []
+ for event, element in self.closed_events():
+ if event == 'start':
+ result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+ if element.text:
+ result.append(element.text)
+ elif event == 'end':
+ result.append(u'</%s>' % element.tag)
+ if element.tail:
+ result.append(element.tail)
+ else:
+ result.append(element)
+
+ return ''.join(result)
+
+ def __unicode__(self):
+ return self.to_string()
+
+
+def extract_fragments(input_filename):
+ """Extracts theme fragments from input_filename."""
+ open_fragments = {}
+ closed_fragments = {}
+
+ for event, element in etree.iterparse(input_filename, events=('start', 'end')):
+ # Process begin and end elements
+ if element.tag == 'span' and element.get('class', '') in ('theme-begin', 'theme-end'):
+ if not event == 'end': continue # Process elements only once, on end event
+
+ # Open new fragment
+ if element.get('class', '') == 'theme-begin':
+ fragment = Fragment(id=element.get('fid'), themes=element.text)
+
+ # Append parents
+ if element.getparent().tag != 'body':
+ parents = [element.getparent()]
+ while parents[-1].getparent().tag != 'body':
+ parents.append(parents[-1].getparent())
+
+ parents.reverse()
+ for parent in parents:
+ fragment.append('start', parent)
+
+ open_fragments[fragment.id] = fragment
+
+ # Close existing fragment
+ else:
+ try:
+ fragment = open_fragments[element.get('fid')]
+ except KeyError:
+ print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
+ else:
+ closed_fragments[fragment.id] = fragment
+ del open_fragments[fragment.id]
+
+ # Append element tail to lost_text (we don't want to lose any text)
+ if element.tail:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append('text', element.tail)
+
+
+ # Process all elements except begin and end
+ else:
+ # Omit annotation tags
+ if len(element.get('name', '')) or element.get('class', '') == 'annotation':
+ if event == 'end' and element.tail:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append('text', element.tail)
+ else:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append(event, copy.copy(element))
+
+ return closed_fragments, open_fragments
+