--- /dev/null
+.DS_Store
+*.pyc
+MANIFEST
+dist
--- /dev/null
+include librarian/*.xslt
+recursive-include librarian/tests/files/ *.xml
--- /dev/null
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions" >
+
+<xsl:output encoding="utf-8" indent="yes" omit-xml-declaration = "yes" version="2.0" />
+
+
+<xsl:template match="utwor">
+ <!-- <html>
+ <head>
+ <title>Książka z serwisu WolneLektury.pl</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+ </head>
+ <style>
+ body {
+ font-size: 16px;
+ font: Georgia, "Times New Roman", serif;
+ line-height: 1.5em;
+ margin: 0;
+ }
+
+ a {
+ color: blue;
+ text-decoration: none;
+ }
+
+ #book-text {
+ margin: 3em;
+ max-width: 36em;
+ }
+
+ /* ================================== */
+ /* = Header with logo and menu = */
+ /* ================================== */
+ #header {
+ margin: 3.4em 0 0 1.4em;
+ }
+
+ img {
+ border: none;
+ }
+
+
+ #menu {
+ position: fixed;
+ left: 0em;
+ top: 0em;
+ width: 100%;
+ height: 1.5em;
+ background: #333;
+ color: #FFF;
+ opacity: 0.9;
+ }
+
+ #menu ul {
+ list-style: none;
+ padding: 0;
+ margin: 0;
+ }
+
+ #menu li a {
+ display: block;
+ float: left;
+ width: 7.5em;
+ height: 1.5em;
+ margin-left: 0.5em;
+ text-align: center;
+ color: #FFF;
+ }
+
+ #menu li a:hover, #menu li a:active {
+ color: #000;
+ background: #FFF url(/media/img/arrow-down.png) no-repeat center right;
+ }
+
+ #menu li a.selected {
+ color: #000;
+ background: #FFF url(/media/img/arrow-up.png) no-repeat center right;
+ }
+
+ #toc, #themes {
+ position: fixed;
+ left: 0em;
+ top: 1.5em;
+ width: 37em;
+ padding: 1.5em;
+ background: #FFF;
+ border-bottom: 0.25em solid #DDD;
+ border-right: 0.25em solid #DDD;
+ display: none;
+ height: 16em;
+ overflow-x: hidden;
+ overflow-y: auto;
+ opacity: 0.9;
+ }
+
+ #toc ol, #themes ol {
+ list-style: none;
+ padding: 0;
+ margin: 0;
+ }
+
+ #toc ol li {
+ font-weight: bold;
+ }
+
+ #toc ol ol {
+ padding: 0 0 1.5em 1.5em;
+ margin: 0;
+ }
+
+ #toc ol ol li {
+ font-weight: normal;
+ }
+
+ #toc h2 {
+ display: none;
+ }
+
+ #toc .anchor {
+ float: none;
+ margin: 0;
+ color: blue;
+ font-size: 16px;
+ position: inherit;
+ }
+
+ /* =================================================== */
+ /* = Common elements: headings, paragraphs and lines = */
+ /* =================================================== */
+ h1 {
+ font-size: 3em;
+ margin: 1.5em 0;
+ text-align: center;
+ line-height: 1.5em;
+ font-weight: bold;
+ }
+
+ h2 {
+ font-size: 2em;
+ margin: 1.5em 0 0;
+ font-weight: bold;
+ line-height: 1.5em;
+ }
+
+ h3 {
+ font-size: 1.5em;
+ margin: 1.5em 0 0;
+ font-weight: normal;
+ line-height: 1.5em;
+ }
+
+ h4 {
+ font-size: 1em;
+ margin: 1.5em 0 0;
+ line-height: 1.5em;
+ }
+
+ p {
+ margin: 0;
+ }
+
+ /* ======================== */
+ /* = Footnotes and themes = */
+ /* ======================== */
+ .theme-begin {
+ border-left: 0.1em solid #DDDDDD;
+ color: #777;
+ padding: 0 0.5em;
+ width: 7.5em;
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ float: right;
+ margin-right: -9.5em;
+ clear: both;
+ left: 40em;
+ line-height: 1.5em;
+ text-align: left;
+ }
+
+ .annotation {
+ font-style: normal;
+ font-weight: normal;
+ font-size: 12px;
+ }
+
+ #footnotes .annotation {
+ display: block;
+ float: left;
+ width: 2.5em;
+ clear: both;
+ }
+
+ #footnotes div {
+ margin: 1.5em 0 0 0;
+ }
+
+ #footnotes p {
+ margin-left: 2.5em;
+ font-size: 0.875em;
+ }
+
+ blockquote {
+ font-size: 0.875em;
+ }
+
+ /* ============= */
+ /* = Numbering = */
+ /* ============= */
+ .anchor {
+ position: absolute;
+ margin: -0.25em -0.5em;
+ left: 1em;
+ color: #777;
+ font-size: 12px;
+ width: 2em;
+ text-align: center;
+ padding: 0.25em 0.5em;
+ line-height: 1.5em;
+ }
+
+ .anchor:hover, #book-text .anchor:active {
+ color: #FFF;
+ background-color: #CCC;
+ }
+
+ /* =================== */
+ /* = Custom elements = */
+ /* =================== */
+ span.author {
+ font-size: 0.5em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: 0.25em;
+ }
+
+ span.collection {
+ font-size: 0.375em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: -0.25em;
+ }
+
+ span.subtitle {
+ font-size: 0.5em;
+ display: block;
+ line-height: 1.5em;
+ margin-top: -0.25em;
+ }
+
+ div.didaskalia {
+ font-style: italic;
+ margin: 0.5em 0 0 1.5em;
+ }
+
+ div.kwestia {
+ margin: 0.5em 0 0;
+ }
+
+ div.stanza {
+ margin: 1.5em 0 0;
+ }
+
+ div.kwestia div.stanza {
+ margin: 0;
+ }
+
+ p.paragraph {
+ text-align: justify;
+ margin: 1.5em 0 0;
+ }
+
+ p.motto {
+ text-align: justify;
+ font-style: italic;
+ margin: 1.5em 0 0;
+ }
+
+ p.motto_podpis {
+ font-size: 0.875em;
+ text-align: right;
+ }
+
+ div.fragment {
+ border-bottom: 0.1em solid #999;
+ padding-bottom: 1.5em;
+ }
+
+ div.note p, div.dedication p, div.note p.paragraph, div.dedication p.paragraph {
+ text-align: right;
+ font-style: italic;
+ }
+
+ hr.spacer {
+ height: 3em;
+ visibility: hidden;
+ }
+
+ hr.spacer-line {
+ margin: 1.5em 0;
+ border: none;
+ border-bottom: 0.1em solid #000;
+ }
+
+ p.spacer-asterisk {
+ padding: 0;
+ margin: 1.5em 0;
+ text-align: center;
+ }
+
+ div.person-list ol {
+ list-style: none;
+ padding: 0 0 0 1.5em;
+ }
+
+ p.place-and-time {
+ font-style: italic;
+ }
+
+ em.math, em.foreign-word, em.book-title, em.didaskalia {
+ font-style: italic;
+ }
+
+ em.author-emphasis {
+ letter-spacing: 0.1em;
+ }
+
+ em.person {
+ font-style: normal;
+ font-variant: small-caps;
+ }
+ </style>
+ <body> -->
+ <div id="book-text">
+ <xsl:apply-templates select="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny" />
+ <xsl:if test="count(descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)])">
+ <div id="footnotes">
+ <h3>Przypisy</h3>
+ <xsl:for-each select="descendant::*[self::pe or self::pa or self::pr or self::pt][not(parent::extra)]">
+ <div>
+ <a name="{concat('footnote-', generate-id(.))}" />
+ <a href="{concat('#anchor-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
+ <xsl:choose>
+ <xsl:when test="count(akap|akap_cd|strofa) = 0">
+ <p><xsl:apply-templates select="text()|*" mode="inline" /></p>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:apply-templates select="text()|*" mode="inline" />
+ </xsl:otherwise>
+ </xsl:choose>
+ </div>
+ </xsl:for-each>
+ </div>
+ </xsl:if>
+ </div>
+ <!-- </body>
+ </html> -->
+</xsl:template>
+
+
+<!-- ============================================================================== -->
+<!-- = MASTER TAG = -->
+<!-- = (can contain block tags, paragraph tags, standalone tags and special tags) = -->
+<!-- ============================================================================== -->
+<xsl:template match="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny">
+ <xsl:if test="nazwa_utworu">
+ <h1>
+ <xsl:apply-templates select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul" mode="header" />
+ </h1>
+ </xsl:if>
+ <xsl:apply-templates />
+</xsl:template>
+
+
+<!-- ==================================================================================== -->
+<!-- = BLOCK TAGS = -->
+<!-- = (can contain other block tags, paragraph tags, standalone tags and special tags) = -->
+<!-- ==================================================================================== -->
+<xsl:template match="nota">
+ <div class="note"><xsl:apply-templates /></div>
+</xsl:template>
+
+<xsl:template match="lista_osob">
+ <div class="person-list">
+ <h3><xsl:value-of select="naglowek_listy" /></h3>
+ <ol>
+ <xsl:apply-templates select="lista_osoba" />
+ </ol>
+ </div>
+</xsl:template>
+
+<xsl:template match="dedykacja">
+ <div class="dedication"><xsl:apply-templates /></div>
+</xsl:template>
+
+<xsl:template match="kwestia">
+ <div class="kwestia">
+ <xsl:apply-templates select="strofa|akap|didaskalia" />
+ </div>
+</xsl:template>
+
+<xsl:template match="dlugi_cytat|poezja_cyt">
+ <blockquote><xsl:apply-templates /></blockquote>
+</xsl:template>
+
+<xsl:template match="motto">
+ <div class="motto"><xsl:apply-templates mode="inline" /></div>
+</xsl:template>
+
+
+<!-- ========================================== -->
+<!-- = PARAGRAPH TAGS = -->
+<!-- = (can contain inline and special tags) = -->
+<!-- ========================================== -->
+<!-- Title page -->
+<xsl:template match="autor_utworu" mode="header">
+ <span class="author"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="nazwa_utworu" mode="header">
+ <span class="title"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="dzielo_nadrzedne" mode="header">
+ <span class="collection"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<xsl:template match="podtytul" mode="header">
+ <span class="subtitle"><xsl:apply-templates mode="inline" /></span>
+</xsl:template>
+
+<!-- Section headers (included in index)-->
+<xsl:template match="naglowek_akt|naglowek_czesc|srodtytul">
+ <h2><xsl:apply-templates mode="inline" /></h2>
+</xsl:template>
+
+<xsl:template match="naglowek_scena|naglowek_rozdzial">
+ <h3><xsl:apply-templates mode="inline" /></h3>
+</xsl:template>
+
+<xsl:template match="naglowek_osoba|naglowek_podrozdzial">
+ <h4><xsl:apply-templates mode="inline" /></h4>
+</xsl:template>
+
+<!-- Other paragraph tags -->
+<xsl:template match="miejsce_czas">
+ <p class="place-and-time"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+<xsl:template match="didaskalia">
+ <div class="didaskalia"><xsl:apply-templates mode="inline" /></div>
+</xsl:template>
+
+<xsl:template match="lista_osoba">
+ <li><xsl:apply-templates mode="inline" /></li>
+</xsl:template>
+
+<xsl:template match="akap|akap_dialog|akap_cd">
+ <p class="paragraph"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+<xsl:template match="strofa">
+ <div class="stanza">
+ <xsl:choose>
+ <xsl:when test="count(br) > 0">
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content" select="br[1]/preceding-sibling::text() | br[1]/preceding-sibling::node()" />
+ <xsl:with-param name="verse-type" select="br[1]/preceding-sibling::*[name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd'][1]" />
+ </xsl:call-template>
+ <xsl:for-each select="br">
+ <!-- Each BR tag "consumes" text after it -->
+ <xsl:variable name="lnum" select="count(preceding-sibling::br)" />
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content"
+ select="following-sibling::text()[count(preceding-sibling::br) = $lnum+1] | following-sibling::node()[count(preceding-sibling::br) = $lnum+1]" />
+ <xsl:with-param name="verse-type" select="following-sibling::*[count(preceding-sibling::br) = $lnum+1 and (name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd')][1]" />
+ </xsl:call-template>
+ </xsl:for-each>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content" select="text() | node()" />
+ <xsl:with-param name="verse-type" select="wers_wciety|wers_akap|wers_cd[1]" />
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+ </div>
+</xsl:template>
+
+<xsl:template name="verse">
+ <xsl:param name="verse-content" />
+ <xsl:param name="verse-type" />
+ <p class="verse">
+ <xsl:choose>
+ <xsl:when test="name($verse-type) = 'wers_akap'">
+ <xsl:attribute name="style">padding-left: 1em</xsl:attribute>
+ </xsl:when>
+ <xsl:when test="name($verse-type) = 'wers_wciety'">
+ <xsl:choose>
+ <xsl:when test="$verse-content/@typ">
+ <xsl:attribute name="style">padding-left: <xsl:value-of select="$verse-content/@typ" />em</xsl:attribute>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:attribute name="style">padding-left: 1em</xsl:attribute>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:when>
+ <xsl:when test="name($verse-type) = 'wers_cd'">
+ <xsl:attribute name="style">padding-left: 12em</xsl:attribute>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:apply-templates select="$verse-content" mode="inline" />
+ </p>
+</xsl:template>
+
+<xsl:template match="motto_podpis">
+ <p class="motto_podpis"><xsl:apply-templates mode="inline" /></p>
+</xsl:template>
+
+
+<!-- ================================================ -->
+<!-- = INLINE TAGS = -->
+<!-- = (contain other inline tags and special tags) = -->
+<!-- ================================================ -->
+<!-- Annotations -->
+<xsl:template match="pa|pe|pr|pt" mode="inline">
+ <a name="{concat('anchor-', generate-id(.))}" />
+ <a href="{concat('#footnote-', generate-id(.))}" class="annotation">[<xsl:number value="count(preceding::*[self::pa or self::pe or self::pr or self::pt]) + 1" />]</a>
+</xsl:template>
+
+<!-- Other inline tags -->
+<xsl:template match="mat" mode="inline">
+ <em class="math"><xsl:apply-templates mode="inline" /></em>
+</xsl:template>
+
+<xsl:template match="didask_tekst" mode="inline">
+ <em class="didaskalia"><xsl:apply-templates mode="inline" /></em>
+</xsl:template>
+
+<xsl:template match="slowo_obce" mode="inline">
+ <em class="foreign-word"><xsl:apply-templates mode="inline" /></em>
+</xsl:template>
+
+<xsl:template match="tytul_dziela" mode="inline">
+ <em class="book-title">
+ <xsl:if test="@typ = '1'">„</xsl:if><xsl:apply-templates mode="inline" /><xsl:if test="@typ = '1'">”</xsl:if>
+ </em>
+</xsl:template>
+
+<xsl:template match="wyroznienie" mode="inline">
+ <em class="author-emphasis"><xsl:apply-templates mode="inline" /></em>
+</xsl:template>
+
+<xsl:template match="osoba" mode="inline">
+ <em class="person"><xsl:apply-templates mode="inline" /></em>
+</xsl:template>
+
+
+<!-- ============================================== -->
+<!-- = STANDALONE TAGS = -->
+<!-- = (cannot contain any other tags) = -->
+<!-- ============================================== -->
+<xsl:template match="sekcja_swiatlo">
+ <hr class="spacer" />
+</xsl:template>
+
+<xsl:template match="sekcja_asterysk">
+ <p class="spacer-asterisk">*</p>
+</xsl:template>
+
+<xsl:template match="separator_linia">
+ <hr class="spacer-line" />
+</xsl:template>
+
+
+<!-- ================ -->
+<!-- = SPECIAL TAGS = -->
+<!-- ================ -->
+<!-- Themes -->
+<xsl:template match="begin" mode="inline">
+ <xsl:variable name="mnum" select="concat('m', substring(@id, 2))" />
+ <a name="m{substring(@id, 2)}" class="theme-begin" fid="{substring(@id, 2)}">
+ <xsl:value-of select="string(following::motyw[@id=$mnum]/text())" />
+ </a>
+</xsl:template>
+
+<xsl:template match="end" mode="inline">
+ <span class="theme-end" fid="{substring(@id, 2)}"> </span>
+</xsl:template>
+
+<xsl:template match="begin|end">
+ <xsl:apply-templates select='.' mode="inline" />
+</xsl:template>
+
+<xsl:template match="motyw" mode="inline" />
+
+
+<!-- ================ -->
+<!-- = IGNORED TAGS = -->
+<!-- ================ -->
+<xsl:template match="extra|uwaga" />
+<xsl:template match="extra|uwaga" mode="inline" />
+
+
+<!-- ======== -->
+<!-- = TEXT = -->
+<!-- ======== -->
+<xsl:template match="text()" />
+<xsl:template match="text()" mode="inline">
+ <xsl:value-of select="wl:substitute_entities(.)" />
+</xsl:template>
+
+
+</xsl:stylesheet>
+
--- /dev/null
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions" >
+
+<xsl:output encoding="utf-8" method="text" />
+
+<!-- ============================================================================== -->
+<!-- = MASTER TAG = -->
+<!-- = (can contain block tags, paragraph tags, standalone tags and special tags) = -->
+<!-- ============================================================================== -->
+<xsl:template match="powiesc|opowiadanie|liryka_l|liryka_lp|dramat_wierszowany_l|dramat_wierszowany_lp|dramat_wspolczesny">
+<xsl:text>Kodowanie znaków w dokumencie: UTF-8.
+-----
+Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl/). Reprodukcja cyfrowa wykonana przez
+Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. Ten utwór nie jest chroniony prawem autorskim i znajduje
+się w domenie publicznej, co oznacza, że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać.
+
+Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %s.
+-----
+
+
+</xsl:text>
+<xsl:if test="nazwa_utworu"><xsl:apply-templates select="autor_utworu|dzielo_nadrzedne|nazwa_utworu|podtytul" mode="header" /></xsl:if>
+<xsl:text>
+
+</xsl:text>
+<xsl:apply-templates />
+</xsl:template>
+
+
+<!-- ==================================================================================== -->
+<!-- = BLOCK TAGS = -->
+<!-- = (can contain other block tags, paragraph tags, standalone tags and special tags) = -->
+<!-- ==================================================================================== -->
+<xsl:template match="nota">
+<xsl:apply-templates />
+</xsl:template>
+
+<xsl:template match="lista_osob">
+<xsl:text>
+
+
+</xsl:text>
+<xsl:value-of select="naglowek_listy" />
+<xsl:apply-templates select="lista_osoba" />
+<xsl:text>
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="dedykacja">
+<xsl:text>
+
+</xsl:text>
+<xsl:apply-templates />
+</xsl:template>
+
+<xsl:template match="kwestia">
+<xsl:apply-templates select="strofa|akap|didaskalia" />
+</xsl:template>
+
+<xsl:template match="dlugi_cytat|poezja_cyt">
+<xsl:text>
+</xsl:text>
+<xsl:apply-templates />
+</xsl:template>
+
+<xsl:template match="motto">
+<xsl:text>
+
+
+
+</xsl:text>
+<xsl:apply-templates /><xsl:text>
+
+</xsl:text>
+</xsl:template>
+
+
+<!-- ========================================== -->
+<!-- = PARAGRAPH TAGS = -->
+<!-- = (can contain inline and special tags) = -->
+<!-- ========================================== -->
+<!-- Title page -->
+<xsl:template match="autor_utworu" mode="header">
+<xsl:text>
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="nazwa_utworu" mode="header">
+<xsl:text>
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="dzielo_nadrzedne" mode="header">
+<xsl:text>
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="podtytul" mode="header">
+<xsl:text>
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<!-- Section headers (included in index)-->
+<xsl:template match="naglowek_akt|naglowek_czesc|srodtytul">
+<xsl:text>
+
+
+
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="naglowek_scena|naglowek_rozdzial">
+<xsl:text>
+
+
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="naglowek_osoba|naglowek_podrozdzial">
+<xsl:text>
+
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<!-- Other paragraph tags -->
+<xsl:template match="miejsce_czas">
+<xsl:text>
+
+
+
+</xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="didaskalia">
+<xsl:variable name="content">
+ <xsl:apply-templates select="*|text()" mode="inline" />
+</xsl:variable>
+<xsl:text>
+
+/ </xsl:text><xsl:value-of select="wl:wrap_words(wl:strip($content))" /><xsl:text> /</xsl:text>
+</xsl:template>
+
+<xsl:template match="lista_osoba">
+<xsl:text>
+ * </xsl:text>
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+<xsl:template match="akap|akap_dialog|akap_cd">
+<xsl:variable name="content">
+ <xsl:apply-templates select="*|text()" mode="inline" />
+</xsl:variable>
+<xsl:text>
+
+</xsl:text>
+<xsl:value-of select="wl:wrap_words(wl:strip($content))" />
+</xsl:template>
+
+<xsl:template match="strofa">
+<xsl:text>
+</xsl:text>
+ <xsl:choose>
+ <xsl:when test="count(br) > 0">
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content" select="br[1]/preceding-sibling::text() | br[1]/preceding-sibling::node()" />
+ <xsl:with-param name="verse-type" select="br[1]/preceding-sibling::*[name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd'][1]" />
+ </xsl:call-template>
+ <xsl:for-each select="br">
+ <!-- Each BR tag "consumes" text after it -->
+ <xsl:variable name="lnum" select="count(preceding-sibling::br)" />
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content"
+ select="following-sibling::text()[count(preceding-sibling::br) = $lnum+1] | following-sibling::node()[count(preceding-sibling::br) = $lnum+1]" />
+ <xsl:with-param name="verse-type" select="following-sibling::*[count(preceding-sibling::br) = $lnum+1 and (name() = 'wers_wciety' or name() = 'wers_akap' or name() = 'wers_cd')][1]" />
+ </xsl:call-template>
+ </xsl:for-each>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:call-template name="verse">
+ <xsl:with-param name="verse-content" select="text() | node()" />
+ <xsl:with-param name="verse-type" select="wers_wciety|wers_akap|wers_cd[1]" />
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+<xsl:template name="verse">
+ <xsl:param name="verse-content" />
+ <xsl:param name="verse-type" />
+<xsl:text>
+</xsl:text>
+ <xsl:variable name="content">
+ <xsl:apply-templates select="$verse-content" mode="inline" />
+ </xsl:variable>
+ <xsl:choose>
+ <xsl:when test="name($verse-type) = 'wers_akap'">
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:when test="name($verse-type) = 'wers_wciety'">
+ <xsl:choose>
+ <xsl:when test="$verse-content/@typ">
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text> </xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:when>
+ <xsl:when test="name($verse-type) = 'wers_cd'">
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ </xsl:choose>
+<xsl:value-of select="wl:strip($content)" />
+</xsl:template>
+
+<xsl:template match="motto_podpis">
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+
+<!-- ================================================ -->
+<!-- = INLINE TAGS = -->
+<!-- = (contain other inline tags and special tags) = -->
+<!-- ================================================ -->
+<!-- Annotations -->
+<xsl:template match="pa|pe|pr|pt" mode="inline" />
+
+<!-- Other inline tags -->
+<xsl:template match="mat" mode="inline"><xsl:apply-templates mode="inline" /></xsl:template>
+
+<xsl:template match="didask_tekst" mode="inline"><xsl:apply-templates mode="inline" /></xsl:template>
+
+<xsl:template match="slowo_obce" mode="inline"><xsl:apply-templates mode="inline" /></xsl:template>
+
+<xsl:template match="tytul_dziela" mode="inline">
+<xsl:if test="@typ = '1'">„</xsl:if><xsl:apply-templates mode="inline" /><xsl:if test="@typ = '1'">”</xsl:if>
+</xsl:template>
+
+<xsl:template match="wyroznienie" mode="inline">
+<xsl:text>*</xsl:text><xsl:apply-templates mode="inline" /><xsl:text>*</xsl:text>
+</xsl:template>
+
+<xsl:template match="osoba" mode="inline">
+<xsl:apply-templates mode="inline" />
+</xsl:template>
+
+
+<!-- ============================================== -->
+<!-- = STANDALONE TAGS = -->
+<!-- = (cannot contain any other tags) = -->
+<!-- ============================================== -->
+<xsl:template match="sekcja_swiatlo">
+<xsl:text>
+
+
+
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="sekcja_asterysk">
+<xsl:text>
+
+*
+
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="separator_linia">
+<xsl:text>
+
+------------------------------------------------
+
+</xsl:text>
+</xsl:template>
+
+
+<!-- ================ -->
+<!-- = SPECIAL TAGS = -->
+<!-- ================ -->
+<!-- Themes -->
+<xsl:template match="begin" mode="inline" />
+
+<xsl:template match="end" mode="inline" />
+
+<xsl:template match="begin|end" />
+
+<xsl:template match="motyw" mode="inline" />
+
+
+<!-- ================ -->
+<!-- = IGNORED TAGS = -->
+<!-- ================ -->
+<xsl:template match="extra|uwaga" />
+<xsl:template match="extra|uwaga" mode="inline" />
+
+
+<!-- ======== -->
+<!-- = TEXT = -->
+<!-- ======== -->
+<xsl:template match="text()" />
+<xsl:template match="text()" mode="inline">
+ <xsl:value-of select="wl:substitute_entities(.)" />
+</xsl:template>
+
+
+</xsl:stylesheet>
+
--- /dev/null
+# -*- coding: utf-8 -*-
+from xml.parsers.expat import ExpatError
+from datetime import date
+import time
+
+# Import ElementTree from anywhere
+try:
+ import xml.etree.ElementTree as etree # Python >= 2.5
+except ImportError:
+ try:
+ import elementtree.ElementTree as etree # effbot's pure Python module
+ except ImportError:
+ import lxml.etree as etree # ElementTree API using libxml2
+
+
+# ==============
+# = Converters =
+# ==============
+class Person(object):
+ """Single person with last name and a list of first names."""
+ def __init__(self, last_name, *first_names):
+ self.last_name = last_name
+ self.first_names = first_names
+
+
+ def __eq__(self, right):
+ return self.last_name == right.last_name and self.first_names == right.first_names
+
+
+ def __unicode__(self):
+ if len(self.first_names) > 0:
+ return '%s, %s' % (self.last_name, ' '.join(self.first_names))
+ else:
+ return self.last_name
+
+
+ def __repr__(self):
+ return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names)
+
+
+def str_to_unicode(value, previous):
+ return unicode(value)
+
+
+def str_to_unicode_list(value, previous):
+ if previous is None:
+ previous = []
+ previous.append(str_to_unicode(value, None))
+ return previous
+
+
+def str_to_person(value, previous):
+ comma_count = value.count(',')
+
+ if comma_count == 0:
+ last_name, first_names = value, []
+ elif comma_count == 1:
+ last_name, first_names = value.split(',')
+ first_names = [name for name in first_names.split(' ') if len(name)]
+ else:
+ raise ValueError("value contains more than one comma: %r" % value)
+
+ return Person(last_name.strip(), *first_names)
+
+
+def str_to_date(value, previous):
+ try:
+ t = time.strptime(value, '%Y-%m-%d')
+ except ValueError:
+ t = time.strptime(value, '%Y')
+ return date(t[0], t[1], t[2])
+
+
+# ==========
+# = Parser =
+# ==========
+class ParseError(Exception):
+ def __init__(self, message):
+ super(ParseError, self).__init__(message)
+
+
+class XMLNamespace(object):
+ '''Represents XML namespace.'''
+
+ def __init__(self, uri):
+ self.uri = uri
+
+ def __call__(self, tag):
+ return '{%s}%s' % (self.uri, tag)
+
+ def __contains__(self, tag):
+ return tag.startswith(str(self))
+
+ def __repr__(self):
+ return 'XMLNamespace(%r)' % self.uri
+
+ def __str__(self):
+ return '%s' % self.uri
+
+
+class BookInfo(object):
+ RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+ DC = XMLNamespace('http://purl.org/dc/elements/1.1/')
+
+ mapping = {
+ DC('creator') : ('author', str_to_person),
+ DC('title') : ('title', str_to_unicode),
+ DC('subject.period') : ('epoch', str_to_unicode),
+ DC('subject.type') : ('kind', str_to_unicode),
+ DC('subject.genre') : ('genre', str_to_unicode),
+ DC('date') : ('created_at', str_to_date),
+ DC('date.pd') : ('released_to_public_domain_at', str_to_date),
+ DC('contributor.translator') : ('translator', str_to_person),
+ DC('contributor.technical_editor') : ('technical_editor', str_to_person),
+ DC('publisher') : ('publisher', str_to_unicode),
+ DC('source') : ('source_name', str_to_unicode),
+ DC('source.URL') : ('source_url', str_to_unicode),
+ DC('identifier.url') : ('url', str_to_unicode),
+ DC('relation.hasPart') : ('parts', str_to_unicode_list),
+ DC('rights.license') : ('license', str_to_unicode),
+ DC('rights') : ('license_description', str_to_unicode),
+ }
+
+ @classmethod
+ def from_string(cls, xml):
+ from StringIO import StringIO
+ return cls.from_file(StringIO(xml))
+
+ @classmethod
+ def from_file(cls, xml_file):
+ book_info = cls()
+
+ try:
+ tree = etree.parse(xml_file)
+ except ExpatError, e:
+ raise ParseError(e)
+
+ description = tree.find('//' + book_info.RDF('Description'))
+ book_info.wiki_url = description.get(cls.RDF('about'), None)
+
+ if description is None:
+ raise ParseError('no Description tag found in document')
+
+ for element in description.findall('*'):
+ book_info.parse_element(element)
+
+ return book_info
+
+ def parse_element(self, element):
+ try:
+ attribute, converter = self.mapping[element.tag]
+ setattr(self, attribute, converter(element.text, getattr(self, attribute, None)))
+ except KeyError:
+ pass
+
+ def to_xml(self):
+ """XML representation of this object."""
+ etree._namespace_map[str(self.RDF)] = 'rdf'
+ etree._namespace_map[str(self.DC)] = 'dc'
+
+ root = etree.Element(self.RDF('RDF'))
+ description = etree.SubElement(root, self.RDF('Description'))
+
+ if self.wiki_url:
+ description.set(self.RDF('about'), self.wiki_url)
+
+ for tag, (attribute, converter) in self.mapping.iteritems():
+ if hasattr(self, attribute):
+ e = etree.Element(tag)
+ e.text = unicode(getattr(self, attribute))
+ description.append(e)
+
+ return unicode(etree.tostring(root, 'utf-8'), 'utf-8')
+
+ def to_dict(self):
+ etree._namespace_map[str(self.RDF)] = 'rdf'
+ etree._namespace_map[str(self.DC)] = 'dc'
+
+ result = {'about': self.wiki_url}
+ for tag, (attribute, converter) in self.mapping.iteritems():
+ if hasattr(self, attribute):
+ result[attribute] = unicode(getattr(self, attribute))
+
+ return result
+
+
+def parse(file_name):
+ return BookInfo.from_file(file_name)
+
+
+if __name__ == '__main__':
+ import sys
+
+ info = parse(sys.argv[1])
+ for attribute, _ in BookInfo.mapping.values():
+ print '%s: %r' % (attribute, getattr(info, attribute, None))
+
--- /dev/null
+# -*- coding: utf-8 -*-
+import os
+import cStringIO
+import re
+import copy
+import pkgutil
+
+from lxml import etree
+
+
+ENTITY_SUBSTITUTIONS = [
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
+]
+
+
+def substitute_entities(context, text):
+ """XPath extension function converting all entites in passed text."""
+ if isinstance(text, list):
+ text = ''.join(text)
+ for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ text = text.replace(entity, substitutution)
+ return text
+
+
+# Register substitute_entities function with lxml
+ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+ns['substitute_entities'] = substitute_entities
+
+
+def transform(input_filename, output_filename):
+ """Transforms file input_filename in XML to output_filename in XHTML."""
+ # Parse XSLT
+ style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
+ style = etree.parse(style_filename)
+
+ doc_file = cStringIO.StringIO()
+ expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
+
+ f = open(input_filename, 'r')
+ for line in f:
+ line = line.decode('utf-8')
+ line = expr.sub(u'<br/>\n', line)
+ doc_file.write(line.encode('utf-8'))
+ f.close()
+
+ doc_file.seek(0);
+
+ parser = etree.XMLParser(remove_blank_text=True)
+ doc = etree.parse(doc_file, parser)
+
+ result = doc.xslt(style)
+ if result.find('//p') is not None:
+ add_anchors(result.getroot())
+ add_table_of_contents(result.getroot())
+ result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8')
+ return True
+ else:
+ return False
+
+
+class Fragment(object):
+ def __init__(self, id, themes):
+ super(Fragment, self).__init__()
+ self.id = id
+ self.themes = themes
+ self.events = []
+
+ def append(self, event, element):
+ self.events.append((event, element))
+
+ def closed_events(self):
+ stack = []
+ for event, element in self.events:
+ if event == 'start':
+ stack.append(('end', element))
+ elif event == 'end':
+ try:
+ stack.pop()
+ except IndexError:
+ print 'CLOSED NON-OPEN TAG:', element
+
+ stack.reverse()
+ return self.events + stack
+
+ def to_string(self):
+ result = []
+ for event, element in self.closed_events():
+ if event == 'start':
+ result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items())))
+ if element.text:
+ result.append(element.text)
+ elif event == 'end':
+ result.append(u'</%s>' % element.tag)
+ if element.tail:
+ result.append(element.tail)
+ else:
+ result.append(element)
+
+ return ''.join(result)
+
+ def __unicode__(self):
+ return self.to_string()
+
+
+def extract_fragments(input_filename):
+ """Extracts theme fragments from input_filename."""
+ open_fragments = {}
+ closed_fragments = {}
+
+ for event, element in etree.iterparse(input_filename, events=('start', 'end')):
+ # Process begin and end elements
+ if element.get('class', '') in ('theme-begin', 'theme-end'):
+ if not event == 'end': continue # Process elements only once, on end event
+
+ # Open new fragment
+ if element.get('class', '') == 'theme-begin':
+ fragment = Fragment(id=element.get('fid'), themes=element.text)
+
+ # Append parents
+ if element.getparent().get('id', None) != 'book-text':
+ parents = [element.getparent()]
+ while parents[-1].getparent().get('id', None) != 'book-text':
+ parents.append(parents[-1].getparent())
+
+ parents.reverse()
+ for parent in parents:
+ fragment.append('start', parent)
+
+ open_fragments[fragment.id] = fragment
+
+ # Close existing fragment
+ else:
+ try:
+ fragment = open_fragments[element.get('fid')]
+ except KeyError:
+ print '%s:closed not open fragment #%s' % (input_filename, element.get('fid'))
+ else:
+ closed_fragments[fragment.id] = fragment
+ del open_fragments[fragment.id]
+
+ # Append element tail to lost_text (we don't want to lose any text)
+ if element.tail:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append('text', element.tail)
+
+
+ # Process all elements except begin and end
+ else:
+ # Omit annotation tags
+ if len(element.get('name', '')) or element.get('class', '') == 'annotation':
+ if event == 'end' and element.tail:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append('text', element.tail)
+ else:
+ for fragment_id in open_fragments:
+ open_fragments[fragment_id].append(event, copy.copy(element))
+
+ return closed_fragments, open_fragments
+
+
+def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None):
+ if with_link:
+ if link_text is None:
+ link_text = prefix
+ anchor = etree.Element('a', href='#%s' % prefix)
+ anchor.set('class', 'anchor')
+ anchor.text = unicode(link_text)
+ if element.text:
+ anchor.tail = element.text
+ element.text = u''
+ element.insert(0, anchor)
+
+ if with_target:
+ anchor_target = etree.Element('a', name='%s' % prefix)
+ anchor_target.set('class', 'target')
+ anchor_target.text = u' '
+ if element.text:
+ anchor_target.tail = element.text
+ element.text = u''
+ element.insert(0, anchor_target)
+
+
+def any_ancestor(element, test):
+ for ancestor in element.iterancestors():
+ if test(ancestor):
+ return True
+ return False
+
+
+def add_anchors(root):
+ counter = 1
+ for element in root.iterdescendants():
+ if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication')
+ or e.tag == 'blockquote'):
+ continue
+
+ if element.tag == 'p' and 'verse' in element.get('class', ''):
+ if counter == 1 or counter % 5 == 0:
+ add_anchor(element, "f%d" % counter, link_text=counter)
+ counter += 1
+ elif 'paragraph' in element.get('class', ''):
+ add_anchor(element, "f%d" % counter, link_text=counter)
+ counter += 1
+
+
+def add_table_of_contents(root):
+ sections = []
+ counter = 1
+ for element in root.iterdescendants():
+ if element.tag in ('h2', 'h3'):
+ if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)):
+ continue
+
+ if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2':
+ sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), []))
+ else:
+ sections.append((counter, element.tag, ''.join(element.xpath('text()')), []))
+ add_anchor(element, "s%d" % counter, with_link=False)
+ counter += 1
+
+ toc = etree.Element('div')
+ toc.set('id', 'toc')
+ toc_header = etree.SubElement(toc, 'h2')
+ toc_header.text = u'Spis treści'
+ toc_list = etree.SubElement(toc, 'ol')
+
+ for n, section, text, subsections in sections:
+ section_element = etree.SubElement(toc_list, 'li')
+ add_anchor(section_element, "s%d" % n, with_target=False, link_text=text)
+
+ if len(subsections):
+ subsection_list = etree.SubElement(section_element, 'ol')
+ for n, subsection, text, _ in subsections:
+ subsection_element = etree.SubElement(subsection_list, 'li')
+ add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text)
+
+ root.insert(0, toc)
+
--- /dev/null
+# -*- coding: utf-8 -*-
+import unittest
+from os.path import dirname, join, realpath
+
+from lxml import etree
+from librarian import dcparser, html
+
+
+def test_file_path(dir_name, file_name):
+ return realpath(join(dirname(__file__), 'files', dir_name, file_name))
+
+
+class TestDCParser(unittest.TestCase):
+ KNOWN_RESULTS = (
+ ('dcparser', 'andersen_brzydkie_kaczatko.xml', {
+ 'publisher': u'Fundacja Nowoczesna Polska',
+ 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko',
+ 'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925',
+ 'author': u'Andersen, Hans Christian',
+ 'url': u'http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko',
+ 'created_at': u'2007-08-14',
+ 'title': u'Brzydkie kaczątko',
+ 'kind': u'Epika',
+ 'source_url': u'http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4',
+ 'translator': u'Niewiadomska, Cecylia',
+ 'released_to_public_domain_at': u'1925-01-01',
+ 'epoch': u'Romantyzm',
+ 'genre': u'Baśń',
+ 'technical_editor': u'Gałecki, Dariusz',
+ 'license_description': u'Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925',
+ }),
+ ('dcparser', 'kochanowski_piesn7.xml', {
+ 'publisher': u'Fundacja Nowoczesna Polska',
+ 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)',
+ 'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976',
+ 'author': u'Kochanowski, Jan',
+ 'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr',
+ 'created_at': u'2007-08-31',
+ 'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)',
+ 'kind': u'Liryka',
+ 'source_url': u'http://www.polona.pl/Content/1499',
+ 'released_to_public_domain_at': u'1584-01-01',
+ 'epoch': u'Renesans',
+ 'genre': u'Pieśń',
+ 'technical_editor': u'Gałecki, Dariusz',
+ 'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ',
+ }),
+ ('dcparser', 'mickiewicz_rybka.xml', {
+ 'publisher': u'Fundacja Nowoczesna Polska',
+ 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka',
+ 'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922',
+ 'author': u'Mickiewicz, Adam',
+ 'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka',
+ 'created_at': u'2007-09-06',
+ 'title': u'Rybka',
+ 'kind': u'Liryka',
+ 'source_url': u'http://www.polona.pl/Content/2222',
+ 'released_to_public_domain_at': u'1855-01-01',
+ 'epoch': u'Romantyzm',
+ 'genre': u'Ballada',
+ 'technical_editor': u'Sutkowska, Olga',
+ 'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855',
+ }),
+ ('dcparser', 'sofokles_antygona.xml', {
+ 'publisher': u'Fundacja Nowoczesna Polska',
+ 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona',
+ 'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939',
+ 'author': u'Sofokles',
+ 'url': u'http://wolnelektury.pl/katalog/lektura/antygona',
+ 'created_at': u'2007-08-30',
+ 'title': u'Antygona',
+ 'kind': u'Dramat',
+ 'source_url': u'http://www.polona.pl/Content/3768',
+ 'translator': u'Morawski, Kazimierz',
+ 'released_to_public_domain_at': u'1925-01-01',
+ 'epoch': u'Starożytność',
+ 'genre': u'Tragedia',
+ 'technical_editor': u'Gałecki, Dariusz',
+ 'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925',
+ }),
+ ('dcparser', 'biedrzycki_akslop.xml', {
+ 'publisher': u'Fundacja Nowoczesna Polska',
+ 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop',
+ 'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993',
+ 'author': u'Biedrzycki, Miłosz',
+ 'url': u'http://wolnelektury.pl/katalog/lektura/akslop',
+ 'created_at': u'2009-06-04',
+ 'title': u'Akslop',
+ 'kind': u'Liryka',
+ 'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1',
+ 'epoch': u'Współczesność',
+ 'genre': u'Wiersz',
+ 'technical_editor': u'Sutkowska, Olga',
+ 'license': u'http://creativecommons.org/licenses/by-sa/3.0/',
+ 'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL'
+ }),
+ )
+
+ def test_parse(self):
+ for dir_name, file_name, result in self.KNOWN_RESULTS:
+ self.assertEqual(dcparser.parse(test_file_path(dir_name, file_name)).to_dict(), result)
+
+
+class TestParserErrors(unittest.TestCase):
+ def test_error(self):
+ try:
+ html.transform(test_file_path('erroneous', 'asnyk_miedzy_nami.xml'),
+ test_file_path('erroneous', 'asnyk_miedzy_nami.html'))
+ self.fail()
+ except etree.XMLSyntaxError, e:
+ self.assertEqual(e.position, (25, 13))
+
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
--- /dev/null
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko">
+ <dc:creator xml:lang="pl">Andersen, Hans Christian</dc:creator>
+ <dc:title xml:lang="pl">Brzydkie kaczątko</dc:title>
+ <dc:contributor.translator xml:lang="pl">Niewiadomska, Cecylia</dc:contributor.translator>
+ <dc:contributor.technical_editor xml:lang="pl">Gałecki, Dariusz</dc:contributor.technical_editor>
+ <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+ <dc:subject.period xml:lang="pl">Romantyzm</dc:subject.period>
+ <dc:subject.type xml:lang="pl">Epika</dc:subject.type>
+ <dc:subject.genre xml:lang="pl">Baśń</dc:subject.genre>
+ <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+ <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko</dc:identifier.url>
+ <dc:source.URL xml:lang="pl">http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4</dc:source.URL>
+ <dc:source xml:lang="pl">Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925</dc:source>
+ <dc:rights xml:lang="pl">Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925</dc:rights>
+ <dc:date.pd xml:lang="pl">1925</dc:date.pd>
+ <dc:format xml:lang="pl">xml</dc:format>
+ <dc:type xml:lang="pl">text</dc:type>
+ <dc:type xml:lang="en">text</dc:type>
+ <dc:date xml:lang="pl">2007-08-14</dc:date>
+ <dc:audience xml:lang="pl">SP1</dc:audience>
+ <dc:language xml:lang="pl">pol</dc:language>
+ </rdf:Description>
+</rdf:RDF>
\ No newline at end of file
--- /dev/null
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop">
+ <dc:creator xml:lang="pl">Biedrzycki, Miłosz</dc:creator>
+ <dc:title xml:lang="pl">Akslop</dc:title>
+ <dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+ <dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
+ <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+ <dc:subject.period xml:lang="pl">Współczesność</dc:subject.period>
+ <dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+ <dc:subject.genre xml:lang="pl">Wiersz</dc:subject.genre>
+ <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).</dc:description>
+ <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/akslop</dc:identifier.url>
+ <dc:source.URL xml:lang="pl">http://free.art.pl/mlb/gwiazdka.html#t1</dc:source.URL>
+ <dc:source xml:lang="pl">Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993</dc:source>
+ <dc:rights xml:lang="pl">Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL</dc:rights>
+ <dc:rights.license>http://creativecommons.org/licenses/by-sa/3.0/</dc:rights.license>
+ <dc:format xml:lang="pl">xml</dc:format>
+ <dc:type xml:lang="pl">text</dc:type>
+ <dc:type xml:lang="en">text</dc:type>
+ <dc:date xml:lang="pl">2009-06-04</dc:date>
+ <dc:audience xml:lang="pl">L</dc:audience>
+ <dc:language xml:lang="pl">pol</dc:language>
+ </rdf:Description>
+</rdf:RDF>
\ No newline at end of file
--- /dev/null
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)">
+ <dc:creator xml:lang="pl">Kochanowski, Jan</dc:creator>
+ <dc:title xml:lang="pl">Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)</dc:title>
+ <dc:relation.isPartOf xml:lang="pl">http://www.wolnelektury.pl/lektura/piesni-ksiegi-pierwsze</dc:relation.isPartOf>
+ <dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+ <dc:contributor.editor xml:lang="pl">Krzyżanowski, Julian</dc:contributor.editor>
+ <dc:contributor.editor xml:lang="pl">Otwinowska, Barbara</dc:contributor.editor>
+ <dc:contributor.technical_editor xml:lang="pl">Gałecki, Dariusz</dc:contributor.technical_editor>
+ <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+ <dc:subject.period xml:lang="pl">Renesans</dc:subject.period>
+ <dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+ <dc:subject.genre xml:lang="pl">Pieśń</dc:subject.genre>
+ <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+ <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr</dc:identifier.url>
+ <dc:source.URL xml:lang="pl">http://www.polona.pl/Content/1499</dc:source.URL>
+ <dc:source xml:lang="pl">Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976</dc:source>
+ <dc:rights xml:lang="pl">Domena publiczna - Jan Kochanowski zm. 1584 </dc:rights>
+ <dc:date.pd xml:lang="pl">1584</dc:date.pd>
+ <dc:format xml:lang="pl">xml</dc:format>
+ <dc:type xml:lang="pl">text</dc:type>
+ <dc:type xml:lang="en">text</dc:type>
+ <dc:date xml:lang="pl">2007-08-31</dc:date>
+ <dc:audience xml:lang="pl">L</dc:audience>
+ <dc:language xml:lang="pl">pol</dc:language>
+ </rdf:Description>
+</rdf:RDF>
\ No newline at end of file
--- /dev/null
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka">
+ <dc:creator xml:lang="pl">Mickiewicz, Adam</dc:creator>
+ <dc:title xml:lang="pl">Rybka</dc:title>
+ <dc:relation.isPartOf xml:lang="pl">http://www.wolnelektury.pl/lektura/ballady-i-romanse</dc:relation.isPartOf>
+ <dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+ <dc:contributor.editor xml:lang="pl">Kallenbach, Józef</dc:contributor.editor>
+ <dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor>
+ <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+ <dc:subject.period xml:lang="pl">Romantyzm</dc:subject.period>
+ <dc:subject.type xml:lang="pl">Liryka</dc:subject.type>
+ <dc:subject.genre xml:lang="pl">Ballada</dc:subject.genre>
+ <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+ <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka</dc:identifier.url>
+ <dc:source.URL xml:lang="pl">http://www.polona.pl/Content/2222</dc:source.URL>
+ <dc:source xml:lang="pl">Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922</dc:source>
+ <dc:rights xml:lang="pl">Domena publiczna - Adam Mickiewicz zm. 1855</dc:rights>
+ <dc:date.pd xml:lang="pl">1855</dc:date.pd>
+ <dc:format xml:lang="pl">xml</dc:format>
+ <dc:type xml:lang="pl">text</dc:type>
+ <dc:type xml:lang="en">text</dc:type>
+ <dc:date xml:lang="pl">2007-09-06</dc:date>
+ <dc:audience xml:lang="pl">SP2</dc:audience>
+ <dc:audience xml:lang="pl">G</dc:audience>
+ <dc:audience xml:lang="pl">L</dc:audience>
+ <dc:language xml:lang="pl">pol</dc:language>
+ </rdf:Description>
+</rdf:RDF>
\ No newline at end of file
--- /dev/null
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona">
+ <dc:creator xml:lang="pl">Sofokles</dc:creator>
+ <dc:title xml:lang="pl">Antygona</dc:title>
+ <dc:contributor.editor xml:lang="pl">Sekuła, Aleksandra</dc:contributor.editor>
+ <dc:contributor.translator xml:lang="pl">Morawski, Kazimierz</dc:contributor.translator>
+ <dc:contributor.technical_editor xml:lang="pl">Gałecki, Dariusz</dc:contributor.technical_editor>
+ <dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher>
+ <dc:subject.period xml:lang="pl">Starożytność</dc:subject.period>
+ <dc:subject.type xml:lang="pl">Dramat</dc:subject.type>
+ <dc:subject.genre xml:lang="pl">Tragedia</dc:subject.genre>
+ <dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN.</dc:description>
+ <dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/antygona</dc:identifier.url>
+ <dc:source.URL xml:lang="pl">http://www.polona.pl/Content/3768</dc:source.URL>
+ <dc:source xml:lang="pl">Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939</dc:source>
+ <dc:rights xml:lang="pl">Domena publiczna - tłumacz Kazimierz Morawski zm. 1925</dc:rights>
+ <dc:date.pd xml:lang="pl">1925</dc:date.pd>
+ <dc:format xml:lang="pl">xml</dc:format>
+ <dc:type xml:lang="pl">text</dc:type>
+ <dc:type xml:lang="en">text</dc:type>
+ <dc:date xml:lang="pl">2007-08-30</dc:date>
+ <dc:audience xml:lang="pl">G</dc:audience>
+ <dc:language xml:lang="pl">pol</dc:language>
+ </rdf:Description>
+</rdf:RDF>
\ No newline at end of file
--- /dev/null
+<div xmlns:wl="http://wolnelektury.pl/functions" id="book-text">
+ <div id="toc">
+ <h2>Spis treści</h2>
+ <ol/>
+ </div>
+ <h1>
+ <span class="author">Adam Asnyk</span>
+ <span class="title">Między nami nic nie było</span>
+ </h1>
+ <div class="stanza">
+ <p class="verse"><a name="f1" class="target"> </a><a href="#f1" class="anchor">1</a>Między nami nic nie było!</p>
+ <p class="verse">
+ Żadnych zwierzeń, wyznań żadnych!</p>
+ <p class="verse">
+ Nic nas z sobą nie łączyło —</p>
+ <p class="verse">
+ Prócz wiosennych marzeń zdradnych;</p>
+ </div>
+ <div class="stanza">
+ <p class="verse"><a name="f5" class="target"> </a><a href="#f5" class="anchor">5</a>Prócz tych woni, barw i blasków,</p>
+ <p class="verse">
+ Unoszących się w przestrzeni;</p>
+ <p class="verse">
+ Prócz szumiących śpiewem lasków</p>
+ <p class="verse">
+ I tej świeżej łąk zieleni;</p>
+ </div>
+ <div class="stanza">
+ <p class="verse">Prócz tych kaskad i potoków,</p>
+ <p class="verse"><a name="f10" class="target"> </a><a href="#f10" class="anchor">10</a>
+ Zraszających każdy parów,</p>
+ <p class="verse">
+ Prócz girlandy tęcz, obłoków,</p>
+ <p class="verse">
+ Prócz natury słodkich czarów;</p>
+ </div>
+ <div class="stanza">
+ <p class="verse">Prócz tych wspólnych, jasnych zdrojów,</p>
+ <p class="verse">
+ Z których serce zachwyt piło;</p>
+ <p class="verse"><a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a>
+ Prócz pierwiosnków i powojów,—</p>
+ <p class="verse">
+ Między nami nic nie było!</p>
+ </div>
+</div>
--- /dev/null
+<?xml version='1.0' encoding='utf-8'?>
+<utwor><liryka_lp>
+ <autor_utworu>Adam Asnyk</autor_utworu>
+ <nazwa_utworu>Między nami nic nie było</nazwa_utworu>
+
+ <strofa>Między nami nic nie było!/
+ Żadnych zwierzeń, wyznań żadnych!/
+ Nic nas z sobą nie łączyło ---/
+ Prócz wiosennych marzeń zdradnych;</strofa>
+
+ <strofa>Prócz tych woni, barw i blasków,/
+ Unoszących się w przestrzeni;/
+ Prócz szumiących śpiewem lasków/
+ I tej świeżej łąk zieleni;
+
+ <strofa>Prócz tych kaskad i potoków,/
+ Zraszających każdy parów,/
+ Prócz girlandy tęcz, obłoków,/
+ Prócz natury słodkich czarów;</strofa>
+
+ <strofa>Prócz tych wspólnych, jasnych zdrojów,/
+ Z których serce zachwyt piło;/
+ Prócz pierwiosnków i powojów,---/
+ Między nami nic nie było!</strofa>
+</liryka_lp></utwor>
--- /dev/null
+# -*- coding: utf-8 -*-
+import os
+import cStringIO
+import re
+import codecs
+
+from lxml import etree
+
+from librarian import dcparser
+
+
+ENTITY_SUBSTITUTIONS = [
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
+]
+
+
+MAX_LINE_LENGTH = 80
+
+
+def strip(context, text):
+ """Remove unneeded whitespace from beginning and end"""
+ if isinstance(text, list):
+ text = ''.join(text)
+ return re.sub(r'\s+', ' ', text).strip()
+
+
+def substitute_entities(context, text):
+ """XPath extension function converting all entites in passed text."""
+ if isinstance(text, list):
+ text = ''.join(text)
+ for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ text = text.replace(entity, substitutution)
+ return text
+
+
+def wrap_words(context, text):
+ """XPath extension function automatically wrapping words in passed text"""
+ if isinstance(text, list):
+ text = ''.join(text)
+ words = re.split(r'\s', text)
+
+ line_length = 0
+ lines = [[]]
+ for word in words:
+ line_length += len(word) + 1
+ if line_length > MAX_LINE_LENGTH:
+ # Max line length was exceeded. We create new line
+ lines.append([])
+ line_length = len(word)
+ lines[-1].append(word)
+ return '\n'.join(' '.join(line) for line in lines)
+
+
+# Register substitute_entities function with lxml
+ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+ns['strip'] = strip
+ns['substitute_entities'] = substitute_entities
+ns['wrap_words'] = wrap_words
+
+
+def transform(input_filename, output_filename):
+ """Transforms file input_filename in XML to output_filename in TXT."""
+ # Parse XSLT
+ style_filename = os.path.join(os.path.dirname(__file__), 'book2txt.xslt')
+ style = etree.parse(style_filename)
+
+ doc_file = cStringIO.StringIO()
+ expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
+
+ f = open(input_filename, 'r')
+ for line in f:
+ line = line.decode('utf-8')
+ line = expr.sub(u'<br/>\n', line)
+ doc_file.write(line.encode('utf-8'))
+ f.close()
+
+ doc_file.seek(0)
+
+ parser = etree.XMLParser(remove_blank_text=True)
+ doc = etree.parse(doc_file, parser)
+
+ result = doc.xslt(style)
+ output_file = codecs.open(output_filename, 'wb', encoding='utf-8')
+ output_file.write(unicode(result) % dcparser.parse(input_filename).url)
+
--- /dev/null
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import html
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to HTML format."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.html'
+ html.transform(input_filename, output_filename)
+
--- /dev/null
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import text
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to TXT format."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.txt'
+ text.transform(input_filename, output_filename)
+
--- /dev/null
+#!/usr/bin/env python
+import os
+import optparse
+
+from librarian import html
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Extract theme fragments from SOURCE."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
+
+ closed_fragments, open_fragments = html.extract_fragments(input_filename)
+
+ for fragment_id in open_fragments:
+ print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+
+ output_file = open(output_filename, 'w')
+ output_file.write("""
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ <html><head>
+ <title>bookfragments output</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+ <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
+ </head>
+ <body>""")
+ for fragment in closed_fragments.values():
+ fragment_html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
+ output_file.write(fragment_html.encode('utf-8'))
+ output_file.write('</body></html>')
+ output_file.close()
+
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import optparse
+
+from lxml import etree
+from librarian import html
+from slughifi import slughifi
+
+
+BOOK_URL = 'http://wolnelektury.pl/katalog/lektura/'
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Generate slugs for SOURCE."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option('-f', '--force', action='store_true', dest='force', default=False,
+ help='overwrite current identifiers')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ doc = etree.parse(input_filename)
+ try:
+ title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text
+ except AttributeError:
+ print '%s:error:Book title not found. Skipping.' % input_filename
+ continue
+
+ parent = ''
+ try:
+ parent_url = doc.find('//{http://purl.org/dc/elements/1.1/}relation.isPartOf').text
+ parent = parent_url.rsplit('/', 1)[1] + ' '
+ except AttributeError:
+ pass
+ except IndexError:
+ print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url)
+
+ book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url')
+ if book_url is None:
+ book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
+ book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url')
+ if not options.force and book_url.text.startswith('http://'):
+ print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text)
+ continue
+
+ book_url.text = BOOK_URL + slughifi(parent + title)[:60]
+
+ doc.write(input_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
+
--- /dev/null
+body {
+ font-size: 16px;
+ font: Georgia, "Times New Roman", serif;
+ line-height: 1.5em;
+ margin: 3em;
+ max-width: 36em;
+}
+
+a {
+ color: blue;
+ text-decoration: none;
+}
+
+/* =================================================== */
+/* = Common elements: headings, paragraphs and lines = */
+/* =================================================== */
+h1 {
+ font-size: 3em;
+ margin: 1.5em 0;
+ text-align: center;
+ line-height: 1.5em;
+ font-weight: bold;
+}
+
+h2 {
+ font-size: 2em;
+ margin: 1.5em 0 0;
+ font-weight: bold;
+ line-height: 1.5em;
+}
+
+h3 {
+ font-size: 1.5em;
+ margin: 1.5em 0 0;
+ font-weight: normal;
+ line-height: 1.5em;
+}
+
+h4 {
+ font-size: 1em;
+ margin: 1.5em 0 0;
+ line-height: 1.5em;
+}
+
+p {
+ margin: 0;
+}
+
+/* ======================== */
+/* = Footnotes and themes = */
+/* ======================== */
+.theme-begin {
+ border-left: 0.1em solid #DDDDDD;
+ color: #777;
+ padding: 0 0.5em;
+ width: 7.5em;
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ position: absolute;
+ left: 40em;
+ line-height: 1.5em;
+ text-align: left;
+}
+
+.annotation {
+ font-style: normal;
+ font-weight: normal;
+ font-size: 12px;
+}
+
+#footnotes .annotation {
+ display: block;
+ float: left;
+ width: 2.5em;
+ clear: both;
+}
+
+#footnotes div {
+ margin: 1.5em 0 0 0;
+}
+
+#footnotes p {
+ margin-left: 2.5em;
+}
+
+
+/* ============= */
+/* = Numbering = */
+/* ============= */
+.anchor {
+ float: left;
+ margin: -0.2em -0.5em -0.2em -3.5em;
+ color: #777;
+ font-size: 12px;
+ width: 2em;
+ text-align: center;
+ padding: 0.2em 0.5em;
+}
+
+.anchor:hover, .anchor:active {
+ color: #FFF;
+ background-color: #CCC;
+}
+
+
+/* =================== */
+/* = Custom elements = */
+/* =================== */
+span.author {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: 0.25em;
+}
+
+span.collection {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: -0.25em;
+}
+
+span.subtitle {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-top: -0.25em;
+}
+
+div.didaskalia {
+ font-style: italic;
+ margin: 0.5em 0 0;
+}
+
+div.kwestia {
+ margin: 0.5em 0 0;
+}
+
+div.stanza {
+ margin: 1.5em 0 0;
+}
+
+div.kwestia div.stanza {
+ margin: 0;
+}
+
+p.paragraph {
+ text-align: justify;
+ margin: 1.5em 0 0;
+}
+
+p.motto {
+ text-align: justify;
+ font-style: italic;
+ margin: 1.5em 0 0;
+}
+
+p.motto_podpis {
+ font-size: 0.875em;
+}
+
+div.fragment {
+ border-bottom: 0.1em solid #999;
+ padding-bottom: 1.5em;
+}
+
+div.note p, div.dedication p, div.note p.paragraph, div.dedication p.paragraph {
+ text-align: right;
+ font-style: italic;
+}
+
+hr.spacer {
+ height: 3em;
+ visibility: hidden;
+}
+
+hr.spacer-line {
+ margin: 1.5em 0;
+ border: none;
+ border-bottom: 0.1em solid #000;
+}
+
+p.spacer-asterisk {
+ padding: 0;
+ margin: 1.5em 0;
+ text-align: center;
+}
+
+div.person-list ol {
+ list-style: none;
+ padding: 0 0 0 1.5em;
+}
+
+p.place-and-time {
+ font-style: italic;
+}
+
+em.math, em.foreign-word, em.book-title, em.didaskalia, em.author-emphasis {
+ font-style: italic;
+}
+
+em.person {
+ font-style: normal;
+ font-variant: small-caps;
+}
+
--- /dev/null
+body {
+ font-size: 16px;
+ font: Georgia, "Times New Roman", serif;
+ line-height: 1.5em;
+ margin: 3em;
+ max-width: 36em;
+}
+
+a {
+ color: blue;
+ text-decoration: none;
+}
+
+/* =================================================== */
+/* = Common elements: headings, paragraphs and lines = */
+/* =================================================== */
+h1 {
+ font-size: 3em;
+ margin: 1.5em 0;
+ text-align: center;
+ line-height: 1.5em;
+ font-weight: bold;
+}
+
+h2 {
+ font-size: 2em;
+ margin: 1.5em 0 0;
+ font-weight: bold;
+ line-height: 1.5em;
+}
+
+h3 {
+ font-size: 1.5em;
+ margin: 1.5em 0 0;
+ font-weight: normal;
+ line-height: 1.5em;
+}
+
+h4 {
+ font-size: 1em;
+ margin: 1.5em 0 0;
+ line-height: 1.5em;
+}
+
+p {
+ margin: 0;
+}
+
+/* ======================== */
+/* = Footnotes and themes = */
+/* ======================== */
+.theme-begin {
+ border-left: 0.1em solid #DDDDDD;
+ color: #666;
+ float: right;
+ margin: 0 -9.5em 0 0;
+ padding: 0 0.5em;
+ width: 7.5em;
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ display: none;
+}
+
+.annotation {
+ font-style: normal;
+ font-weight: normal;
+ font-size: 16px;
+ display: none;
+}
+
+#footnotes {
+ display: none;
+}
+
+#footnotes .annotation {
+ display: block;
+ float: left;
+ width: 2.5em;
+ clear: both;
+}
+
+#footnotes div {
+ margin: 1.5em 0 0 0;
+}
+
+#footnotes p {
+ margin-left: 2.5em;
+}
+
+/* =================== */
+/* = Custom elements = */
+/* =================== */
+span.author {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: 0.25em;
+}
+
+span.collection {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-bottom: -0.25em;
+}
+
+span.subtitle {
+ font-size: 0.75em;
+ display: block;
+ line-height: 1.5em;
+ margin-top: -0.25em;
+}
+
+div.didaskalia {
+ font-style: italic;
+ margin: 0.5em 0 0;
+}
+
+div.kwestia {
+ margin: 0.5em 0 0;
+}
+
+div.stanza {
+ margin: 1.5em 0 0;
+}
+
+div.kwestia div.stanza {
+ margin: 0;
+}
+
+p.paragraph {
+ text-align: justify;
+ margin: 1.5em 0 0;
+}
+
+p.motto {
+ text-align: justify;
+ font-style: italic;
+ margin: 1.5em 0 0;
+}
+
+p.motto_podpis {
+ font-size: 0.875em;
+}
+
+div.fragment {
+ border-bottom: 0.1em solid #999;
+ padding-bottom: 1.5em;
+}
+
+div.note p, div.note p.paragraph {
+ text-align: right;
+ font-style: italic;
+}
+
+hr.spacer {
+ height: 3em;
+ visibility: hidden;
+}
--- /dev/null
+# -*- coding: utf-8 -*-
+from distutils.core import setup
+
+
+setup(
+ name='librarian',
+ version='1.1',
+ description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
+ author='Marek Stępniowski',
+ author_email='marek@stepniowski.com',
+ url='http://redmine.nowoczesnapolska.org.pl/',
+ packages=['librarian', 'librarian.tests'],
+ package_dir={'librarian': 'librarian'},
+ package_data={
+ 'librarian': ['*.xslt'],
+ 'librarian.tests': ['files/dcparser/*.xml', 'files/erroneous/*.xml'],
+ },
+ scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/bookfragments', 'scripts/genslugs'],
+)