setup(
name='librarian',
- version='2.4.13',
+ version='2.5',
description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
author="Marek Stępniowski",
author_email='marek@stepniowski.com',
from collections import OrderedDict
from .txt import TxtBuilder
-from .html import HtmlBuilder, StandaloneHtmlBuilder, DaisyHtmlBuilder
+from .html import HtmlBuilder, SnippetHtmlBuilder, StandaloneHtmlBuilder, DaisyHtmlBuilder
from .sanitize import Sanitizer
from .daisy import DaisyBuilder
from .epub import EpubBuilder
builders = OrderedDict([
("txt", TxtBuilder),
("html", HtmlBuilder),
+ ("html-snippet", SnippetHtmlBuilder),
("html-standalone", StandaloneHtmlBuilder),
("html-daisy", DaisyHtmlBuilder),
("daisy", DaisyBuilder),
self._base_url = base_url
self.tree = text = etree.Element('div', **{'id': 'book-text'})
- self.header = etree.SubElement(text, 'h1')
+ self.header = etree.Element('h1')
self.footnotes = etree.Element('div', id='footnotes')
self.footnote_counter = 0
if self.with_toc:
add_table_of_contents(self.tree)
+ if len(self.header):
+ self.tree.insert(0, self.header)
+
if self.footnote_counter:
fnheader = etree.Element("h3")
fnheader.text = _("Footnotes")
)
+class SnippetHtmlBuilder(HtmlBuilder):
+ with_anchors = False
+ with_themes = False
+ with_toc = False
+ with_footnotes = False
+ with_nota_red = False
+ with_refs = False
+
+
class DaisyHtmlBuilder(StandaloneHtmlBuilder):
file_extension = 'xhtml'
with_anchors = False
persons.remove(None)
return persons
+ def references(self):
+ return self.tree.findall('.//ref')
from lxml import etree
from . import (blocks, comments, drama, figures, footnotes, front, headers,
masters, paragraphs, poetry, ref, root, separators, styles, themes,
- tools)
+ tools, base)
WL_ELEMENTS = {
+ 'snippet': base.Snippet,
'meta': etree.ElementBase,
'coverClass': etree.ElementBase,
"developmentStage": etree.ElementBase,
-# -*- coding: utf-8
-
+import copy
import re
from lxml import etree
from librarian import dcparser, RDFNS
from librarian.util import get_translation
+def last_words(text, n):
+ words = []
+ for w in reversed(text.split()):
+ words.append(w)
+ if len(w) > 2:
+ n -= 1
+ if not n: break
+ if n:
+ return n, text
+ else:
+ return n, ' '.join(reversed(words))
+
class WLElement(etree.ElementBase):
SECTION_PRECEDENCE = None
+ ASIDE = False
TXT_TOP_MARGIN = 0
TXT_BOTTOM_MARGIN = 0
# always copy the id attribute (?)
if self.attrib.get('id'):
attr['id'] = self.attrib['id']
- elif '_compat_section_id' in self.attrib:
+ elif getattr(self, 'SHOULD_HAVE_ID', False) and '_compat_section_id' in self.attrib:
attr['id'] = self.attrib['_compat_section_id']
return attr
for e in self:
if isinstance(e, WLElement):
e.sanitize()
+
+ def snip(self, words, before=None, sub=False):
+ if sub and self.ASIDE:
+ return words, []
+
+ snippet = []
+ if before is not None:
+ i = self.index(before)
+ else:
+ i = len(self)
+
+ while i > 0:
+ i -= 1
+ if self[i].tail:
+ if words:
+ words, text = last_words(self[i].tail, words)
+ snippet = [('text', text)] + snippet
+
+ if words:
+ words, subsnip = self[i].snip(words, sub=True)
+ snippet = subsnip + snippet
+
+ if words and self.text:
+ words, text = last_words(self.text, words)
+ snippet = [('text', text)] + snippet
+
+ snippet = [('start', self.tag, self.attrib)] + snippet + [('end',)]
+
+ if not sub and words and not self.ASIDE:
+ # do we dare go up?
+ parent = self.getparent()
+ if parent is not None and parent.CAN_HAVE_TEXT:
+ print(etree.tostring(self, encoding='unicode'))
+ assert False
+ words, parsnip = parent.snip(words, before=self)
+ return words, parsnip[:-1] + snippet + parsnip[-1:]
+
+ return words, snippet
+
+ def get_snippet(self, words=15):
+ from librarian.parser import parser
+
+ words, snippet = self.getparent().snip(words=words, before=self)
+
+ cursor = snipelem = parser.makeelement('snippet')
+ snipelem._meta_object = self.meta
+ for s in snippet:
+ if s[0] == 'start':
+ elem = parser.makeelement(s[1], **s[2])
+ cursor.append(elem)
+ cursor = elem
+ elif s[0] == 'end':
+ cursor = cursor.getparent()
+ else:
+ if len(cursor):
+ cursor[-1].tail = (cursor[-1].tail or '') + s[1]
+ else:
+ cursor.text = (cursor.text or '') + s[1]
+
+ return snipelem
+
+ def get_link(self):
+ sec = getattr(self, 'SHOULD_HAVE_ID', False) and self.attrib.get('_compat_section_id')
+ if sec:
+ return sec
+ parent_index = self.getparent().index(self)
+ if parent_index:
+ return self.getparent()[parent_index - 1].get_link()
+ else:
+ return self.getparent().get_link()
+
+
+class Snippet(WLElement):
+ pass
class Footnote(WLElement):
NO_TOC = True
START_INLINE = True
+ ASIDE = True
def signal(self, signal):
if signal == 'INLINE':
class Ref(WLElement):
+ ASIDE = True
+ HTML_TAG = 'a'
+
def txt_build(self, builder):
pass
- def html_build(self, builder):
- pass
+ def get_html_attr(self, builder):
+ return {
+ 'class': 'reference',
+ 'data-uri': self.attrib.get('href', ''),
+ }
def epub_build(self, builder):
pass
-
class Motyw(WLElement):
+ ASIDE = True
HTML_TAG = "a"
def txt_build(self, builder):
<div class="stanza" id="sec7">
<div class="verse">Prócz tych wspólnych, jasnych zdrojów,</div>
<div class="verse">Z których serce zachwyt piło;</div>
-<a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a><div class="verse">Prócz pierwiosnków i powojów,—</div>
+<a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a><div class="verse">Prócz pierwiosnków<a class="reference" data-uri="https://www.wikidata.org/wiki/Q158974"></a> i powojów,—</div>
<div class="verse">Między nami nic nie było!<span class="theme-end" fid="1189062528872"></span><span class="theme-end" fid="1189062500041"></span>
</div>
</div>
<div class="stanza">
<a name="sec7"></a><div class="verse">Prócz tych wspólnych, jasnych zdrojów,</div>
<div class="verse">Z których serce zachwyt piło;</div>
-<a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a><div class="verse">Prócz pierwiosnków i powojów,—</div>
+<a name="f15" class="target"> </a><a href="#f15" class="anchor">15</a><div class="verse">Prócz pierwiosnków<a class="reference" data-uri="https://www.wikidata.org/wiki/Q158974"></a> i powojów,—</div>
<div class="verse">Między nami nic nie było!<span class="theme-end" fid="1189062528872"></span><span class="theme-end" fid="1189062500041"></span>
</div>
</div>
<div class="stanza">
<div class="verse">Prócz tych wspólnych, jasnych zdrojów,</div>
<div class="verse">Z których serce zachwyt piło;</div>
-<div class="verse">Prócz pierwiosnków i powojów,—</div>
+<div class="verse">Prócz pierwiosnków<a class="reference" data-uri="https://www.wikidata.org/wiki/Q158974"></a> i powojów,—</div>
<div class="verse">Między nami nic nie było!</div>
</div>
<div class="stanza">
<div class="verse">Prócz tych wspólnych, jasnych zdrojów,</div>
<div class="verse">Z których serce zachwyt piło;</div>
-<div class="verse">Prócz pierwiosnków i powojów,—</div>
+<div class="verse">Prócz pierwiosnków<a class="reference" data-uri="https://www.wikidata.org/wiki/Q158974"></a> i powojów,—</div>
<div class="verse">Między nami nic nie było!</div>
</div>
<strofa>Prócz tych wspólnych, jasnych zdrojów,/
Z których serce zachwyt piło;/
-Prócz pierwiosnków i powojów,---/
+Prócz pierwiosnków<pe><slowo_obce>pierwiosnek</slowo_obce> --- taki kwiatek</pe><ref href="https://www.wikidata.org/wiki/Q158974"/> i powojów,---/
Między nami nic nie było!<end id="e1189062528872"/><end id="e1189062500041"/></strofa>
</liryka_lp>