X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/d04e61819290fc8d6d71b1932c55a774014c1f05..261a3308d235fc252f30ae03603d0e964bb60223:/src/librarian/elements/base.py diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py index b9df185..5317268 100644 --- a/src/librarian/elements/base.py +++ b/src/librarian/elements/base.py @@ -3,10 +3,13 @@ import re from lxml import etree from librarian import dcparser, RDFNS +from librarian.html import raw_printable_text from librarian.util import get_translation class WLElement(etree.ElementBase): + SECTION_PRECEDENCE = None + TXT_TOP_MARGIN = 0 TXT_BOTTOM_MARGIN = 0 TXT_PREFIX = "" @@ -15,17 +18,24 @@ class WLElement(etree.ElementBase): HTML_TAG = None HTML_ATTR = {} HTML_CLASS = None - + + EPUB_TAG = None + EPUB_ATTR = {} + EPUB_CLASS = None + EPUB_START_CHUNK = False + CAN_HAVE_TEXT = True STRIP = False text_substitutions = [ (u'---', u'—'), (u'--', u'–'), - (u'...', u'…'), + #(u'...', u'…'), # Temporary turnoff for epub (u',,', u'„'), (u'"', u'”'), ('\ufeff', ''), + + ("'", "\u2019"), # This was enabled for epub. ] @property @@ -52,11 +62,25 @@ class WLElement(etree.ElementBase): def gettext(self): return get_translation(self.meta.language).gettext + def raw_printable_text(self): + # TODO: podtagi, wyroznienia, etc + t = '' + t += self.normalize_text(self.text) + for c in self: + if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'): + t += c.raw_printable_text() + t += self.normalize_text(c.tail) + return t + def normalize_text(self, text): text = text or '' for e, s in self.text_substitutions: text = text.replace(e, s) - text = re.sub(r'\s+', ' ', text) + # FIXME: TEmporary turnoff +# text = re.sub(r'\s+', ' ', text) +### TODO: Added now for epub + text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text) + return text def _build_inner(self, builder, build_method): @@ -118,6 +142,48 @@ class WLElement(etree.ElementBase): if self.HTML_TAG: builder.end_element() + def _epub_build_inner(self, builder): + self._build_inner(builder, 'epub_build') + + def get_epub_attr(self, builder): + attr = self.EPUB_ATTR.copy() + if self.EPUB_CLASS: + attr['class'] = self.EPUB_CLASS + return attr + + def epub_build(self, builder): + # TEMPORARY + self.CAN_HAVE_TEXT = True + self.STRIP = False + + if self.EPUB_START_CHUNK: + builder.start_chunk() + + fragment = None + if self.SECTION_PRECEDENCE: + if not self.EPUB_START_CHUNK: + fragment = 'sub%d' % builder.assign_section_number() + self.attrib['id'] = fragment + + builder.add_toc_entry( + fragment, + self.raw_printable_text(), + self.SECTION_PRECEDENCE + ) + + if self.EPUB_TAG: + attr = self.get_epub_attr(builder) + if fragment: + attr['id'] = fragment + builder.start_element( + self.EPUB_TAG, + attr + ) + + self._epub_build_inner(builder) + if self.EPUB_TAG: + builder.end_element() + def sanitize(self): # TODO: Remove insanity here. for e in self: