X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/67a177f2ec1fa2eac56e7fb07ccaf32bcd33d8ce..86becb1ae4d215dd782dfc054471d724f6dcd1d7:/src/librarian/elements/base.py diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py index 1d3edd6..6036d16 100644 --- a/src/librarian/elements/base.py +++ b/src/librarian/elements/base.py @@ -3,7 +3,6 @@ import re from lxml import etree from librarian import dcparser, RDFNS -from librarian.html import raw_printable_text from librarian.util import get_translation @@ -62,33 +61,58 @@ class WLElement(etree.ElementBase): def gettext(self): return get_translation(self.meta.language).gettext - def raw_printable_text(self): + def in_context_of(self, setting): + parent = self.getparent() + if parent is None: + return False + try: + return getattr(parent, setting) + except AttributeError: + return parent.in_context_of(setting) + + def signal(self, signal): + parent = self.getparent() + if parent is not None: + parent.signal(signal) + + def raw_printable_text(self, builder): + from librarian.html import raw_printable_text + # TODO: podtagi, wyroznienia, etc t = '' - t += self.normalize_text(self.text) + t += self.normalize_text(self.text, builder) for c in self: if not isinstance(c, WLElement): continue if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'): - t += c.raw_printable_text() - t += self.normalize_text(c.tail) + t += c.raw_printable_text(builder) + t += self.normalize_text(c.tail, builder) return t - def normalize_text(self, text): + def normalize_text(self, text, builder): text = text or '' for e, s in self.text_substitutions: text = text.replace(e, s) # FIXME: TEmporary turnoff # text = re.sub(r'\s+', ' ', text) ### TODO: Added now for epub - text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text) + + if getattr(builder, 'hyphenator', None) is not None: + newt = '' + wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text) + for w in wlist: + newt += builder.hyphenator.inserted(w, u'\u00AD') + text = newt + + if builder.orphans: + text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text) return text def _build_inner(self, builder, build_method): child_count = len(self) if self.CAN_HAVE_TEXT and self.text: - text = self.normalize_text(self.text) + text = self.normalize_text(self.text, builder) if self.STRIP: text = text.lstrip() if not child_count: @@ -98,7 +122,7 @@ class WLElement(etree.ElementBase): if isinstance(child, WLElement): getattr(child, build_method)(builder) if self.CAN_HAVE_TEXT and child.tail: - text = self.normalize_text(child.tail) + text = self.normalize_text(child.tail, builder) if self.STRIP and i == child_count - 1: text = text.rstrip() builder.push_text(text) @@ -166,14 +190,14 @@ class WLElement(etree.ElementBase): builder.start_chunk() fragment = None - if self.SECTION_PRECEDENCE: + if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'): if not start_chunk: fragment = 'sub%d' % builder.assign_section_number() self.attrib['id'] = fragment builder.add_toc_entry( fragment, - self.raw_printable_text(), + self.raw_printable_text(builder), self.SECTION_PRECEDENCE ) @@ -193,11 +217,12 @@ class WLElement(etree.ElementBase): def validate(self): from librarian.elements.masters import Master from librarian.elements.blocks import DlugiCytat, PoezjaCyt + from librarian.elements.footnotes import Footnote if self.SECTION_PRECEDENCE: - assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt)), \ + assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \ 'Header {} inside a <{}> instead of a master.'.format( - etree.tostring(self), self.getparent().tag) + etree.tostring(self, encoding='unicode'), self.getparent().tag) for c in self: if isinstance(c, WLElement):