1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 
   7 from librarian import dcparser, RDFNS
 
   8 from librarian.util import get_translation
 
  10 def last_words(text, n):
 
  12     for w in reversed(text.split()):
 
  20         return n, ' '.join(reversed(words))
 
  23 class WLElement(etree.ElementBase):
 
  24     SECTION_PRECEDENCE = None
 
  39     EPUB_START_CHUNK = False
 
  45     text_substitutions = [
 
  56     def meta_object(self):
 
  57         if not hasattr(self, '_meta_object'):
 
  58             elem = self.find(RDFNS('RDF'))
 
  60                 self._meta_object = dcparser.BookInfo.from_element(elem)
 
  62                 self._meta_object = None
 
  63         return self._meta_object
 
  67         if self.meta_object is not None:
 
  68             return self.meta_object
 
  70             if self.getparent() is not None:
 
  71                 return self.getparent().meta
 
  73                 return self.document.base_meta
 
  77         return get_translation(self.meta.language).gettext
 
  79     def in_context_of(self, setting):
 
  80         parent = self.getparent()
 
  84             return getattr(parent, setting)
 
  85         except AttributeError:
 
  86             return parent.in_context_of(setting)
 
  88     def get_context_map(self, setting, key, default=None):
 
  89         parent = self.getparent()
 
  93             return getattr(parent, setting)[key]
 
  94         except AttributeError:
 
  95             return parent.get_context_map(setting, key, default)
 
  97     def signal(self, signal):
 
  98         parent = self.getparent()
 
  99         if parent is not None:
 
 100             parent.signal(signal)
 
 102     def raw_printable_text(self, builder):
 
 103         from librarian.html import raw_printable_text
 
 105         # TODO: podtagi, wyroznienia, etc
 
 107         t += self.normalize_text(self.text, builder)
 
 109             if not isinstance(c, WLElement):
 
 111             if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 
 112                 t += c.raw_printable_text(builder)
 
 113             t += self.normalize_text(c.tail, builder)
 
 116     def normalize_text(self, text, builder):
 
 118         for e, s in self.text_substitutions:
 
 119             text = text.replace(e, s)
 
 121         if getattr(builder, 'normalize_whitespace', False):
 
 122             text = re.sub(r'\s+', ' ', text)
 
 124         if getattr(builder, 'hyphenator', None) is not None:
 
 126             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
 
 128                 newt += builder.hyphenator.inserted(w, '\u00AD')
 
 132             text = re.sub(r'(?<=\s\w)\s+', '\u00A0', text)
 
 136     def _build_inner(self, builder, build_method):
 
 137         child_count = len(self)
 
 138         if self.CAN_HAVE_TEXT and self.text:
 
 139             text = self.normalize_text(self.text, builder)
 
 144             builder.push_text(text)
 
 145         for i, child in enumerate(self):
 
 147             if isinstance(child, WLElement):
 
 148                 getattr(child, build_method)(builder)
 
 149                 self.after_child(builder, real_child_count)
 
 150                 real_child_count += 1
 
 152             # FIXME base builder api
 
 153             elif getattr(builder, 'debug', False) and child.tag is etree.Comment:
 
 154                 builder.process_comment(child)
 
 155             if self.CAN_HAVE_TEXT and child.tail:
 
 156                 text = self.normalize_text(child.tail, builder)
 
 157                 if self.STRIP and i == child_count - 1:
 
 159                 builder.push_text(text)
 
 161     def after_child(self, builder, child_count):
 
 162         fn = getattr(builder, 'after_child_fn', None)
 
 164             getattr(self, builder.after_child_fn)(builder, child_count)
 
 166     def txt_after_child(self, builder, child_count):
 
 169     def _txt_build_inner(self, builder):
 
 170         self._build_inner(builder, 'txt_build')
 
 172     def txt_build(self, builder):
 
 173         builder.push_margin(self.TXT_TOP_MARGIN)
 
 174         builder.push_text(self.TXT_PREFIX, True)
 
 175         self._txt_build_inner(builder)
 
 176         builder.push_text(self.TXT_SUFFIX, True)
 
 177         builder.push_margin(self.TXT_BOTTOM_MARGIN)
 
 179     def _html_build_inner(self, builder):
 
 180         self._build_inner(builder, 'html_build')
 
 182     def get_html_attr(self, builder):
 
 183         attr = self.HTML_ATTR.copy()
 
 185             attr['class'] = self.HTML_CLASS
 
 187             # always copy the id attribute (?)
 
 188             if self.attrib.get('id'):
 
 189                 attr['id'] = self.attrib['id']
 
 190             if self.attrib.get('_id'):
 
 191                 attr['id'] = self.attrib['_id']
 
 194     def html_build(self, builder):
 
 195         # Do we need a number?
 
 196         numbering = self.numbering
 
 197         if numbering == 'main':
 
 198             if builder.with_numbering and self.has_visible_numbering:
 
 199                 builder.add_visible_number(self)
 
 202             builder.start_element(
 
 204                 self.get_html_attr(builder),
 
 207         self._html_build_inner(builder)
 
 209             builder.end_element()
 
 211     def _epub_build_inner(self, builder):
 
 212         self._build_inner(builder, 'epub_build')
 
 214     def get_epub_attr(self, builder):
 
 215         attr = self.EPUB_ATTR.copy()
 
 217             attr['class'] = self.EPUB_CLASS
 
 220     def epub_build(self, builder):
 
 221         from librarian.elements.masters import Master
 
 224         self.CAN_HAVE_TEXT = True
 
 227         start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
 
 230             builder.start_chunk()
 
 233         if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'):
 
 235                 fragment = 'sub%d' % builder.assign_section_number()
 
 236                 self.attrib['id'] = fragment
 
 238             builder.add_toc_entry(
 
 240                 self.raw_printable_text(builder),
 
 241                 self.SECTION_PRECEDENCE
 
 245             attr = self.get_epub_attr(builder)
 
 247                 attr['id'] = fragment
 
 249                 chunkno, sourceline = 0, self.sourceline
 
 251                     chunkno, sourceline = len(builder.splits), sourceline - builder.splits[-1]
 
 252                 attr['data-debug'] = f'{chunkno}:{sourceline}'
 
 253             builder.start_element(
 
 258         self._epub_build_inner(builder)
 
 260             builder.end_element()
 
 263         from librarian.elements.masters import Master
 
 264         from librarian.elements.blocks import DlugiCytat, PoezjaCyt
 
 265         from librarian.elements.footnotes import Footnote
 
 267         if self.SECTION_PRECEDENCE:
 
 268             assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \
 
 269                     'Header {} inside a <{}> instead of a master.'.format(
 
 270                             etree.tostring(self, encoding='unicode'), self.getparent().tag)
 
 273             if isinstance(c, WLElement):
 
 278         # TODO: Remove insanity here.
 
 280             if isinstance(e, WLElement):
 
 283     def snip(self, words, before=None, sub=False):
 
 284         if sub and self.ASIDE:
 
 288         if before is not None:
 
 289             i = self.index(before)
 
 297                     words, text = last_words(self[i].tail, words)
 
 298                     snippet = [('text', text)] + snippet
 
 301                 words, subsnip = self[i].snip(words, sub=True)
 
 302                 snippet = subsnip + snippet
 
 304         if words and self.text:
 
 305             words, text = last_words(self.text, words)
 
 306             snippet = [('text', text)] + snippet
 
 308         snippet = [('start', self.tag, self.attrib)] + snippet + [('end',)]
 
 310         if not sub and words and not self.ASIDE:
 
 312             parent = self.getparent()
 
 313             if parent is not None and parent.CAN_HAVE_TEXT:
 
 314                 words, parsnip = parent.snip(words, before=self)
 
 315                 return words, parsnip[:-1] + snippet + parsnip[-1:]
 
 317         return words, snippet
 
 319     def get_snippet(self, words=15):
 
 320         from librarian.parser import parser
 
 322         words, snippet = self.getparent().snip(words=words, before=self)
 
 324         cursor = snipelem = parser.makeelement('snippet')
 
 325         snipelem._meta_object = self.meta
 
 328                 elem = parser.makeelement(s[1], **s[2])
 
 332                 cursor = cursor.getparent()
 
 335                     cursor[-1].tail = (cursor[-1].tail or '') + s[1]
 
 337                     cursor.text = (cursor.text or '') + s[1]
 
 343         numbering = self.NUMBERING
 
 344         if numbering is None or self.in_context_of('DISABLE_NUMBERING'):
 
 346         numbering = self.get_context_map('SUPPRESS_NUMBERING', numbering, numbering)
 
 351         prefix = self.numbering
 
 353             # TODO: self.context.main_numbering_prefix
 
 354             prefix = 'f' # default numbering prefix
 
 357     def assign_id(self, builder):
 
 358         numbering = self.numbering
 
 360             number = str(builder.counters[numbering])
 
 361             self.attrib['_id'] = self.id_prefix + number
 
 362             builder.counters[numbering] += 1
 
 364             if numbering == 'main':
 
 365                 self.attrib['_visible_numbering'] = str(builder.counters['_visible'])
 
 366                 builder.counters['_visible'] += 1
 
 368             if numbering == 'fn':
 
 369                 self.attrib['_visible_numbering'] = number
 
 372         return self.attrib.get('_id') or self.getparent().get_link()
 
 375 class Snippet(WLElement):