1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
7 from librarian import dcparser, RDFNS
8 from librarian.util import get_translation
10 def last_words(text, n):
12 for w in reversed(text.split()):
20 return n, ' '.join(reversed(words))
23 class WLElement(etree.ElementBase):
24 SECTION_PRECEDENCE = None
39 EPUB_START_CHUNK = False
45 text_substitutions = [
56 def meta_object(self):
57 if not hasattr(self, '_meta_object'):
58 elem = self.find(RDFNS('RDF'))
60 self._meta_object = dcparser.BookInfo.from_element(elem)
62 self._meta_object = None
63 return self._meta_object
67 if self.meta_object is not None:
68 return self.meta_object
70 if self.getparent() is not None:
71 return self.getparent().meta
73 return self.document.base_meta
77 return get_translation(self.meta.language).gettext
79 def in_context_of(self, setting):
80 parent = self.getparent()
84 return getattr(parent, setting)
85 except AttributeError:
86 return parent.in_context_of(setting)
88 def get_context_map(self, setting, key, default=None):
89 parent = self.getparent()
93 return getattr(parent, setting)[key]
94 except AttributeError:
95 return parent.get_context_map(setting, key, default)
97 def signal(self, signal):
98 parent = self.getparent()
99 if parent is not None:
100 parent.signal(signal)
102 def raw_printable_text(self, builder):
103 from librarian.html import raw_printable_text
105 # TODO: podtagi, wyroznienia, etc
107 t += self.normalize_text(self.text, builder)
109 if not isinstance(c, WLElement):
111 if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
112 t += c.raw_printable_text(builder)
113 t += self.normalize_text(c.tail, builder)
116 def normalize_text(self, text, builder):
118 for e, s in self.text_substitutions:
119 text = text.replace(e, s)
121 if getattr(builder, 'normalize_whitespace', False):
122 text = re.sub(r'\s+', ' ', text)
124 if getattr(builder, 'hyphenator', None) is not None:
126 wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
128 newt += builder.hyphenator.inserted(w, '\u00AD')
132 text = re.sub(r'(?<=\s\w)\s+', '\u00A0', text)
136 def _build_inner(self, builder, build_method):
137 child_count = len(self)
138 if self.CAN_HAVE_TEXT and self.text:
139 text = self.normalize_text(self.text, builder)
144 builder.push_text(text)
145 for i, child in enumerate(self):
147 if isinstance(child, WLElement):
148 getattr(child, build_method)(builder)
149 self.after_child(builder, real_child_count)
150 real_child_count += 1
152 # FIXME base builder api
153 elif getattr(builder, 'debug', False) and child.tag is etree.Comment:
154 builder.process_comment(child)
155 if self.CAN_HAVE_TEXT and child.tail:
156 text = self.normalize_text(child.tail, builder)
157 if self.STRIP and i == child_count - 1:
159 builder.push_text(text)
161 def after_child(self, builder, child_count):
162 fn = getattr(builder, 'after_child_fn', None)
164 getattr(self, builder.after_child_fn)(builder, child_count)
166 def txt_after_child(self, builder, child_count):
169 def _txt_build_inner(self, builder):
170 self._build_inner(builder, 'txt_build')
172 def txt_build(self, builder):
173 builder.push_margin(self.TXT_TOP_MARGIN)
174 builder.push_text(self.TXT_PREFIX, True)
175 self._txt_build_inner(builder)
176 builder.push_text(self.TXT_SUFFIX, True)
177 builder.push_margin(self.TXT_BOTTOM_MARGIN)
179 def _html_build_inner(self, builder):
180 self._build_inner(builder, 'html_build')
182 def get_html_attr(self, builder):
183 attr = self.HTML_ATTR.copy()
185 attr['class'] = self.HTML_CLASS
187 # always copy the id attribute (?)
188 if self.attrib.get('id'):
189 attr['id'] = self.attrib['id']
190 if self.attrib.get('_id'):
191 attr['id'] = self.attrib['_id']
194 def html_build(self, builder):
195 # Do we need a number?
196 numbering = self.numbering
197 if numbering == 'main':
198 if builder.with_numbering and self.has_visible_numbering:
199 builder.add_visible_number(self)
202 builder.start_element(
204 self.get_html_attr(builder),
207 self._html_build_inner(builder)
209 builder.end_element()
211 def _epub_build_inner(self, builder):
212 self._build_inner(builder, 'epub_build')
214 def get_epub_attr(self, builder):
215 attr = self.EPUB_ATTR.copy()
217 attr['class'] = self.EPUB_CLASS
220 def epub_build(self, builder):
221 from librarian.elements.masters import Master
224 self.CAN_HAVE_TEXT = True
227 start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
230 builder.start_chunk()
233 if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'):
235 fragment = 'sub%d' % builder.assign_section_number()
236 self.attrib['id'] = fragment
238 builder.add_toc_entry(
240 self.raw_printable_text(builder),
241 self.SECTION_PRECEDENCE
245 attr = self.get_epub_attr(builder)
247 attr['id'] = fragment
249 chunkno, sourceline = 0, self.sourceline
251 chunkno, sourceline = len(builder.splits), sourceline - builder.splits[-1]
252 attr['data-debug'] = f'{chunkno}:{sourceline}'
253 builder.start_element(
258 self._epub_build_inner(builder)
260 builder.end_element()
263 from librarian.elements.masters import Master
264 from librarian.elements.blocks import DlugiCytat, PoezjaCyt
265 from librarian.elements.footnotes import Footnote
267 if self.SECTION_PRECEDENCE:
268 assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \
269 'Header {} inside a <{}> instead of a master.'.format(
270 etree.tostring(self, encoding='unicode'), self.getparent().tag)
273 if isinstance(c, WLElement):
278 # TODO: Remove insanity here.
280 if isinstance(e, WLElement):
283 def snip(self, words, before=None, sub=False):
284 if sub and self.ASIDE:
288 if before is not None:
289 i = self.index(before)
297 words, text = last_words(self[i].tail, words)
298 snippet = [('text', text)] + snippet
301 words, subsnip = self[i].snip(words, sub=True)
302 snippet = subsnip + snippet
304 if words and self.text:
305 words, text = last_words(self.text, words)
306 snippet = [('text', text)] + snippet
308 snippet = [('start', self.tag, self.attrib)] + snippet + [('end',)]
310 if not sub and words and not self.ASIDE:
312 parent = self.getparent()
313 if parent is not None and parent.CAN_HAVE_TEXT:
314 words, parsnip = parent.snip(words, before=self)
315 return words, parsnip[:-1] + snippet + parsnip[-1:]
317 return words, snippet
319 def get_snippet(self, words=15):
320 from librarian.parser import parser
322 words, snippet = self.getparent().snip(words=words, before=self)
324 cursor = snipelem = parser.makeelement('snippet')
325 snipelem._meta_object = self.meta
328 elem = parser.makeelement(s[1], **s[2])
332 cursor = cursor.getparent()
335 cursor[-1].tail = (cursor[-1].tail or '') + s[1]
337 cursor.text = (cursor.text or '') + s[1]
343 numbering = self.NUMBERING
344 if numbering is None or self.in_context_of('DISABLE_NUMBERING'):
346 numbering = self.get_context_map('SUPPRESS_NUMBERING', numbering, numbering)
351 prefix = self.numbering
353 # TODO: self.context.main_numbering_prefix
354 prefix = 'f' # default numbering prefix
357 def assign_id(self, builder):
358 numbering = self.numbering
360 number = str(builder.counters[numbering])
361 self.attrib['_id'] = self.id_prefix + number
362 builder.counters[numbering] += 1
364 if numbering == 'main':
365 self.attrib['_visible_numbering'] = str(builder.counters['_visible'])
366 builder.counters['_visible'] += 1
368 if numbering == 'fn':
369 self.attrib['_visible_numbering'] = number
372 return self.attrib.get('_id') or self.getparent().get_link()
375 class Snippet(WLElement):