4 from librarian import dcparser, RDFNS
5 from librarian.util import get_translation
7 def last_words(text, n):
9 for w in reversed(text.split()):
17 return n, ' '.join(reversed(words))
20 class WLElement(etree.ElementBase):
21 SECTION_PRECEDENCE = None
36 EPUB_START_CHUNK = False
41 text_substitutions = [
44 #(u'...', u'…'), # Temporary turnoff for epub
49 ("'", "\u2019"), # This was enabled for epub.
53 def meta_object(self):
54 if not hasattr(self, '_meta_object'):
55 elem = self.find(RDFNS('RDF'))
57 self._meta_object = dcparser.BookInfo.from_element(elem)
59 self._meta_object = None
60 return self._meta_object
64 if self.meta_object is not None:
65 return self.meta_object
67 if self.getparent() is not None:
68 return self.getparent().meta
70 return self.document.base_meta
74 return get_translation(self.meta.language).gettext
76 def in_context_of(self, setting):
77 parent = self.getparent()
81 return getattr(parent, setting)
82 except AttributeError:
83 return parent.in_context_of(setting)
85 def signal(self, signal):
86 parent = self.getparent()
87 if parent is not None:
90 def raw_printable_text(self, builder):
91 from librarian.html import raw_printable_text
93 # TODO: podtagi, wyroznienia, etc
95 t += self.normalize_text(self.text, builder)
97 if not isinstance(c, WLElement):
99 if c.tag not in ('pe', 'pa', 'pt', 'pr', 'motyw'):
100 t += c.raw_printable_text(builder)
101 t += self.normalize_text(c.tail, builder)
104 def normalize_text(self, text, builder):
106 for e, s in self.text_substitutions:
107 text = text.replace(e, s)
108 # FIXME: TEmporary turnoff
109 # text = re.sub(r'\s+', ' ', text)
110 ### TODO: Added now for epub
112 if getattr(builder, 'hyphenator', None) is not None:
114 wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
116 newt += builder.hyphenator.inserted(w, u'\u00AD')
120 text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
124 def _build_inner(self, builder, build_method):
125 child_count = len(self)
126 if self.CAN_HAVE_TEXT and self.text:
127 text = self.normalize_text(self.text, builder)
132 builder.push_text(text)
133 for i, child in enumerate(self):
134 if isinstance(child, WLElement):
135 getattr(child, build_method)(builder)
136 if self.CAN_HAVE_TEXT and child.tail:
137 text = self.normalize_text(child.tail, builder)
138 if self.STRIP and i == child_count - 1:
140 builder.push_text(text)
142 def _txt_build_inner(self, builder):
143 self._build_inner(builder, 'txt_build')
145 def txt_build(self, builder):
146 if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
147 builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
149 builder.push_margin(self.TXT_TOP_MARGIN)
150 builder.push_text(self.TXT_PREFIX, True)
151 self._txt_build_inner(builder)
152 builder.push_text(self.TXT_SUFFIX, True)
153 if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
154 builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
156 builder.push_margin(self.TXT_BOTTOM_MARGIN)
158 def _html_build_inner(self, builder):
159 self._build_inner(builder, 'html_build')
161 def get_html_attr(self, builder):
162 attr = self.HTML_ATTR.copy()
164 attr['class'] = self.HTML_CLASS
165 # always copy the id attribute (?)
166 if self.attrib.get('id'):
167 attr['id'] = self.attrib['id']
168 elif getattr(self, 'SHOULD_HAVE_ID', False) and '_compat_section_id' in self.attrib:
169 attr['id'] = self.attrib['_compat_section_id']
172 def html_build(self, builder):
174 builder.start_element(
176 self.get_html_attr(builder),
179 self._html_build_inner(builder)
181 builder.end_element()
183 def _epub_build_inner(self, builder):
184 self._build_inner(builder, 'epub_build')
186 def get_epub_attr(self, builder):
187 attr = self.EPUB_ATTR.copy()
189 attr['class'] = self.EPUB_CLASS
192 def epub_build(self, builder):
193 from librarian.elements.masters import Master
196 self.CAN_HAVE_TEXT = True
199 start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
202 builder.start_chunk()
205 if self.SECTION_PRECEDENCE and not self.in_context_of('NO_TOC'):
207 fragment = 'sub%d' % builder.assign_section_number()
208 self.attrib['id'] = fragment
210 builder.add_toc_entry(
212 self.raw_printable_text(builder),
213 self.SECTION_PRECEDENCE
217 attr = self.get_epub_attr(builder)
219 attr['id'] = fragment
220 builder.start_element(
225 self._epub_build_inner(builder)
227 builder.end_element()
230 from librarian.elements.masters import Master
231 from librarian.elements.blocks import DlugiCytat, PoezjaCyt
232 from librarian.elements.footnotes import Footnote
234 if self.SECTION_PRECEDENCE:
235 assert isinstance(self.getparent(), (Master, DlugiCytat, PoezjaCyt, Footnote)), \
236 'Header {} inside a <{}> instead of a master.'.format(
237 etree.tostring(self, encoding='unicode'), self.getparent().tag)
240 if isinstance(c, WLElement):
245 # TODO: Remove insanity here.
247 if isinstance(e, WLElement):
250 def snip(self, words, before=None, sub=False):
251 if sub and self.ASIDE:
255 if before is not None:
256 i = self.index(before)
264 words, text = last_words(self[i].tail, words)
265 snippet = [('text', text)] + snippet
268 words, subsnip = self[i].snip(words, sub=True)
269 snippet = subsnip + snippet
271 if words and self.text:
272 words, text = last_words(self.text, words)
273 snippet = [('text', text)] + snippet
275 snippet = [('start', self.tag, self.attrib)] + snippet + [('end',)]
277 if not sub and words and not self.ASIDE:
279 parent = self.getparent()
280 if parent is not None and parent.CAN_HAVE_TEXT:
281 print(etree.tostring(self, encoding='unicode'))
283 words, parsnip = parent.snip(words, before=self)
284 return words, parsnip[:-1] + snippet + parsnip[-1:]
286 return words, snippet
288 def get_snippet(self, words=15):
289 from librarian.parser import parser
291 words, snippet = self.getparent().snip(words=words, before=self)
293 cursor = snipelem = parser.makeelement('snippet')
294 snipelem._meta_object = self.meta
297 elem = parser.makeelement(s[1], **s[2])
301 cursor = cursor.getparent()
304 cursor[-1].tail = (cursor[-1].tail or '') + s[1]
306 cursor.text = (cursor.text or '') + s[1]
311 sec = getattr(self, 'SHOULD_HAVE_ID', False) and self.attrib.get('_compat_section_id')
314 parent_index = self.getparent().index(self)
316 return self.getparent()[parent_index - 1].get_link()
318 return self.getparent().get_link()
321 class Snippet(WLElement):