+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
+#
import copy
import re
from lxml import etree
STRIP = False
text_substitutions = [
- (u'---', u'—'),
- (u'--', u'–'),
- #(u'...', u'…'), # Temporary turnoff for epub
- (u',,', u'„'),
- (u'"', u'”'),
+ ('---', '—'),
+ ('--', '–'),
+ #('...', '…'), # Temporary turnoff for epub
+ (',,', '„'),
+ ('"', '”'),
('\ufeff', ''),
("'", "\u2019"), # This was enabled for epub.
newt = ''
wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
for w in wlist:
- newt += builder.hyphenator.inserted(w, u'\u00AD')
+ newt += builder.hyphenator.inserted(w, '\u00AD')
text = newt
if builder.orphans:
- text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
+ text = re.sub(r'(?<=\s\w)\s+', '\u00A0', text)
return text
for i, child in enumerate(self):
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
+ elif getattr(builder, 'debug') and child.tag is etree.Comment:
+ builder.process_comment(child)
if self.CAN_HAVE_TEXT and child.tail:
text = self.normalize_text(child.tail, builder)
if self.STRIP and i == child_count - 1:
# TEMPORARY
self.CAN_HAVE_TEXT = True
self.STRIP = False
-
+
start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
if start_chunk:
attr = self.get_epub_attr(builder)
if fragment:
attr['id'] = fragment
+ if builder.debug:
+ chunkno, sourceline = 0, self.sourceline
+ if builder.splits:
+ chunkno, sourceline = len(builder.splits), sourceline - builder.splits[-1]
+ attr['data-debug'] = f'{chunkno}:{sourceline}'
builder.start_element(
self.EPUB_TAG,
attr
# do we dare go up?
parent = self.getparent()
if parent is not None and parent.CAN_HAVE_TEXT:
- print(etree.tostring(self, encoding='unicode'))
- assert False
words, parsnip = parent.snip(words, before=self)
return words, parsnip[:-1] + snippet + parsnip[-1:]