+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
+#
import copy
import re
from lxml import etree
STRIP = False
text_substitutions = [
- (u'---', u'—'),
- (u'--', u'–'),
- #(u'...', u'…'), # Temporary turnoff for epub
- (u',,', u'„'),
- (u'"', u'”'),
+ ('---', '—'),
+ ('--', '–'),
+ #('...', '…'), # Temporary turnoff for epub
+ (',,', '„'),
+ ('"', '”'),
('\ufeff', ''),
("'", "\u2019"), # This was enabled for epub.
newt = ''
wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
for w in wlist:
- newt += builder.hyphenator.inserted(w, u'\u00AD')
+ newt += builder.hyphenator.inserted(w, '\u00AD')
text = newt
if builder.orphans:
- text = re.sub(r'(?<=\s\w)\s+', u'\u00A0', text)
+ text = re.sub(r'(?<=\s\w)\s+', '\u00A0', text)
return text
# do we dare go up?
parent = self.getparent()
if parent is not None and parent.CAN_HAVE_TEXT:
- print(etree.tostring(self, encoding='unicode'))
- assert False
words, parsnip = parent.snip(words, before=self)
return words, parsnip[:-1] + snippet + parsnip[-1:]