fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
bit longer author field
[librarian.git]
/
src
/
librarian
/
elements
/
base.py
diff --git
a/src/librarian/elements/base.py
b/src/librarian/elements/base.py
index
646067e
..
b0d16ed
100644
(file)
--- a/
src/librarian/elements/base.py
+++ b/
src/librarian/elements/base.py
@@
-1,3
+1,6
@@
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
+#
import copy
import re
from lxml import etree
import copy
import re
from lxml import etree
@@
-39,11
+42,11
@@
class WLElement(etree.ElementBase):
STRIP = False
text_substitutions = [
STRIP = False
text_substitutions = [
- (
u'---', u
'—'),
- (
u'--', u
'–'),
- #(
u'...', u
'…'), # Temporary turnoff for epub
- (
u',,', u
'„'),
- (
u'"', u
'”'),
+ (
'---',
'—'),
+ (
'--',
'–'),
+ #(
'...',
'…'), # Temporary turnoff for epub
+ (
',,',
'„'),
+ (
'"',
'”'),
('\ufeff', ''),
("'", "\u2019"), # This was enabled for epub.
('\ufeff', ''),
("'", "\u2019"), # This was enabled for epub.
@@
-113,11
+116,11
@@
class WLElement(etree.ElementBase):
newt = ''
wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
for w in wlist:
newt = ''
wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(text)
for w in wlist:
- newt += builder.hyphenator.inserted(w,
u
'\u00AD')
+ newt += builder.hyphenator.inserted(w, '\u00AD')
text = newt
if builder.orphans:
text = newt
if builder.orphans:
- text = re.sub(r'(?<=\s\w)\s+',
u
'\u00A0', text)
+ text = re.sub(r'(?<=\s\w)\s+', '\u00A0', text)
return text
return text
@@
-133,6
+136,8
@@
class WLElement(etree.ElementBase):
for i, child in enumerate(self):
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
for i, child in enumerate(self):
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
+ elif getattr(builder, 'debug') and child.tag is etree.Comment:
+ builder.process_comment(child)
if self.CAN_HAVE_TEXT and child.tail:
text = self.normalize_text(child.tail, builder)
if self.STRIP and i == child_count - 1:
if self.CAN_HAVE_TEXT and child.tail:
text = self.normalize_text(child.tail, builder)
if self.STRIP and i == child_count - 1:
@@
-195,7
+200,7
@@
class WLElement(etree.ElementBase):
# TEMPORARY
self.CAN_HAVE_TEXT = True
self.STRIP = False
# TEMPORARY
self.CAN_HAVE_TEXT = True
self.STRIP = False
-
+
start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
if start_chunk:
start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
if start_chunk:
@@
-217,6
+222,11
@@
class WLElement(etree.ElementBase):
attr = self.get_epub_attr(builder)
if fragment:
attr['id'] = fragment
attr = self.get_epub_attr(builder)
if fragment:
attr['id'] = fragment
+ if builder.debug:
+ chunkno, sourceline = 0, self.sourceline
+ if builder.splits:
+ chunkno, sourceline = len(builder.splits), sourceline - builder.splits[-1]
+ attr['data-debug'] = f'{chunkno}:{sourceline}'
builder.start_element(
self.EPUB_TAG,
attr
builder.start_element(
self.EPUB_TAG,
attr
@@
-278,8
+288,6
@@
class WLElement(etree.ElementBase):
# do we dare go up?
parent = self.getparent()
if parent is not None and parent.CAN_HAVE_TEXT:
# do we dare go up?
parent = self.getparent()
if parent is not None and parent.CAN_HAVE_TEXT:
- print(etree.tostring(self, encoding='unicode'))
- assert False
words, parsnip = parent.snip(words, before=self)
return words, parsnip[:-1] + snippet + parsnip[-1:]
words, parsnip = parent.snip(words, before=self)
return words, parsnip[:-1] + snippet + parsnip[-1:]