This document records all notable changes to Librarian.
+## 23.12
+
+- Added debug version for epub and epubcheck utility.
+
## 23.10
- Remove direct verse styling from HTML.
setup(
name='librarian',
- version='23.10',
+ version='23.12',
description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
author="Marek Stępniowski",
author_email='marek@stepniowski.com',
from datetime import date
import io
import os
+import re
import tempfile
from ebooklib import epub
from lxml import etree
isbn_field = 'isbn_epub'
orphans = True
- def __init__(self, *args, **kwargs):
+ def __init__(self, *args, debug=False, **kwargs):
self.chars = set()
self.fundr = 0
+ self.debug = debug
+ self.splits = []
super().__init__(*args, **kwargs)
def build(self, document, **kwargs):
file_name=name
)
return name
+
+ def process_comment(self, comment):
+ m = re.match(r'TRIM:(\d+)', comment.text)
+ if m is not None:
+ self.splits.append(comment.sourceline - int(m.group(1)))
for i, child in enumerate(self):
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
+ elif getattr(builder, 'debug') and child.tag is etree.Comment:
+ builder.process_comment(child)
if self.CAN_HAVE_TEXT and child.tail:
text = self.normalize_text(child.tail, builder)
if self.STRIP and i == child_count - 1:
# TEMPORARY
self.CAN_HAVE_TEXT = True
self.STRIP = False
-
+
start_chunk = self.EPUB_START_CHUNK and isinstance(self.getparent(), Master)
if start_chunk:
attr = self.get_epub_attr(builder)
if fragment:
attr['id'] = fragment
+ if builder.debug:
+ chunkno, sourceline = 0, self.sourceline
+ if builder.splits:
+ chunkno, sourceline = len(builder.splits), sourceline - builder.splits[-1]
+ attr['data-debug'] = f'{chunkno}:{sourceline}'
builder.start_element(
self.EPUB_TAG,
attr
--- /dev/null
+import json
+import re
+import subprocess
+import zipfile
+
+
+def epubcheck(filename):
+ p = subprocess.run(
+ [
+ 'epubcheck', '-q',
+ '-j', '-',
+ filename
+ ],
+ capture_output=True
+ )
+ output = json.loads(p.stdout)
+ epub = zipfile.ZipFile(filename)
+ messages = output.get('messages', [])
+ for message in messages:
+ for loc in message.get('locations', []):
+ if loc['path'].startswith('EPUB/part'):
+ with epub.open(loc['path']) as zfile:
+ text = zfile.read().decode('utf-8')
+ line = text.split('\n')[loc['line'] - 1][:loc['column'] - 1:]
+ debug = re.findall(r' data-debug="(\d+):(\d+)', line)
+ if debug:
+ debug = debug[-1]
+ loc['wl_chunk'] = int(debug[0])
+ loc['wl_line'] = int(debug[1])
+ return messages
+
+
+