X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/223fd8f247b4a588d263afaf798dca4cb9ffa639..e316fc14bef26f958937aec0e6854b61f71a3b34:/librarian/pdf.py diff --git a/librarian/pdf.py b/librarian/pdf.py index 169d661..b9ead15 100644 --- a/librarian/pdf.py +++ b/librarian/pdf.py @@ -3,12 +3,17 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from __future__ import with_statement import os import os.path import shutil from StringIO import StringIO from tempfile import mkdtemp import re +from copy import deepcopy + +import sys +sys.path.append('..') # for running from working copy from Texml.processor import process from lxml import etree @@ -18,7 +23,13 @@ from librarian.parser import WLDocument from librarian import ParseError from librarian import functions + + functions.reg_substitute_entities() +functions.reg_person_name() +functions.reg_strip() +functions.reg_starts_white() +functions.reg_ends_white() STYLESHEETS = { 'wl2tex': 'xslt/wl2tex.xslt', @@ -26,7 +37,14 @@ STYLESHEETS = { def insert_tags(doc, split_re, tagname): - print tagname + """ inserts for every occurence of `split_re' in text nodes in the `doc' tree + + >>> t = etree.fromstring('A-B-CX-Y-Z'); + >>> insert_tags(t, re.compile('-'), 'd'); + >>> print etree.tostring(t) + ABCXYZ + """ + for elem in doc.iter(): if elem.text: chunks = split_re.split(elem.text) @@ -42,7 +60,7 @@ def insert_tags(doc, split_re, tagname): elem.tail = chunks.pop(0) while chunks: ins = etree.Element(tagname) - ins.tail = chunks.pop(0) + ins.tail = chunks.pop() parent.insert(ins_index, ins) @@ -58,8 +76,11 @@ def fix_hanging(doc): "nbsp") +def get_resource(path): + return os.path.join(os.path.dirname(__file__), path) + def get_stylesheet(name): - return os.path.join(os.path.dirname(__file__), STYLESHEETS[name]) + return get_resource(STYLESHEETS[name]) def transform(provider, slug, output_file=None, output_dir=None): """ produces a pdf file @@ -76,10 +97,34 @@ def transform(provider, slug, output_file=None, output_dir=None): document = load_including_children(provider, slug) + # dirty hack for the marginpar-creates-orphans LaTeX problem + # see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304 + for motif in document.edoc.findall('//strofa//motyw'): + # find relevant verse-level tag + verse, stanza = motif, motif.getparent() + while stanza is not None and stanza.tag != 'strofa': + verse, stanza = stanza, stanza.getparent() + breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True)) + breaks_after = sum(1 for i in verse.itersiblings('br')) + if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1: + move_by = 1 + if breaks_after == 2: + move_by += 1 + moved_motif = deepcopy(motif) + motif.tag = 'span' + motif.text = None + moved_motif.tail = None + moved_motif.set('moved', str(move_by)) + + for br in verse.itersiblings(tag='br'): + if move_by > 1: + move_by -= 1 + continue + br.addnext(moved_motif) + break + substitute_hyphens(document.edoc) fix_hanging(document.edoc) - - print etree.tostring(document.edoc) # if output to dir, create the file if output_dir is not None: @@ -96,6 +141,8 @@ def transform(provider, slug, output_file=None, output_dir=None): fout.close() del texml + shutil.copy(get_resource('pdf/wl.sty'), temp) + shutil.copy(get_resource('pdf/wl-logo.png'), temp) print "pdflatex -output-directory %s %s" % (temp, os.path.join(temp, 'doc.tex')) if os.system("pdflatex -output-directory %s %s" % (temp, os.path.join(temp, 'doc.tex'))): raise ParseError("Error parsing .tex file")