# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+from __future__ import with_statement
import os
import os.path
import shutil
from StringIO import StringIO
from tempfile import mkdtemp
import re
+from copy import deepcopy
+
+import sys
+sys.path.append('..') # for running from working copy
from Texml.processor import process
from lxml import etree
from librarian import ParseError
from librarian import functions
+
+
functions.reg_substitute_entities()
+functions.reg_person_name()
+functions.reg_strip()
+functions.reg_starts_white()
+functions.reg_ends_white()
STYLESHEETS = {
'wl2tex': 'xslt/wl2tex.xslt',
def insert_tags(doc, split_re, tagname):
- print tagname
+ """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
+
+ >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
+ >>> insert_tags(t, re.compile('-'), 'd');
+ >>> print etree.tostring(t)
+ <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
+ """
+
for elem in doc.iter():
if elem.text:
chunks = split_re.split(elem.text)
elem.tail = chunks.pop(0)
while chunks:
ins = etree.Element(tagname)
- ins.tail = chunks.pop(0)
+ ins.tail = chunks.pop()
parent.insert(ins_index, ins)
"nbsp")
+def get_resource(path):
+ return os.path.join(os.path.dirname(__file__), path)
+
def get_stylesheet(name):
- return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])
+ return get_resource(STYLESHEETS[name])
def transform(provider, slug, output_file=None, output_dir=None):
""" produces a pdf file
document = load_including_children(provider, slug)
+ # dirty hack for the marginpar-creates-orphans LaTeX problem
+ # see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
+ for motif in document.edoc.findall('//strofa//motyw'):
+ # find relevant verse-level tag
+ verse, stanza = motif, motif.getparent()
+ while stanza is not None and stanza.tag != 'strofa':
+ verse, stanza = stanza, stanza.getparent()
+ breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
+ breaks_after = sum(1 for i in verse.itersiblings('br'))
+ if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
+ move_by = 1
+ if breaks_after == 2:
+ move_by += 1
+ moved_motif = deepcopy(motif)
+ motif.tag = 'span'
+ motif.text = None
+ moved_motif.tail = None
+ moved_motif.set('moved', str(move_by))
+
+ for br in verse.itersiblings(tag='br'):
+ if move_by > 1:
+ move_by -= 1
+ continue
+ br.addnext(moved_motif)
+ break
+
substitute_hyphens(document.edoc)
fix_hanging(document.edoc)
-
- print etree.tostring(document.edoc)
# if output to dir, create the file
if output_dir is not None:
fout.close()
del texml
+ shutil.copy(get_resource('pdf/wl.sty'), temp)
+ shutil.copy(get_resource('pdf/wl-logo.png'), temp)
print "pdflatex -output-directory %s %s" % (temp, os.path.join(temp, 'doc.tex'))
if os.system("pdflatex -output-directory %s %s" % (temp, os.path.join(temp, 'doc.tex'))):
raise ParseError("Error parsing .tex file")