1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 from __future__ import with_statement
10 from StringIO import StringIO
11 from tempfile import mkdtemp
13 from copy import deepcopy
14 from subprocess import call, PIPE
18 from Texml.processor import process
19 from lxml import etree
20 from lxml.etree import XMLSyntaxError, XSLTApplyError
22 from librarian.parser import WLDocument
23 from librarian import ParseError
24 from librarian import functions
28 functions.reg_substitute_entities()
29 functions.reg_person_name()
31 functions.reg_starts_white()
32 functions.reg_ends_white()
35 'wl2tex': 'xslt/wl2tex.xslt',
39 def insert_tags(doc, split_re, tagname):
40 """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
42 >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>');
43 >>> insert_tags(t, re.compile('-'), 'd');
44 >>> print etree.tostring(t)
45 <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
48 for elem in doc.iter():
51 chunks = split_re.split(elem.text)
52 while len(chunks) > 1:
53 ins = etree.Element(tagname)
54 ins.tail = chunks.pop()
56 elem.text = chunks.pop(0)
58 chunks = split_re.split(elem.tail)
59 parent = elem.getparent()
60 ins_index = parent.index(elem) + 1
61 while len(chunks) > 1:
62 ins = etree.Element(tagname)
63 ins.tail = chunks.pop()
64 parent.insert(ins_index, ins)
65 elem.tail = chunks.pop(0)
67 # element with no children, like comment
71 def substitute_hyphens(doc):
73 re.compile("(?<=[^-\s])-(?=[^-\s])"),
79 re.compile("(?<=\s\w)\s+"),
83 def move_motifs_inside(doc):
84 """ moves motifs to be into block elements """
85 for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
86 for motif in master.xpath('motyw'):
88 for sib in motif.itersiblings():
89 if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia', 'begin', 'end', 'motyw', 'extra', 'uwaga'):
90 # motif shouldn't have a tail - it would be untagged text
92 motif.getparent().remove(motif)
98 """ dirty hack for the marginpar-creates-orphans LaTeX problem
99 see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
101 moves motifs in stanzas from first verse to second
102 and from next to last to last, then inserts negative vspace before them
104 for motif in doc.findall('//strofa//motyw'):
105 # find relevant verse-level tag
106 verse, stanza = motif, motif.getparent()
107 while stanza is not None and stanza.tag != 'strofa':
108 verse, stanza = stanza, stanza.getparent()
109 breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
110 breaks_after = sum(1 for i in verse.itersiblings('br'))
111 if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
113 if breaks_after == 2:
115 moved_motif = deepcopy(motif)
118 moved_motif.tail = None
119 moved_motif.set('moved', str(move_by))
121 for br in verse.itersiblings('br'):
125 br.addnext(moved_motif)
129 def get_resource(path):
130 return os.path.join(os.path.dirname(__file__), path)
132 def get_stylesheet(name):
133 return get_resource(STYLESHEETS[name])
136 def package_available(package, args='', verbose=False):
137 """ check if a verion of a latex package accepting given args is available """
138 tempdir = mkdtemp('-wl2pdf-test')
139 fpath = os.path.join(tempdir, 'test.tex')
146 """ % (args, package))
149 p = call(['xelatex', '-output-directory', tempdir, fpath])
151 p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
152 shutil.rmtree(tempdir)
156 def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, save_tex=None):
157 """ produces a PDF file with XeLaTeX
159 provider: a DocProvider
160 slug: slug of file to process, available by provider
161 file_path can be provided instead of a slug
162 output_file: file-like object or path to output file
163 output_dir: path to directory to save output file to; either this or output_file must be present
164 make_dir: writes output to <output_dir>/<author>/<slug>.pdf istead of <output_dir>/<slug>.pdf
165 verbose: prints all output from LaTeX
166 save_tex: path to save the intermediary LaTeX file to
173 raise ValueError('slug or file_path should be specified, not both')
174 document = load_including_children(provider, file_path=file_path)
177 raise ValueError('either slug or file_path should be specified')
178 document = load_including_children(provider, slug=slug)
180 # check for LaTeX packages
181 if not package_available('morefloats', 'maxfloats=19'):
182 # using old morefloats or none at all
183 document.edoc.getroot().set('old-morefloats', 'yes')
186 move_motifs_inside(document.edoc)
187 hack_motifs(document.edoc)
188 substitute_hyphens(document.edoc)
189 fix_hanging(document.edoc)
192 if make_dir and output_dir is not None:
193 author = unicode(document.book_info.author)
194 output_dir = os.path.join(output_dir, author)
197 style_filename = get_stylesheet("wl2tex")
198 style = etree.parse(style_filename)
199 texml = document.transform(style)
200 del document # no longer needed large object :)
203 temp = mkdtemp('-wl2pdf')
204 tex_path = os.path.join(temp, 'doc.tex')
205 fout = open(tex_path, 'w')
206 process(StringIO(texml), fout, 'utf-8')
211 shutil.copy(tex_path, save_tex)
214 shutil.copy(get_resource('pdf/wl.sty'), temp)
215 shutil.copy(get_resource('pdf/wl-logo.png'), temp)
221 p = call(['xelatex', tex_path])
223 p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
225 raise ParseError("Error parsing .tex file")
230 pdf_path = os.path.join(temp, 'doc.pdf')
231 if output_dir is not None:
233 os.makedirs(output_dir)
237 output_path = os.path.join(output_dir, '%s.pdf' % slug)
239 output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.pdf')
240 shutil.move(pdf_path, output_path)
242 if hasattr(output_file, 'write'):
244 with open(pdf_path) as f:
245 output_file.write(f.read())
248 # path to output file
249 shutil.copy(pdf_path, output_file)
252 except (XMLSyntaxError, XSLTApplyError), e:
256 def load_including_children(provider, slug=None, uri=None, file_path=None):
257 """ makes one big xml file with children inserted at end
258 either slug or uri must be provided
262 f = provider.by_uri(uri)
266 f = open(file_path, 'r')
268 raise ValueError('Neither slug, URI nor file path provided for a book.')
270 document = WLDocument.from_file(f, True,
271 parse_dublincore=True,
272 preserve_lines=False)
276 for child_uri in document.book_info.parts:
277 child = load_including_children(provider, uri=child_uri)
278 document.edoc.getroot().append(child.edoc.getroot())