1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
16 from StringIO import StringIO
17 from tempfile import mkdtemp, NamedTemporaryFile
19 from copy import deepcopy
20 from subprocess import call, PIPE
22 from Texml.processor import process
23 from lxml import etree
24 from lxml.etree import XMLSyntaxError, XSLTApplyError
26 from librarian.dcparser import Person
27 from librarian.parser import WLDocument
28 from librarian import ParseError, DCNS, get_resource, OutputFile
29 from librarian import functions
30 from librarian.cover import DefaultEbookCover
31 from .sponsor import sponsor_logo
34 functions.reg_substitute_entities()
36 functions.reg_starts_white()
37 functions.reg_ends_white()
38 functions.reg_texcommand()
41 'wl2tex': 'pdf/wl2tex.xslt',
54 def insert_tags(doc, split_re, tagname, exclude=None):
55 """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
57 >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
58 >>> insert_tags(t, re.compile('-'), 'd')
59 >>> print etree.tostring(t)
60 <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
63 for elem in doc.iter(tag=etree.Element):
64 if exclude and elem.tag in exclude:
67 chunks = split_re.split(elem.text)
68 while len(chunks) > 1:
69 ins = etree.Element(tagname)
70 ins.tail = chunks.pop()
72 elem.text = chunks.pop(0)
74 chunks = split_re.split(elem.tail)
75 parent = elem.getparent()
76 ins_index = parent.index(elem) + 1
77 while len(chunks) > 1:
78 ins = etree.Element(tagname)
79 ins.tail = chunks.pop()
80 parent.insert(ins_index, ins)
81 elem.tail = chunks.pop(0)
84 def substitute_hyphens(doc):
86 re.compile("(?<=[^-\s])-(?=[^-\s])"),
88 exclude=[DCNS("identifier.url"), DCNS("rights.license")]
94 re.compile("(?<=\s\w)\s+"),
96 exclude=[DCNS("identifier.url"), DCNS("rights.license")]
101 for kol in doc.iter(tag='kol'):
102 if kol.tail is not None:
103 if not kol.tail.strip():
105 for table in doc.iter(tag='tabela'):
106 if table.get('ramka') == '1' or table.get('ramki') == '1':
107 table.set('_format', '|' + 'X|' * len(table[0]))
109 table.set('_format', 'X' * len(table[0]))
112 def move_motifs_inside(doc):
113 """ moves motifs to be into block elements """
114 for master in doc.xpath('//powiesc|//opowiadanie|//liryka_l|//liryka_lp|'
115 '//dramat_wierszowany_l|//dramat_wierszowany_lp|//dramat_wspolczesny'):
116 for motif in master.xpath('motyw'):
117 for sib in motif.itersiblings():
118 if sib.tag not in ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
119 'begin', 'end', 'motyw', 'extra', 'uwaga'):
120 # motif shouldn't have a tail - it would be untagged text
122 motif.getparent().remove(motif)
127 def hack_motifs(doc):
128 """ dirty hack for the marginpar-creates-orphans LaTeX problem
129 see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
131 moves motifs in stanzas from first verse to second
132 and from next to last to last, then inserts negative vspace before them
134 for motif in doc.findall('//strofa//motyw'):
135 # find relevant verse-level tag
136 verse, stanza = motif, motif.getparent()
137 while stanza is not None and stanza.tag != 'strofa':
138 verse, stanza = stanza, stanza.getparent()
139 breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
140 breaks_after = sum(1 for i in verse.itersiblings('br'))
141 if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
143 if breaks_after == 2:
145 moved_motif = deepcopy(motif)
148 moved_motif.tail = None
149 moved_motif.set('moved', str(move_by))
151 for br in verse.itersiblings('br'):
155 br.addnext(moved_motif)
159 def parse_creator(doc):
160 """Generates readable versions of creator and translator tags.
162 Finds all dc:creator and dc.contributor.translator tags
163 and adds *_parsed versions with forenames first.
165 for person in doc.xpath("|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
166 namespaces={'dc': str(DCNS)})[::-1]:
169 p = Person.from_text(person.text)
170 person_parsed = deepcopy(person)
171 person_parsed.tag = person.tag + '_parsed'
172 person_parsed.set('sortkey', person.text)
173 person_parsed.text = p.readable()
174 person.getparent().insert(0, person_parsed)
177 def get_stylesheet(name):
178 return get_resource(STYLESHEETS[name])
181 def package_available(package, args='', verbose=False):
182 """ check if a verion of a latex package accepting given args is available """
183 tempdir = mkdtemp('-wl2pdf-test')
184 fpath = os.path.join(tempdir, 'test.tex')
191 """ % (args, package))
194 p = call(['xelatex', '-output-directory', tempdir, fpath])
196 p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
197 shutil.rmtree(tempdir)
201 def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
202 cover=None, flags=None, customizations=None):
203 """ produces a PDF file with XeLaTeX
206 verbose: prints all output from LaTeX
207 save_tex: path to save the intermediary LaTeX file to
208 morefloats (old/new/none): force specific morefloats
209 cover: a cover.Cover factory or True for default
210 flags: less-advertising,
211 customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
216 book_info = wldoc.book_info
217 document = load_including_children(wldoc)
218 root = document.edoc.getroot()
222 cover = DefaultEbookCover
223 bound_cover = cover(book_info, width=1200)
224 root.set('data-cover-width', str(bound_cover.width))
225 root.set('data-cover-height', str(bound_cover.height))
226 if bound_cover.uses_dc_cover:
227 if book_info.cover_by:
228 root.set('data-cover-by', book_info.cover_by)
229 if book_info.cover_source:
230 root.set('data-cover-source', book_info.cover_source)
233 root.set('flag-' + flag, 'yes')
235 # check for LaTeX packages
237 root.set('morefloats', morefloats.lower())
238 elif package_available('morefloats', 'maxfloats=19'):
239 root.set('morefloats', 'new')
242 if customizations is not None:
243 root.set('customizations', u','.join(customizations))
246 editors = document.editors()
248 root.set('editors', u', '.join(sorted(
249 editor.readable() for editor in editors)))
250 if document.book_info.funders:
251 root.set('funders', u', '.join(document.book_info.funders))
252 if document.book_info.thanks:
253 root.set('thanks', document.book_info.thanks)
256 move_motifs_inside(document.edoc)
257 hack_motifs(document.edoc)
258 parse_creator(document.edoc)
259 substitute_hyphens(document.edoc)
260 fix_hanging(document.edoc)
261 fix_tables(document.edoc)
264 style_filename = get_stylesheet("wl2tex")
265 style = etree.parse(style_filename)
266 functions.reg_mathml_latex()
269 temp = mkdtemp('-wl2pdf')
271 for sponsor in book_info.sponsors:
272 ins = etree.Element("data-sponsor", name=sponsor)
273 logo = sponsor_logo(sponsor)
275 fname = 'sponsor-%s' % os.path.basename(logo)
276 shutil.copy(logo, os.path.join(temp, fname))
277 ins.set('src', fname)
280 if book_info.sponsor_note:
281 root.set("sponsor-note", book_info.sponsor_note)
283 texml = document.transform(style)
286 with open(os.path.join(temp, 'cover.png'), 'w') as f:
287 bound_cover.save(f, quality=80)
289 del document # no longer needed large object :)
291 tex_path = os.path.join(temp, 'doc.tex')
292 fout = open(tex_path, 'w')
293 process(StringIO(texml), fout, 'utf-8')
298 shutil.copy(tex_path, save_tex)
301 shutil.copy(get_resource('pdf/wl.cls'), temp)
302 shutil.copy(get_resource('res/wl-logo.png'), temp)
311 p = call(['xelatex', tex_path])
313 p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
315 raise ParseError("Error parsing .tex file")
320 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
321 pdf_path = os.path.join(temp, 'doc.pdf')
322 shutil.move(pdf_path, output_file.name)
324 return OutputFile.from_filename(output_file.name)
326 except (XMLSyntaxError, XSLTApplyError), e:
330 def load_including_children(wldoc=None, provider=None, uri=None):
331 """ Makes one big xml file with children inserted at end.
333 Either wldoc or provider and URI must be provided.
337 f = provider.by_uri(uri)
338 text = f.read().decode('utf-8')
340 elif wldoc is not None:
341 text = etree.tostring(wldoc.edoc, encoding=unicode)
342 provider = wldoc.provider
344 raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
346 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
348 document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
349 document.swap_endlines()
351 for child_uri in document.book_info.parts:
352 child = load_including_children(provider=provider, uri=child_uri)
353 document.edoc.getroot().append(child.edoc.getroot())