1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
16 from StringIO import StringIO
17 from tempfile import mkdtemp, NamedTemporaryFile
19 from copy import deepcopy
20 from subprocess import call, PIPE
22 from Texml.processor import process
23 from lxml import etree
25 from librarian.dcparser import Person
26 from librarian.parser import WLDocument
27 from librarian import ParseError, DCNS, get_resource, IOFile, Format
28 from librarian import functions
31 functions.reg_substitute_entities()
33 functions.reg_starts_white()
34 functions.reg_ends_white()
35 functions.reg_texcommand()
38 'wl2tex': 'pdf/wl2tex.xslt',
42 def insert_tags(doc, split_re, tagname, exclude=None):
43 """ inserts <tagname> for every occurence of `split_re' in text nodes in the `doc' tree
45 >>> t = etree.fromstring('<a><b>A-B-C</b>X-Y-Z</a>')
46 >>> insert_tags(t, re.compile('-'), 'd')
47 >>> print etree.tostring(t)
48 <a><b>A<d/>B<d/>C</b>X<d/>Y<d/>Z</a>
51 for elem in doc.iter(tag=etree.Element):
52 if exclude and elem.tag in exclude:
55 chunks = split_re.split(elem.text)
56 while len(chunks) > 1:
57 ins = etree.Element(tagname)
58 ins.tail = chunks.pop()
60 elem.text = chunks.pop(0)
62 chunks = split_re.split(elem.tail)
63 parent = elem.getparent()
64 ins_index = parent.index(elem) + 1
65 while len(chunks) > 1:
66 ins = etree.Element(tagname)
67 ins.tail = chunks.pop()
68 parent.insert(ins_index, ins)
69 elem.tail = chunks.pop(0)
72 def substitute_hyphens(doc):
74 re.compile("(?<=[^-\s])-(?=[^-\s])"),
76 exclude=[DCNS("identifier.url"), DCNS("rights.license"), 'www']
82 re.compile("(?<=\s\w)\s+"),
84 exclude=[DCNS("identifier.url"), DCNS("rights.license")]
88 def move_motifs_inside(doc):
89 """ moves motifs to be into block elements """
90 main_tags = ('powiesc', 'opowiadanie', 'liryka_l', 'liryka_lp',
91 'dramat_wierszowany_l', 'dramat_wierszowany_lp', 'dramat_wspolczesny')
92 for master in doc.xpath('|'.join('//' + tag for tag in main_tags)):
93 for motif in master.xpath('motyw'):
94 for sib in motif.itersiblings():
95 special_tags = ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
96 'begin', 'end', 'motyw', 'extra', 'uwaga')
97 if sib.tag not in special_tags:
98 # motif shouldn't have a tail - it would be untagged text
100 motif.getparent().remove(motif)
105 def hack_motifs(doc):
106 """ dirty hack for the marginpar-creates-orphans LaTeX problem
107 see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
109 moves motifs in stanzas from first verse to second
110 and from next to last to last, then inserts negative vspace before them
112 for motif in doc.findall('//strofa//motyw'):
113 # find relevant verse-level tag
114 verse, stanza = motif, motif.getparent()
115 while stanza is not None and stanza.tag != 'strofa':
116 verse, stanza = stanza, stanza.getparent()
117 breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
118 breaks_after = sum(1 for i in verse.itersiblings('br'))
119 if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
121 if breaks_after == 2:
123 moved_motif = deepcopy(motif)
126 moved_motif.tail = None
127 moved_motif.set('moved', str(move_by))
129 for br in verse.itersiblings('br'):
133 br.addnext(moved_motif)
137 def parse_creator(doc):
138 """Generates readable versions of creator and translator tags.
140 Finds all dc:creator and dc.contributor.translator tags
141 and adds *_parsed versions with forenames first.
144 "|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
145 namespaces={'dc': str(DCNS)})[::-1]
146 for person in persons:
149 p = Person.from_text(person.text)
150 person_parsed = deepcopy(person)
151 person_parsed.tag = person.tag + '_parsed'
152 person_parsed.set('sortkey', person.text)
153 person_parsed.text = p.readable()
154 person.getparent().insert(0, person_parsed)
157 def get_stylesheet(name):
158 return get_resource(STYLESHEETS[name])
161 def package_available(package, args='', verbose=False):
162 """ check if a verion of a latex package accepting given args is available """
163 tempdir = mkdtemp('-wl2pdf-test')
164 fpath = os.path.join(tempdir, 'test.tex')
171 """ % (args, package))
174 p = call(['xelatex', '-output-directory', tempdir, fpath])
176 p = call(['xelatex', '-interaction=batchmode', '-output-directory', tempdir, fpath], stdout=PIPE, stderr=PIPE)
177 shutil.rmtree(tempdir)
182 def load_including_children(wldoc=None, provider=None, uri=None):
183 """ Makes one big xml file with children inserted at end.
185 Either wldoc or provider and URI must be provided.
189 f = provider.by_uri(uri)
190 # WTF DocProvider.by_uri() returns IOFile, so no .read() there
191 text = f.read().decode('utf-8')
193 elif wldoc is not None:
194 text = etree.tostring(wldoc.edoc, encoding=unicode)
195 provider = wldoc.provider
197 raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
199 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
201 document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
202 document.swap_endlines()
204 for child_uri in document.book_info.parts:
205 child = load_including_children(provider=provider, uri=child_uri)
206 document.edoc.getroot().append(child.edoc.getroot())
210 class PDFFormat(Format):
213 Available customization:
214 nofootnotes: Doesn't do footnotes.
215 nothemes: Doesn't do themes.
216 defaultleading: Default leading.
217 onehalfleading: Bigger leading.
218 doubleleading: Big leading.
219 nowlfont: Uses standard TeX font instead of JUnicodeWL.
225 style = get_resource('pdf/default.sty')
230 """ For use in XSLT. """
231 return self.cover is not None
234 def customization_str(self):
235 """ For use in XSLT. """
236 return u','.join(k for k, v in self.customization.items() if v)
239 raise NotImplementedError
241 def get_tex_dir(self):
242 texml = self.get_texml()
243 temp = mkdtemp('-wl2pdf')
245 tex_path = os.path.join(temp, 'doc.tex')
246 with open(tex_path, 'w') as fout:
247 process(StringIO(texml), fout, 'utf-8')
249 shutil.copy(tex_path, self.save_tex)
251 shutil.copy(get_resource('pdf/wl.cls'), temp)
252 shutil.copy(self.style, os.path.join(temp, 'style.sty'))
253 # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
254 # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
258 self.cover.for_pdf().dump_to(os.path.join(temp, 'makecover.sty'))
262 temp = self.get_tex_dir()
263 tex_path = os.path.join(temp, 'doc.tex')
272 for i in xrange(self.tex_passes):
273 p = call(['xelatex', tex_path])
275 for i in xrange(self.tex_passes):
276 p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
278 raise ParseError("Error parsing .tex file: %s" % tex_path)
283 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
284 pdf_path = os.path.join(temp, 'doc.pdf')
285 shutil.move(pdf_path, output_file.name)
287 return IOFile.from_filename(output_file.name)
289 def build(self, verbose=False, save_tex=None, morefloats=None):
290 """ morefloats: new/old/none
292 self.verbose = verbose
293 self.save_tex = save_tex
295 if morefloats is None and package_available('morefloats', 'maxfloats=19'):
297 self.morefloats = morefloats
299 book_info = self.wldoc.book_info
301 self.cover = self.cover_class(book_info)
303 return self.get_pdf()