2 runs in creating pdf
[librarian.git] / librarian / pdf.py
index b8aafdb..5df8dad 100644 (file)
@@ -13,6 +13,7 @@ from __future__ import with_statement
 import os
 import os.path
 import shutil
+from distutils.dir_util import copy_tree # shutil.copytree is so uncapable.
 from StringIO import StringIO
 from tempfile import mkdtemp, NamedTemporaryFile
 import re
@@ -29,6 +30,7 @@ from librarian import ParseError, DCNS, get_resource, OutputFile
 from librarian import functions
 from librarian.cover import WLCover
 
+import itertools, operator
 
 functions.reg_substitute_entities()
 functions.reg_strip()
@@ -83,7 +85,7 @@ def substitute_hyphens(doc):
     insert_tags(doc,
                 re.compile("(?<=[^-\s])-(?=[^-\s])"),
                 "dywiz",
-                exclude=[DCNS("identifier.url"), DCNS("rights.license")]
+                exclude=[DCNS("identifier.url"), DCNS("rights.license"), "www"]
                 )
 
 
@@ -94,6 +96,29 @@ def fix_hanging(doc):
                 exclude=[DCNS("identifier.url"), DCNS("rights.license")]
                 )
 
+def fake_tables(doc):
+    for tabela in doc.findall("//tabela"):
+        # are we dealing with a table of proper structure?
+        # two levels of same tags, and all tags on second level 
+        # must be of same count.
+        def tag_count(m, k):
+            m[k.tag] = m.get(k.tag, 0) + 1
+            return m
+
+        child_tags = reduce(tag_count, list(tabela), {})
+        if len(child_tags) != 1: 
+            return
+        grandchild_tags = reduce(tag_count, itertools.chain(*[list(c) for c in tabela]), {})
+        if len(grandchild_tags) != 1:
+            return
+        if len(set(grandchild_tags.values())) != 1:
+            return
+
+        for row in tabela:
+            row.tag = 'r'
+            for col in row:
+                col.tag = 'c'
+    return
 
 def move_motifs_inside(doc):
     """ moves motifs to be into block elements """
@@ -183,36 +208,40 @@ def package_available(package, args='', verbose=False):
     return p == 0
 
 
-def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
-              cover=None, flags=None, customizations=None):
+def transform(wldoc, verbose=False, save_tex=None, save_texml=None, morefloats=None,
+              cover=None, cover_file=None, flags=None, customizations=None, documentclass='wl', resources=None, tworuns=False):
     """ produces a PDF file with XeLaTeX
 
     wldoc: a WLDocument
     verbose: prints all output from LaTeX
     save_tex: path to save the intermediary LaTeX file to
+    save_texml: path to save the intermediary TeXML file to
     morefloats (old/new/none): force specific morefloats
     cover: a cover.Cover factory or True for default
     flags: less-advertising,
     customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class)
+    documentclass: LaTeX document class, defaults to wl
+    resources: a directory with resources, copied to place where LaTeX compilation is made
     """
 
     # Parse XSLT
     try:
+        book_info = wldoc.book_info
         document = load_including_children(wldoc)
         root = document.edoc.getroot()
 
         if cover:
             if cover is True:
                 cover = WLCover
-            bound_cover = cover(document.book_info)
+            bound_cover = cover(book_info)
             root.set('data-cover-width', str(bound_cover.width))
             root.set('data-cover-height', str(bound_cover.height))
             if bound_cover.uses_dc_cover:
-                if document.book_info.cover_by:
-                    root.set('data-cover-by', document.book_info.cover_by)
-                if document.book_info.cover_source:
+                if book_info.cover_by:
+                    root.set('data-cover-by', book_info.cover_by)
+                if book_info.cover_source:
                     root.set('data-cover-source',
-                            document.book_info.cover_source)
+                            book_info.cover_source)
         if flags:
             for flag in flags:
                 root.set('flag-' + flag, 'yes')
@@ -227,6 +256,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         if customizations is not None:
             root.set('customizations', u','.join(customizations))
 
+        root.set('documentclass', documentclass or 'wl')
+
         # add editors info
         root.set('editors', u', '.join(sorted(
             editor.readable() for editor in document.editors())))
@@ -234,6 +265,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         # hack the tree
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
+        fake_tables(document.edoc)
         parse_creator(document.edoc)
         substitute_hyphens(document.edoc)
         fix_hanging(document.edoc)
@@ -244,6 +276,9 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
 
         texml = document.transform(style)
 
+        if save_texml:
+            texml.write(save_texml)
+
         # TeXML -> LaTeX
         temp = mkdtemp('-wl2pdf')
 
@@ -264,7 +299,12 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
 
         # LaTeX -> PDF
         shutil.copy(get_resource('pdf/wl.cls'), temp)
+        shutil.copy(get_resource('pdf/wlpub.cls'), temp)
+        shutil.copy(get_resource('pdf/fnprep.cls'), temp)
         shutil.copy(get_resource('res/wl-logo.png'), temp)
+        shutil.copy(get_resource('res/cover.jpg'), temp)
+        if resources:
+            copy_tree(resources, temp)
 
         try:
             cwd = os.getcwd()
@@ -272,12 +312,16 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
             cwd = None
         os.chdir(temp)
 
-        if verbose:
-            p = call(['xelatex', tex_path])
-        else:
-            p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
-        if p:
-            raise ParseError("Error parsing .tex file")
+        if resources:
+            os.putenv("TEXINPUTS", "::.:%s" % resources)
+
+        for run in range(1 + (tworuns and 1 or 0)):
+            if verbose:
+                p = call(['xelatex', tex_path])
+            else:
+                p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE)
+            if p:
+                raise ParseError("Error parsing .tex file")
 
         if cwd is not None:
             os.chdir(cwd)