- # Parse XSLT
- try:
- if file_path:
- if slug:
- raise ValueError('slug or file_path should be specified, not both')
- document = load_including_children(provider, file_path=file_path)
- else:
- if not slug:
- raise ValueError('either slug or file_path should be specified')
- document = load_including_children(provider, slug=slug)
-
- # check for LaTeX packages
- if not package_available('morefloats', 'maxfloats=19'):
- # using old morefloats or none at all
- document.edoc.getroot().set('old-morefloats', 'yes')
-
- # hack the tree
- move_motifs_inside(document.edoc)
- hack_motifs(document.edoc)
- substitute_hyphens(document.edoc)
- fix_hanging(document.edoc)
-
- # find output dir
- if make_dir and output_dir is not None:
- author = unicode(document.book_info.author)
- output_dir = os.path.join(output_dir, author)
-
- # wl -> TeXML
- style_filename = get_stylesheet("wl2tex")
- style = etree.parse(style_filename)
- texml = document.transform(style)
- del document # no longer needed large object :)
-
- # TeXML -> LaTeX
- temp = mkdtemp('-wl2pdf')
- tex_path = os.path.join(temp, 'doc.tex')
- fout = open(tex_path, 'w')
- process(StringIO(texml), fout, 'utf-8')
- fout.close()
- del texml
+ if uri and provider:
+ f = provider.by_uri(uri)
+ # WTF DocProvider.by_uri() returns IOFile, so no .read() there
+ text = f.read().decode('utf-8')
+ f.close()
+ elif wldoc is not None:
+ text = etree.tostring(wldoc.edoc, encoding=unicode)
+ provider = wldoc.provider
+ else:
+ raise ValueError('Neither a WLDocument, nor provider and URI were provided.')
+
+ text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
+
+ document = WLDocument.from_string(text, parse_dublincore=True, provider=provider)
+ document.swap_endlines()
+
+ for child_uri in document.book_info.parts:
+ child = load_including_children(provider=provider, uri=child_uri)
+ document.edoc.getroot().append(child.edoc.getroot())
+ return document
+
+
+class PDFFormat(Format):
+ """ Base PDF format.