1 """ TeXML Writer and string services """
2 # $Id: texmlwr.py,v 1.9 2006-07-20 03:56:27 olpa Exp $
5 # Modes of processing of special characters
12 WEAK_WS_IS_NEWLINE = 2
29 # Handling of '--', '---' and other ligatures
32 # Modes of transformation can be tuned and nested
42 # Current length of a line that is being written. Value usually
43 # incorrect, but always correct to detect the start of a line (0)
44 # > approx_current_line_len
45 # If length of a current line is greater the value
46 # then writer converts weak whitespaces into newlines.
47 # And flag if it is possible
48 # > autonewline_after_len
49 # > allow_weak_ws_to_nl
51 # We usually don't allow empty lines in output because such lines
52 # cause a paragraph break in TeX.
55 # always_ascii: If attempts to write a character to the output
56 # stream have failed, then the code converts the symbol to bytes,
57 # and these bytes are written in the ^^xx format.
59 # bad_enc_warned: TeXML issues warning if it fails to convert
60 # a symbol. This flag controls that warning was issued only once.
63 def __init__(self, stream, encoding, autonl_width, use_context = 0, always_ascii = 0):
64 """ Remember output stream, initialize data structures """
66 self.always_ascii = always_ascii
67 self.encoding = encoding
71 self.stream = stream_encoder(stream, encoding)
73 raise ValueError("Can't create encoder: '%s'" % e)
74 # Continue initialization
78 self.line_is_blank = 1
82 self.escape_stack = []
84 self.ligatures_stack = []
86 self.emptylines_stack = []
87 self.approx_current_line_len = 0
88 self.autonewline_after_len = autonl_width
89 self.allow_weak_ws_to_nl = 1
90 self.is_after_weak_ws = 0
91 self.use_context = use_context
92 self.bad_enc_warned = 0
94 def stack_mode(self, mode):
95 """ Put new mode into the stack of modes """
96 self.mode_stack.append(self.mode)
100 def unstack_mode(self):
102 self.mode = self.mode_stack.pop()
104 def stack_escape(self, ifdo):
105 """ Set if escaping is required. Remember old value. """
106 self.escape_stack.append(self.escape)
110 def unstack_escape(self):
111 """ Restore old policy of escaping """
112 self.escape = self.escape_stack.pop()
114 def stack_ligatures(self, ifdo):
115 """ Set if breaking of ligatures is required. Remember old value. """
116 self.ligatures_stack.append(self.ligatures)
118 self.ligatures = ifdo
120 def unstack_ligatures(self):
121 """ Restore old policy of breaking ligatures """
122 self.ligatures = self.ligatures_stack.pop()
124 def stack_emptylines(self, ifdo):
125 """ Set if empty lines are required. Remember old value. """
126 self.emptylines_stack.append(self.emptylines)
128 self.emptylines = ifdo
130 def unstack_emptylines(self):
131 """ Restore old policy of handling of empty lines """
132 self.emptylines = self.emptylines_stack.pop()
134 def set_allow_weak_ws_to_nl(self, flag):
135 """ Set flag if weak spaces can be converted to new lines """
136 self.allow_weak_ws_to_nl = flag
138 def conditionalNewline(self):
139 """ Write a new line unless already at the start of a line """
140 if self.approx_current_line_len != 0:
141 self.writech('\n', 0)
143 def writeWeakWS(self, hint=1):
144 """ Write a whitespace instead of whitespaces deleted from source XML. Parameter 'hint' is a hack to make <opt/> and <parm/> in <env/> working good. hint=WEAK_WS_IS_NEWLINE if weak space should be converted to newline, not to a space """
145 # weak WS that is newline can not be converted to ws that is space
146 if hint <= self.is_after_weak_ws:
147 # return or avoid next if(). I prefer return.
149 self.is_after_weak_ws = hint
150 #self.last_ch = ' ' # no, setting so is an error: new lines are not corrected after it. Anyway, check for weak ws is the first action in writech, so it should not lead to errors
152 # Break line if it is too long
153 # We should not break lines if we regard spaces
154 # Check for WEAK_WS_IS_NEWLINE in order to avoid line break in
155 # \begin{foo}[aa.....aa]<no line break here!>[bbb]
157 if (self.approx_current_line_len > self.autonewline_after_len) and self.allow_weak_ws_to_nl and (hint != WEAK_WS_IS_NEWLINE):
158 self.conditionalNewline()
161 def ungetWeakWS(self):
162 """ Returns whitespace state and clears WS flag """
163 hint = self.is_after_weak_ws
164 self.is_after_weak_ws = 0
167 def writech(self, ch, esc_specials):
168 """ Write a char, (maybe) escaping specials """
170 # Write for PDF string
173 self.stack_mode(TEXT)
178 # Write a suspended whitespace
180 if self.is_after_weak_ws:
181 hint = self.is_after_weak_ws
182 self.is_after_weak_ws = 0
183 if hint == WEAK_WS_IS_NEWLINE:
184 if ('\n' != ch) and ('\r' != ch):
185 self.conditionalNewline()
187 if (self.approx_current_line_len != 0) and not(ch in string.whitespace):
192 self.approx_current_line_len = self.approx_current_line_len + 1
194 # Handle well-known standard TeX ligatures
196 if not(self.ligatures):
198 if '-' == self.last_ch:
202 if "'" == self.last_ch:
206 if ('`' == self.last_ch) or ('!' == self.last_ch) or ('?' == self.last_ch):
210 # Handle end-of-line symbols.
211 # XML spec says: 2.11 End-of-Line Handling:
212 # ... contains either the literal two-character sequence "#xD#xA" or
213 # a standalone literal #xD, an XML processor must pass to the
214 # application the single character #xA.
216 if ('\n' == ch) or ('\r' == ch):
218 # We should never get '\r', but only '\n'.
219 # Anyway, someone will copy and paste this code, and code will
220 # get '\r'. In this case rewrite '\r' as '\n'.
225 # TeX interprets empty line as \par, fix this problem
227 if self.line_is_blank and (not self.emptylines):
230 # Now create newline, update counters and return
232 self.stream.write(os.linesep)
233 self.approx_current_line_len = 0
235 self.line_is_blank = 1
238 # Remember the last character
242 # Reset the flag of a blank line
244 if not ch in ('\x20', '\x09'):
245 self.line_is_blank = 0
251 if self.mode == TEXT:
252 # Paul Tremblay changed this code on 2005-03-08
254 self.write(specmap.textescmap_context[ch], 0)
256 self.write(specmap.textescmap[ch], 0)
258 self.write(specmap.mathescmap[ch], 0)
263 # First attempt to write symbol as-is
266 self.stream.write(ch)
271 # Try write the symbol in the ^^XX form
273 if self.always_ascii:
275 bytes = ch.encode(self.encoding)
277 self.write('^^%02x' % ord(by), 0)
282 # Symbol have to be rewritten. Let start with math mode.
285 if self.mode == TEXT:
287 # Text mode, lookup text map
290 self.write(unimap.textmap[chord], 0)
294 # Text mode, lookup math map
296 tostr = unimap.mathmap.get(chord, None)
297 else: # self.mode == MATH:
299 # Math mode, lookup math map
302 self.write(unimap.mathmap[chord], 0)
306 # Math mode, lookup text map
308 tostr = unimap.textmap.get(chord, None)
310 # If mapping in another mode table is found, use a wrapper
313 if self.mode == TEXT:
314 self.write('\\ensuremath{', 0)
316 self.write('\\ensuretext{', 0)
321 # Finally, warn about bad symbol and write it in the &#xNNN; form
323 if not self.bad_enc_warned:
324 sys.stderr.write("texml: not all XML symbols are converted\n");
325 self.bad_enc_warned = 1
326 self.write('\\unicodechar{%d}' % chord, 0)
328 def write(self, str, escape = None):
329 """ Write symbols char-by-char in current mode of escaping """
333 self.writech(ch, escape)
335 def writepdfch(self, ch):
336 """ Write char in Acrobat utf16be encoding """
337 bytes = ch.encode('utf_16_be')
339 self.write('\\%03o' % ord(by), 0)
342 # Wrapper over output stream to write is desired encoding
344 class stream_encoder:
346 def __init__(self, stream, encoding):
347 """ Construct a wrapper by stream and encoding """
349 self.encode = codecs.getencoder(encoding)
351 def write(self, str):
352 """ Write string encoded """
353 self.stream.write(self.encode(str)[0])
356 """ Close underlying stream """