1 """ TeXML Writer and string services """
2 # $Id: texmlwr.py,v 1.9 2006-07-20 03:56:27 olpa Exp $
5 # Modes of processing of special characters
12 WEAK_WS_IS_NEWLINE = 2
14 from Texml import unimap
15 from Texml import specmap
21 if sys.version_info[0] >= 3:
34 # Handling of '--', '---' and other ligatures
37 # Modes of transformation can be tuned and nested
47 # Current length of a line that is being written. Value usually
48 # incorrect, but always correct to detect the start of a line (0)
49 # > approx_current_line_len
50 # If length of a current line is greater the value
51 # then writer converts weak whitespaces into newlines.
52 # And flag if it is possible
53 # > autonewline_after_len
54 # > allow_weak_ws_to_nl
56 # We usually don't allow empty lines in output because such lines
57 # cause a paragraph break in TeX.
60 # always_ascii: If attempts to write a character to the output
61 # stream have failed, then the code converts the symbol to bytes,
62 # and these bytes are written in the ^^xx format.
64 # bad_enc_warned: TeXML issues warning if it fails to convert
65 # a symbol. This flag controls that warning was issued only once.
68 def __init__(self, stream, encoding, autonl_width, use_context = 0, always_ascii = 0):
69 """ Remember output stream, initialize data structures """
71 self.always_ascii = always_ascii
72 self.encoding = encoding
76 self.stream = stream_encoder(stream, encoding)
77 except Exception as e:
78 raise ValueError("Can't create encoder: '%s'" % e)
79 # Continue initialization
83 self.line_is_blank = 1
87 self.escape_stack = []
89 self.ligatures_stack = []
91 self.emptylines_stack = []
92 self.approx_current_line_len = 0
93 self.autonewline_after_len = autonl_width
94 self.allow_weak_ws_to_nl = 1
95 self.is_after_weak_ws = 0
96 self.use_context = use_context
97 self.bad_enc_warned = 0
99 def stack_mode(self, mode):
100 """ Put new mode into the stack of modes """
101 self.mode_stack.append(self.mode)
105 def unstack_mode(self):
107 self.mode = self.mode_stack.pop()
109 def stack_escape(self, ifdo):
110 """ Set if escaping is required. Remember old value. """
111 self.escape_stack.append(self.escape)
115 def unstack_escape(self):
116 """ Restore old policy of escaping """
117 self.escape = self.escape_stack.pop()
119 def stack_ligatures(self, ifdo):
120 """ Set if breaking of ligatures is required. Remember old value. """
121 self.ligatures_stack.append(self.ligatures)
123 self.ligatures = ifdo
125 def unstack_ligatures(self):
126 """ Restore old policy of breaking ligatures """
127 self.ligatures = self.ligatures_stack.pop()
129 def stack_emptylines(self, ifdo):
130 """ Set if empty lines are required. Remember old value. """
131 self.emptylines_stack.append(self.emptylines)
133 self.emptylines = ifdo
135 def unstack_emptylines(self):
136 """ Restore old policy of handling of empty lines """
137 self.emptylines = self.emptylines_stack.pop()
139 def set_allow_weak_ws_to_nl(self, flag):
140 """ Set flag if weak spaces can be converted to new lines """
141 self.allow_weak_ws_to_nl = flag
143 def conditionalNewline(self):
144 """ Write a new line unless already at the start of a line """
145 if self.approx_current_line_len != 0:
146 self.writech('\n', 0)
148 def writeWeakWS(self, hint=1):
149 """ Write a whitespace instead of whitespaces deleted from source XML. Parameter 'hint' is a hack to make <opt/> and <parm/> in <env/> working good. hint=WEAK_WS_IS_NEWLINE if weak space should be converted to newline, not to a space """
150 # weak WS that is newline can not be converted to ws that is space
151 if hint <= self.is_after_weak_ws:
152 # return or avoid next if(). I prefer return.
154 self.is_after_weak_ws = hint
155 #self.last_ch = ' ' # no, setting so is an error: new lines are not corrected after it. Anyway, check for weak ws is the first action in writech, so it should not lead to errors
157 # Break line if it is too long
158 # We should not break lines if we regard spaces
159 # Check for WEAK_WS_IS_NEWLINE in order to avoid line break in
160 # \begin{foo}[aa.....aa]<no line break here!>[bbb]
162 if (self.approx_current_line_len > self.autonewline_after_len) and self.allow_weak_ws_to_nl and (hint != WEAK_WS_IS_NEWLINE):
163 self.conditionalNewline()
166 def ungetWeakWS(self):
167 """ Returns whitespace state and clears WS flag """
168 hint = self.is_after_weak_ws
169 self.is_after_weak_ws = 0
172 def writech(self, ch, esc_specials):
173 """ Write a char, (maybe) escaping specials """
175 # Write for PDF string
178 self.stack_mode(TEXT)
183 # Write a suspended whitespace
185 if self.is_after_weak_ws:
186 hint = self.is_after_weak_ws
187 self.is_after_weak_ws = 0
188 if hint == WEAK_WS_IS_NEWLINE:
189 if ('\n' != ch) and ('\r' != ch):
190 self.conditionalNewline()
192 if (self.approx_current_line_len != 0) and not(ch in string.whitespace):
197 self.approx_current_line_len = self.approx_current_line_len + 1
199 # Handle well-known standard TeX ligatures
201 if not(self.ligatures):
203 if '-' == self.last_ch:
207 if "'" == self.last_ch:
211 if ('`' == self.last_ch) or ('!' == self.last_ch) or ('?' == self.last_ch):
215 # Handle end-of-line symbols.
216 # XML spec says: 2.11 End-of-Line Handling:
217 # ... contains either the literal two-character sequence "#xD#xA" or
218 # a standalone literal #xD, an XML processor must pass to the
219 # application the single character #xA.
221 if ('\n' == ch) or ('\r' == ch):
223 # We should never get '\r', but only '\n'.
224 # Anyway, someone will copy and paste this code, and code will
225 # get '\r'. In this case rewrite '\r' as '\n'.
230 # TeX interprets empty line as \par, fix this problem
232 if self.line_is_blank and (not self.emptylines):
235 # Now create newline, update counters and return
237 self.stream.write(os.linesep)
238 self.approx_current_line_len = 0
240 self.line_is_blank = 1
243 # Remember the last character
247 # Reset the flag of a blank line
249 if not ch in ('\x20', '\x09'):
250 self.line_is_blank = 0
256 if self.mode == TEXT:
257 # Paul Tremblay changed this code on 2005-03-08
259 self.write(specmap.textescmap_context[ch], 0)
261 self.write(specmap.textescmap[ch], 0)
263 self.write(specmap.mathescmap[ch], 0)
268 # First attempt to write symbol as-is
271 self.stream.write(ch)
276 # Try write the symbol in the ^^XX form
278 if self.always_ascii:
280 bytes = ch.encode(self.encoding)
282 self.write('^^%02x' % byteord(by), 0)
284 except Exception as e:
287 # Symbol have to be rewritten. Let start with math mode.
290 if self.mode == TEXT:
292 # Text mode, lookup text map
295 self.write(unimap.textmap[chord], 0)
299 # Text mode, lookup math map
301 tostr = unimap.mathmap.get(chord, None)
302 else: # self.mode == MATH:
304 # Math mode, lookup math map
307 self.write(unimap.mathmap[chord], 0)
311 # Math mode, lookup text map
313 tostr = unimap.textmap.get(chord, None)
315 # If mapping in another mode table is found, use a wrapper
318 if self.mode == TEXT:
319 self.write('\\ensuremath{', 0)
321 self.write('\\ensuretext{', 0)
326 # Finally, warn about bad symbol and write it in the &#xNNN; form
328 if not self.bad_enc_warned:
329 sys.stderr.write("texml: not all XML symbols are converted\n");
330 self.bad_enc_warned = 1
331 self.write('\\unicodechar{%d}' % chord, 0)
333 def write(self, str, escape = None):
334 """ Write symbols char-by-char in current mode of escaping """
338 self.writech(ch, escape)
340 def writepdfch(self, ch):
341 """ Write char in Acrobat utf16be encoding """
342 bytes = ch.encode('utf_16_be')
344 self.write('\\%03o' % byteord(by), 0)
347 # Wrapper over output stream to write is desired encoding
349 class stream_encoder:
351 def __init__(self, stream, encoding):
352 """ Construct a wrapper by stream and encoding """
354 self.encode = codecs.getencoder(encoding)
356 def write(self, str):
357 """ Write string encoded """
358 self.stream.write(self.encode(str)[0])
361 """ Close underlying stream """