#1557: colouring ampersand breaks XML
[redakcja.git] / redakcja / static / js / wiki / xslt.js
1 /*
2  *
3  * XSLT STUFF
4  *
5  */
6 function createXSLT(xsl) {
7     var p = new XSLTProcessor();
8     p.importStylesheet(xsl);
9     return p;
10 }
11
12 var xml2htmlStylesheet = null;
13
14 // Wykonuje block z załadowanymi arkuszami stylów
15 function withStylesheets(code_block, onError)
16 {
17     if (!xml2htmlStylesheet) {
18         $.blockUI({message: 'Ładowanie arkuszy stylów...'});
19         $.ajax({
20                 url: STATIC_URL + 'xsl/wl2html_client.xsl?20110112',
21                 dataType: 'xml',
22                 timeout: 10000,
23                 success: function(data) {
24                 xml2htmlStylesheet = createXSLT(data);
25                 $.unblockUI();
26                                 code_block();
27
28             },
29                         error: onError
30         })
31     }
32         else {
33                 code_block();
34         }
35 }
36
37
38 // Wykonuje block z załadowanymi kanonicznymi motywami
39 function withThemes(code_block, onError)
40 {
41     if (typeof withThemes.canon == 'undefined') {
42         $.ajax({
43             url: '/themes',
44             dataType: 'text',
45             success: function(data) {
46                 withThemes.canon = data.split('\n');
47                 code_block(withThemes.canon);
48             },
49             error: function() {
50                 withThemes.canon = null;
51                 code_block(withThemes.canon);
52             }
53         })
54     }
55     else {
56         code_block(withThemes.canon);
57     }
58 }
59
60
61 function xml2html(options) {
62     withStylesheets(function() {
63         var xml = options.xml.replace(/\/(\s+)/g, '<br />$1');
64         xml = xml.replace(/([^a-zA-Z0-9ąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s<>«»\\*_!,:;?&%."'=#()\/-]+)/g, '<alien>$1</alien>');
65         var parser = new DOMParser();
66         var serializer = new XMLSerializer();
67         var doc = parser.parseFromString(xml, 'text/xml');
68         var error = $('parsererror', doc);
69
70         if (error.length == 0) {
71             doc = xml2htmlStylesheet.transformToFragment(doc, document);
72             console.log(doc.firstChild);
73
74         if(doc.firstChild === null) {
75             options.error("Błąd w przetwarzaniu XML.");
76                 return;
77             }
78
79             error = $('parsererror', doc);
80         }
81
82         if (error.length > 0 && options.error) {
83             source = $('sourcetext', doc);
84             source_text = source.text();
85             source.text('');
86             options.error(error.text(), source_text);
87         } else {
88             options.success(doc.firstChild);
89
90             withThemes(function(canonThemes) {
91                 if (canonThemes != null) {
92                     $('.theme-text-list').addClass('canon').each(function(){
93                         var themes = $(this).html().split(',');
94                         for (i in themes) {
95                             themes[i] = $.trim(themes[i]);
96                             if (canonThemes.indexOf(themes[i]) == -1)
97                                 themes[i] = '<span x-pass-thru="true" class="noncanon">' + themes[i] + "</span>"
98                         }
99                         $(this).html(themes.join(', '));
100                     });
101                 }
102             });
103         }
104     }, function() { options.error && options.error('Nie udało się załadować XSLT'); });
105 }
106
107 /* USEFULL CONSTANTS */
108 const ELEMENT_NODE                                       = 1;
109 const ATTRIBUTE_NODE                 = 2;
110 const TEXT_NODE                      = 3;
111 const CDATA_SECTION_NODE             = 4;
112 const ENTITY_REFERENCE_NODE          = 5;
113 const ENTITY_NODE                    = 6;
114 const PROCESSING_INSTRUCTION_NODE    = 7;
115 const COMMENT_NODE                   = 8;
116 const DOCUMENT_NODE                  = 9;
117 const DOCUMENT_TYPE_NODE             = 10;
118 const DOCUMENT_FRAGMENT_NODE         = 11;
119 const NOTATION_NODE                  = 12;
120 const XATTR_RE = /^x-attr-name-(.*)$/;
121
122 const ELEM_START = 1;
123 const ELEM_END = 2;
124 const NS_END = 3;
125
126 const NAMESPACES = {
127         // namespaces not listed here will be assigned random names
128         "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
129         "http://purl.org/dc/elements/1.1/": "dc",
130         "http://www.w3.org/XML/1998/namespace": "xml"
131 };
132
133 function HTMLSerializer() {
134         // empty constructor
135 }
136
137
138
139 HTMLSerializer.prototype._prepare = function() {
140         this.stack = [];
141
142         // XML namespace is implicit
143         this.nsMap = {"http://www.w3.org/XML/1998/namespace": "xml"};
144
145         this.result = "";
146         this.nsCounter = 1;
147 }
148
149 HTMLSerializer.prototype._pushElement = function(element) {
150         this.stack.push({
151                 "type": ELEM_START,
152                 "node": element
153         });
154 }
155
156 HTMLSerializer.prototype._pushChildren = function(element) {
157         for(var i = element.childNodes.length-1; i >= 0; i--)
158                 this._pushElement(element.childNodes.item(i));
159 }
160
161 HTMLSerializer.prototype._pushTagEnd = function(tagName) {
162         this.stack.push({
163                 "type": ELEM_END,
164                 "tagName": tagName
165         });
166 }
167
168 HTMLSerializer.prototype._verseBefore = function(node) {
169     /* true if previous element is a previous verse of a stanza */
170     var parent = node.parentNode;
171     if (!parent || !parent.hasAttribute('x-node') || parent.getAttribute('x-node') != 'strofa')
172         return false;
173
174         var prev = node.previousSibling;
175
176         while((prev !== null) && (prev.nodeType != ELEMENT_NODE)) {
177                 prev = prev.previousSibling;
178         }
179
180         return (prev !== null) && prev.hasAttribute('x-verse');
181 }
182
183 HTMLSerializer.prototype._nodeIgnored = function(node) {
184     return node.getAttribute('x-node') == 'wers';
185 }
186
187 HTMLSerializer.prototype._ignoredWithWhitespace = function(node) {
188     while (node.nodeType == ELEMENT_NODE && this._nodeIgnored(node) && node.childNodes.length > 0)
189         node = node.childNodes[0];
190     if (node.nodeType == TEXT_NODE)
191         return node.nodeValue.match(/^\s/)
192     else return false;
193 }
194
195
196 HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
197 {
198         var self = this;
199         self._prepare();
200
201         if(!stripOuter)
202                 self._pushElement(rootElement);
203         else
204                 self._pushChildren(rootElement);
205
206     var text_buffer = '';
207
208         while(self.stack.length > 0) {
209                 var token = self.stack.pop();
210
211         if(token.type === ELEM_END) {
212             self.result += text_buffer;
213             text_buffer = '';
214             if (token.tagName != '')
215                 self.result += "</" + token.tagName + ">";
216             continue;
217         };
218
219                 if(token.type === NS_END) {
220                         self._unassignNamespace(token.namespace);
221                         continue;
222                 }
223
224
225                 switch(token.node.nodeType) {
226                         case ELEMENT_NODE:
227                                 if(token.node.hasAttribute('x-pass-thru')
228                                  || token.node.hasAttribute('data-pass-thru')) {
229                                         self._pushChildren(token.node);
230                                         break;
231                                 }
232
233                                 if(!token.node.hasAttribute('x-node'))
234                                         break;
235
236                                 var xnode = token.node.getAttribute('x-node');
237
238                                 if(xnode === 'out-of-flow-text') {
239                                         self._pushChildren(token.node);
240                                         break;
241                                 }
242
243                 if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
244                     self.result += '/';
245                     // add whitespace if there's none
246                     if (!(text_buffer.match(/^\s/) || self._ignoredWithWhitespace(token.node)))
247                         self.result += ' ';
248                 }
249
250                 self.result += text_buffer;
251                 text_buffer = '';
252                                 self._serializeElement(token.node);
253                                 break;
254                         case TEXT_NODE:
255                                 self.result += text_buffer;
256                                 text_buffer = token.node.nodeValue;
257                                 break;
258                 };
259         };
260     self.result += text_buffer;
261
262         return this.result;
263 }
264
265 /*
266  * TODO: this doesn't support prefix redefinitions
267  */
268 HTMLSerializer.prototype._unassignNamespace = function(nsData) {
269         this.nsMap[nsData.uri] = undefined;
270 };
271
272 HTMLSerializer.prototype._assignNamespace = function(uri) {
273         if(uri === null) {
274                 // default namespace
275                 return ({"prefix": "", "uri": "", "fresh": false});
276         }
277
278         if(this.nsMap[uri] === undefined) {
279                 // this prefix hasn't been defined yet in current context
280                 var prefix = NAMESPACES[uri];
281
282                 if (prefix === undefined) { // not predefined
283                         prefix = "ns" + this.nsCounter;
284                         this.nsCounter += 1;
285                 }
286
287                 this.nsMap[uri] = prefix;
288                 return ({
289                         "prefix": prefix,
290                         "uri": uri,
291                         "fresh": true
292                 });
293         }
294
295         return ({"prefix": this.nsMap[uri], "uri": uri, "fresh": false});
296 };
297
298 HTMLSerializer.prototype._join = function(prefix, name) {
299         if(!!prefix)
300                 return prefix + ":" + name;
301         return name;
302 };
303
304 HTMLSerializer.prototype._rjoin = function(prefix, name) {
305         if(!!name)
306                 return prefix + ":" + name;
307         return prefix;
308 };
309
310 HTMLSerializer.prototype._serializeElement = function(node) {
311     var self = this;
312
313     if (self._nodeIgnored(node)) {
314         self._pushTagEnd('');
315         self._pushChildren(node);
316     }
317     else {
318         var ns = node.getAttribute('x-ns');
319         var nsPrefix = null;
320         var newNamespaces = [];
321
322         var nsData = self._assignNamespace(node.getAttribute('x-ns'));
323
324         if(nsData.fresh) {
325                 newNamespaces.push(nsData);
326                 self.stack.push({
327                         "type": NS_END,
328                         "namespace": nsData
329                 });
330         }
331
332         var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
333
334         /* retrieve attributes */
335         var attributeIDs = [];
336         for (var i = 0; i < node.attributes.length; i++) {
337                 var attr = node.attributes.item(i);
338
339                 // check if name starts with "x-attr-name"
340                 var m = attr.name.match(XATTR_RE);
341                 if (m !== null)
342                         attributeIDs.push(m[1]);
343         };
344
345         /* print out */
346
347         self.result += '<' + tagName;
348
349         $.each(attributeIDs, function() {
350                 var nsData = self._assignNamespace(node.getAttribute('x-attr-ns-'+this));
351
352                 if(nsData.fresh) {
353                         newNamespaces.push(nsData);
354                         self.stack.push({
355                                 "type": NS_END,
356                                 "namespace": nsData
357                         });
358                 };
359
360                 self.result += ' ' + self._join(nsData.prefix, node.getAttribute('x-attr-name-'+this));
361                 self.result += '="'+node.getAttribute('x-attr-value-'+this) +'"';
362         });
363
364         /* print new namespace declarations */
365         $.each(newNamespaces, function() {
366                 self.result += " " + self._rjoin("xmlns", this.prefix);
367                 self.result += '="' + this.uri + '"';
368         });
369
370         if (node.childNodes.length > 0) {
371                 self.result += ">";
372                 self._pushTagEnd(tagName);
373                 self._pushChildren(node);
374         }
375         else {
376                 self.result += "/>";
377         };
378     }
379 };
380
381 function html2text(params) {
382         try {
383                 var s = new HTMLSerializer();
384                 params.success( s.serialize(params.element, params.stripOuter) );
385         } catch(e) {
386                 params.error("Nie udało się zserializować tekstu:" + e)
387         }
388 }