move xml check to celery,
[redakcja.git] / redakcja / static / js / wiki / xslt.js
1 /*
2  *
3  * XSLT STUFF
4  *
5  */
6 function createXSLT(xsl) {
7     var p = new XSLTProcessor();
8     p.importStylesheet(xsl);
9     return p;
10 }
11
12 var xml2htmlStylesheet = null;
13
14 // Wykonuje block z załadowanymi arkuszami stylów
15 function withStylesheets(code_block, onError)
16 {
17     if (!xml2htmlStylesheet) {
18         $.blockUI({message: 'Ładowanie arkuszy stylów...'});
19         $.ajax({
20                 url: STATIC_URL + 'xsl/wl2html_client.xsl?20110520',
21                 dataType: 'xml',
22                 timeout: 10000,
23                 success: function(data) {
24                 xml2htmlStylesheet = createXSLT(data);
25                 $.unblockUI();
26                                 code_block();
27
28             },
29                         error: onError
30         })
31     }
32         else {
33                 code_block();
34         }
35 }
36
37
38 // Wykonuje block z załadowanymi kanonicznymi motywami
39 function withThemes(code_block, onError)
40 {
41     if (typeof withThemes.canon == 'undefined') {
42         $.ajax({
43             url: '/editor/themes',
44             dataType: 'text',
45             success: function(data) {
46                 withThemes.canon = data.split('\n');
47                 code_block(withThemes.canon);
48             },
49             error: function() {
50                 withThemes.canon = null;
51                 code_block(withThemes.canon);
52             }
53         })
54     }
55     else {
56         code_block(withThemes.canon);
57     }
58 }
59
60
61 function xml2html(options) {
62     withStylesheets(function() {
63         var xml = options.xml.replace(/\/(\s+)/g, '<br />$1');
64         xml = xml.replace(/([^a-zA-Z0-9ąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s<>«»\\*_!,:;?&%."'=#()\/-]+)/g, '<alien>$1</alien>');
65         var parser = new DOMParser();
66         var serializer = new XMLSerializer();
67         var doc = parser.parseFromString(xml, 'text/xml');
68         var error = $('parsererror', doc);
69
70         if (error.length == 0) {
71             doc = xml2htmlStylesheet.transformToFragment(doc, document);
72             console.log(doc.firstChild);
73
74         if(doc.firstChild === null) {
75             options.error("Błąd w przetwarzaniu XML.");
76                 return;
77             }
78
79             error = $('parsererror', doc);
80         }
81
82         if (error.length > 0 && options.error) {
83             source = $('sourcetext', doc);
84             source_text = source.text();
85             source.text('');
86             options.error(error.text(), source_text);
87         } else {
88             options.success(doc.childNodes);
89
90             withThemes(function(canonThemes) {
91                 if (canonThemes != null) {
92                     $('.theme-text-list').addClass('canon').each(function(){
93                         var themes = $(this).html().split(',');
94                         for (i in themes) {
95                             themes[i] = $.trim(themes[i]);
96                             if (canonThemes.indexOf(themes[i]) == -1)
97                                 themes[i] = '<span x-pass-thru="true" class="noncanon">' + themes[i] + "</span>"
98                         }
99                         $(this).html(themes.join(', '));
100                     });
101                 }
102             });
103         }
104     }, function() { options.error && options.error('Nie udało się załadować XSLT'); });
105 }
106
107 /* USEFULL CONSTANTS */
108 const ELEMENT_NODE                                       = 1;
109 const ATTRIBUTE_NODE                 = 2;
110 const TEXT_NODE                      = 3;
111 const CDATA_SECTION_NODE             = 4;
112 const ENTITY_REFERENCE_NODE          = 5;
113 const ENTITY_NODE                    = 6;
114 const PROCESSING_INSTRUCTION_NODE    = 7;
115 const COMMENT_NODE                   = 8;
116 const DOCUMENT_NODE                  = 9;
117 const DOCUMENT_TYPE_NODE             = 10;
118 const DOCUMENT_FRAGMENT_NODE         = 11;
119 const NOTATION_NODE                  = 12;
120 const XATTR_RE = /^x-attr-name-(.*)$/;
121
122 const ELEM_START = 1;
123 const ELEM_END = 2;
124 const NS_END = 3;
125
126 const NAMESPACES = {
127         // namespaces not listed here will be assigned random names
128         "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
129         "http://purl.org/dc/elements/1.1/": "dc",
130         "http://www.w3.org/XML/1998/namespace": "xml"
131 };
132
133 function HTMLSerializer() {
134         // empty constructor
135 }
136
137
138
139 HTMLSerializer.prototype._prepare = function() {
140         this.stack = [];
141
142         // XML namespace is implicit
143         this.nsMap = {"http://www.w3.org/XML/1998/namespace": "xml"};
144
145         this.result = "";
146         this.nsCounter = 1;
147 }
148
149 HTMLSerializer.prototype._pushElement = function(element) {
150         this.stack.push({
151                 "type": ELEM_START,
152                 "node": element
153         });
154 }
155
156 HTMLSerializer.prototype._pushChildren = function(element) {
157         for(var i = element.childNodes.length-1; i >= 0; i--)
158                 this._pushElement(element.childNodes.item(i));
159 }
160
161 HTMLSerializer.prototype._pushTagEnd = function(tagName) {
162         this.stack.push({
163                 "type": ELEM_END,
164                 "tagName": tagName
165         });
166 }
167
168 HTMLSerializer.prototype._verseBefore = function(node) {
169     /* true if previous element is a previous verse of a stanza */
170     var parent = node.parentNode;
171     if (!parent || !parent.hasAttribute('x-node') || parent.getAttribute('x-node') != 'strofa')
172         return false;
173
174         var prev = node.previousSibling;
175
176         while((prev !== null) && (prev.nodeType != ELEMENT_NODE)) {
177                 prev = prev.previousSibling;
178         }
179
180         return (prev !== null) && prev.hasAttribute('x-verse');
181 }
182
183 HTMLSerializer.prototype._nodeIgnored = function(node) {
184     return node.getAttribute('x-node') == 'wers';
185 }
186
187 HTMLSerializer.prototype._ignoredWithWhitespace = function(node) {
188     while (node.nodeType == ELEMENT_NODE && this._nodeIgnored(node) && node.childNodes.length > 0)
189         node = node.childNodes[0];
190     if (node.nodeType == TEXT_NODE)
191         return node.nodeValue.match(/^\s/)
192     else return false;
193 }
194
195
196 HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
197 {
198         var self = this;
199         self._prepare();
200
201         if(!stripOuter)
202                 self._pushElement(rootElement);
203         else
204                 self._pushChildren(rootElement);
205
206     var text_buffer = '';
207
208         while(self.stack.length > 0) {
209                 var token = self.stack.pop();
210
211         if(token.type === ELEM_END) {
212             self.result += text_buffer;
213             text_buffer = '';
214             if (token.tagName != '')
215                 self.result += "</" + token.tagName + ">";
216             continue;
217         };
218
219                 if(token.type === NS_END) {
220                         self._unassignNamespace(token.namespace);
221                         continue;
222                 }
223
224
225                 switch(token.node.nodeType) {
226                         case ELEMENT_NODE:
227                                 if(token.node.hasAttribute('x-pass-thru')
228                                  || token.node.hasAttribute('data-pass-thru')) {
229                                         self._pushChildren(token.node);
230                                         break;
231                                 }
232
233                                 if(!token.node.hasAttribute('x-node'))
234                                         break;
235
236                                 var xnode = token.node.getAttribute('x-node');
237
238                                 if(xnode === 'out-of-flow-text') {
239                                         self._pushChildren(token.node);
240                                         break;
241                                 }
242
243                 if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
244                     self.result += '/';
245                     // add whitespace if there's none
246                     if (!(text_buffer.match(/^\s/) || self._ignoredWithWhitespace(token.node)))
247                         self.result += ' ';
248                 }
249
250                 self.result += text_buffer;
251                 text_buffer = '';
252                                 self._serializeElement(token.node);
253                                 break;
254                         case TEXT_NODE:
255                                 self.result += text_buffer;
256                                 text_buffer = token.node.nodeValue.replace(/&/g, '&amp;').replace(/</g, '&lt;');
257                                 break;
258             case COMMENT_NODE:
259                 self.result += text_buffer;
260                 text_buffer = '';
261                 self.result += '<!--' + token.node.nodeValue + '-->';
262                 break;
263                 };
264         };
265     self.result += text_buffer;
266
267         return this.result;
268 }
269
270 /*
271  * TODO: this doesn't support prefix redefinitions
272  */
273 HTMLSerializer.prototype._unassignNamespace = function(nsData) {
274         this.nsMap[nsData.uri] = undefined;
275 };
276
277 HTMLSerializer.prototype._assignNamespace = function(uri) {
278         if(uri === null) {
279                 // default namespace
280                 return ({"prefix": "", "uri": "", "fresh": false});
281         }
282
283         if(this.nsMap[uri] === undefined) {
284                 // this prefix hasn't been defined yet in current context
285                 var prefix = NAMESPACES[uri];
286
287                 if (prefix === undefined) { // not predefined
288                         prefix = "ns" + this.nsCounter;
289                         this.nsCounter += 1;
290                 }
291
292                 this.nsMap[uri] = prefix;
293                 return ({
294                         "prefix": prefix,
295                         "uri": uri,
296                         "fresh": true
297                 });
298         }
299
300         return ({"prefix": this.nsMap[uri], "uri": uri, "fresh": false});
301 };
302
303 HTMLSerializer.prototype._join = function(prefix, name) {
304         if(!!prefix)
305                 return prefix + ":" + name;
306         return name;
307 };
308
309 HTMLSerializer.prototype._rjoin = function(prefix, name) {
310         if(!!name)
311                 return prefix + ":" + name;
312         return prefix;
313 };
314
315 HTMLSerializer.prototype._serializeElement = function(node) {
316     var self = this;
317
318     if (self._nodeIgnored(node)) {
319         self._pushTagEnd('');
320         self._pushChildren(node);
321     }
322     else {
323         var ns = node.getAttribute('x-ns');
324         var nsPrefix = null;
325         var newNamespaces = [];
326
327         var nsData = self._assignNamespace(node.getAttribute('x-ns'));
328
329         if(nsData.fresh) {
330                 newNamespaces.push(nsData);
331                 self.stack.push({
332                         "type": NS_END,
333                         "namespace": nsData
334                 });
335         }
336
337         var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
338
339         /* retrieve attributes */
340         var attributeIDs = [];
341         for (var i = 0; i < node.attributes.length; i++) {
342                 var attr = node.attributes.item(i);
343
344                 // check if name starts with "x-attr-name"
345                 var m = attr.name.match(XATTR_RE);
346                 if (m !== null)
347                         attributeIDs.push(m[1]);
348         };
349
350         /* print out */
351
352         self.result += '<' + tagName;
353
354         $.each(attributeIDs, function() {
355                 var nsData = self._assignNamespace(node.getAttribute('x-attr-ns-'+this));
356
357                 if(nsData.fresh) {
358                         newNamespaces.push(nsData);
359                         self.stack.push({
360                                 "type": NS_END,
361                                 "namespace": nsData
362                         });
363                 };
364
365                 self.result += ' ' + self._join(nsData.prefix, node.getAttribute('x-attr-name-'+this));
366                 self.result += '="'+node.getAttribute('x-attr-value-'+this) +'"';
367         });
368
369         /* print new namespace declarations */
370         $.each(newNamespaces, function() {
371                 self.result += " " + self._rjoin("xmlns", this.prefix);
372                 self.result += '="' + this.uri + '"';
373         });
374
375         if (node.childNodes.length > 0) {
376                 self.result += ">";
377                 self._pushTagEnd(tagName);
378                 self._pushChildren(node);
379         }
380         else {
381                 self.result += "/>";
382         };
383     }
384 };
385
386 function html2text(params) {
387         try {
388                 var s = new HTMLSerializer();
389                 params.success( s.serialize(params.element, params.stripOuter) );
390         } catch(e) {
391                 params.error("Nie udało się zserializować tekstu:" + e)
392         }
393 }