Catalogue: wikidata suggestions
[redakcja.git] / src / redakcja / static / js / wiki / xslt.js
1 /*
2  *
3  * XSLT STUFF
4  *
5  */
6 function createXSLT(xsl) {
7     var p = new XSLTProcessor();
8     p.importStylesheet(xsl);
9     return p;
10 }
11
12 var xml2htmlStylesheet = null;
13
14 // Wykonuje block z załadowanymi arkuszami stylów
15 function withStylesheets(code_block, onError)
16 {
17     if (!xml2htmlStylesheet) {
18         $.blockUI({message: 'Ładowanie arkuszy stylów...'});
19         $.ajax({
20             url: '/wlxml/wl2html.xsl',
21             dataType: 'xml',
22             timeout: 10000,
23             success: function(data) {
24                 xml2htmlStylesheet = createXSLT(data);
25                 $.unblockUI();
26                 code_block();
27             },
28             error: onError
29         })
30     }
31     else {
32         code_block();
33     }
34 }
35
36
37 function xml2html(options) {
38     withStylesheets(function() {
39         var xml = options.xml.replace(/\/(\s+)/g, '<br />$1');
40         xml = xml.replace(/([^a-zA-Z0-9ąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s<>«»\\*_!,:;?&%."'=#()\/-]+)/g, '<alien>$1</alien>');
41         var parser = new DOMParser();
42         var serializer = new XMLSerializer();
43         var doc = parser.parseFromString(xml, 'text/xml');
44         var error = $('parsererror', doc);
45
46         if (error.length == 0) {
47             doc = xml2htmlStylesheet.transformToFragment(doc, document);
48
49         if(doc.firstChild === null) {
50             options.error("Błąd w przetwarzaniu XML.");
51                 return;
52             }
53
54             error = $('parsererror', doc);
55         }
56
57         if (error.length > 0 && options.error) {
58             source = $('sourcetext', doc);
59             source_text = source.text();
60             source.text('');
61             options.error(error.text(), source_text);
62         } else {
63             let galleryUrl = new URL(
64                 options.base,
65                 window.location.href
66             );
67             $("img", $(doc.childNodes)).each(function() {
68                 $(this).attr(
69                     'src',
70                     new URL(
71                         $(this).attr('src'),
72                         galleryUrl
73                     )
74                 );
75             })
76
77             options.success(doc.childNodes);
78
79             $.themes.withCanon(function(canonThemes) {
80                 if (canonThemes != null) {
81                     $('.theme-text-list').addClass('canon').each(function(){
82                         var themes = $(this).html().split(',');
83                         for (i in themes) {
84                             themes[i] = $.trim(themes[i]);
85                             if (canonThemes.indexOf(themes[i]) == -1)
86                                 themes[i] = '<span x-pass-thru="true" class="noncanon">' + themes[i] + "</span>"
87                         }
88                         $(this).html(themes.join(', '));
89                     });
90                 }
91             });
92         }
93     }, function() { options.error && options.error('Nie udało się załadować XSLT'); });
94 }
95
96 /* USEFULL CONSTANTS */
97 const ELEMENT_NODE                                       = 1;
98 const ATTRIBUTE_NODE                 = 2;
99 const TEXT_NODE                      = 3;
100 const CDATA_SECTION_NODE             = 4;
101 const ENTITY_REFERENCE_NODE          = 5;
102 const ENTITY_NODE                    = 6;
103 const PROCESSING_INSTRUCTION_NODE    = 7;
104 const COMMENT_NODE                   = 8;
105 const DOCUMENT_NODE                  = 9;
106 const DOCUMENT_TYPE_NODE             = 10;
107 const DOCUMENT_FRAGMENT_NODE         = 11;
108 const NOTATION_NODE                  = 12;
109 const XATTR_RE = /^x-attr-name-(.*)$/;
110 const XATTR_KNOWN_RE = /^x-a-([a-z]+)-(.*)$/;
111
112 const ELEM_START = 1;
113 const ELEM_END = 2;
114 const NS_END = 3;
115
116 const NAMESPACES = {
117         // namespaces not listed here will be assigned random names
118         "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
119         "http://purl.org/dc/elements/1.1/": "dc",
120         "http://www.w3.org/XML/1998/namespace": "xml"
121 };
122
123 NS_PREFIXES = {
124     'wl': ''
125 };
126 for (prefix in NAMESPACES) {
127     NS_PREFIXES[NAMESPACES[prefix]] = prefix
128 };
129
130 function HTMLSerializer() {
131         // empty constructor
132 }
133
134
135
136 HTMLSerializer.prototype._prepare = function() {
137         this.stack = [];
138
139         // XML namespace is implicit
140         this.nsMap = {"http://www.w3.org/XML/1998/namespace": "xml"};
141
142         this.result = "";
143         this.nsCounter = 1;
144 }
145
146 HTMLSerializer.prototype._pushElement = function(element) {
147         this.stack.push({
148                 "type": ELEM_START,
149                 "node": element
150         });
151 }
152
153 HTMLSerializer.prototype._pushChildren = function(element) {
154         for(var i = element.childNodes.length-1; i >= 0; i--)
155                 this._pushElement(element.childNodes.item(i));
156 }
157
158 HTMLSerializer.prototype._pushTagEnd = function(tagName) {
159         this.stack.push({
160                 "type": ELEM_END,
161                 "tagName": tagName
162         });
163 }
164
165 HTMLSerializer.prototype._verseBefore = function(node) {
166     /* true if previous element is a previous verse of a stanza */
167     var parent = node.parentNode;
168     if (!parent || !parent.hasAttribute('x-node') || parent.getAttribute('x-node') != 'strofa')
169         return false;
170
171         var prev = node.previousSibling;
172
173         while((prev !== null) && (prev.nodeType != ELEMENT_NODE)) {
174                 prev = prev.previousSibling;
175         }
176
177         return (prev !== null) && prev.hasAttribute('x-verse');
178 }
179
180 HTMLSerializer.prototype._nodeIgnored = function(node) {
181     return node.getAttribute('x-auto-node') == 'true';
182 }
183
184 HTMLSerializer.prototype._ignoredWithWhitespace = function(node) {
185     while (node.nodeType == ELEMENT_NODE && this._nodeIgnored(node) && node.childNodes.length > 0)
186         node = node.childNodes[0];
187     if (node.nodeType == TEXT_NODE)
188         return node.nodeValue.match(/^\s/)
189     else return false;
190 }
191
192
193 HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
194 {
195         var self = this;
196         self._prepare();
197
198         if(!stripOuter)
199                 self._pushElement(rootElement);
200         else
201                 self._pushChildren(rootElement);
202
203     var text_buffer = '';
204
205         while(self.stack.length > 0) {
206                 var token = self.stack.pop();
207
208         if(token.type === ELEM_END) {
209             self.result += text_buffer;
210             text_buffer = '';
211             if (token.tagName != '')
212                 self.result += "</" + token.tagName + ">";
213             continue;
214         };
215
216                 if(token.type === NS_END) {
217                         self._unassignNamespace(token.namespace);
218                         continue;
219                 }
220
221
222                 switch(token.node.nodeType) {
223                         case ELEMENT_NODE:
224                                 if(token.node.hasAttribute('x-pass-thru')
225                                  || token.node.hasAttribute('data-pass-thru')) {
226                                         self._pushChildren(token.node);
227                                         break;
228                                 }
229
230                                 if(!token.node.hasAttribute('x-node'))
231                                         break;
232
233                                 var xnode = token.node.getAttribute('x-node');
234
235                                 if(xnode === 'out-of-flow-text') {
236                                         self._pushChildren(token.node);
237                                         break;
238                                 }
239
240                 if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
241                     self.result += '/';
242                     // add whitespace if there's none
243                     if (!(text_buffer.match(/^\s/) || self._ignoredWithWhitespace(token.node)))
244                         self.result += ' ';
245                 }
246
247                 self.result += text_buffer;
248                 text_buffer = '';
249                                 self._serializeElement(token.node);
250                                 break;
251                         case TEXT_NODE:
252                                 self.result += text_buffer;
253                                 text_buffer = token.node.nodeValue.replace(/&/g, '&amp;').replace(/</g, '&lt;');
254                                 break;
255             case COMMENT_NODE:
256                 self.result += text_buffer;
257                 text_buffer = '';
258                 self.result += '<!--' + token.node.nodeValue + '-->';
259                 break;
260                 };
261         };
262     self.result += text_buffer;
263
264         return this.result;
265 }
266
267 /*
268  * TODO: this doesn't support prefix redefinitions
269  */
270 HTMLSerializer.prototype._unassignNamespace = function(nsData) {
271         this.nsMap[nsData.uri] = undefined;
272 };
273
274 HTMLSerializer.prototype._assignNamespace = function(uri) {
275         if(uri === null) {
276                 // default namespace
277                 return ({"prefix": "", "uri": "", "fresh": false});
278         }
279
280         if(this.nsMap[uri] === undefined) {
281                 // this prefix hasn't been defined yet in current context
282                 var prefix = NAMESPACES[uri];
283
284                 if (prefix === undefined) { // not predefined
285                         prefix = "ns" + this.nsCounter;
286                         this.nsCounter += 1;
287                 }
288
289                 this.nsMap[uri] = prefix;
290                 return ({
291                         "prefix": prefix,
292                         "uri": uri,
293                         "fresh": true
294                 });
295         }
296
297         return ({"prefix": this.nsMap[uri], "uri": uri, "fresh": false});
298 };
299
300 HTMLSerializer.prototype._join = function(prefix, name) {
301         if(!!prefix)
302                 return prefix + ":" + name;
303         return name;
304 };
305
306 HTMLSerializer.prototype._rjoin = function(prefix, name) {
307         if(!!name)
308                 return prefix + ":" + name;
309         return prefix;
310 };
311
312 HTMLSerializer.prototype._serializeElement = function(node) {
313     var self = this;
314
315     if (self._nodeIgnored(node)) {
316         self._pushTagEnd('');
317         self._pushChildren(node);
318     }
319     else {
320         var ns = node.getAttribute('x-ns');
321         var nsPrefix = null;
322         var newNamespaces = [];
323
324         var nsData = self._assignNamespace(node.getAttribute('x-ns'));
325
326         if(nsData.fresh) {
327                 newNamespaces.push(nsData);
328                 self.stack.push({
329                         "type": NS_END,
330                         "namespace": nsData
331                 });
332         }
333
334         var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
335
336         /* retrieve attributes */
337         var attributeIDs = [];
338         var attributes = [];
339         for (var i = 0; i < node.attributes.length; i++) {
340             var attr = node.attributes.item(i);
341
342             m = attr.name.match(XATTR_KNOWN_RE);
343             if (m !== null) {
344                 prefix = m[1];
345                 tag = m[2];
346                 attributes.push([
347                     NS_PREFIXES[prefix],
348                     tag,
349                     attr.value
350                 ]);
351             } else {
352                 // check if name starts with "x-attr-name"
353                 var m = attr.name.match(XATTR_RE);
354                 if (m !== null) {
355                     attributeIDs.push(m[1]);
356                 }
357             }
358         };
359
360         /* print out */
361
362         self.result += '<' + tagName;
363
364         function writeAttr(ns, tag, value) {
365             if (ns) {
366                 var nsData = self._assignNamespace(ns);
367                 if(nsData.fresh) {
368                     newNamespaces.push(nsData);
369                     self.stack.push({
370                         "type": NS_END,
371                         "namespace": nsData
372                     });
373                 };
374                 tag = self._join(nsData.prefix, tag);
375             }
376
377             self.result += ' ' + tag;
378             self.result += '="' + value.replace(/&/g, '&amp;').replace(/"/g, '&quot;') + '"';
379         }
380         
381         $.each(attributes, function() {
382             writeAttr(
383                 this[0], this[1], this[2]
384             );
385         });
386         
387         $.each(attributeIDs, function() {
388             writeAttr(
389                 node.getAttribute('x-attr-ns-'+this),
390                 node.getAttribute('x-attr-name-'+this),
391                 node.getAttribute('x-attr-value-'+this)
392             );
393         });
394
395         /* print new namespace declarations */
396         $.each(newNamespaces, function() {
397                 self.result += " " + self._rjoin("xmlns", this.prefix);
398                 self.result += '="' + this.uri + '"';
399         });
400
401         if (node.childNodes.length > 0) {
402                 self.result += ">";
403                 self._pushTagEnd(tagName);
404                 self._pushChildren(node);
405         }
406         else {
407                 self.result += "/>";
408         };
409     }
410 };
411
412 function html2text(params) {
413         try {
414                 var s = new HTMLSerializer();
415                 params.success( s.serialize(params.element, params.stripOuter) );
416         } catch(e) {
417                 params.error("Nie udało się zserializować tekstu:" + e)
418         }
419 }