Obey length limits for wikidata import.
[redakcja.git] / src / redakcja / static / js / wiki / xslt.js
1 /*
2  *
3  * XSLT STUFF
4  *
5  */
6 function createXSLT(xsl) {
7     var p = new XSLTProcessor();
8     p.importStylesheet(xsl);
9     return p;
10 }
11
12 var xml2htmlStylesheet = null;
13
14 // Wykonuje block z załadowanymi arkuszami stylów
15 function withStylesheets(code_block, onError)
16 {
17     if (!xml2htmlStylesheet) {
18         $.blockUI({message: 'Ładowanie arkuszy stylów...'});
19         $.ajax({
20             url: '/wlxml/wl2html.xsl',
21             dataType: 'xml',
22             timeout: 10000,
23             success: function(data) {
24                 xml2htmlStylesheet = createXSLT(data);
25                 $.unblockUI();
26                 code_block();
27             },
28             error: onError
29         })
30     }
31     else {
32         code_block();
33     }
34 }
35
36
37 function xml2html(options) {
38     withStylesheets(function() {
39         var xml = options.xml.replace(/\/(\s+)/g, '<br />$1');
40         xml = xml.replace(/([^a-zA-Z0-9ąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s<>«»\\*_!,:;?&%."'=#()\/-]+)/g, '<alien>$1</alien>');
41         var parser = new DOMParser();
42         var serializer = new XMLSerializer();
43         var doc = parser.parseFromString(xml, 'text/xml');
44         var error = $('parsererror', doc);
45
46         if (error.length == 0) {
47             doc = xml2htmlStylesheet.transformToFragment(doc, document);
48
49         if(doc.firstChild === null) {
50             options.error("Błąd w przetwarzaniu XML.");
51                 return;
52             }
53
54             error = $('parsererror', doc);
55         }
56
57         if (error.length > 0 && options.error) {
58             source = $('sourcetext', doc);
59             source_text = source.text();
60             source.text('');
61             options.error(error.text(), source_text);
62         } else {
63             let galleryUrl = new URL(
64                 options.base,
65                 window.location.href
66             );
67             $("img", $(doc.childNodes)).each(function() {
68                 $(this).attr(
69                     'src',
70                     new URL(
71                         $(this).attr('src'),
72                         galleryUrl
73                     )
74                 );
75             })
76
77             options.success(doc.childNodes);
78
79             $.themes.withCanon(function(canonThemes) {
80                 if (canonThemes != null) {
81                     $('.theme-text-list').addClass('canon').each(function(){
82                         var themes = $(this).html().split(',');
83                         for (i in themes) {
84                             themes[i] = $.trim(themes[i]);
85                             if (canonThemes.indexOf(themes[i]) == -1)
86                                 themes[i] = '<span x-pass-thru="true" class="noncanon">' + themes[i] + "</span>"
87                         }
88                         $(this).html(themes.join(', '));
89                     });
90                 }
91             });
92         }
93     }, function() { options.error && options.error('Nie udało się załadować XSLT'); });
94 }
95
96 /* USEFULL CONSTANTS */
97 const ELEMENT_NODE                   = 1;
98 const ATTRIBUTE_NODE                 = 2;
99 const TEXT_NODE                      = 3;
100 const CDATA_SECTION_NODE             = 4;
101 const ENTITY_REFERENCE_NODE          = 5;
102 const ENTITY_NODE                    = 6;
103 const PROCESSING_INSTRUCTION_NODE    = 7;
104 const COMMENT_NODE                   = 8;
105 const DOCUMENT_NODE                  = 9;
106 const DOCUMENT_TYPE_NODE             = 10;
107 const DOCUMENT_FRAGMENT_NODE         = 11;
108 const NOTATION_NODE                  = 12;
109 const XATTR_RE = /^x-attr-name-(.*)$/;
110 const XATTR_KNOWN_RE = /^x-a-([a-z]+)-(.*)$/;
111
112 const ELEM_START = 1;
113 const ELEM_END = 2;
114 const NS_END = 3;
115
116 const NAMESPACES = {
117         // namespaces not listed here will be assigned random names
118         "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
119         "http://purl.org/dc/elements/1.1/": "dc",
120         "http://www.w3.org/XML/1998/namespace": "xml"
121 };
122
123 NS_PREFIXES = {
124     'wl': ''
125 };
126 for (prefix in NAMESPACES) {
127     NS_PREFIXES[NAMESPACES[prefix]] = prefix
128 };
129
130 class HTMLSerializer {
131     _prepare() {
132         this.stack = [];
133
134         // XML namespace is implicit
135         this.nsMap = {"http://www.w3.org/XML/1998/namespace": "xml"};
136
137         this.result = "";
138         this.nsCounter = 1;
139     }
140
141     _pushElement(element) {
142         this.stack.push({
143             "type": ELEM_START,
144             "node": element
145         });
146     }
147
148     _pushChildren(element) {
149         for(var i = element.childNodes.length-1; i >= 0; i--)
150             this._pushElement(element.childNodes.item(i));
151     }
152
153     _pushTagEnd(tagName) {
154         this.stack.push({
155             "type": ELEM_END,
156             "tagName": tagName
157         });
158     }
159
160     _verseBefore(node) {
161         /* true if previous element is a previous verse of a stanza */
162         var parent = node.parentNode;
163         if (!parent || !parent.hasAttribute('x-node') || parent.getAttribute('x-node') != 'strofa')
164             return false;
165
166         var prev = node.previousSibling;
167
168         while((prev !== null) && (prev.nodeType != ELEMENT_NODE)) {
169             prev = prev.previousSibling;
170         }
171
172         return (prev !== null) && prev.hasAttribute('x-verse');
173     }
174
175     _nodeIgnored(node) {
176         return node.getAttribute('x-auto-node') == 'true';
177     }
178
179     _ignoredWithWhitespace(node) {
180         while (node.nodeType == ELEMENT_NODE && this._nodeIgnored(node) && node.childNodes.length > 0)
181             node = node.childNodes[0];
182         if (node.nodeType == TEXT_NODE)
183             return node.nodeValue.match(/^\s/)
184         else return false;
185     }
186
187
188     serialize(rootElement, stripOuter)
189     {
190         var self = this;
191         self._prepare();
192         
193         if(!stripOuter)
194             self._pushElement(rootElement);
195         else
196             self._pushChildren(rootElement);
197         
198         var text_buffer = '';
199
200         while(self.stack.length > 0) {
201             var token = self.stack.pop();
202
203             if(token.type === ELEM_END) {
204                 self.result += text_buffer;
205                 text_buffer = '';
206                 if (token.tagName != '')
207                     self.result += "</" + token.tagName + ">";
208                 continue;
209             };
210
211             if(token.type === NS_END) {
212                 self._unassignNamespace(token.namespace);
213                 continue;
214             }
215
216             switch(token.node.nodeType) {
217             case ELEMENT_NODE:
218                 if(token.node.hasAttribute('x-pass-thru')
219                    || token.node.hasAttribute('data-pass-thru')) {
220                     self._pushChildren(token.node);
221                     break;
222                 }
223                 
224                 if(!token.node.hasAttribute('x-node'))
225                     break;
226                 
227                 var xnode = token.node.getAttribute('x-node');
228                 
229                 if(xnode === 'out-of-flow-text') {
230                     self._pushChildren(token.node);
231                     break;
232                 }
233                 
234                 if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
235                     self.result += '/';
236                     // add whitespace if there's none
237                     if (!(text_buffer.match(/^\s/) || self._ignoredWithWhitespace(token.node)))
238                         self.result += ' ';
239                 }
240                 
241                 self.result += text_buffer;
242                 text_buffer = '';
243                 self._serializeElement(token.node);
244                 break;
245             case TEXT_NODE:
246                 self.result += text_buffer;
247                 text_buffer = token.node.nodeValue.replace(/&/g, '&amp;').replace(/</g, '&lt;');
248                 break;
249             case COMMENT_NODE:
250                 self.result += text_buffer;
251                 text_buffer = '';
252                 self.result += '<!--' + token.node.nodeValue + '-->';
253                 break;
254             };
255         };
256         self.result += text_buffer;
257
258         return this.result;
259     }
260
261     /*
262      * TODO: this doesn't support prefix redefinitions
263      */
264     _unassignNamespace(nsData) {
265         this.nsMap[nsData.uri] = undefined;
266     }
267
268     _assignNamespace(uri) {
269         if(uri === null) {
270             // default namespace
271             return ({"prefix": "", "uri": "", "fresh": false});
272         }
273         
274         if(this.nsMap[uri] === undefined) {
275             // this prefix hasn't been defined yet in current context
276             var prefix = NAMESPACES[uri];
277             
278             if (prefix === undefined) { // not predefined
279                 prefix = "ns" + this.nsCounter;
280                 this.nsCounter += 1;
281             }
282             
283             this.nsMap[uri] = prefix;
284             return ({
285                 "prefix": prefix,
286                 "uri": uri,
287                 "fresh": true
288             });
289         }
290         
291         return ({"prefix": this.nsMap[uri], "uri": uri, "fresh": false});
292     }
293     
294     _join(prefix, name) {
295         if(!!prefix)
296             return prefix + ":" + name;
297         return name;
298     }
299
300     _rjoin(prefix, name) {
301         if(!!name)
302             return prefix + ":" + name;
303         return prefix;
304     }
305
306     _serializeElement(node) {
307         var self = this;
308
309         if (self._nodeIgnored(node)) {
310             self._pushTagEnd('');
311             self._pushChildren(node);
312         }
313         else {
314             var ns = node.getAttribute('x-ns');
315             var nsPrefix = null;
316             var newNamespaces = [];
317
318             var nsData = self._assignNamespace(node.getAttribute('x-ns'));
319
320             if(nsData.fresh) {
321                 newNamespaces.push(nsData);
322                 self.stack.push({
323                     "type": NS_END,
324                     "namespace": nsData
325                 });
326             }
327
328             var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
329             
330             /* retrieve attributes */
331             var attributeIDs = [];
332             var attributes = [];
333             for (var i = 0; i < node.attributes.length; i++) {
334                 var attr = node.attributes.item(i);
335                 
336                 m = attr.name.match(XATTR_KNOWN_RE);
337                 if (m !== null) {
338                     prefix = m[1];
339                     let tag = m[2];
340                     attributes.push([
341                         NS_PREFIXES[prefix],
342                         tag,
343                         attr.value
344                     ]);
345                 } else {
346                     // check if name starts with "x-attr-name"
347                     var m = attr.name.match(XATTR_RE);
348                     if (m !== null) {
349                         attributeIDs.push(m[1]);
350                     }
351                 }
352             }
353
354             /* print out */
355
356             self.result += '<' + tagName;
357
358             function writeAttr(ns, tag, value) {
359                 if (ns) {
360                     var nsData = self._assignNamespace(ns);
361                     if(nsData.fresh) {
362                         newNamespaces.push(nsData);
363                         self.stack.push({
364                             "type": NS_END,
365                             "namespace": nsData
366                         });
367                     };
368                     tag = self._join(nsData.prefix, tag);
369                 }
370                 
371                 self.result += ' ' + tag;
372                 self.result += '="' + value.replace(/&/g, '&amp;').replace(/"/g, '&quot;') + '"';
373             }
374         
375             $.each(attributes, function() {
376                 writeAttr(
377                     this[0], this[1], this[2]
378                 );
379             });
380         
381             $.each(attributeIDs, function() {
382                 writeAttr(
383                     node.getAttribute('x-attr-ns-'+this),
384                     node.getAttribute('x-attr-name-'+this),
385                     node.getAttribute('x-attr-value-'+this)
386                 );
387             });
388
389             /* print new namespace declarations */
390             $.each(newNamespaces, function() {
391                 self.result += " " + self._rjoin("xmlns", this.prefix);
392                 self.result += '="' + this.uri + '"';
393             });
394
395             if (node.childNodes.length > 0) {
396                 self.result += ">";
397                 self._pushTagEnd(tagName);
398                 self._pushChildren(node);
399             }
400             else {
401                 self.result += "/>";
402             };
403         }
404     }
405 }
406
407 function html2text(params) {
408     try {
409         var s = new HTMLSerializer();
410         params.success( s.serialize(params.element, params.stripOuter) );
411     } catch(e) {
412         params.error("Nie udało się zserializować tekstu:" + e)
413     }
414 }