Fix dictionary auto-tagging
[redakcja.git] / redakcja / static / js / wiki / xslt.js
index dda12a1..1327fc6 100644 (file)
@@ -17,8 +17,9 @@ function withStylesheets(code_block, onError)
     if (!xml2htmlStylesheet) {
        $.blockUI({message: 'Ładowanie arkuszy stylów...'});
        $.ajax({
-               url: STATIC_URL + 'xsl/wl2html_client.xsl',
+               url: STATIC_URL + 'xsl/wl2html_client.xsl?20110520',
                dataType: 'xml',
+               timeout: 10000,
                success: function(data) {
                xml2htmlStylesheet = createXSLT(data);
                 $.unblockUI();
@@ -34,9 +35,33 @@ function withStylesheets(code_block, onError)
 }
 
 
+// Wykonuje block z załadowanymi kanonicznymi motywami
+function withThemes(code_block, onError)
+{
+    if (typeof withThemes.canon == 'undefined') {
+        $.ajax({
+            url: '/editor/themes',
+            dataType: 'text',
+            success: function(data) {
+                withThemes.canon = data.split('\n');
+                code_block(withThemes.canon);
+            },
+            error: function() {
+                withThemes.canon = null;
+                code_block(withThemes.canon);
+            }
+        })
+    }
+    else {
+        code_block(withThemes.canon);
+    }
+}
+
+
 function xml2html(options) {
     withStylesheets(function() {
-        var xml = options.xml.replace(/\/\s+/g, '<br />');
+        var xml = options.xml.replace(/\/(\s+)/g, '<br />$1');
+        xml = xml.replace(/([^a-zA-Z0-9ąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s<>«»\\*_!,:;?&%."'=#()\/-]+)/g, '<alien>$1</alien>');
         var parser = new DOMParser();
         var serializer = new XMLSerializer();
         var doc = parser.parseFromString(xml, 'text/xml');
@@ -44,20 +69,37 @@ function xml2html(options) {
 
         if (error.length == 0) {
             doc = xml2htmlStylesheet.transformToFragment(doc, document);
-                       console.log(doc.firstChild);
+            console.log(doc.firstChild);
 
-                       if(doc.firstChild === null) {
-                               options.error("Błąd w przetwarzaniu XML.");
-                               return;
-                       }
+        if(doc.firstChild === null) {
+            options.error("Błąd w przetwarzaniu XML.");
+                return;
+            }
 
             error = $('parsererror', doc);
         }
 
         if (error.length > 0 && options.error) {
-            options.error(error.text());
+            source = $('sourcetext', doc);
+            source_text = source.text();
+            source.text('');
+            options.error(error.text(), source_text);
         } else {
-            options.success(doc.firstChild);
+            options.success(doc.childNodes);
+
+            withThemes(function(canonThemes) {
+                if (canonThemes != null) {
+                    $('.theme-text-list').addClass('canon').each(function(){
+                        var themes = $(this).html().split(',');
+                        for (i in themes) {
+                            themes[i] = $.trim(themes[i]);
+                            if (canonThemes.indexOf(themes[i]) == -1)
+                                themes[i] = '<span x-pass-thru="true" class="noncanon">' + themes[i] + "</span>"
+                        }
+                        $(this).html(themes.join(', '));
+                    });
+                }
+            });
         }
     }, function() { options.error && options.error('Nie udało się załadować XSLT'); });
 }
@@ -88,62 +130,6 @@ const NAMESPACES = {
        "http://www.w3.org/XML/1998/namespace": "xml"
 };
 
-/*
- * PADDING for pretty-printing
- */
-const PADDING = {
-    dramat_wierszowany_l: 4,
-    dramat_wierszowany_lp: 4,
-    dramat_wspolczesny: 4,
-    wywiad: 4,
-    opowiadanie: 4,
-    powiesc: 4,
-    liryka_l: 4,
-    liryka_lp: 4,
-    naglowek_czesc: 4,
-    naglowek_akt: 4,
-    naglowek_rozdzial: 4,
-    naglowek_osoba: 4,
-    lista_osob: 4,
-
-    akap: 3,
-    akap_cd: 3,
-    akap_dialog: 3,
-    strofa: 3,
-    motto: 3,
-    miejsce_czas: 3,
-
-    autor_utworu: 2,
-    nazwa_utworu: 2,
-    dzielo_nadrzedne: 2,
-
-    didaskalia: 2,
-    motto_podpis: 2,
-    naglowek_listy: 2,
-    kwestia: 1,
-    lista_osoba: 1,
-
-       "podpis": 1,
-       "wers": 0,
-       "wers_cd": 0,
-       "wers_akap": 0,
-       "wers_wciety": 0,
-
-       "rdf:RDF": 3,
-       "rdf:Description": 1,
-};
-
-function getPadding(name) {
-
-       if(name.match(/^dc:.*$/))
-               return -1;
-
-       if(PADDING[name])
-               return PADDING[name];
-
-       return 0;
-}
-
 function HTMLSerializer() {
        // empty constructor
 }
@@ -180,6 +166,11 @@ HTMLSerializer.prototype._pushTagEnd = function(tagName) {
 }
 
 HTMLSerializer.prototype._verseBefore = function(node) {
+    /* true if previous element is a previous verse of a stanza */
+    var parent = node.parentNode;
+    if (!parent || !parent.hasAttribute('x-node') || parent.getAttribute('x-node') != 'strofa')
+        return false;
+
        var prev = node.previousSibling;
 
        while((prev !== null) && (prev.nodeType != ELEMENT_NODE)) {
@@ -189,6 +180,19 @@ HTMLSerializer.prototype._verseBefore = function(node) {
        return (prev !== null) && prev.hasAttribute('x-verse');
 }
 
+HTMLSerializer.prototype._nodeIgnored = function(node) {
+    return node.getAttribute('x-node') == 'wers';
+}
+
+HTMLSerializer.prototype._ignoredWithWhitespace = function(node) {
+    while (node.nodeType == ELEMENT_NODE && this._nodeIgnored(node) && node.childNodes.length > 0)
+        node = node.childNodes[0];
+    if (node.nodeType == TEXT_NODE)
+        return node.nodeValue.match(/^\s/)
+    else return false;
+}
+
+
 HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
 {
        var self = this;
@@ -199,16 +203,18 @@ HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
        else
                self._pushChildren(rootElement);
 
+    var text_buffer = '';
+
        while(self.stack.length > 0) {
                var token = self.stack.pop();
 
-               if(token.type === ELEM_END) {
-                       self.result += "</" + token.tagName + ">";
-                       for(var padding = getPadding(token.tagName); padding > 0; padding--) {
-                               self.result += "\n";
-                       }
-                       continue;
-               };
+        if(token.type === ELEM_END) {
+            self.result += text_buffer;
+            text_buffer = '';
+            if (token.tagName != '')
+                self.result += "</" + token.tagName + ">";
+            continue;
+        };
 
                if(token.type === NS_END) {
                        self._unassignNamespace(token.namespace);
@@ -218,7 +224,8 @@ HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
 
                switch(token.node.nodeType) {
                        case ELEMENT_NODE:
-                               if(token.node.hasAttribute('x-pass-thru')) {
+                               if(token.node.hasAttribute('x-pass-thru')
+                                || token.node.hasAttribute('data-pass-thru')) {
                                        self._pushChildren(token.node);
                                        break;
                                }
@@ -228,30 +235,34 @@ HTMLSerializer.prototype.serialize = function(rootElement, stripOuter)
 
                                var xnode = token.node.getAttribute('x-node');
 
-                               if(xnode === 'wers') {
-                                       /* push children */
-                                       if(self._verseBefore(token.node))
-                                               self.result += '/\n';
-                                       self._pushChildren(token.node);
-                                       break;
-                               };
-
                                if(xnode === 'out-of-flow-text') {
                                        self._pushChildren(token.node);
                                        break;
                                }
 
-                               if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
-                                       self.result += '/\n';
-                               };
+                if(token.node.hasAttribute('x-verse') && self._verseBefore(token.node)) {
+                    self.result += '/';
+                    // add whitespace if there's none
+                    if (!(text_buffer.match(/^\s/) || self._ignoredWithWhitespace(token.node)))
+                        self.result += ' ';
+                }
 
+                self.result += text_buffer;
+                text_buffer = '';
                                self._serializeElement(token.node);
                                break;
                        case TEXT_NODE:
-                               self.result += token.node.nodeValue;
+                               self.result += text_buffer;
+                               text_buffer = token.node.nodeValue.replace(/&/g, '&amp;').replace(/</g, '&lt;');
                                break;
+            case COMMENT_NODE:
+                self.result += text_buffer;
+                text_buffer = '';
+                self.result += '<!--' + token.node.nodeValue + '-->';
+                break;
                };
        };
+    self.result += text_buffer;
 
        return this.result;
 }
@@ -302,70 +313,74 @@ HTMLSerializer.prototype._rjoin = function(prefix, name) {
 };
 
 HTMLSerializer.prototype._serializeElement = function(node) {
-       var self = this;
-
-       var ns = node.getAttribute('x-ns');
-       var nsPrefix = null;
-       var newNamespaces = [];
-
-       var nsData = self._assignNamespace(node.getAttribute('x-ns'));
-
-       if(nsData.fresh) {
-               newNamespaces.push(nsData);
-               self.stack.push({
-                       "type": NS_END,
-                       "namespace": nsData
-               });
-       }
-
-       var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
-
-       /* retrieve attributes */
-       var attributeIDs = [];
-       for (var i = 0; i < node.attributes.length; i++) {
-               var attr = node.attributes.item(i);
-
-               // check if name starts with "x-attr-name"
-               var m = attr.name.match(XATTR_RE);
-               if (m !== null)
-                       attributeIDs.push(m[1]);
-       };
-
-       /* print out */
-       if (getPadding(tagName))
-               self.result += '\n';
-
-       self.result += '<' + tagName;
+    var self = this;
 
-       $.each(attributeIDs, function() {
-               var nsData = self._assignNamespace(node.getAttribute('x-attr-ns-'+this));
-
-               if(nsData.fresh) {
-                       newNamespaces.push(nsData);
-                       self.stack.push({
-                               "type": NS_END,
-                               "namespace": nsData
-                       });
-               };
-
-               self.result += ' ' + self._join(nsData.prefix, node.getAttribute('x-attr-name-'+this));
-               self.result += '="'+node.getAttribute('x-attr-value-'+this) +'"';
-       });
-
-       /* print new namespace declarations */
-       $.each(newNamespaces, function() {
-               self.result += " " + self._rjoin("xmlns", this.prefix);
-               self.result += '="' + this.uri + '"';
-       });
-
-       if (node.childNodes.length > 0) {
-               self.result += ">";
-               self._pushTagEnd(tagName);
-               self._pushChildren(node);
-       }
-       else {
-               self.result += "/>";
-       };
+    if (self._nodeIgnored(node)) {
+        self._pushTagEnd('');
+        self._pushChildren(node);
+    }
+    else {
+       var ns = node.getAttribute('x-ns');
+       var nsPrefix = null;
+       var newNamespaces = [];
+
+       var nsData = self._assignNamespace(node.getAttribute('x-ns'));
+
+       if(nsData.fresh) {
+               newNamespaces.push(nsData);
+               self.stack.push({
+                       "type": NS_END,
+                       "namespace": nsData
+               });
+       }
+
+       var tagName = self._join(nsData.prefix, node.getAttribute('x-node'));
+
+       /* retrieve attributes */
+       var attributeIDs = [];
+       for (var i = 0; i < node.attributes.length; i++) {
+               var attr = node.attributes.item(i);
+
+               // check if name starts with "x-attr-name"
+               var m = attr.name.match(XATTR_RE);
+               if (m !== null)
+                       attributeIDs.push(m[1]);
+       };
+
+       /* print out */
+
+       self.result += '<' + tagName;
+
+       $.each(attributeIDs, function() {
+               var nsData = self._assignNamespace(node.getAttribute('x-attr-ns-'+this));
+
+               if(nsData.fresh) {
+                       newNamespaces.push(nsData);
+                       self.stack.push({
+                               "type": NS_END,
+                               "namespace": nsData
+                       });
+               };
+
+               self.result += ' ' + self._join(nsData.prefix, node.getAttribute('x-attr-name-'+this));
+               self.result += '="'+node.getAttribute('x-attr-value-'+this) +'"';
+       });
+
+       /* print new namespace declarations */
+       $.each(newNamespaces, function() {
+               self.result += " " + self._rjoin("xmlns", this.prefix);
+               self.result += '="' + this.uri + '"';
+       });
+
+       if (node.childNodes.length > 0) {
+               self.result += ">";
+               self._pushTagEnd(tagName);
+               self._pushChildren(node);
+       }
+       else {
+               self.result += "/>";
+       };
+    }
 };
 
 function html2text(params) {
@@ -375,4 +390,4 @@ function html2text(params) {
        } catch(e) {
                params.error("Nie udało się zserializować tekstu:" + e)
        }
-}
\ No newline at end of file
+}