wip: wlxml formatting support - first test passing

author Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>

Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)

committer Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>

Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)
author Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)
committer Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)
diff --git a/src/smartxml/smartxml.js b/src/smartxml/smartxml.js

index b90a042..f4aa9fe 100644 (file)
--- a/src/smartxml/smartxml.js
+++ b/src/smartxml/smartxml.js
@@ -189,8 +189,12 @@ $.extend(ElementNode.prototype, DocumentNode.prototype, {
  
      toXML: function() {
          var wrapper = $('<div>');
-        wrapper.append(this._$);
+        wrapper.append(this._getXMLDOMToDump());
          return wrapper.html();
+    },
+    
+    _getXMLDOMToDump: function() {
+        return this._$;
      }
  });
  
diff --git a/src/wlxml/wlxml.js b/src/wlxml/wlxml.js

index 40f070c..51655d4 100644 (file)
--- a/src/wlxml/wlxml.js
+++ b/src/wlxml/wlxml.js
@@ -41,10 +41,59 @@ $.extend(WLXMLElementNode.prototype, smartxml.ElementNode.prototype, {
              }
          });
          return toret;
+    },
+
+    _getXMLDOMToDump: function() {
+        var DOM = this._$.clone(true, true);
+
+        DOM.find('*').addBack().each(function() {
+            var el = $(this),
+                parent = el.parent(),
+                contents = parent.contents(),
+                idx = contents.index(el),
+                data = el.data();
+
+
+            var txt;
+
+            if(data[formatter_prefix+ 'orig_before']) {
+                txt = idx > 0 && contents[idx-1].nodeType === Node.TEXT_NODE ? contents[idx-1] : null;
+                if(txt && txt.data === data[formatter_prefix + 'orig_before_transformed']) {
+                    txt.data = data[formatter_prefix+ 'orig_before_original'];
+                } else {
+                    el.before(data[formatter_prefix+ 'orig_before']);
+                }
+            }
+            if(data[formatter_prefix+ 'orig_after']) {
+                txt = idx < contents.length-1 && contents[idx+1].nodeType === Node.TEXT_NODE ? contents[idx+1] : null;
+                if(txt && txt.data === data[formatter_prefix + 'orig_after_transformed']) {
+                    txt.data = data[formatter_prefix+ 'orig_after_original'];
+                } else {
+                    el.after(data[formatter_prefix+ 'orig_after']);
+                }
+            }
+            if(data[formatter_prefix+ 'orig_begin']) {
+                el.prepend(data[formatter_prefix+ 'orig_begin']);
+            }
+            if(data[formatter_prefix+ 'orig_end']) {
+                contents = el.contents();
+                txt = (contents.length && contents[contents.length-1].nodeType === Node.TEXT_NODE) ? contents[contents.length-1] : null;
+                if(txt && txt.data === data[formatter_prefix + 'orig_end_transformed']) {
+                    txt.data = data[formatter_prefix+ 'orig_end_original'];
+                } else {
+                    el.append(data[formatter_prefix+ 'orig_end']);
+                }
+            }
+        });
+
+        return DOM;
      }
  });
  
  
+
+
+
  var WLXMLDocument = function(xml) {
      smartxml.Document.call(this, xml);
  
@@ -59,6 +108,30 @@ var WLXMLDocument = function(xml) {
              hasSpanAfter = el.next().length && $(el.next()).prop('tagName') === 'SPAN';
  
  
+        var addInfo = function(toAdd, where, transformed, original) {
+            var parentContents = elParent.contents(),
+                idx = parentContents.index(el[0]),
+                prev = idx > 0 ? parentContents[idx-1] : null,
+                next = idx < parentContents.length - 1 ? parentContents[idx+1] : null,
+                target, key;
+
+            if(where === 'above') {
+                target = prev ? $(prev) : elParent;
+                key = prev ? 'orig_after' : 'orig_begin';
+            } else if(where === 'below') {
+                target = next ? $(next) : elParent;
+                key = next ? 'orig_before' : 'orig_end';
+            } else { throw new Object;}
+
+            target.data(formatter_prefix + key, toAdd);
+            if(transformed !== undefined) {
+                target.data(formatter_prefix + key + '_transformed', transformed);
+            }
+            if(original !== undefined) {
+                target.data(formatter_prefix + key + '_original', original);   
+            }
+        }
+
          text.transformed = text.trimmed;
  
          if(hasSpanParent || hasSpanBefore || hasSpanAfter) {
@@ -74,15 +147,43 @@ var WLXMLDocument = function(xml) {
          }
  
          if(!text.transformed) {
+            addInfo(text.original, 'below');
              el.remove();
              return true; // continue
          }
+
+        if(text.transformed !== text.original) {
+            // if(!text.trimmed) {
+            //     addInfo(text.original, 'below');
+            // } else {
+                var startingMatch = text.original.match(/^\s+/g),
+                    endingMatch = text.original.match(/\s+$/g),
+                    startingWhiteSpace = startingMatch ? startingMatch[0] : null,
+                    endingWhiteSpace = endingMatch ? endingMatch[0] : null;
+
+                if(endingWhiteSpace) {
+                    if(text.transformed[text.transformed.length - 1] === ' ' && endingWhiteSpace[0] === ' ')
+                        endingWhiteSpace = endingWhiteSpace.substr(1);
+                    addInfo(endingWhiteSpace, 'below', !text.trimmed ? text.transformed : undefined, !text.trimmed ? text.original : undefined);
+                }
+
+                if(startingWhiteSpace && text.trimmed) {
+                    if(text.transformed[0] === ' ' && startingWhiteSpace[startingWhiteSpace.length-1] === ' ')
+                        startingWhiteSpace = startingWhiteSpace.substr(0, startingWhiteSpace.length -1);
+                    addInfo(startingWhiteSpace, 'above', !text.trimmed ? text.transformed : undefined, !text.trimmed ? text.original : undefined);
+                }
+            //}
+        }
+
          el.replaceWith(document.createTextNode(text.transformed));
      });
  };
+
+var formatter_prefix = '_wlxml_formatter_';
+
  WLXMLDocument.prototype = Object.create(smartxml.Document.prototype);
  $.extend(WLXMLDocument.prototype, {
-    ElementNodeFactory: WLXMLElementNode
+    ElementNodeFactory: WLXMLElementNode,
  });
  
  
diff --git a/src/wlxml/wlxml.test.js b/src/wlxml/wlxml.test.js

index 7628969..86fe462 100644 (file)
--- a/src/wlxml/wlxml.test.js
+++ b/src/wlxml/wlxml.test.js
@@ -13,6 +13,10 @@ var nodeFromXML = function(xml) {
      return wlxml.WLXMLElementNodeFromXML(xml);
  };
  
+var getDocumentFromXML = function(xml) {
+    return wlxml.WLXMLDocumentFromXML(xml);
+};
+
  
  describe('WLXMLDocument', function() {
      
@@ -67,6 +71,120 @@ describe('WLXMLDocument', function() {
          });
      });
  
+    describe('formatting output xml', function() {
+        it('keeps white space between XML nodes', function() {
+            var xmlIn = '<section>\n\n\n<div></div>\n\n\n<div></div>\n\n\n</section>',
+            doc = getDocumentFromXML(xmlIn),
+            xmlOut = doc.toXML();
+
+            var partsIn = xmlIn.split('\n\n\n'),
+                partsOut = xmlOut.split('\n\n\n');
+
+            expect(partsIn).to.deep.equal(partsOut);
+        });
+
+        it('keeps white space between XML nodes - inline case', function() {
+            var xmlIn = '<section>\n\n\n<span></span>\n\n\n<span></span>\n\n\n</section>',
+                doc = getDocumentFromXML(xmlIn),
+                xmlOut = doc.toXML();
+
+            var partsIn = xmlIn.split('\n\n\n'),
+                partsOut = xmlOut.split('\n\n\n');
+            console.log(xmlIn);
+            console.log(xmlOut);
+            expect(partsIn).to.deep.equal(partsOut);
+        });
+
+        it('keeps white space at the beginning of text', function() {
+            var xmlIn = '<section>    abc<div>some div</div>    abc</section>',
+                doc = getDocumentFromXML(xmlIn),
+                xmlOut = doc.toXML();
+
+            expect(xmlOut).to.equal(xmlIn);
+        });
+
+        // it('nests new children block elements', function() {
+        //     var doc = getDocumentFromXML('<section></section>');
+    
+        //     doc.root.append({tag: 'header'});
+
+        //     var xmlOut = doc.toXML();
+        //     expect(xmlOut.split('\n  ')[0]).to.equal('<section>', 'nesting start ok');
+        //     expect(xmlOut.split('\n').slice(-1)[0]).to.equal('</section>', 'nesting end ok');
+
+        // });
+
+        // it('doesn\'t nest new children inline elements', function() {
+        //     var doc = getDocumentFromXML('<section></section>');
+    
+        //     doc.root.append({tag: 'span'});
+
+        //     var xmlOut = doc.toXML();
+        //     expect(xmlOut).to.equal('<section><span></span></section>');
+        // });
+
+        it('keeps original white space at the end of text', function() {
+            
+            var xmlIn = '<header>    Some text ended with white space \
+            \
+            <span class="uri">Some text</span> some text\
+        \
+        </header>',
+                doc = getDocumentFromXML(xmlIn),
+                xmlOut = doc.toXML();
+        
+            expect(xmlOut).to.equal(xmlIn);
+        });
+
+        it('keeps white space around text node', function() {
+            var xmlIn = '<section>\
+            <header>header1</header>\
+            Some text surrounded by white space\
+            <header>header2</header>\
+        </section>',
+                doc = getDocumentFromXML(xmlIn),
+                xmlOut = doc.toXML();
+            expect(xmlOut).to.equal(xmlIn);
+        });
+
+        it('keeps white space around text node - last node case', function() {
+            var xmlIn = '<section>\
+            <header>header</header>\
+                \
+            Some text surrounded by white space\
+                \
+        </section>',
+                doc = getDocumentFromXML(xmlIn),
+                xmlOut = doc.toXML();
+
+            expect(xmlOut).to.equal(xmlIn);
+        });
+
+        it('keeps white space after detaching text element', function() {
+            var xmlIn = '<section><header>header</header>\n\
+                \n\
+            text1\n\
+                \n\
+        </section>',
+                expectedXmlOut = '<section><header>header</header>\n\
+                \n\
+            \n\
+                \n\
+        </section>',
+                doc = getDocumentFromXML(xmlIn),
+                contents = doc.root.contents(),
+                text = contents[contents.length-1];
+            
+            expect(text.getText()).to.equal('text1');
+
+            text.detach();
+
+            var xmlOut = doc.toXML();
+            expect(xmlOut).to.equal(expectedXmlOut);
+        });
+
+    });
+
  });
  
  });
 \ No newline at end of file
author	Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
	Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)
committer	Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
	Wed, 23 Oct 2013 14:56:36 +0000 (16:56 +0200)
src/smartxml/smartxml.js		patch \| blob \| history
src/wlxml/wlxml.js		patch \| blob \| history
src/wlxml/wlxml.test.js		patch \| blob \| history