wlxml: moving white space handling from canvas code base

author Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>

Tue, 8 Oct 2013 14:12:19 +0000 (16:12 +0200)

committer Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>

Wed, 9 Oct 2013 14:58:40 +0000 (16:58 +0200)
author Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
Tue, 8 Oct 2013 14:12:19 +0000 (16:12 +0200)
committer Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
Wed, 9 Oct 2013 14:58:40 +0000 (16:58 +0200)
diff --git a/src/smartxml/smartxml.js b/src/smartxml/smartxml.js

index 248333e..477166b 100644 (file)
--- a/src/smartxml/smartxml.js
+++ b/src/smartxml/smartxml.js
@@ -174,6 +174,9 @@ var Document = function(xml) {
      Object.defineProperty(this, 'root', {get: function() {
          return doc.createElementNode($document[0]);
      }});
+    Object.defineProperty(this, 'dom', {get: function() {
+        return $document[0];
+    }});
  };
  $.extend(Document.prototype, {
      ElementNodeFactory: ElementNode,
diff --git a/src/wlxml/wlxml.js b/src/wlxml/wlxml.js

index 7dfbd99..40f070c 100644 (file)
--- a/src/wlxml/wlxml.js
+++ b/src/wlxml/wlxml.js
@@ -47,6 +47,38 @@ $.extend(WLXMLElementNode.prototype, smartxml.ElementNode.prototype, {
  
  var WLXMLDocument = function(xml) {
      smartxml.Document.call(this, xml);
+
+    $(this.dom).find(':not(iframe)').addBack().contents()
+    .filter(function() {return this.nodeType === Node.TEXT_NODE;})
+    .each(function() {
+        var el = $(this),
+            text = {original: el.text(), trimmed: $.trim(el.text())},
+            elParent = el.parent(),
+            hasSpanParent = elParent.prop('tagName') === 'SPAN',
+            hasSpanBefore = el.prev().length && $(el.prev()).prop('tagName') === 'SPAN',
+            hasSpanAfter = el.next().length && $(el.next()).prop('tagName') === 'SPAN';
+
+
+        text.transformed = text.trimmed;
+
+        if(hasSpanParent || hasSpanBefore || hasSpanAfter) {
+            var startSpace = /\s/g.test(text.original.substr(0,1)),
+                endSpace = /\s/g.test(text.original.substr(-1)) && text.original.length > 1;
+            text.transformed = (startSpace && (hasSpanParent || hasSpanBefore) ? ' ' : '');
+            text.transformed += text.trimmed;
+            text.transformed += (endSpace && (hasSpanParent || hasSpanAfter) ? ' ' : '');
+        } else {
+            if(text.trimmed.length === 0 && text.original.length > 0 && elParent.contents().length === 1) {
+                text.transformed = ' ';
+            }
+        }
+
+        if(!text.transformed) {
+            el.remove();
+            return true; // continue
+        }
+        el.replaceWith(document.createTextNode(text.transformed));
+    });
  };
  WLXMLDocument.prototype = Object.create(smartxml.Document.prototype);
  $.extend(WLXMLDocument.prototype, {
diff --git a/src/wlxml/wlxml.test.js b/src/wlxml/wlxml.test.js

index 9bca9f0..7628969 100644 (file)
--- a/src/wlxml/wlxml.test.js
+++ b/src/wlxml/wlxml.test.js
@@ -33,6 +33,40 @@ describe('WLXMLDocument', function() {
          });
      });
  
+    describe('White space handling', function() {
+        it('ignores white space surrounding block elements', function() {
+            var node = nodeFromXML('<section> <div></div> </section>'),
+                contents = node.contents();
+            expect(contents).to.have.length(1);
+            expect(contents[0].nodeType).to.equal(Node.ELEMENT_NODE);
+        });
+        it('ignores white space between block elements', function() {
+            var node = nodeFromXML('<section><div></div> <div></div></section>'),
+            contents = node.contents();
+            expect(contents).to.have.length(2);
+            [0,1].forEach(function(idx) {
+                expect(contents[idx].nodeType).to.equal(Node.ELEMENT_NODE);
+            });
+        });
+        it('trims white space from the beginning and the end of the block elements', function() {
+            var node = nodeFromXML('<section> Alice <span>has</span> a cat </section>');
+            expect(node.contents()[0].getText()).to.equal('Alice ');
+            expect(node.contents()[2].getText()).to.equal(' a cat');
+        });
+        it('normalizes string of white characters to one space at the inline element boundries', function() {
+            var node = nodeFromXML('<span>   Alice has a cat   </span>');
+            expect(node.contents()[0].getText()).to.equal(' Alice has a cat ');
+        });
+        it('normalizes string of white characters to one space before inline element', function() {
+            var node = nodeFromXML('<div>Alice has  <span>a cat</span></div>');
+            expect(node.contents()[0].getText()).to.equal('Alice has ');
+        });
+        it('normalizes string of white characters to one space after inline element', function() {
+            var node = nodeFromXML('<div>Alice has <span>a</span>  cat</div>');
+            expect(node.contents()[2].getText()).to.equal(' cat');
+        });
+    });
+
  });
  
  });
 \ No newline at end of file
author	Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
	Tue, 8 Oct 2013 14:12:19 +0000 (16:12 +0200)
committer	Aleksander Łukasz <aleksander.lukasz@nowoczesnapolska.org.pl>
	Wed, 9 Oct 2013 14:58:40 +0000 (16:58 +0200)
src/smartxml/smartxml.js		patch \| blob \| history
src/wlxml/wlxml.js		patch \| blob \| history
src/wlxml/wlxml.test.js		patch \| blob \| history