[kupu-checkins] r43407 - in kupu/trunk/kupu: common tests

duncan at codespeak.net duncan at codespeak.net
Tue May 15 15:55:16 CEST 2007


Author: duncan
Date: Tue May 15 15:55:16 2007
New Revision: 43407

Modified:
   kupu/trunk/kupu/common/kupucontentfilters.js
   kupu/trunk/kupu/tests/test_xhtml.js
Log:
Improved filtering code: <p> tags can no longer contain tables, captioned images, or other things which make the html go horribly wrong.

Modified: kupu/trunk/kupu/common/kupucontentfilters.js
==============================================================================
--- kupu/trunk/kupu/common/kupucontentfilters.js	(original)
+++ kupu/trunk/kupu/common/kupucontentfilters.js	Tue May 15 15:55:16 2007
@@ -312,7 +312,7 @@
         this.misc = ['noscript'].concat(this.misc_inline);
         this.inline = ['a'].concat(this.special, this.fontstyle, this.phrase, this.inline_forms);
 
-        this.Inline = ['#PCDATA'].concat(this.inline, this.misc_inline);
+        this.Inline = ['#text'].concat(this.inline, this.misc_inline);
 
         this.heading = ['h1','h2','h3','h4','h5','h6'];
         this.lists = ['ul','ol','dl','menu','dir'];
@@ -320,7 +320,7 @@
         this.block = ['p','div','isindex','fieldset','table'].concat(
                      this.heading, this.lists, this.blocktext);
 
-        this.Flow = ['#PCDATA','form'].concat(this.block, this.inline, this.misc);
+        this.Flow = ['#text','form'].concat(this.block, this.inline, this.misc);
     }(this);
 
     this._commonsetting = function(self, names, value) {
@@ -415,7 +415,7 @@
             'base', 'meta', 'link', 'hr', 'param', 'img', 'area', 'input',
             'br', 'basefont', 'isindex', 'col'], []);
 
-        setStates(['title','style','script','option','textarea'], ['#PCDATA']);
+        setStates(['title','style','script','option','textarea'], ['#text']);
         setStates([ 'noscript', 'iframe', 'noframes', 'body', 'div',
             'li', 'dd', 'blockquote', 'center', 'ins', 'del', 'td', 'th'], el.Flow);
 
@@ -430,12 +430,12 @@
         setStates(['dl'], ['dt','dd']);
         setStates(['pre'], validation._exclude(el.Inline, "img|object|embed|applet|big|small|sub|sup|font|basefont"));
         setStates(['a'], validation._exclude(el.Inline, "a"));
-        setStates(['applet', 'object','embed'], ['#PCDATA', 'param','form'].concat(el.block, el.inline, el.misc));
+        setStates(['applet', 'object','embed'], ['#text', 'param','form'].concat(el.block, el.inline, el.misc));
         setStates(['map'], ['form', 'area'].concat(el.block, el.misc));
         setStates(['form'], validation._exclude(el.Flow, ['form']));
         setStates(['select'], ['optgroup','option']);
         setStates(['optgroup'], ['option']);
-        setStates(['fieldset'], ['#PCDATA','legend','form'].concat(el.block,el.inline,el.misc));
+        setStates(['fieldset'], ['#text','legend','form'].concat(el.block,el.inline,el.misc));
         setStates(['button'], validation._exclude(el.Flow, ['a','form','iframe'].concat(el.inline_forms)));
         setStates(['table'], ['caption','col','colgroup','thead','tfoot','tbody','tr']);
         setStates(['thead', 'tfoot', 'tbody'], ['tr']);
@@ -542,26 +542,6 @@
         };
     }(this, editor);
 
-    // Node filtering. May modify html node or xhtml parentNode.
-    // Return true to continue processing html node, false to skip it.
-    this.nodefilters = new function(editor) {
-        // Strip <br> at end of paragraph.
-        // Top level <br>: enclose preceding text (if any) in <p> and
-        // drop the <br>
-        this['br'] = function(node, parentNode) {
-            if (parentNode.tagName=='body') {
-                var p = parentNode.ownerDocument.createElement('p');
-                if (parentNode.lastChild && parentNode.lastChild.nodeType==3) {
-                    p.appendChild(parentNode.lastChild);
-                }
-                parentNode.appendChild(p);
-                return false;
-            }
-            if (!node.nextSibling && /p|div/i.test(parentNode.nodeName)) return false;
-            return true;
-        };
-    }
-
     // Exclude unwanted tags.
     this.excludeTags(['center']);
 
@@ -610,9 +590,101 @@
             if (filter) filter(name, htmlnode, xhtmlnode);
         }
     };
+    this._xmlCopyAttr = function(srcnode, target) {
+        var valid = this.tagAttributes[srcnode.nodeName];
+        for (var i = 0; i < valid.length; i++) {
+            var val = srcnode.getAttribute(valid[i]);
+            if (val) {
+                target.setAttribute(valid[i], val);
+            }
+        };
+    }
+
+    this._convertToSarissaNode = function(ownerdoc, htmlnode) {
+        var root = this._convertNodes(ownerdoc, htmlnode, null, new this.Set(['html']));
+        this._cleanupBr(ownerdoc, root);
+        this._cleanupParas(ownerdoc, root);
+        return root;
+    };
+
+    // Clean up a paragraph. Any direct child which is not allowed in
+    // the paragraph is moved to the parent. This may involved
+    // splitting the paragraph, or if it is at the beginning or end it
+    // may simply mean moving it out of the paragraph.
+    this._cleanupPara = function(ownerdoc, para) {
+        var permitted = this.States.p;
+        var nodes = [[]];
+        var idx = 0;
+        for (var child = para.firstChild; child; child = child.nextSibling) {
+            var nn = child.nodeName.toLowerCase();
+            if (permitted[nn] && (nn != 'img' || !(/\bcaptioned\b/i.test(child.getAttribute('class'))))) {
+                nodes[idx].push(child);
+            } else {
+                if (nodes[idx].length) {
+                    nodes.push(child);
+                } else {
+                    nodes[idx] = child;
+                }
+                nodes.push([]);
+                idx = nodes.length-1;
+            }
+        }
+        if (!nodes[idx].length) {
+            nodes.splice(idx,1);
+        };
+        if (nodes.length > 0 && nodes[0] instanceof Array && !nodes[0].length) {
+            nodes.splice(0,1);
+        }
+        if (nodes.length==0 || (nodes.length==1 && nodes[0] instanceof Array)) {
+            return; /* No change */
+        }
+        /* Need to cleanup this paragraph */
+        var parentnode = para.parentNode;
+        for (var idx = 0; idx < nodes.length; idx++) {
+            var n = nodes[idx];
+            if (n instanceof Array) {
+                var newp = ownerdoc.createElement('p');
+                this._xmlCopyAttr(para, newp);
+                var ln = n.length-1;
+                if (/br/i.test(n[ln].nodeName)) {
+                    n.splice(ln,1);
+                }
+                for (var j = 0; j < n.length; j++) {
+                    newp.appendChild(n[j]);
+                }
+                n = newp;
+            }
+            parentnode.insertBefore(n,para);
+        }
+        parentnode.removeChild(para);
+    };
 
-    this._convertToSarissaNode = function(ownerdoc, htmlnode, xhtmlparent) {
-        return this._convertNodes(ownerdoc, htmlnode, xhtmlparent, new this.Set(['html']));
+    this._cleanupParas = function(ownerdoc, root) {
+        var paras = root.getElementsByTagName('p');
+        for (var i = 0; i < paras.length; i++) {
+            this._cleanupPara(ownerdoc, paras[i]);
+        }
+    };
+    /* Cleanup br tags: br at top level is replaced by a paragraph,
+     * br at end of p|div is dropped.
+     */
+    this._cleanupBr = function(ownerdoc, root) {
+        var breaks = root.getElementsByTagName('br');
+        for (var i = 0; i < breaks.length; i++) {
+            var node = breaks[i];
+            var parentNode = node.parentNode;
+            if (parentNode.tagName=='body') {
+                var p = ownerdoc.createElement('p');
+                var prev = node.previousSibling;
+                if (prev && prev.nodeType==3) {
+                    p.appendChild(prev);
+                }
+                parentNode.insertBefore(p,node);
+                parentNode.removeChild(node);
+            } else if (!node.nextSibling && (/p|div/i.test(parentNode.nodeName))) {
+                parentNode.removeChild(node);
+            }
+        }
     };
 
     this._convertNodes = function(ownerdoc, htmlnode, xhtmlparent, permitted) {
@@ -620,12 +692,6 @@
         var nodename = this._getTagName(htmlnode);
         var nostructure = !this.filterstructure;
 
-        var filter = this.nodefilters[nodename];
-        if (filter) {
-            if (!filter(htmlnode, xhtmlparent)) {
-                return;
-            }
-        }
         // TODO: This permits valid tags anywhere. it should use the state
         // table in xhtmlvalid to only permit tags where the XHTML DTD
         // says they are valid.
@@ -646,7 +712,7 @@
 
         if (kids.length == 0) {
             if (htmlnode.text && htmlnode.text != "" &&
-                (nostructure || permittedChildren['#PCDATA'])) {
+                (nostructure || permittedChildren['#text'])) {
                 var text = htmlnode.text;
                 var tnode = ownerdoc.createTextNode(text);
                 parentnode.appendChild(tnode);
@@ -669,11 +735,11 @@
                         parentnode.appendChild(newkid);
                     };
                 } else if (kid.nodeType == 3) {
-                    if (nostructure || permittedChildren['#PCDATA']) {
+                    if (nostructure || permittedChildren['#text']) {
                         parentnode.appendChild(ownerdoc.createTextNode(kid.nodeValue));
                     }
                 } else if (kid.nodeType == 4) {
-                    if (nostructure || permittedChildren['#PCDATA']) {
+                    if (nostructure || permittedChildren['#text']) {
                         parentnode.appendChild(ownerdoc.createCDATASection(kid.nodeValue));
                     }
                 }

Modified: kupu/trunk/kupu/tests/test_xhtml.js
==============================================================================
--- kupu/trunk/kupu/tests/test_xhtml.js	(original)
+++ kupu/trunk/kupu/tests/test_xhtml.js	Tue May 15 15:55:16 2007
@@ -256,8 +256,45 @@
             '<basefont size="2"/>';
         this.editor.xhtmlvalid.filterstructure = true;
         this.conversionTest(data, data);
+    }
+
+    // Some tests to ensure that we don't put anything in a <p> tag
+    // which isn't allowed in a <p> tag.
+    // Can't test with <p><div>x</div></p> as Firefox DOM fixes that
+    // for us (but not as we want it fixed) so we use something FF doesn't already fix.
+    this.testPcleanup = function() {
+        var data = '<p><table><tbody><tr><td>oops</td></tr></tbody></table></p>';
+        var expected = '<table><tbody><tr><td>oops</td></tr></tbody></table>';
+        this.conversionTest(data, expected);
+    };
+
+    this.testPcleanup2 = function() {
+        var data = '<p class="blue">some text<br/><table><tbody><tr><td>oops</td></tr></tbody></table>more text<br/></p>';
+        var expected = '<p class="blue">some text</p><table><tbody><tr><td>oops</td></tr></tbody></table><p class="blue">more text</p>';
+        this.conversionTest(data, expected);
+    };
+
+    this.testPWithCaptionedImg = function() {
+        // Captioned images are converted to block tags so they must
+        // not be inside a paragraph.
+        var data = '<p>some text<br/><img class="image-inline captioned" src="javascript:" alt="xyzzy" height="1" width="1"/>blah</p>';
+        var expected = '<p>some text</p><img class="image-inline captioned" src="javascript:" alt="xyzzy" height="1" width="1"/><p>blah</p>';
+        this.conversionTest(data, expected);
+        // If there is no surrounding text we don't want empty paras.
+        var data = '<p><img class="image-inline captioned" src="javascript:" alt="xyzzy" height="1" width="1"/></p>';
+        var expected = '<img class="image-inline captioned" src="javascript:" alt="xyzzy" height="1" width="1"/>';
+        this.conversionTest(data, expected);
+        // But ordinary images are fine.
+        var data = '<p>some text<br/><img class="image-inline" src="javascript:" alt="xyzzy" height="1" width="1"/>blah</p>';
+        var expected = '<p>some text<br/><img class="image-inline" src="javascript:" alt="xyzzy" height="1" width="1"/>blah</p>';
+        this.conversionTest(data, expected);
     };
 
+    // Firefox is broken wrt to <br> tags and newlines inside <pre>.
+    // It thinks that <br>\n is two newlines but in fact the HTML spec
+    // says it should ignore any whitespace following a <br>.
+    this.testBrInsidePre = function() {
+    }
     this.tearDown = function() {
         this.body.innerHTML = '';
     };


More information about the kupu-checkins mailing list