[Lxml-checkins] r43969 - lxml/branch/html/src/lxml/html

ianb at codespeak.net ianb at codespeak.net
Fri Jun 1 08:39:16 CEST 2007


Author: ianb
Date: Fri Jun  1 08:39:16 2007
New Revision: 43969

Modified:
   lxml/branch/html/src/lxml/html/clean.py
Log:
don't delete fieldset and legend.  Do remove <layer>

Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py	(original)
+++ lxml/branch/html/src/lxml/html/clean.py	Fri Jun  1 08:39:16 2007
@@ -9,7 +9,6 @@
 # Other on* attributes that aren't standard?
 # Try these tests: http://feedparser.org/tests/wellformed/sanitize/
 # Also http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl
-# <layer>...?
 # <head> and <title> is fishy in a fragment
 # max width for words
 # max height?
@@ -141,13 +140,12 @@
     if meta:
         kill_tags.append('meta')
     if embedded:
-        kill_tags.extend(['object', 'embed', 'iframe', 'applet'])
+        # FIXME: is <layer> really embedded?
+        kill_tags.extend(['object', 'embed', 'iframe', 'applet', 'layer'])
     if frames:
         kill_tags.extend(defs.frame_tags)
     if forms:
-        # FIXME: do I even care about fieldset and legend?  I don't
-        # care about label.
-        remove_tags.extend(['form', 'fieldset', 'legend'])
+        remove_tags.extend(['form'])
         kill_tags.extend(['button', 'input', 'select', 'textarea'])
     bad = []
     for el in doc.iterdescendants():


More information about the lxml-checkins mailing list