[Lxml-checkins] r43969 - lxml/branch/html/src/lxml/html
ianb at codespeak.net
ianb at codespeak.net
Fri Jun 1 08:39:16 CEST 2007
Author: ianb
Date: Fri Jun 1 08:39:16 2007
New Revision: 43969
Modified:
lxml/branch/html/src/lxml/html/clean.py
Log:
don't delete fieldset and legend. Do remove <layer>
Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py (original)
+++ lxml/branch/html/src/lxml/html/clean.py Fri Jun 1 08:39:16 2007
@@ -9,7 +9,6 @@
# Other on* attributes that aren't standard?
# Try these tests: http://feedparser.org/tests/wellformed/sanitize/
# Also http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl
-# <layer>...?
# <head> and <title> is fishy in a fragment
# max width for words
# max height?
@@ -141,13 +140,12 @@
if meta:
kill_tags.append('meta')
if embedded:
- kill_tags.extend(['object', 'embed', 'iframe', 'applet'])
+ # FIXME: is <layer> really embedded?
+ kill_tags.extend(['object', 'embed', 'iframe', 'applet', 'layer'])
if frames:
kill_tags.extend(defs.frame_tags)
if forms:
- # FIXME: do I even care about fieldset and legend? I don't
- # care about label.
- remove_tags.extend(['form', 'fieldset', 'legend'])
+ remove_tags.extend(['form'])
kill_tags.extend(['button', 'input', 'select', 'textarea'])
bad = []
for el in doc.iterdescendants():
More information about the lxml-checkins
mailing list