[Lxml-checkins] r44015 - lxml/branch/html/src/lxml/html
scoder at codespeak.net
scoder at codespeak.net
Mon Jun 4 11:13:16 CEST 2007
Author: scoder
Date: Mon Jun 4 11:13:15 2007
New Revision: 44015
Modified:
lxml/branch/html/src/lxml/html/clean.py
Log:
come cleanup
Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py (original)
+++ lxml/branch/html/src/lxml/html/clean.py Mon Jun 4 11:13:15 2007
@@ -151,32 +151,34 @@
kill_tags.append('script')
if safe_attrs_only:
safe_attrs = set(defs.safe_attrs)
- for el in _itertree(doc):
- for aname in el.attrib.keys():
+ for el in doc.getiterator():
+ attrib = el.attrib
+ for aname in attrib.keys():
if aname not in defs.safe_attrs:
- del el.attrib[aname]
+ del attrib[aname]
if javascript:
if not safe_attrs_only:
# safe_attrs handles events attributes itself
- for el in _itertree(doc):
- for aname in el.attrib.keys():
+ for el in doc.getiterator():
+ attrib = el.attrib
+ for aname in attrib.keys():
if aname.startswith('on'):
- del el.attrib[aname]
+ del attrib[aname]
doc.rewrite_links(_remove_javascript, resolve_base_href=False)
if not style:
# If we're deleting style then we don't have to remove JS links
# from styles, otherwise...
for el in doc.xpath('descendant-or-self::*[@style]'):
- old = el.attrib['style']
+ old = el.get('style')
new = _css_javascript_re.sub('', old)
new = _css_import_re.sub('', old)
if _has_sneaky_javascript(new):
# Something tricky is going on...
del el.attrib['style']
elif new != old:
- el.attrib['style'] = new
+ el.set('style', new)
for el in doc.xpath('descendant-or-self::style'):
- if el.attrib.get('type', '').lower().strip() == 'text/javascript':
+ if el.get('type', '').lower().strip() == 'text/javascript':
el.drop_element()
continue
old = el.text or ''
More information about the lxml-checkins
mailing list