[Lxml-checkins] r44015 - lxml/branch/html/src/lxml/html

scoder at codespeak.net scoder at codespeak.net
Mon Jun 4 11:13:16 CEST 2007


Author: scoder
Date: Mon Jun  4 11:13:15 2007
New Revision: 44015

Modified:
   lxml/branch/html/src/lxml/html/clean.py
Log:
come cleanup

Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py	(original)
+++ lxml/branch/html/src/lxml/html/clean.py	Mon Jun  4 11:13:15 2007
@@ -151,32 +151,34 @@
         kill_tags.append('script')
     if safe_attrs_only:
         safe_attrs = set(defs.safe_attrs)
-        for el in _itertree(doc):
-            for aname in el.attrib.keys():
+        for el in doc.getiterator():
+            attrib = el.attrib
+            for aname in attrib.keys():
                 if aname not in defs.safe_attrs:
-                    del el.attrib[aname]
+                    del attrib[aname]
     if javascript:
         if not safe_attrs_only:
             # safe_attrs handles events attributes itself
-            for el in _itertree(doc):
-                for aname in el.attrib.keys():
+            for el in doc.getiterator():
+                attrib = el.attrib
+                for aname in attrib.keys():
                     if aname.startswith('on'):
-                        del el.attrib[aname]
+                        del attrib[aname]
         doc.rewrite_links(_remove_javascript, resolve_base_href=False)
         if not style:
             # If we're deleting style then we don't have to remove JS links
             # from styles, otherwise...
             for el in doc.xpath('descendant-or-self::*[@style]'):
-                old = el.attrib['style']
+                old = el.get('style')
                 new = _css_javascript_re.sub('', old)
                 new = _css_import_re.sub('', old)
                 if _has_sneaky_javascript(new):
                     # Something tricky is going on...
                     del el.attrib['style']
                 elif new != old:
-                    el.attrib['style'] = new
+                    el.set('style', new)
             for el in doc.xpath('descendant-or-self::style'):
-                if el.attrib.get('type', '').lower().strip() == 'text/javascript':
+                if el.get('type', '').lower().strip() == 'text/javascript':
                     el.drop_element()
                     continue
                 old = el.text or ''


More information about the lxml-checkins mailing list