[Lxml-checkins] r48303 - lxml/trunk/src/lxml/html

ianb at codespeak.net ianb at codespeak.net
Mon Nov 5 03:04:47 CET 2007


Author: ianb
Date: Mon Nov  5 03:04:45 2007
New Revision: 48303

Modified:
   lxml/trunk/src/lxml/html/clean.py
Log:
Handle the case of <applet> in lxml.html.clean, where more than one attribute can contain a link

Modified: lxml/trunk/src/lxml/html/clean.py
==============================================================================
--- lxml/trunk/src/lxml/html/clean.py	(original)
+++ lxml/trunk/src/lxml/html/clean.py	Mon Nov  5 03:04:45 2007
@@ -368,10 +368,20 @@
     def allow_element(self, el):
         if el.tag not in self._tag_link_attrs:
             return False
-        url = el.get(self._tag_link_attrs[el.tag])
-        if not url:
-            return False
-        return self.allow_embedded_url(el, url)
+        attr = self._tag_link_attrs[el.tag]
+        if isinstance(attr, (list, tuple)):
+            for one_attr in attr:
+                url = el.get(one_attr)
+                if not url:
+                    return False
+                if not self.allow_embedded_url(el, url):
+                    return False
+            return True
+        else:
+            url = el.get(attr)
+            if not url:
+                return False
+            return self.allow_embedded_url(el, url)
 
     def allow_embedded_url(self, el, url):
         if (self.whitelist_tags is not None


More information about the lxml-checkins mailing list