[Lxml-checkins] r48303 - lxml/trunk/src/lxml/html
ianb at codespeak.net
ianb at codespeak.net
Mon Nov 5 03:04:47 CET 2007
Author: ianb
Date: Mon Nov 5 03:04:45 2007
New Revision: 48303
Modified:
lxml/trunk/src/lxml/html/clean.py
Log:
Handle the case of <applet> in lxml.html.clean, where more than one attribute can contain a link
Modified: lxml/trunk/src/lxml/html/clean.py
==============================================================================
--- lxml/trunk/src/lxml/html/clean.py (original)
+++ lxml/trunk/src/lxml/html/clean.py Mon Nov 5 03:04:45 2007
@@ -368,10 +368,20 @@
def allow_element(self, el):
if el.tag not in self._tag_link_attrs:
return False
- url = el.get(self._tag_link_attrs[el.tag])
- if not url:
- return False
- return self.allow_embedded_url(el, url)
+ attr = self._tag_link_attrs[el.tag]
+ if isinstance(attr, (list, tuple)):
+ for one_attr in attr:
+ url = el.get(one_attr)
+ if not url:
+ return False
+ if not self.allow_embedded_url(el, url):
+ return False
+ return True
+ else:
+ url = el.get(attr)
+ if not url:
+ return False
+ return self.allow_embedded_url(el, url)
def allow_embedded_url(self, el, url):
if (self.whitelist_tags is not None
More information about the lxml-checkins
mailing list