[Lxml-checkins] r43963 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Fri Jun 1 07:12:30 CEST 2007
Author: ianb
Date: Fri Jun 1 07:12:30 2007
New Revision: 43963
Modified:
lxml/branch/html/src/lxml/html/clean.py
lxml/branch/html/src/lxml/html/tests/test_clean.txt
Log:
Handle <image> in clean
Modified: lxml/branch/html/src/lxml/html/clean.py
==============================================================================
--- lxml/branch/html/src/lxml/html/clean.py (original)
+++ lxml/branch/html/src/lxml/html/clean.py Fri Jun 1 07:12:30 2007
@@ -11,7 +11,6 @@
# Other on* attributes that aren't standard?
# Try these tests: http://feedparser.org/tests/wellformed/sanitize/
# Also http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl
-# IE treats <image> like <img>
# <layer>...?
# <head> and <title> is fishy in a fragment
# max width for words
Modified: lxml/branch/html/src/lxml/html/tests/test_clean.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_clean.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_clean.txt Fri Jun 1 07:12:30 2007
@@ -19,6 +19,7 @@
... </form>
... <blink>annoying EVIL!</blink>
... <a href="evil-site">spam spam SPAM!</a>
+... <image src="evil!">
... </body>
... </html>'''
>>> print doc
@@ -40,6 +41,7 @@
</form>
<blink>annoying EVIL!</blink>
<a href="evil-site">spam spam SPAM!</a>
+ <image src="evil!">
</body>
</html>
>>> print tostring(HTML(doc))
@@ -61,6 +63,7 @@
</form>
<blink>annoying EVIL!</blink>
<a href="evil-site">spam spam SPAM!</a>
+ <image src="evil!">
</body>
</html>
>>> print clean_html(doc)
@@ -76,6 +79,7 @@
Password:
<blink>annoying EVIL!</blink>
<a href="evil-site">spam spam SPAM!</a>
+ <img src="evil!">
</body>
</html>
>>> print clean_html(doc, style=True, links=True, add_nofollow=True)
@@ -90,5 +94,6 @@
Password:
<blink>annoying EVIL!</blink>
<a href="evil-site" rel="nofollow">spam spam SPAM!</a>
+ <img src="evil!">
</body>
</html>
More information about the lxml-checkins
mailing list