[Lxml-checkins] r43959 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Fri Jun 1 06:58:08 CEST 2007
Author: ianb
Date: Fri Jun 1 06:58:08 2007
New Revision: 43959
Modified:
lxml/branch/html/src/lxml/html/__init__.py
lxml/branch/html/src/lxml/html/tests/test_basic.txt
Log:
Fix find_rel_links
Modified: lxml/branch/html/src/lxml/html/__init__.py
==============================================================================
--- lxml/branch/html/src/lxml/html/__init__.py (original)
+++ lxml/branch/html/src/lxml/html/__init__.py Fri Jun 1 06:58:08 2007
@@ -9,7 +9,7 @@
'find_rel_links', 'find_class', 'make_links_absolute',
'resolve_base_href', 'iter_links', 'rewrite_links']
-_rel_links_xpath = etree.XPath("descendant-or-self::a[fn:upper-case(@rel)=$rel]")
+_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]")
#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
_class_xpath = etree.XPath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]")
_id_xpath = etree.XPath("descendant-or-self::*[@id=$id]")
@@ -71,13 +71,15 @@
"""
Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements.
"""
- return _rel_links_xpath(self, rel=rel.lower())
+ rel = rel.lower()
+ return [el for el in _rel_links_xpath(self)
+ if el.attrib['rel'].lower() == rel]
def find_class(self, class_name):
"""
Find any elements with the given class name.
"""
- return _class_xpath(self, class_name=class_name.lower())
+ return _class_xpath(self, class_name=class_name)
def get_element_by_id(self, id, default=None):
"""
Modified: lxml/branch/html/src/lxml/html/tests/test_basic.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_basic.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_basic.txt Fri Jun 1 06:58:08 2007
@@ -27,16 +27,14 @@
Also added is a get_rel_links, which you can use to search for links
like ``<a rel="$something">``:
- >>> h = HTML('''
- ... <a href="1">test 1</a>
- ... <a href="2" rel="tag">item 2</a>
- ... <a href="3" rel="tagging">item 3</a>
- ... <a href="4" rel="TAG">item 4</a>''')
- >>> print [e.attrib['href'] for e in h.find_rel_links('tag')]
- ['2']
- >>> print [e.attrib['href'] for e in h.find_rel_links('nofollow')]
- []
-
-FIXME: actually that should have returned ['2', '4']
+ >>> h = HTML('''
+ ... <a href="1">test 1</a>
+ ... <a href="2" rel="tag">item 2</a>
+ ... <a href="3" rel="tagging">item 3</a>
+ ... <a href="4" rel="TAG">item 4</a>''')
+ >>> print [e.attrib['href'] for e in h.find_rel_links('tag')]
+ ['2', '4']
+ >>> print [e.attrib['href'] for e in h.find_rel_links('nofollow')]
+ []
More information about the lxml-checkins
mailing list