[Lxml-checkins] r44639 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Sat Jun 30 01:36:03 CEST 2007
Author: ianb
Date: Sat Jun 30 01:36:03 2007
New Revision: 44639
Modified:
lxml/branch/html/src/lxml/html/css.py
lxml/branch/html/src/lxml/html/tests/test_css.txt
lxml/branch/html/src/lxml/html/tests/test_css_select.txt
Log:
More improvements to the selectors
Modified: lxml/branch/html/src/lxml/html/css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/css.py (original)
+++ lxml/branch/html/src/lxml/html/css.py Sat Jun 30 01:36:03 2007
@@ -232,7 +232,7 @@
return xpath
def _xpath_empty(self, xpath):
- xpath.add_condition("count(./children::*) = 0 and string(.) = ''")
+ xpath.add_condition("count(./child::*) = 0 and normalize-space(.) = ''")
return xpath
class Attrib(object):
@@ -454,11 +454,13 @@
class XPath(object):
- def __init__(self, prefix=None, path=None, element='*', condition=None):
+ def __init__(self, prefix=None, path=None, element='*', condition=None,
+ star_prefix=False):
self.prefix = prefix
self.path = path
self.element = element
self.condition = condition
+ self.star_prefix = star_prefix
def __str__(self):
path = ''
@@ -502,15 +504,24 @@
self.element = '*'
def add_star_prefix(self):
+ """
+ Adds a /* prefix if there is no prefix. This is when you need
+ to keep context's constrained to a single parent.
+ """
if self.path:
self.path += '*/'
else:
self.path = '*/'
+ self.star_prefix = True
def join(self, combiner, other):
prefix = str(self)
prefix += combiner
path = (other.prefix or '') + (other.path or '')
+ # We don't need a star prefix if we are joining to this other
+ # prefix; so we'll get rid of it
+ if other.star_prefix and path == '*/':
+ path = ''
self.prefix = prefix
self.path = path
self.element = other.element
Modified: lxml/branch/html/src/lxml/html/tests/test_css.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.txt Sat Jun 30 01:36:03 2007
@@ -91,7 +91,7 @@
>>> xpath('E:only-of-type')
e[last() = 1]
>>> xpath('E:empty')
- e[count(./children::*) = 0 and string(.) = '']
+ e[count(./child::*) = 0 and normalize-space(.) = '']
>>> xpath('E:contains("foo")')
e[contains(css:lower-case(string(.)), 'foo')]
>>> xpath('E.warning')
Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css_select.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt Sat Jun 30 01:36:03 2007
@@ -1,7 +1,7 @@
This is a test of CSS selectors. We setup a document we'll use for
all our selections, and a function make querying simpler:
- >>> from lxml.html.css import run_css
+ >>> from lxml.html.css import run_css, xpath
>>> from lxml.html import HTML
>>> doc = HTML('''
... <html><head></head><body>
@@ -20,7 +20,7 @@
... c"></li>
... <li id="fifth-li"></li>
... <li id="sixth-li"></li>
- ... <li id="seventh-li"></li>
+ ... <li id="seventh-li"> </li>
... </ol>
... <p id="paragraph">
... <b id="p-b">hi</b> <em id="p-em">there</em>
@@ -120,7 +120,7 @@
>>> pcss('a:empty')
name-anchor
>>> pcss('li:empty')
- third-li, fourth-li, fifth-li, sixth-li
+ third-li, fourth-li, fifth-li, sixth-li, seventh-li
>>> pcss('*:contains("link")')
nil, nil, outer-div, tag-anchor, nofollow-anchor
>>> pcss('*:contains("E")')
@@ -129,7 +129,7 @@
first-ol
>>> pcss('.c', '*.c')
first-ol, third-li, fourth-li
- >>> pcss('ol *.c', 'ol li.c', 'ol ~ li.c', 'ol > li.c')
+ >>> pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c')
third-li, fourth-li
>>> pcss('#first-li', 'li#first-li', '*#first-li')
first-li
More information about the lxml-checkins
mailing list