[Lxml-checkins] r44639 - in lxml/branch/html/src/lxml/html: . tests

ianb at codespeak.net ianb at codespeak.net
Sat Jun 30 01:36:03 CEST 2007


Author: ianb
Date: Sat Jun 30 01:36:03 2007
New Revision: 44639

Modified:
   lxml/branch/html/src/lxml/html/css.py
   lxml/branch/html/src/lxml/html/tests/test_css.txt
   lxml/branch/html/src/lxml/html/tests/test_css_select.txt
Log:
More improvements to the selectors

Modified: lxml/branch/html/src/lxml/html/css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/css.py	(original)
+++ lxml/branch/html/src/lxml/html/css.py	Sat Jun 30 01:36:03 2007
@@ -232,7 +232,7 @@
         return xpath
 
     def _xpath_empty(self, xpath):
-        xpath.add_condition("count(./children::*) = 0 and string(.) = ''")
+        xpath.add_condition("count(./child::*) = 0 and normalize-space(.) = ''")
         return xpath
 
 class Attrib(object):
@@ -454,11 +454,13 @@
 
 class XPath(object):
 
-    def __init__(self, prefix=None, path=None, element='*', condition=None):
+    def __init__(self, prefix=None, path=None, element='*', condition=None,
+                 star_prefix=False):
         self.prefix = prefix
         self.path = path
         self.element = element
         self.condition = condition
+        self.star_prefix = star_prefix
 
     def __str__(self):
         path = ''
@@ -502,15 +504,24 @@
         self.element = '*'
 
     def add_star_prefix(self):
+        """
+        Adds a /* prefix if there is no prefix.  This is when you need
+        to keep context's constrained to a single parent.
+        """
         if self.path:
             self.path += '*/'
         else:
             self.path = '*/'
+        self.star_prefix = True
 
     def join(self, combiner, other):
         prefix = str(self)
         prefix += combiner
         path = (other.prefix or '') + (other.path or '')
+        # We don't need a star prefix if we are joining to this other
+        # prefix; so we'll get rid of it
+        if other.star_prefix and path == '*/':
+            path = ''
         self.prefix = prefix
         self.path = path
         self.element = other.element

Modified: lxml/branch/html/src/lxml/html/tests/test_css.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.txt	Sat Jun 30 01:36:03 2007
@@ -91,7 +91,7 @@
     >>> xpath('E:only-of-type')
     e[last() = 1]
     >>> xpath('E:empty')
-    e[count(./children::*) = 0 and string(.) = '']
+    e[count(./child::*) = 0 and normalize-space(.) = '']
     >>> xpath('E:contains("foo")')
     e[contains(css:lower-case(string(.)), 'foo')]
     >>> xpath('E.warning')

Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css_select.txt	(original)
+++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt	Sat Jun 30 01:36:03 2007
@@ -1,7 +1,7 @@
 This is a test of CSS selectors.  We setup a document we'll use for
 all our selections, and a function make querying simpler:
 
-    >>> from lxml.html.css import run_css
+    >>> from lxml.html.css import run_css, xpath
     >>> from lxml.html import HTML
     >>> doc = HTML('''
     ... <html><head></head><body>
@@ -20,7 +20,7 @@
     ... c"></li>
     ...    <li id="fifth-li"></li>
     ...    <li id="sixth-li"></li>
-    ...    <li id="seventh-li"></li>
+    ...    <li id="seventh-li">  </li>
     ...  </ol>
     ...  <p id="paragraph">
     ...    <b id="p-b">hi</b> <em id="p-em">there</em>
@@ -120,7 +120,7 @@
     >>> pcss('a:empty')
     name-anchor
     >>> pcss('li:empty')
-    third-li, fourth-li, fifth-li, sixth-li
+    third-li, fourth-li, fifth-li, sixth-li, seventh-li
     >>> pcss('*:contains("link")')
     nil, nil, outer-div, tag-anchor, nofollow-anchor
     >>> pcss('*:contains("E")')
@@ -129,7 +129,7 @@
     first-ol
     >>> pcss('.c', '*.c')
     first-ol, third-li, fourth-li
-    >>> pcss('ol *.c', 'ol li.c', 'ol ~ li.c', 'ol > li.c')
+    >>> pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c')
     third-li, fourth-li
     >>> pcss('#first-li', 'li#first-li', '*#first-li')
     first-li


More information about the lxml-checkins mailing list