[Lxml-checkins] r44681 - in lxml/branch/html/src/lxml/html: . tests
ianb at codespeak.net
ianb at codespeak.net
Tue Jul 3 00:54:34 CEST 2007
Author: ianb
Date: Tue Jul 3 00:54:33 2007
New Revision: 44681
Modified:
lxml/branch/html/src/lxml/html/css.py
lxml/branch/html/src/lxml/html/tests/test_css.py
lxml/branch/html/src/lxml/html/tests/test_css.txt
lxml/branch/html/src/lxml/html/tests/test_css_select.txt
Log:
Created CSSSelector, added a public __all__ for the css module; renamed the xpath() function; put in some tests for parse_series and fixed the results; added NotImplemented errors for *:something-of-type psuedoclasses, which I can't well implement with XPath; change inheritance of the exceptions
Modified: lxml/branch/html/src/lxml/html/css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/css.py (original)
+++ lxml/branch/html/src/lxml/html/css.py Tue Jul 3 00:54:33 2007
@@ -1,12 +1,31 @@
import re
from lxml import etree
-class SelectorSyntaxError(Exception):
+__all__ = ['SelectorSyntaxError', 'ExpressionError',
+ 'CSSSelector']
+
+class SelectorSyntaxError(SyntaxError):
pass
-class ExpressionError(Exception):
+class ExpressionError(RuntimeError):
pass
+class CSSSelector(etree.XPath):
+
+ def __init__(self, css):
+ path = css_to_xpath(css)
+ etree.XPath.__init__(self, path)
+ self.css = css
+
+ def __repr__(self):
+ return '<%s %s for %r>' % (
+ self.__class__.__name__,
+ hex(abs(id(self)))[2:],
+ self.css)
+
+##############################
+## Token objects:
+
class _UniToken(unicode):
def __new__(cls, contents, pos):
obj = unicode.__new__(cls, contents)
@@ -91,14 +110,14 @@
def _xpath_nth_child(self, xpath, expr, last=False,
add_name_test=True):
a, b = parse_series(expr)
- if not a:
+ if not a and not b:
# a=0 means nothing is returned...
xpath.add_condition('false() and position() = 0')
return xpath
if add_name_test:
xpath.add_name_test()
xpath.add_star_prefix()
- if a == 1:
+ if a == 0:
if last:
b = 'last() - %s' % b
xpath.add_condition('position() = %s' % b)
@@ -111,12 +130,17 @@
b_neg = str(-b)
else:
b_neg = '+%s' % (-b)
- expr = '(position() %s) mod %s = 0' % (b_neg, a)
+ if a != 1:
+ expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
+ else:
+ expr = []
if b >= 0:
- expr += ' and position() >= %s' % b
+ expr.append('position() >= %s' % b)
elif b < 0 and last:
- expr += ' and position() < (last() %s)' % b
- xpath.add_condition(expr)
+ expr.append('position() < (last() %s)' % b)
+ expr = ' and '.join(expr)
+ if expr:
+ xpath.add_condition(expr)
return xpath
# FIXME: handle an+b, odd, even
# an+b means every-a, plus b, e.g., 2n+1 means odd
@@ -130,6 +154,9 @@
return self._xpath_nth_child(xpath, expr, last=True)
def _xpath_nth_of_type(self, xpath, expr):
+ if xpath.element == '*':
+ raise NotImplementedError(
+ "*:nth-of-type() is not implemented")
return self._xpath_nth_child(xpath, expr, add_name_test=False)
def _xpath_nth_last_of_type(self, xpath, expr):
@@ -215,11 +242,17 @@
return xpath
def _xpath_first_of_type(self, xpath):
+ if xpath.element == '*':
+ raise NotImplementedError(
+ "*:first-of-type is not implemented")
xpath.add_star_prefix()
xpath.add_condition('position() = 1')
return xpath
def _xpath_last_of_type(self, xpath):
+ if xpath.element == '*':
+ raise NotImplementedError(
+ "*:last-of-type is not implemented")
xpath.add_star_prefix()
xpath.add_condition('position() = last()')
return xpath
@@ -230,6 +263,9 @@
return xpath
def _xpath_only_of_type(self, xpath):
+ if xpath.element == '*':
+ raise NotImplementedError(
+ "*:only-of-type is not implemented")
xpath.add_condition('last() = 1')
return xpath
@@ -343,7 +379,7 @@
else:
# FIXME: Should we lowercase here?
el = '%s:%s' % (self.namespace, self.element)
- return XPath(element=el)
+ return XPathExpr(element=el)
class Hash(object):
"""
@@ -375,7 +411,7 @@
def xpath(self):
paths = [item.xpath() for item in self.items]
- return XPathOr(paths)
+ return XPathExprOr(paths)
class CombinedSelector(object):
@@ -435,9 +471,9 @@
return xpath
##############################
-## XPath objects:
+## XPathExpr objects:
-def xpath(css_expr, prefix='descendant-or-self::'):
+def css_to_xpath(css_expr, prefix='descendant-or-self::'):
if isinstance(css_expr, basestring):
css_expr = parse(css_expr)
expr = css_expr.xpath()
@@ -447,14 +483,7 @@
expr.add_prefix(prefix)
return str(expr)
-def run_xpath(doc, xpath):
- return [el for el in doc.xpath(xpath)
- if isinstance(el, etree.ElementBase)]
-
-def run_css(doc, css):
- return run_xpath(doc, xpath(css))
-
-class XPath(object):
+class XPathExpr(object):
def __init__(self, prefix=None, path=None, element='*', condition=None,
star_prefix=False):
@@ -529,7 +558,7 @@
self.element = other.element
self.condition = other.condition
-class XPathOr(XPath):
+class XPathExprOr(XPathExpr):
"""
Represents on |'d expressions. Note that unfortunately it isn't
@@ -547,7 +576,9 @@
return ' | '.join([prefix + str(i) for i in self.items])
def xpath_repr(s):
- # FIXME: I don't think this is right
+ # FIXME: I don't think this is right, but lacking any reasonable
+ # specification on what XPath literals look like (which doesn't seem
+ # to be in the XPath specification) it is hard to do 'right'
if isinstance(s, Element):
# This is probably a symbol that looks like an expression...
s = s._format_element()
@@ -703,11 +734,11 @@
if isinstance(s, Element):
s = s._format_element()
if not s or s == '*':
- # Happens when there's nothing, which CSS things of as *
- return (1, 0)
+ # Happens when there's nothing, which the CSS parser thinks of as *
+ return (0, 0)
if isinstance(s, int):
# Happens when you just get a number
- return (1, s)
+ return (0, s)
if s == 'odd':
return (2, 1)
elif s == 'even':
@@ -716,7 +747,7 @@
return (1, 0)
if 'n' not in s:
# Just a b
- return int(s)
+ return (0, int(s))
a, b = s.split('n', 1)
if not a:
a = 1
Modified: lxml/branch/html/src/lxml/html/tests/test_css.py
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.py (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.py Tue Jul 3 00:54:33 2007
@@ -69,7 +69,7 @@
body = doc.xpath('//body')[0]
bad = []
selector, count = self.selectors[self.index]
- xpath = css.xpath(css.parse(selector))
+ xpath = css.css_to_xpath(css.parse(selector))
try:
results = body.xpath(xpath)
except Exception, e:
Modified: lxml/branch/html/src/lxml/html/tests/test_css.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css.txt Tue Jul 3 00:54:33 2007
@@ -110,6 +110,23 @@
e/following-sibling::f
>>> xpath('div#container p')
div[@id = 'container']/descendant::p
- >>> # FIXME: This isn't right, but I don't know what *is* right
>>> xpath('p *:only-of-type')
- p/descendant::*[last() = 1]
+ Traceback (most recent call last):
+ ...
+ NotImplementedError: *:only-of-type is not implemented
+
+Then of parse_series:
+
+ >>> from lxml.html.css import parse_series
+ >>> parse_series('1n+3')
+ (1, 3)
+ >>> parse_series('n-5')
+ (1, -5)
+ >>> parse_series('odd')
+ (2, 1)
+ >>> parse_series('3n')
+ (3, 0)
+ >>> parse_series('n')
+ (1, 0)
+ >>> parse_series('5')
+ (0, 5)
\ No newline at end of file
Modified: lxml/branch/html/src/lxml/html/tests/test_css_select.txt
==============================================================================
--- lxml/branch/html/src/lxml/html/tests/test_css_select.txt (original)
+++ lxml/branch/html/src/lxml/html/tests/test_css_select.txt Tue Jul 3 00:54:33 2007
@@ -1,7 +1,7 @@
This is a test of CSS selectors. We setup a document we'll use for
all our selections, and a function make querying simpler:
- >>> from lxml.html.css import run_css, xpath
+ >>> from lxml.html.css import CSSSelector
>>> from lxml.html import HTML
>>> doc = HTML('''
... <html><head></head><body>
@@ -35,10 +35,10 @@
>>> for count, el in enumerate(doc.getiterator()):
... order[el] = count
>>> def select_ids(selector):
- ... items = run_css(doc, selector)
+ ... items = CSSSelector(selector)(doc)
... if not items:
... return 'empty'
- ... items = run_css(doc, selector)
+ ... items = CSSSelector(selector)(doc)
... items.sort(key=lambda el: order[el])
... return ', '.join([el.get('id', 'nil') for el in items])
>>> def pcss(main, *selectors):
@@ -114,7 +114,9 @@
>>> pcss('div *:only-child')
foobar-span
>>> pcss('p *:only-of-type')
- p-em
+ Traceback (most recent call last):
+ ...
+ NotImplementedError: *:only-of-type is not implemented
>>> pcss('p:only-of-type')
paragraph
>>> pcss('a:empty')
More information about the lxml-checkins
mailing list