[Lxml-checkins] r50512 - in lxml/trunk: . doc src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Fri Jan 11 11:55:09 CET 2008
Author: scoder
Date: Fri Jan 11 11:55:07 2008
New Revision: 50512
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/doc/tutorial.txt
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/python.pxd
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
r3237 at delle: sbehnel | 2008-01-11 11:54:56 +0100
subtyping PyStringObject does not work in Cython/Pyrex, so XPath string results will just have to be unicode
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Jan 11 11:55:07 2008
@@ -12,7 +12,8 @@
* XPath string results of the ``text()`` function and attribute
selection make their Element container accessible through a
- ``getparent()`` method.
+ ``getparent()`` method. As a side-effect, they are now always
+ unicode objects (even ASCII strings).
* ``XSLT`` objects are usable in any thread - at the cost of a deep
copy if they were not created in that thread.
Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt (original)
+++ lxml/trunk/doc/tutorial.txt Fri Jan 11 11:55:07 2008
@@ -281,13 +281,39 @@
>>> print html.xpath("string()") # lxml.etree only!
TEXTTAIL
>>> print html.xpath("//text()") # lxml.etree only!
- ['TEXT', 'TAIL']
+ [u'TEXT', u'TAIL']
If you want to use this more often, you can wrap it in a function::
>>> build_text_list = etree.XPath("//text()") # lxml.etree only!
>>> print build_text_list(html)
- ['TEXT', 'TAIL']
+ [u'TEXT', u'TAIL']
+
+Note that the ``text()`` function in XPath always returns unicode
+strings. This is because it is actually a special object that knows
+about its origins. You can ask it where it came from through its
+``getparent()`` method, just as you would with Elements::
+
+ >>> texts = build_text_list(html)
+ >>> print texts[0]
+ TEXT
+ >>> parent = texts[0].getparent()
+ >>> print parent.tag
+ body
+
+ >>> print texts[1]
+ TAIL
+ >>> print texts[1].getparent().tag
+ br
+
+You can also find out if it's normal text content or tail text::
+
+ >>> print texts[0].is_text
+ True
+ >>> print texts[1].is_text
+ False
+ >>> print texts[1].is_tail
+ True
.. _XPath: xpathxslt.html#xpath
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Fri Jan 11 11:55:07 2008
@@ -560,16 +560,7 @@
################################################################################
# special str/unicode subclasses
-cdef class _ElementUnicodeResult(python.unicode):
- cdef _Element parent
- cdef readonly object is_tail
- cdef readonly object is_text
- cdef readonly object is_attribute
-
- def getparent(self):
- return self.parent
-
-cdef class _ElementStringResult(python.str):
+cdef class _ElementStringResult(python.unicode):
cdef _Element parent
cdef readonly object is_tail
cdef readonly object is_text
@@ -579,22 +570,17 @@
return self.parent
cdef object _newElementStringResult(_Document doc, xmlNode* c_node):
- cdef _ElementUnicodeResult element_unicode
- cdef _ElementStringResult element_str
+ cdef _ElementStringResult result
cdef xmlNode* c_element
cdef char* s
- cdef bint is_attribute, is_tail, is_utf8
+ cdef bint is_attribute, is_tail
if c_node.type == tree.XML_ATTRIBUTE_NODE:
is_attribute = 1
is_tail = 0
s = tree.xmlNodeGetContent(c_node)
- is_utf8 = isutf8(s)
try:
- if is_utf8:
- value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
- else:
- value = s
+ value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
finally:
tree.xmlFree(s)
c_element = NULL
@@ -602,12 +588,8 @@
#assert c_node.type == tree.XML_TEXT_NODE, "invalid node type"
is_attribute = 0
# tail text?
- is_utf8 = isutf8(c_node.content)
- if is_utf8:
- value = python.PyUnicode_DecodeUTF8(
- c_node.content, cstd.strlen(c_node.content), NULL)
- else:
- value = c_node.content
+ value = python.PyUnicode_DecodeUTF8(
+ c_node.content, cstd.strlen(c_node.content), NULL)
c_element = _previousElement(c_node)
is_tail = c_element is not NULL
@@ -620,20 +602,12 @@
if c_element is NULL:
return value
- if is_utf8:
- element_unicode = _ElementUnicodeResult(value)
- element_unicode.parent = _fakeDocElementFactory(doc, c_element)
- element_unicode.is_attribute = is_attribute
- element_unicode.is_tail = is_tail
- element_unicode.is_text = not (is_tail or is_attribute)
- return element_unicode
- else:
- element_str = _ElementStringResult(value)
- element_str.parent = _fakeDocElementFactory(doc, c_element)
- element_str.is_attribute = is_attribute
- element_str.is_tail = is_tail
- element_str.is_text = not (is_tail or is_attribute)
- return element_str
+ result = _ElementStringResult(value)
+ result.parent = _fakeDocElementFactory(doc, c_element)
+ result.is_attribute = is_attribute
+ result.is_tail = is_tail
+ result.is_text = not (is_tail or is_attribute)
+ return result
################################################################################
# callbacks for XPath/XSLT extension functions
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri Jan 11 11:55:07 2008
@@ -19,9 +19,6 @@
ctypedef class __builtin__.unicode [object PyUnicodeObject]:
pass
- ctypedef class __builtin__.str [object PyStringObject]:
- pass
-
cdef FILE* PyFile_AsFile(object p)
cdef int PyFile_Check(object p)
cdef object PyFile_Name(object p)
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Jan 11 11:55:07 2008
@@ -67,16 +67,33 @@
self.assertEquals(['Foo', 'Bar'],
tree.xpath('/a/b/text()'))
+ def test_xpath_list_text_parent(self):
+ tree = self.parse('<a><b>FooBar</b><b>BarFoo</b></a>')
+ root = tree.getroot()
+ self.assertEquals(['FooBar', 'BarFoo'],
+ tree.xpath('/a/b/text()'))
+ self.assertEquals([root[0], root[1]],
+ [r.getparent() for r in tree.xpath('/a/b/text()')])
+
+ def test_xpath_list_unicode_text_parent(self):
+ xml = u'<a><b>FooBar\u0680\u3120</b><b>BarFoo\u0680\u3120</b></a>'
+ tree = self.parse(xml.encode('utf-8'))
+ root = tree.getroot()
+ self.assertEquals([u'FooBar\u0680\u3120', u'BarFoo\u0680\u3120'],
+ tree.xpath('/a/b/text()'))
+ self.assertEquals([root[0], root[1]],
+ [r.getparent() for r in tree.xpath('/a/b/text()')])
+
def test_xpath_list_attribute(self):
tree = self.parse('<a b="B" c="C"/>')
self.assertEquals(['B'],
tree.xpath('/a/@b'))
def test_xpath_list_attribute_parent(self):
- tree = self.parse('<a b="B" c="C"/>')
+ tree = self.parse('<a b="BaSdFgHjKl" c="CqWeRtZuI"/>')
results = tree.xpath('/a/@c')
self.assertEquals(1, len(results))
- self.assertEquals('C', results[0])
+ self.assertEquals('CqWeRtZuI', results[0])
self.assertEquals(tree.getroot().tag, results[0].getparent().tag)
def test_xpath_list_comment(self):
More information about the lxml-checkins
mailing list