[Lxml-checkins] r50512 - in lxml/trunk: . doc src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Fri Jan 11 11:55:09 CET 2008


Author: scoder
Date: Fri Jan 11 11:55:07 2008
New Revision: 50512

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/CHANGES.txt
   lxml/trunk/doc/tutorial.txt
   lxml/trunk/src/lxml/extensions.pxi
   lxml/trunk/src/lxml/python.pxd
   lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
 r3237 at delle:  sbehnel | 2008-01-11 11:54:56 +0100
 subtyping PyStringObject does not work in Cython/Pyrex, so XPath string results will just have to be unicode


Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Fri Jan 11 11:55:07 2008
@@ -12,7 +12,8 @@
 
 * XPath string results of the ``text()`` function and attribute
   selection make their Element container accessible through a
-  ``getparent()`` method.
+  ``getparent()`` method.  As a side-effect, they are now always
+  unicode objects (even ASCII strings).
 
 * ``XSLT`` objects are usable in any thread - at the cost of a deep
   copy if they were not created in that thread.

Modified: lxml/trunk/doc/tutorial.txt
==============================================================================
--- lxml/trunk/doc/tutorial.txt	(original)
+++ lxml/trunk/doc/tutorial.txt	Fri Jan 11 11:55:07 2008
@@ -281,13 +281,39 @@
     >>> print html.xpath("string()") # lxml.etree only!
     TEXTTAIL
     >>> print html.xpath("//text()") # lxml.etree only!
-    ['TEXT', 'TAIL']
+    [u'TEXT', u'TAIL']
 
 If you want to use this more often, you can wrap it in a function::
 
     >>> build_text_list = etree.XPath("//text()") # lxml.etree only!
     >>> print build_text_list(html)
-    ['TEXT', 'TAIL']
+    [u'TEXT', u'TAIL']
+
+Note that the ``text()`` function in XPath always returns unicode
+strings.  This is because it is actually a special object that knows
+about its origins.  You can ask it where it came from through its
+``getparent()`` method, just as you would with Elements::
+
+    >>> texts = build_text_list(html)
+    >>> print texts[0]
+    TEXT
+    >>> parent = texts[0].getparent()
+    >>> print parent.tag
+    body
+
+    >>> print texts[1]
+    TAIL
+    >>> print texts[1].getparent().tag
+    br
+
+You can also find out if it's normal text content or tail text::
+
+    >>> print texts[0].is_text
+    True
+    >>> print texts[1].is_text
+    False
+    >>> print texts[1].is_tail
+    True
 
 .. _XPath: xpathxslt.html#xpath
 

Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi	(original)
+++ lxml/trunk/src/lxml/extensions.pxi	Fri Jan 11 11:55:07 2008
@@ -560,16 +560,7 @@
 ################################################################################
 # special str/unicode subclasses
 
-cdef class _ElementUnicodeResult(python.unicode):
-    cdef _Element parent
-    cdef readonly object is_tail
-    cdef readonly object is_text
-    cdef readonly object is_attribute
-
-    def getparent(self):
-        return self.parent
-
-cdef class _ElementStringResult(python.str):
+cdef class _ElementStringResult(python.unicode):
     cdef _Element parent
     cdef readonly object is_tail
     cdef readonly object is_text
@@ -579,22 +570,17 @@
         return self.parent
 
 cdef object _newElementStringResult(_Document doc, xmlNode* c_node):
-    cdef _ElementUnicodeResult element_unicode
-    cdef _ElementStringResult element_str
+    cdef _ElementStringResult result
     cdef xmlNode* c_element
     cdef char* s
-    cdef bint is_attribute, is_tail, is_utf8
+    cdef bint is_attribute, is_tail
 
     if c_node.type == tree.XML_ATTRIBUTE_NODE:
         is_attribute = 1
         is_tail = 0
         s = tree.xmlNodeGetContent(c_node)
-        is_utf8 = isutf8(s)
         try:
-            if is_utf8:
-                value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
-            else:
-                value = s
+            value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
         finally:
             tree.xmlFree(s)
         c_element = NULL
@@ -602,12 +588,8 @@
         #assert c_node.type == tree.XML_TEXT_NODE, "invalid node type"
         is_attribute = 0
         # tail text?
-        is_utf8 = isutf8(c_node.content)
-        if is_utf8:
-            value = python.PyUnicode_DecodeUTF8(
-                c_node.content, cstd.strlen(c_node.content), NULL)
-        else:
-            value = c_node.content
+        value = python.PyUnicode_DecodeUTF8(
+            c_node.content, cstd.strlen(c_node.content), NULL)
         c_element = _previousElement(c_node)
         is_tail = c_element is not NULL
 
@@ -620,20 +602,12 @@
     if c_element is NULL:
         return value
 
-    if is_utf8:
-        element_unicode = _ElementUnicodeResult(value)
-        element_unicode.parent = _fakeDocElementFactory(doc, c_element)
-        element_unicode.is_attribute = is_attribute
-        element_unicode.is_tail = is_tail
-        element_unicode.is_text = not (is_tail or is_attribute)
-        return element_unicode
-    else:
-        element_str = _ElementStringResult(value)
-        element_str.parent = _fakeDocElementFactory(doc, c_element)
-        element_str.is_attribute = is_attribute
-        element_str.is_tail = is_tail
-        element_str.is_text = not (is_tail or is_attribute)
-        return element_str
+    result = _ElementStringResult(value)
+    result.parent = _fakeDocElementFactory(doc, c_element)
+    result.is_attribute = is_attribute
+    result.is_tail = is_tail
+    result.is_text = not (is_tail or is_attribute)
+    return result
 
 ################################################################################
 # callbacks for XPath/XSLT extension functions

Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd	(original)
+++ lxml/trunk/src/lxml/python.pxd	Fri Jan 11 11:55:07 2008
@@ -19,9 +19,6 @@
     ctypedef class __builtin__.unicode [object PyUnicodeObject]:
         pass
 
-    ctypedef class __builtin__.str [object PyStringObject]:
-        pass
-
     cdef FILE* PyFile_AsFile(object p)
     cdef int PyFile_Check(object p)
     cdef object PyFile_Name(object p)

Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py	(original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py	Fri Jan 11 11:55:07 2008
@@ -67,16 +67,33 @@
         self.assertEquals(['Foo', 'Bar'],
                           tree.xpath('/a/b/text()'))
 
+    def test_xpath_list_text_parent(self):
+        tree = self.parse('<a><b>FooBar</b><b>BarFoo</b></a>')
+        root = tree.getroot()
+        self.assertEquals(['FooBar', 'BarFoo'],
+                          tree.xpath('/a/b/text()'))
+        self.assertEquals([root[0], root[1]],
+                          [r.getparent() for r in tree.xpath('/a/b/text()')])
+
+    def test_xpath_list_unicode_text_parent(self):
+        xml = u'<a><b>FooBar\u0680\u3120</b><b>BarFoo\u0680\u3120</b></a>'
+        tree = self.parse(xml.encode('utf-8'))
+        root = tree.getroot()
+        self.assertEquals([u'FooBar\u0680\u3120', u'BarFoo\u0680\u3120'],
+                          tree.xpath('/a/b/text()'))
+        self.assertEquals([root[0], root[1]],
+                          [r.getparent() for r in tree.xpath('/a/b/text()')])
+
     def test_xpath_list_attribute(self):
         tree = self.parse('<a b="B" c="C"/>')
         self.assertEquals(['B'],
                           tree.xpath('/a/@b'))
 
     def test_xpath_list_attribute_parent(self):
-        tree = self.parse('<a b="B" c="C"/>')
+        tree = self.parse('<a b="BaSdFgHjKl" c="CqWeRtZuI"/>')
         results = tree.xpath('/a/@c')
         self.assertEquals(1, len(results))
-        self.assertEquals('C', results[0])
+        self.assertEquals('CqWeRtZuI', results[0])
         self.assertEquals(tree.getroot().tag, results[0].getparent().tag)
 
     def test_xpath_list_comment(self):


More information about the lxml-checkins mailing list