[Lxml-checkins] r50506 - in lxml/trunk: . src/lxml

scoder at codespeak.net scoder at codespeak.net
Fri Jan 11 09:50:59 CET 2008


Author: scoder
Date: Fri Jan 11 09:50:59 2008
New Revision: 50506

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/src/lxml/extensions.pxi
   lxml/trunk/src/lxml/python.pxd
Log:
 r3222 at delle:  sbehnel | 2008-01-10 00:09:11 +0100
 separate ElementStringResult implementations for str and unicode values, requires Cython > 0.9.6.10b


Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi	(original)
+++ lxml/trunk/src/lxml/extensions.pxi	Fri Jan 11 09:50:59 2008
@@ -560,7 +560,16 @@
 ################################################################################
 # special str/unicode subclasses
 
-cdef class _ElementStringResult(python.unicode):
+cdef class _ElementUnicodeResult(python.unicode):
+    cdef _Element parent
+    cdef readonly object is_tail
+    cdef readonly object is_text
+    cdef readonly object is_attribute
+
+    def getparent(self):
+        return self.parent
+
+cdef class _ElementStringResult(python.str):
     cdef _Element parent
     cdef readonly object is_tail
     cdef readonly object is_text
@@ -570,17 +579,22 @@
         return self.parent
 
 cdef object _newElementStringResult(_Document doc, xmlNode* c_node):
-    cdef _ElementStringResult element_string
+    cdef _ElementUnicodeResult element_unicode
+    cdef _ElementStringResult element_str
     cdef xmlNode* c_element
     cdef char* s
-    cdef bint is_attribute, is_tail
+    cdef bint is_attribute, is_tail, is_utf8
 
     if c_node.type == tree.XML_ATTRIBUTE_NODE:
         is_attribute = 1
         is_tail = 0
         s = tree.xmlNodeGetContent(c_node)
+        is_utf8 = isutf8(s)
         try:
-            value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
+            if is_utf8:
+                value = python.PyUnicode_DecodeUTF8(s, cstd.strlen(s), NULL)
+            else:
+                value = s
         finally:
             tree.xmlFree(s)
         c_element = NULL
@@ -588,8 +602,12 @@
         #assert c_node.type == tree.XML_TEXT_NODE, "invalid node type"
         is_attribute = 0
         # tail text?
-        value = python.PyUnicode_DecodeUTF8(
-            c_node.content, cstd.strlen(c_node.content), NULL)
+        is_utf8 = isutf8(c_node.content)
+        if is_utf8:
+            value = python.PyUnicode_DecodeUTF8(
+                c_node.content, cstd.strlen(c_node.content), NULL)
+        else:
+            value = c_node.content
         c_element = _previousElement(c_node)
         is_tail = c_element is not NULL
 
@@ -599,15 +617,23 @@
         while c_element is not NULL and not _isElement(c_element):
             c_element = c_element.parent
 
-    #if c_element is NULL:
-    return value
+    if c_element is NULL:
+        return value
 
-    element_string = _ElementStringResult(value)
-    element_string.parent = _fakeDocElementFactory(doc, c_element)
-    element_string.is_attribute = is_attribute
-    element_string.is_tail = is_tail
-    element_string.is_text = not (is_tail or is_attribute)
-    return element_string
+    if is_utf8:
+        element_unicode = _ElementUnicodeResult(value)
+        element_unicode.parent = _fakeDocElementFactory(doc, c_element)
+        element_unicode.is_attribute = is_attribute
+        element_unicode.is_tail = is_tail
+        element_unicode.is_text = not (is_tail or is_attribute)
+        return element_unicode
+    else:
+        element_str = _ElementStringResult(value)
+        element_str.parent = _fakeDocElementFactory(doc, c_element)
+        element_str.is_attribute = is_attribute
+        element_str.is_tail = is_tail
+        element_str.is_text = not (is_tail or is_attribute)
+        return element_str
 
 ################################################################################
 # callbacks for XPath/XSLT extension functions

Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd	(original)
+++ lxml/trunk/src/lxml/python.pxd	Fri Jan 11 09:50:59 2008
@@ -19,6 +19,9 @@
     ctypedef class __builtin__.unicode [object PyUnicodeObject]:
         pass
 
+    ctypedef class __builtin__.str [object PyStringObject]:
+        pass
+
     cdef FILE* PyFile_AsFile(object p)
     cdef int PyFile_Check(object p)
     cdef object PyFile_Name(object p)


More information about the lxml-checkins mailing list