[Lxml-checkins] r44174 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Tue Jun 12 18:15:00 CEST 2007


Author: scoder
Date: Tue Jun 12 18:14:58 2007
New Revision: 44174

Modified:
   lxml/branch/lxml-1.3/CHANGES.txt
   lxml/branch/lxml-1.3/TODO.txt
   lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
   lxml/branch/lxml-1.3/src/lxml/objectify.pyx
   lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
Log:
merged in revs 41642:41648 from trunk

Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.3/CHANGES.txt	Tue Jun 12 18:14:58 2007
@@ -13,6 +13,8 @@
 Bugs fixed
 ----------
 
+* Raise AssertionError when passing strings containing '\0' bytes
+
 
 1.3beta (2007-02-27)
 ====================

Modified: lxml/branch/lxml-1.3/TODO.txt
==============================================================================
--- lxml/branch/lxml-1.3/TODO.txt	(original)
+++ lxml/branch/lxml-1.3/TODO.txt	Tue Jun 12 18:14:58 2007
@@ -16,8 +16,6 @@
 
 * more testing on multi-threading
 
-* the code on extension functions and XSLT needs some refactoring
-
 
 ElementTree
 -----------
@@ -34,8 +32,8 @@
 Objectify
 ---------
 
-* set special __attributes__ on ObjectifiedElement's as Python attributes, not
-  XML children
+* emulate setting special __attributes__ on ObjectifiedElement's as Python
+  attributes, not XML children
 
 
 Features

Modified: lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi	(original)
+++ lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi	Tue Jun 12 18:14:58 2007
@@ -537,6 +537,21 @@
         c = s[0]
     return 0
 
+cdef int isutf8py(pystring):
+    cdef char* s
+    cdef char* c_end
+    cdef char c
+    s = _cstr(pystring)
+    c_end = s + python.PyString_GET_SIZE(pystring)
+    while s < c_end:
+        c = s[0]
+        if c == c'\0':
+            return -1 # invalid!
+        if c & 0x80:
+            return 1  # non-ASCII
+        s = s + 1
+    return 0          # plain 7-bit ASCII
+
 cdef object funicode(char* s):
     cdef Py_ssize_t slen
     cdef char* spos
@@ -555,7 +570,8 @@
 
 cdef object _utf8(object s):
     if python.PyString_Check(s):
-        assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
+        assert not isutf8py(s), \
+               "All strings must be Unicode or ASCII"
         return s
     elif python.PyUnicode_Check(s):
         return python.PyUnicode_AsUTF8String(s)
@@ -581,10 +597,10 @@
     if filename is None:
         return None
     elif python.PyString_Check(filename):
-        c_filename = _cstr(filename)
-        if not isutf8(c_filename):
+        if not isutf8py(filename):
             # plain ASCII!
             return filename
+        c_filename = _cstr(filename)
         try:
             # try to decode with default encoding
             filename = python.PyUnicode_Decode(

Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/objectify.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx	Tue Jun 12 18:14:58 2007
@@ -42,8 +42,17 @@
 
 cdef object AttributeError
 AttributeError = __builtin__.AttributeError
+cdef object TypeError
+TypeError = __builtin__.TypeError
+cdef object ValueError
+ValueError = __builtin__.ValueError
 cdef object IndexError
 IndexError = __builtin__.IndexError
+cdef object StopIteration
+StopIteration = __builtin__.StopIteration
+
+cdef object IGNORABLE_ERRORS
+IGNORABLE_ERRORS = (ValueError, TypeError)
 
 cdef object list
 list = __builtin__.list
@@ -202,7 +211,7 @@
         """Return the (first) child with the given tag name.  If no namespace
         is provided, the child will be looked up in the same one as self.
         """
-        return _lookupChild(self, tag)
+        return _lookupChildOrRaise(self, tag)
 
     def __setattr__(self, tag, value):
         """Set the value of the (first) child with the given tag name.  If no
@@ -223,15 +232,14 @@
             return
 
         tag = _buildChildTag(self, tag)
-        try:
-            element = _lookupChild(self, tag)
-        except AttributeError:
+        element = _lookupChild(self, tag)
+        if element is None:
             _appendValue(self, tag, value)
         else:
             _replaceElement(element, value)
 
     def __delattr__(self, tag):
-        child = _lookupChild(self, tag)
+        child = _lookupChildOrRaise(self, tag)
         self.remove(child)
 
     def addattr(self, tag, value):
@@ -253,7 +261,7 @@
         cdef tree.xmlNode* c_parent
         cdef tree.xmlNode* c_node
         if python._isString(key):
-            return _lookupChild(self, key)
+            return _lookupChildOrRaise(self, key)
         c_self_node = self._c_node
         c_parent = c_self_node.parent
         if c_parent is NULL:
@@ -290,9 +298,8 @@
         cdef tree.xmlNode* c_node
         if python._isString(key):
             key = _buildChildTag(self, key)
-            try:
-                element = _lookupChild(self, key)
-            except AttributeError:
+            element = _lookupChild(self, key)
+            if element is None:
                 _appendValue(self, key, value)
             else:
                 _replaceElement(element, value)
@@ -421,10 +428,16 @@
         c_href = _cstr(ns)
     c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
     if c_result is NULL:
-        raise AttributeError, "no such child: " + \
-              cetree.namespacedNameFromNsName(c_href, c_tag)
+        return None
     return elementFactory(parent._doc, c_result)
 
+cdef object _lookupChildOrRaise(_Element parent, tag):
+    element = _lookupChild(parent, tag)
+    if element is None:
+        raise AttributeError, "no such child: " + \
+              _buildChildTag(parent, tag)
+    return element
+
 cdef object _buildChildTag(_Element parent, tag):
     cdef char* c_href
     cdef char* c_tag
@@ -910,16 +923,17 @@
     """
     types = []
     known = set()
+    add_to_known = known.add
     for check, pytype in _TYPE_CHECKS:
         name = pytype.name
         if name not in known:
-            known.add(name)
-            types.append(pytype)
+            add_to_known(name)
+            python.PyList_Append(types, pytype)
     for pytype in _PYTYPE_DICT.itervalues():
         name = pytype.name
         if name not in known:
-            known.add(name)
-            types.append(pytype)
+            add_to_known(name)
+            python.PyList_Append(types, pytype)
     return types
 
 cdef object _guessElementClass(tree.xmlNode* c_node):
@@ -928,12 +942,11 @@
         return None
     if value == '':
         return StringElement
-    errors = (ValueError, TypeError)
     for type_check, pytype in _TYPE_CHECKS:
         try:
             type_check(value)
             return (<PyType>pytype)._type
-        except errors:
+        except IGNORABLE_ERRORS:
             pass
     return None
 
@@ -1426,7 +1439,6 @@
     doc = element._doc
     ignore = bool(ignore_old)
 
-    _ValueError = ValueError
     StrType = _PYTYPE_DICT.get('str')
     c_node = element._c_node
     tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
@@ -1443,7 +1455,7 @@
                 try:
                     if not (<PyType>pytype).type_check(value):
                         pytype = None
-                except _ValueError:
+                except ValueError:
                     pytype = None
 
     if pytype is None:
@@ -1474,7 +1486,7 @@
                         if type_check(value) is not False:
                             pytype = tested_pytype
                             break
-                    except _ValueError:
+                    except ValueError:
                         pass
                 else:
                     pytype = StrType
@@ -1579,13 +1591,12 @@
         strval = str(_value)
 
     if _pytype is None:
-        errors = (ValueError, TypeError)
         for type_check, pytype in _TYPE_CHECKS:
             try:
                 type_check(strval)
                 _pytype = (<PyType>pytype).name
                 break
-            except errors:
+            except IGNORABLE_ERRORS:
                 pass
         if _pytype is None:
             if _value is None:

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	Tue Jun 12 18:14:58 2007
@@ -1196,6 +1196,15 @@
         self.assertEquals(docinfo.root_name,   'html')
         self.assertEquals(docinfo.doctype, '')
 
+    def test_byte_zero(self):
+        Element = self.etree.Element
+
+        a = Element('a')
+        self.assertRaises(AssertionError, setattr, a, "text", 'ha\0ho')
+        self.assertRaises(AssertionError, setattr, a, "tail", 'ha\0ho')
+
+        self.assertRaises(AssertionError, Element, 'ha\0ho')
+
     def test_encoding_tostring_utf16(self):
         # ElementTree fails to serialize this
         tostring = self.etree.tostring


More information about the lxml-checkins mailing list