[Lxml-checkins] r44174 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Tue Jun 12 18:15:00 CEST 2007
Author: scoder
Date: Tue Jun 12 18:14:58 2007
New Revision: 44174
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/TODO.txt
lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
lxml/branch/lxml-1.3/src/lxml/objectify.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
Log:
merged in revs 41642:41648 from trunk
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Tue Jun 12 18:14:58 2007
@@ -13,6 +13,8 @@
Bugs fixed
----------
+* Raise AssertionError when passing strings containing '\0' bytes
+
1.3beta (2007-02-27)
====================
Modified: lxml/branch/lxml-1.3/TODO.txt
==============================================================================
--- lxml/branch/lxml-1.3/TODO.txt (original)
+++ lxml/branch/lxml-1.3/TODO.txt Tue Jun 12 18:14:58 2007
@@ -16,8 +16,6 @@
* more testing on multi-threading
-* the code on extension functions and XSLT needs some refactoring
-
ElementTree
-----------
@@ -34,8 +32,8 @@
Objectify
---------
-* set special __attributes__ on ObjectifiedElement's as Python attributes, not
- XML children
+* emulate setting special __attributes__ on ObjectifiedElement's as Python
+ attributes, not XML children
Features
Modified: lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi Tue Jun 12 18:14:58 2007
@@ -537,6 +537,21 @@
c = s[0]
return 0
+cdef int isutf8py(pystring):
+ cdef char* s
+ cdef char* c_end
+ cdef char c
+ s = _cstr(pystring)
+ c_end = s + python.PyString_GET_SIZE(pystring)
+ while s < c_end:
+ c = s[0]
+ if c == c'\0':
+ return -1 # invalid!
+ if c & 0x80:
+ return 1 # non-ASCII
+ s = s + 1
+ return 0 # plain 7-bit ASCII
+
cdef object funicode(char* s):
cdef Py_ssize_t slen
cdef char* spos
@@ -555,7 +570,8 @@
cdef object _utf8(object s):
if python.PyString_Check(s):
- assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
+ assert not isutf8py(s), \
+ "All strings must be Unicode or ASCII"
return s
elif python.PyUnicode_Check(s):
return python.PyUnicode_AsUTF8String(s)
@@ -581,10 +597,10 @@
if filename is None:
return None
elif python.PyString_Check(filename):
- c_filename = _cstr(filename)
- if not isutf8(c_filename):
+ if not isutf8py(filename):
# plain ASCII!
return filename
+ c_filename = _cstr(filename)
try:
# try to decode with default encoding
filename = python.PyUnicode_Decode(
Modified: lxml/branch/lxml-1.3/src/lxml/objectify.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/objectify.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/objectify.pyx Tue Jun 12 18:14:58 2007
@@ -42,8 +42,17 @@
cdef object AttributeError
AttributeError = __builtin__.AttributeError
+cdef object TypeError
+TypeError = __builtin__.TypeError
+cdef object ValueError
+ValueError = __builtin__.ValueError
cdef object IndexError
IndexError = __builtin__.IndexError
+cdef object StopIteration
+StopIteration = __builtin__.StopIteration
+
+cdef object IGNORABLE_ERRORS
+IGNORABLE_ERRORS = (ValueError, TypeError)
cdef object list
list = __builtin__.list
@@ -202,7 +211,7 @@
"""Return the (first) child with the given tag name. If no namespace
is provided, the child will be looked up in the same one as self.
"""
- return _lookupChild(self, tag)
+ return _lookupChildOrRaise(self, tag)
def __setattr__(self, tag, value):
"""Set the value of the (first) child with the given tag name. If no
@@ -223,15 +232,14 @@
return
tag = _buildChildTag(self, tag)
- try:
- element = _lookupChild(self, tag)
- except AttributeError:
+ element = _lookupChild(self, tag)
+ if element is None:
_appendValue(self, tag, value)
else:
_replaceElement(element, value)
def __delattr__(self, tag):
- child = _lookupChild(self, tag)
+ child = _lookupChildOrRaise(self, tag)
self.remove(child)
def addattr(self, tag, value):
@@ -253,7 +261,7 @@
cdef tree.xmlNode* c_parent
cdef tree.xmlNode* c_node
if python._isString(key):
- return _lookupChild(self, key)
+ return _lookupChildOrRaise(self, key)
c_self_node = self._c_node
c_parent = c_self_node.parent
if c_parent is NULL:
@@ -290,9 +298,8 @@
cdef tree.xmlNode* c_node
if python._isString(key):
key = _buildChildTag(self, key)
- try:
- element = _lookupChild(self, key)
- except AttributeError:
+ element = _lookupChild(self, key)
+ if element is None:
_appendValue(self, key, value)
else:
_replaceElement(element, value)
@@ -421,10 +428,16 @@
c_href = _cstr(ns)
c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
if c_result is NULL:
- raise AttributeError, "no such child: " + \
- cetree.namespacedNameFromNsName(c_href, c_tag)
+ return None
return elementFactory(parent._doc, c_result)
+cdef object _lookupChildOrRaise(_Element parent, tag):
+ element = _lookupChild(parent, tag)
+ if element is None:
+ raise AttributeError, "no such child: " + \
+ _buildChildTag(parent, tag)
+ return element
+
cdef object _buildChildTag(_Element parent, tag):
cdef char* c_href
cdef char* c_tag
@@ -910,16 +923,17 @@
"""
types = []
known = set()
+ add_to_known = known.add
for check, pytype in _TYPE_CHECKS:
name = pytype.name
if name not in known:
- known.add(name)
- types.append(pytype)
+ add_to_known(name)
+ python.PyList_Append(types, pytype)
for pytype in _PYTYPE_DICT.itervalues():
name = pytype.name
if name not in known:
- known.add(name)
- types.append(pytype)
+ add_to_known(name)
+ python.PyList_Append(types, pytype)
return types
cdef object _guessElementClass(tree.xmlNode* c_node):
@@ -928,12 +942,11 @@
return None
if value == '':
return StringElement
- errors = (ValueError, TypeError)
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(value)
return (<PyType>pytype)._type
- except errors:
+ except IGNORABLE_ERRORS:
pass
return None
@@ -1426,7 +1439,6 @@
doc = element._doc
ignore = bool(ignore_old)
- _ValueError = ValueError
StrType = _PYTYPE_DICT.get('str')
c_node = element._c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
@@ -1443,7 +1455,7 @@
try:
if not (<PyType>pytype).type_check(value):
pytype = None
- except _ValueError:
+ except ValueError:
pytype = None
if pytype is None:
@@ -1474,7 +1486,7 @@
if type_check(value) is not False:
pytype = tested_pytype
break
- except _ValueError:
+ except ValueError:
pass
else:
pytype = StrType
@@ -1579,13 +1591,12 @@
strval = str(_value)
if _pytype is None:
- errors = (ValueError, TypeError)
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(strval)
_pytype = (<PyType>pytype).name
break
- except errors:
+ except IGNORABLE_ERRORS:
pass
if _pytype is None:
if _value is None:
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py Tue Jun 12 18:14:58 2007
@@ -1196,6 +1196,15 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_byte_zero(self):
+ Element = self.etree.Element
+
+ a = Element('a')
+ self.assertRaises(AssertionError, setattr, a, "text", 'ha\0ho')
+ self.assertRaises(AssertionError, setattr, a, "tail", 'ha\0ho')
+
+ self.assertRaises(AssertionError, Element, 'ha\0ho')
+
def test_encoding_tostring_utf16(self):
# ElementTree fails to serialize this
tostring = self.etree.tostring
More information about the lxml-checkins
mailing list