[Lxml-checkins] r41643 - lxml/trunk/src/lxml
scoder at codespeak.net
scoder at codespeak.net
Thu Mar 29 21:40:29 CEST 2007
Author: scoder
Date: Thu Mar 29 21:40:20 2007
New Revision: 41643
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
raise AssertionError when string with '\0' bytes are passed into the API
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu Mar 29 21:40:20 2007
@@ -560,6 +560,21 @@
c = s[0]
return 0
+cdef int isutf8py(pystring):
+ cdef char* s
+ cdef char* c_end
+ cdef char c
+ s = _cstr(pystring)
+ c_end = s + python.PyString_GET_SIZE(pystring)
+ while s < c_end:
+ c = s[0]
+ if c == c'\0':
+ return -1 # invalid!
+ if c & 0x80:
+ return 1 # non-ASCII
+ s = s + 1
+ return 0 # plain 7-bit ASCII
+
cdef object funicode(char* s):
cdef Py_ssize_t slen
cdef char* spos
@@ -578,7 +593,8 @@
cdef object _utf8(object s):
if python.PyString_Check(s):
- assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
+ assert not isutf8py(s), \
+ "All strings must be Unicode or ASCII"
return s
elif python.PyUnicode_Check(s):
return python.PyUnicode_AsUTF8String(s)
@@ -604,10 +620,10 @@
if filename is None:
return None
elif python.PyString_Check(filename):
- c_filename = _cstr(filename)
- if not isutf8(c_filename):
+ if not isutf8py(filename):
# plain ASCII!
return filename
+ c_filename = _cstr(filename)
try:
# try to decode with default encoding
filename = python.PyUnicode_Decode(
More information about the lxml-checkins
mailing list