[Lxml-checkins] r44199 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Tue Jun 12 19:46:15 CEST 2007
Author: scoder
Date: Tue Jun 12 19:46:15 2007
New Revision: 44199
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
lxml/branch/lxml-1.3/src/lxml/tree.pxd
Log:
merged in trunk fixes from revs 44117 and 44165
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Tue Jun 12 19:46:15 2007
@@ -22,6 +22,12 @@
Bugs fixed
----------
+* Replacing the children slice of an Element would cut off the tails of the
+ original children
+
+* API functions now check incoming strings for XML conformity. Zero bytes or
+ low ASCII characters are no longer accepted.
+
* XSLT parsing failed to pass resolver context on to imported documents
* More ET compatible behaviour when writing out XML declarations or not
Modified: lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/lxml-1.3/src/lxml/apihelpers.pxi Tue Jun 12 19:46:15 2007
@@ -462,7 +462,10 @@
cdef void _removeNode(xmlNode* c_node):
"""Unlink and free a node and subnodes if possible.
"""
+ cdef xmlNode* c_next
+ c_next = c_node.next
tree.xmlUnlinkNode(c_node)
+ _moveTail(c_next, c_node)
attemptDeallocation(c_node)
cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target):
@@ -503,8 +506,8 @@
while c_node is not NULL and c < stop:
c_next = c_node.next
if _isElement(c_node):
- _removeText(c_node.next)
- c_next = c_node.next
+ while c_next is not NULL and not _isElement(c_next):
+ c_next = c_next.next
_removeNode(c_node)
c = c + 1
c_node = c_next
@@ -572,16 +575,20 @@
cdef char* s
cdef char* c_end
cdef char c
+ cdef int is_non_ascii
s = _cstr(pystring)
c_end = s + python.PyString_GET_SIZE(pystring)
+ is_non_ascii = 0
while s < c_end:
c = s[0]
+ if c & 0x80:
+ is_non_ascii = 1
if c == c'\0':
return -1 # invalid!
- if c & 0x80:
- return 1 # non-ASCII
+ if is_non_ascii == 0 and not tree.xmlIsChar_ch(c):
+ return -1 # invalid!
s = s + 1
- return 0 # plain 7-bit ASCII
+ return is_non_ascii
cdef object funicode(char* s):
cdef Py_ssize_t slen
@@ -602,12 +609,15 @@
cdef object _utf8(object s):
if python.PyString_Check(s):
assert not isutf8py(s), \
- "All strings must be Unicode or ASCII"
- return s
+ "All strings must be XML compatible, either Unicode or ASCII"
elif python.PyUnicode_Check(s):
- return python.PyUnicode_AsUTF8String(s)
+ # FIXME: we should test these strings, too ...
+ s = python.PyUnicode_AsUTF8String(s)
+ assert isutf8py(s) != -1, \
+ "All strings must be XML compatible, either Unicode or ASCII"
else:
raise TypeError, "Argument must be string or unicode."
+ return s
cdef object _encodeFilename(object filename):
if filename is None:
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Jun 12 19:46:15 2007
@@ -480,7 +480,7 @@
else:
c_node = _findChild(self._c_node, start)
# now delete the slice
- if start != stop:
+ if c_node is not NULL and start != stop:
c_node = _deleteSlice(c_node, start, stop)
# if the insertion point is at the end, append there
if c_node is NULL:
@@ -591,8 +591,8 @@
while c_node is not NULL:
c_node_next = c_node.next
if _isElement(c_node):
- _removeText(c_node_next)
- c_node_next = c_node.next
+ while c_node_next is not NULL and not _isElement(c_node_next):
+ c_node_next = c_node_next.next
_removeNode(c_node)
c_node = c_node_next
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py Tue Jun 12 19:46:15 2007
@@ -1151,6 +1151,26 @@
self.assertXML('<b><bs></bs></b>', b)
self.assertXML('<c><cs></cs></c>', c)
+ def test_delslice_tail(self):
+ XML = self.etree.XML
+ a = XML('<a><b></b>B2<c></c>C2</a>')
+ b, c = a
+
+ del a[:]
+
+ self.assertEquals("B2", b.tail)
+ self.assertEquals("C2", c.tail)
+
+ def test_replace_slice_tail(self):
+ XML = self.etree.XML
+ a = XML('<a><b></b>B2<c></c>C2</a>')
+ b, c = a
+
+ a[:] = []
+
+ self.assertEquals("B2", b.tail)
+ self.assertEquals("C2", c.tail)
+
def test_delitem_tail(self):
ElementTree = self.etree.ElementTree
f = StringIO('<a><b></b>B2<c></c>C2</a>')
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py Tue Jun 12 19:46:15 2007
@@ -1388,6 +1388,41 @@
self.assertRaises(AssertionError, Element, 'ha\0ho')
+ def test_unicode_byte_zero(self):
+ Element = self.etree.Element
+
+ a = Element('a')
+ self.assertRaises(AssertionError, setattr, a, "text", u'ha\0ho')
+ self.assertRaises(AssertionError, setattr, a, "tail", u'ha\0ho')
+
+ self.assertRaises(AssertionError, Element, u'ha\0ho')
+
+ def test_byte_invalid(self):
+ Element = self.etree.Element
+
+ a = Element('a')
+ self.assertRaises(AssertionError, setattr, a, "text", 'ha\x07ho')
+ self.assertRaises(AssertionError, setattr, a, "text", 'ha\x02ho')
+
+ self.assertRaises(AssertionError, setattr, a, "tail", 'ha\x07ho')
+ self.assertRaises(AssertionError, setattr, a, "tail", 'ha\x02ho')
+
+ self.assertRaises(AssertionError, Element, 'ha\x07ho')
+ self.assertRaises(AssertionError, Element, 'ha\x02ho')
+
+ def test_unicode_byte_invalid(self):
+ Element = self.etree.Element
+
+ a = Element('a')
+ self.assertRaises(AssertionError, setattr, a, "text", u'ha\x07ho')
+ self.assertRaises(AssertionError, setattr, a, "text", u'ha\x02ho')
+
+ self.assertRaises(AssertionError, setattr, a, "tail", u'ha\x07ho')
+ self.assertRaises(AssertionError, setattr, a, "tail", u'ha\x02ho')
+
+ self.assertRaises(AssertionError, Element, u'ha\x07ho')
+ self.assertRaises(AssertionError, Element, u'ha\x02ho')
+
def test_encoding_tostring_utf16(self):
# ElementTree fails to serialize this
tostring = self.etree.tostring
Modified: lxml/branch/lxml-1.3/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-1.3/src/lxml/tree.pxd Tue Jun 12 19:46:15 2007
@@ -41,6 +41,9 @@
cdef xmlCharEncoding xmlDetectCharEncoding(char* text, int len)
cdef char* xmlGetCharEncodingName(xmlCharEncoding enc)
+cdef extern from "libxml/chvalid.h":
+ cdef int xmlIsChar_ch(char c)
+
cdef extern from "libxml/hash.h":
ctypedef struct xmlHashTable
ctypedef void xmlHashScanner(void* payload, void* data, char* name)
More information about the lxml-checkins
mailing list