[Lxml-checkins] r44186 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Tue Jun 12 19:11:48 CEST 2007
Author: scoder
Date: Tue Jun 12 19:11:47 2007
New Revision: 44186
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/selftest.py
lxml/branch/lxml-1.3/selftest2.py
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/iterparse.pxi
lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd
Log:
merged in revs 43159:43235 from trunk
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Tue Jun 12 19:11:47 2007
@@ -22,6 +22,12 @@
Bugs fixed
----------
+* More ET compatible behaviour when writing out XML declarations or not
+
+* ``Element.attrib`` was missing ``clear()`` method
+
+* More robust error handling in ``iterparse()``
+
* Documents lost their top-level PIs and comments on serialisation
* lxml.sax failed on comments and PIs. Comments are now properly ignored and
Modified: lxml/branch/lxml-1.3/selftest.py
==============================================================================
--- lxml/branch/lxml-1.3/selftest.py (original)
+++ lxml/branch/lxml-1.3/selftest.py Tue Jun 12 19:11:47 2007
@@ -272,28 +272,31 @@
## '<p>spam<b>egg</b></p>'
## """
-## def parseliteral():
-## r"""
-## >>> element = ElementTree.XML("<html><body>text</body></html>")
-## >>> ElementTree.ElementTree(element).write(sys.stdout)
-## <html><body>text</body></html>
-## >>> element = ElementTree.fromstring("<html><body>text</body></html>")
-## >>> ElementTree.ElementTree(element).write(sys.stdout)
-## <html><body>text</body></html>
-## >>> print ElementTree.tostring(element)
-## <html><body>text</body></html>
-## >>> print ElementTree.tostring(element, "ascii")
-## <?xml version='1.0' encoding='ascii'?>
-## <html><body>text</body></html>
-## >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
-## >>> len(ids)
-## 0
-## >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
-## >>> len(ids)
-## 1
-## >>> ids["body"].tag
-## 'body'
-## """
+def parseliteral():
+ r"""
+ >>> element = ElementTree.XML("<html><body>text</body></html>")
+ >>> ElementTree.ElementTree(element).write(sys.stdout)
+ <html><body>text</body></html>
+ >>> element = ElementTree.fromstring("<html><body>text</body></html>")
+ >>> ElementTree.ElementTree(element).write(sys.stdout)
+ <html><body>text</body></html>
+ >>> print ElementTree.tostring(element)
+ <html><body>text</body></html>
+
+# looks different in lxml
+# >>> print ElementTree.tostring(element, "ascii")
+# <?xml version='1.0' encoding='ascii'?>
+# <html><body>text</body></html>
+
+ >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
+ >>> len(ids)
+ 0
+ >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
+ >>> len(ids)
+ 1
+ >>> ids["body"].tag
+ 'body'
+ """
## def simpleparsefile():
## """
@@ -519,16 +522,18 @@
## """
-## def xmllang():
-## """
-## This appears to be a problem; in underlying libxml2?
+def xmllang():
+ """
+ This appears to be a problem; in underlying libxml2?
-## 1) xml namespace
+ 1) xml namespace
-## >>> elem = ElementTree.XML("<tag xml:lang='en' />")
-## >>> serialize(elem) # 1.1
-## '<tag xml:lang="en" />'
-## """
+ >>> elem = ElementTree.XML("<tag xml:lang='en' />")
+ >>> serialize(elem) # 1.1
+ '<tag xml:lang="en"/>'
+
+# '<tag xml:lang="en" />' # ElementTree produces an extra blank
+ """
def namespace():
"""
Modified: lxml/branch/lxml-1.3/selftest2.py
==============================================================================
--- lxml/branch/lxml-1.3/selftest2.py (original)
+++ lxml/branch/lxml-1.3/selftest2.py Tue Jun 12 19:11:47 2007
@@ -133,30 +133,30 @@
'<tag>text<subtag>subtext</subtag></tag>'
"""
-## def encoding():
-## r"""
-## Test encoding issues.
+def encoding():
+ r"""
+ Test encoding issues.
-## >>> elem = ElementTree.Element("tag")
-## >>> elem.text = u"abc"
-## >>> serialize(elem)
-## '<tag>abc</tag>'
-## >>> serialize(elem, "utf-8")
-## '<tag>abc</tag>'
-## >>> serialize(elem, "us-ascii")
-## '<tag>abc</tag>'
-## >>> serialize(elem, "iso-8859-1")
-## "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
+ >>> elem = ElementTree.Element("tag")
+ >>> elem.text = u"abc"
+ >>> serialize(elem)
+ '<tag>abc</tag>'
+ >>> serialize(elem, "utf-8")
+ '<tag>abc</tag>'
+ >>> serialize(elem, "us-ascii")
+ '<tag>abc</tag>'
+ >>> serialize(elem, "iso-8859-1").lower()
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
-## >>> elem.text = "<&\"\'>"
-## >>> serialize(elem)
-## '<tag><&"\'></tag>'
-## >>> serialize(elem, "utf-8")
-## '<tag><&"\'></tag>'
-## >>> serialize(elem, "us-ascii") # cdata characters
-## '<tag><&"\'></tag>'
-## >>> serialize(elem, "iso-8859-1")
-## '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
+ >>> elem.text = "<&\"\'>"
+ >>> serialize(elem)
+ '<tag><&"\'></tag>'
+ >>> serialize(elem, "utf-8")
+ '<tag><&"\'></tag>'
+ >>> serialize(elem, "us-ascii") # cdata characters
+ '<tag><&"\'></tag>'
+ >>> serialize(elem, "iso-8859-1").lower()
+ '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
## >>> elem.attrib["key"] = "<&\"\'>"
## >>> elem.text = None
@@ -169,16 +169,16 @@
## >>> serialize(elem, "iso-8859-1")
## '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"'>" />'
-## >>> elem.text = u'\xe5\xf6\xf6<>'
-## >>> elem.attrib.clear()
-## >>> serialize(elem)
-## '<tag>åöö<></tag>'
-## >>> serialize(elem, "utf-8")
-## '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
-## >>> serialize(elem, "us-ascii")
-## '<tag>åöö<></tag>'
-## >>> serialize(elem, "iso-8859-1")
-## "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
+ >>> elem.text = u'\xe5\xf6\xf6<>'
+ >>> elem.attrib.clear()
+ >>> serialize(elem)
+ '<tag>åöö<></tag>'
+ >>> serialize(elem, "utf-8")
+ '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
+ >>> serialize(elem, "us-ascii")
+ '<tag>åöö<></tag>'
+ >>> serialize(elem, "iso-8859-1").lower()
+ "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
## >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
## >>> elem.text = None
@@ -191,25 +191,25 @@
## >>> serialize(elem, "iso-8859-1")
## '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />'
-## """
+ """
-## def qname():
-## """
-## Test QName handling.
+def qname():
+ """
+ Test QName handling.
-## 1) decorated tags
+ 1) decorated tags
-## >>> elem = ElementTree.Element("{uri}tag")
-## >>> serialize(elem) # 1.1
-## '<ns0:tag xmlns:ns0="uri" />'
+ >>> elem = ElementTree.Element("{uri}tag")
+ >>> serialize(elem) # 1.1
+ '<ns0:tag xmlns:ns0="uri"/>'
## 2) decorated attributes
## >>> elem.attrib["{uri}key"] = "value"
## >>> serialize(elem) # 2.1
-## '<ns0:tag ns0:key="value" xmlns:ns0="uri" />'
+## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
-## """
+ """
def cdata():
"""
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Tue Jun 12 19:11:47 2007
@@ -1453,6 +1453,12 @@
_delAttribute(self._element, key)
return result
+ def clear(self):
+ cdef xmlNode* c_node
+ c_node = self._element._c_node
+ while c_node.properties is not NULL:
+ tree.xmlRemoveProp(c_node.properties)
+
# ACCESSORS
def __repr__(self):
return repr(dict( _attributeIteratorFactory(self._element, 3) ))
@@ -1871,17 +1877,15 @@
"""
cdef int write_declaration
cdef int c_pretty_print
- if encoding is None:
- encoding = 'ASCII'
- else:
- encoding = encoding.upper()
c_pretty_print = bool(pretty_print)
if xml_declaration is None:
# by default, write an XML declaration only for non-standard encodings
- write_declaration = encoding not in \
+ write_declaration = encoding is not None and encoding.upper() not in \
('ASCII', 'UTF-8', 'UTF8', 'US-ASCII')
else:
write_declaration = bool(xml_declaration)
+ if encoding is None:
+ encoding = 'ASCII'
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree,
Modified: lxml/branch/lxml-1.3/src/lxml/iterparse.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/iterparse.pxi (original)
+++ lxml/branch/lxml-1.3/src/lxml/iterparse.pxi Tue Jun 12 19:11:47 2007
@@ -48,7 +48,7 @@
c_ns = c_ns.next
return count
-cdef class _IterparseResolverContext(_ResolverContext):
+cdef class _IterparseContext(_ResolverContext):
cdef xmlparser.startElementNsSAX2Func _origSaxStart
cdef xmlparser.endElementNsSAX2Func _origSaxEnd
cdef _Element _root
@@ -64,8 +64,8 @@
cdef char* _tag_href
cdef char* _tag_name
- def __init__(self, *args):
- _ResolverContext.__init__(self, *args)
+ def __init__(self, _ResolverRegistry resolvers):
+ _ResolverContext.__init__(self, resolvers)
self._ns_stack = []
self._pop_ns = self._ns_stack.pop
self._node_stack = []
@@ -90,7 +90,7 @@
ITERPARSE_FILTER_END_NS):
sax.endElementNs = _saxEnd
- cdef void _setEventFilter(self, events, tag):
+ cdef _setEventFilter(self, events, tag):
self._event_filter = _buildIterparseEventFilter(events)
if tag is None or tag == '*':
self._tag_href = NULL
@@ -109,8 +109,7 @@
if self._tag_href is NULL and self._tag_name is NULL:
self._tag_tuple = None
- cdef void startNode(self, xmlNode* c_node):
- cdef _Element node
+ cdef int startNode(self, xmlNode* c_node) except -1:
cdef xmlNs* c_ns
cdef int ns_count
if self._event_filter & ITERPARSE_FILTER_START_NS:
@@ -129,9 +128,9 @@
python.PyList_Append(self._node_stack, node)
if self._event_filter & ITERPARSE_FILTER_START:
python.PyList_Append(self._events, ("start", node))
+ return 0
- cdef void endNode(self, xmlNode* c_node):
- cdef _Element node
+ cdef int endNode(self, xmlNode* c_node) except -1:
cdef xmlNs* c_ns
cdef int ns_count
if self._event_filter & ITERPARSE_FILTER_END:
@@ -141,7 +140,6 @@
ITERPARSE_FILTER_START_NS | \
ITERPARSE_FILTER_END_NS):
node = self._pop_node()
- assert node._c_node is c_node
else:
if self._doc is None:
self._doc = _documentFactory(c_node.doc, None)
@@ -155,23 +153,36 @@
event = ("end-ns", None)
for i from 0 <= i < ns_count:
python.PyList_Append(self._events, event)
+ return 0
cdef void _pushSaxStartEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
- cdef _IterparseResolverContext context
- context = <_IterparseResolverContext>c_ctxt._private
- context.startNode(c_node)
+ cdef _IterparseContext context
+ context = <_IterparseContext>c_ctxt._private
+ try:
+ context.startNode(c_node)
+ except:
+ if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ c_ctxt.disableSAX = 1
+ context._store_raised()
cdef void _pushSaxEndEvent(xmlparser.xmlParserCtxt* c_ctxt, xmlNode* c_node):
- cdef _IterparseResolverContext context
- context = <_IterparseResolverContext>c_ctxt._private
- context.endNode(c_node)
+ cdef _IterparseContext context
+ context = <_IterparseContext>c_ctxt._private
+ try:
+ context.endNode(c_node)
+ except:
+ if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ c_ctxt.disableSAX = 1
+ context._store_raised()
cdef xmlparser.startElementNsSAX2Func _getOrigStart(xmlparser.xmlParserCtxt* c_ctxt):
- return (<_IterparseResolverContext>c_ctxt._private)._origSaxStart
+ return (<_IterparseContext>c_ctxt._private)._origSaxStart
cdef xmlparser.endElementNsSAX2Func _getOrigEnd(xmlparser.xmlParserCtxt* c_ctxt):
- return (<_IterparseResolverContext>c_ctxt._private)._origSaxEnd
+ return (<_IterparseContext>c_ctxt._private)._origSaxEnd
cdef void _saxStart(void* ctxt, char* localname, char* prefix, char* URI,
int nb_namespaces, char** namespaces,
@@ -230,7 +241,7 @@
def __init__(self, source, events=("end",), tag=None,
attribute_defaults=False, dtd_validation=False,
load_dtd=False, no_network=False, remove_blank_text=False):
- cdef _IterparseResolverContext context
+ cdef _IterparseContext context
cdef char* c_filename
cdef int parse_options
if not hasattr(source, 'read'):
@@ -246,7 +257,7 @@
c_filename = NULL
self._source = source
- _BaseParser.__init__(self, _IterparseResolverContext)
+ _BaseParser.__init__(self, _IterparseContext)
parse_options = _XML_DEFAULT_PARSE_OPTIONS
if load_dtd:
@@ -263,7 +274,7 @@
parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS
self._parse_options = parse_options
- context = <_IterparseResolverContext>self._context
+ context = <_IterparseContext>self._context
context._setEventFilter(events, tag)
context._wrapCallbacks(self._parser_ctxt.sax)
xmlparser.xmlCtxtUseOptions(self._parser_ctxt, parse_options)
@@ -274,12 +285,12 @@
return self
def __next__(self):
- cdef _IterparseResolverContext context
+ cdef _IterparseContext context
cdef int error
cdef char* c_filename
if self._source is None:
raise StopIteration
- context = <_IterparseResolverContext>self._context
+ context = <_IterparseContext>self._context
if python.PyList_GET_SIZE(context._events) > context._event_index:
item = python.PyList_GET_ITEM(context._events, context._event_index)
python.Py_INCREF(item) # 'borrowed reference' from PyList_GET_ITEM
@@ -291,7 +302,6 @@
while python.PyList_GET_SIZE(context._events) == 0 and error == 0:
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
if not python.PyString_Check(data):
- #xmlparser.xmlParseChunk(self._parser_ctxt, NULL, 0, 1)
self._source = None
raise TypeError, "reading file objects must return plain strings"
elif data:
@@ -307,6 +317,7 @@
_raiseParseError(self._parser_ctxt, self._filename)
if python.PyList_GET_SIZE(context._events) == 0:
self.root = context._root
+ self._source = None
raise StopIteration
context._event_index = 1
@@ -316,8 +327,8 @@
cdef class iterwalk:
- """A tree walker that generates ``iterparse()`` events from an existing
- tree as if it was parsing XML data.
+ """A tree walker that generates events from an existing tree as if it was
+ parsing XML data with ``iterparse()``.
"""
cdef object _node_stack
cdef object _pop_node
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_elementtree.py Tue Jun 12 19:11:47 2007
@@ -290,6 +290,27 @@
self.assertEquals(None, root.get('three'))
self.assertEquals('foo', root.get('three', 'foo'))
+ def test_attrib_clear(self):
+ XML = self.etree.XML
+
+ root = XML('<doc one="One" two="Two"/>')
+ self.assertEquals('One', root.get('one'))
+ self.assertEquals('Two', root.get('two'))
+ root.attrib.clear()
+ self.assertEquals(None, root.get('one'))
+ self.assertEquals(None, root.get('two'))
+
+ def test_attrib_set_clear(self):
+ Element = self.etree.Element
+
+ root = Element("root", one="One")
+ root.set("two", "Two")
+ self.assertEquals('One', root.get('one'))
+ self.assertEquals('Two', root.get('two'))
+ root.attrib.clear()
+ self.assertEquals(None, root.get('one'))
+ self.assertEquals(None, root.get('two'))
+
def test_attribute_update_dict(self):
XML = self.etree.XML
Modified: lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd (original)
+++ lxml/branch/lxml-1.3/src/lxml/xmlparser.pxd Tue Jun 12 19:11:47 2007
@@ -52,6 +52,8 @@
int wellFormed
int recovery
int options
+ int disableSAX
+ int errNo
xmlError lastError
xmlNode* node
xmlSAXHandler* sax
More information about the lxml-checkins
mailing list